npm - orionfold-relay - Versions diffs - 0.22.0 → 0.22.1 - Mend

orionfold-relay 0.22.0 → 0.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/cli.js +72 -13
package/package.json +1 -1
package/src/app/api/telemetry/route.ts +6 -5
package/src/components/shell/telemetry-rail.tsx +30 -9
package/src/components/shell/telemetry-types.ts +6 -2
package/src/lib/agents/claude-agent.ts +20 -0
package/src/lib/agents/runtime/anthropic-direct.ts +7 -2
package/src/lib/agents/runtime/claude.ts +12 -8
package/src/lib/agents/runtime/model-preference.ts +54 -0
package/src/lib/agents/runtime/openai-direct.ts +7 -2
package/src/lib/chat/ollama-engine.ts +51 -1
package/src/lib/settings/budget-guardrails.ts +29 -0
package/src/lib/usage/pricing.ts +7 -0

package/dist/cli.js CHANGED Viewed

@@ -6910,6 +6910,34 @@ var init_output_scanner = __esm({
   }
 });
+// src/lib/agents/runtime/model-preference.ts
+var model_preference_exports = {};
+__export(model_preference_exports, {
+  resolvePreferredModel: () => resolvePreferredModel
+});
+async function resolvePreferredModel(runtimeId, options) {
+  if (options?.pinnedModelId) {
+    return { modelId: options.pinnedModelId, source: "pin" };
+  }
+  const models = getRuntimeCatalogEntry(runtimeId).models;
+  const preference = await getModelPreference();
+  const tierModel = preference === "balanced" ? models.tiers?.balanced : preference === "cost" ? models.tiers?.fast : preference === "quality" ? models.tiers?.quality : void 0;
+  if (tierModel) {
+    return { modelId: tierModel, source: "preference" };
+  }
+  return {
+    modelId: models.tiers?.quality ?? models.default,
+    source: "default"
+  };
+}
+var init_model_preference = __esm({
+  "src/lib/agents/runtime/model-preference.ts"() {
+    "use strict";
+    init_catalog2();
+    init_helpers();
+  }
+});
 // src/lib/agents/runtime/claude-sdk.ts
 function buildClaudeSdkEnv(authEnv) {
   const { CLAUDECODE, ANTHROPIC_API_KEY, ...cleanEnv } = process.env;
@@ -7679,6 +7707,9 @@ async function deriveUsageCostMicros(input) {
   if (!input.modelId) {
     return { costMicros: null, pricingVersion: null };
   }
+  if (input.providerId === "ollama") {
+    return { costMicros: 0, pricingVersion: "local-free" };
+  }
   if (input.providerId !== "anthropic" && input.providerId !== "openai") {
     return { costMicros: null, pricingVersion: null };
   }
@@ -18074,13 +18105,17 @@ ${learnedCtx}
 You are operating inside a git worktree (branch: ${ws.gitBranch ?? "unknown"}). All file operations MUST use paths relative to the working directory: ${cwd}. Do NOT navigate to or create files in the main repository directory.` : "";
   const systemInstructions = [worktreeNote, profileInstructions, learnedCtxBlock, docContext, tableContext, outputInstructions].filter(Boolean).join("\n\n");
   const maxTurns = profile?.maxTurns ?? DEFAULT_MAX_TURNS;
+  const { modelId } = await resolvePreferredModel("claude-code", {
+    pinnedModelId: profile?.capabilityOverrides?.["claude-code"]?.modelId
+  });
   return {
     userPrompt: basePrompt,
     systemInstructions,
     cwd,
     payload,
     maxTurns,
-    canUseToolPolicy: payload?.canUseToolPolicy
+    canUseToolPolicy: payload?.canUseToolPolicy,
+    modelId
   };
 }
 async function executeClaudeTask(taskId) {
@@ -18126,6 +18161,10 @@ async function executeClaudeTask(taskId) {
       prompt: ctx.userPrompt,
       options: {
         abortController,
+        // Explicit model: profile pin > onboarding preference > quality
+        // default. Omitting this let the SDK pick ITS default (Opus) and
+        // silently bill the wrong tier.
+        model: ctx.modelId,
         includePartialMessages: true,
         cwd: ctx.cwd,
         env: buildClaudeSdkEnv(authEnv),
@@ -18248,6 +18287,9 @@ async function resumeClaudeTask(taskId) {
       options: {
         resume: task.sessionId,
         abortController,
+        // Same model resolution as the original run — a resume must not
+        // silently hop tiers (profile pin > preference > quality default).
+        model: ctx.modelId,
         includePartialMessages: true,
         cwd: ctx.cwd,
         env: buildClaudeSdkEnv(authEnv),
@@ -18366,6 +18408,7 @@ var init_claude_agent = __esm({
     init_context_builder2();
     init_output_scanner();
     init_registry2();
+    init_model_preference();
     init_compatibility();
     init_claude_sdk();
     init_types2();
@@ -18384,9 +18427,8 @@ var init_claude_agent = __esm({
 // src/lib/agents/runtime/claude.ts
 import { query as query2 } from "@anthropic-ai/claude-agent-sdk";
 import { eq as eq34 } from "drizzle-orm";
-function claudeCodeModelAlias() {
-  const models = getRuntimeCatalogEntry("claude-code").models;
-  return models.tiers?.quality ?? models.default;
+async function claudeCodeModelAlias() {
+  return (await resolvePreferredModel("claude-code")).modelId;
 }
 function buildTaskAssistSystemPrompt(profileIds) {
   const profileList = profileIds.length > 0 ? `Available agent profiles: ${profileIds.join(", ")}
@@ -18480,7 +18522,7 @@ Provide a brief analysis (2-3 paragraphs max). Include specific terminology rele
       prompt,
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         includePartialMessages: true,
         env: buildClaudeSdkEnv(authEnv),
         allowedTools: []
@@ -18621,7 +18663,7 @@ async function runMetaCompletion(input) {
       prompt: input.prompt,
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         includePartialMessages: true,
         cwd: getLaunchCwd(),
         env: buildClaudeSdkEnv(authEnv),
@@ -18801,7 +18843,7 @@ ${userMessage}`;
       prompt,
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         includePartialMessages: true,
         cwd: getLaunchCwd(),
         env: buildClaudeSdkEnv(authEnv),
@@ -18876,7 +18918,7 @@ ${userMessage}`;
       prompt,
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         includePartialMessages: true,
         cwd: getLaunchCwd(),
         env: buildClaudeSdkEnv(authEnv),
@@ -18941,7 +18983,7 @@ async function testClaudeConnection() {
       prompt: "Reply with exactly: OK",
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         maxTurns: 1,
         includePartialMessages: false,
         cwd: getLaunchCwd(),
@@ -18988,6 +19030,7 @@ var init_claude = __esm({
     init_compatibility();
     init_claude_agent();
     init_catalog2();
+    init_model_preference();
     init_claude_sdk();
     init_workspace_context();
     init_helpers();
@@ -20882,7 +20925,8 @@ ${outputInstructions}`;
       initialMessages = [{ role: "user", content: ctx.userPrompt }];
     }
     const { getSetting: getSetting2 } = await Promise.resolve().then(() => (init_helpers(), helpers_exports));
-    const modelId = await getSetting2("anthropic_direct_model") ?? getRuntimeCatalogEntry("anthropic-direct").models.default;
+    const { resolvePreferredModel: resolvePreferredModel2 } = await Promise.resolve().then(() => (init_model_preference(), model_preference_exports));
+    const modelId = await getSetting2("anthropic_direct_model") ?? (await resolvePreferredModel2("anthropic-direct")).modelId;
     const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
     await db.insert(agentLogs).values({
       id: crypto.randomUUID(),
@@ -21353,7 +21397,8 @@ ${outputInstructions}`;
     );
     const pluginMcpTools = mcpServersToOpenAiTools(mergedMcpServers);
     const { getSetting: getSetting2 } = await Promise.resolve().then(() => (init_helpers(), helpers_exports));
-    const modelId = await getSetting2("openai_direct_model") ?? getRuntimeCatalogEntry("openai-direct").models.default;
+    const { resolvePreferredModel: resolvePreferredModel2 } = await Promise.resolve().then(() => (init_model_preference(), model_preference_exports));
+    const modelId = await getSetting2("openai_direct_model") ?? (await resolvePreferredModel2("openai-direct")).modelId;
     const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
     let previousResponseId = null;
     if (isResume) {
@@ -22193,6 +22238,17 @@ async function getUsageAggregates(policy, runtimeStates, now = /* @__PURE__ */ n
       runtimes[runtimeId].daily.totalTokens += row.totalTokens ?? 0;
     }
   });
+  const metered = {
+    daily: { costMicros: 0, totalTokens: 0 },
+    monthly: { costMicros: 0, totalTokens: 0 }
+  };
+  for (const runtimeId of SUPPORTED_AGENT_RUNTIMES) {
+    metered.daily.costMicros += runtimes[runtimeId].daily.costMicros;
+    metered.daily.totalTokens += runtimes[runtimeId].daily.totalTokens;
+    metered.monthly.costMicros += runtimes[runtimeId].monthly.costMicros;
+    metered.monthly.totalTokens += runtimes[runtimeId].monthly.totalTokens;
+  }
+  let planPricedMonthlyMicros = null;
   if (runtimeStates["claude-code"].billingMode === "subscription") {
     const planPriceUsd = await getClaudeOAuthPlanPrice(
       policy.runtimes["claude-code"].claudeOAuthPlan
@@ -22201,6 +22257,7 @@ async function getUsageAggregates(policy, runtimeStates, now = /* @__PURE__ */ n
     const dailyMicros = Math.round(monthlyMicros / daysInMonth(now));
     runtimes["claude-code"].monthly.costMicros = monthlyMicros;
     runtimes["claude-code"].daily.costMicros = dailyMicros;
+    planPricedMonthlyMicros = monthlyMicros;
   }
   const overall = {
     daily: { costMicros: 0, totalTokens: 0 },
@@ -22218,6 +22275,8 @@ async function getUsageAggregates(policy, runtimeStates, now = /* @__PURE__ */ n
   return {
     overall,
     runtimes,
+    metered,
+    planPricedMonthlyMicros,
     ...getBudgetWindowBounds(now)
   };
 }
@@ -25681,8 +25740,8 @@ import { execFileSync as execFileSync3 } from "child_process";
 import yaml12 from "js-yaml";
 import semver from "semver";
 function relayCoreVersion() {
-  if (semver.valid("0.22.0")) {
-    return "0.22.0";
+  if (semver.valid("0.22.1")) {
+    return "0.22.1";
   }
   try {
     const root = getAppRoot(import.meta.dirname, 3);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "orionfold-relay",
-  "version": "0.22.0",
+  "version": "0.22.1",
   "description": "Orionfold Relay — a local-first, multi-agent orchestration runtime and builder scaffold for AI-native work.",
   "keywords": [
     "ai",

package/src/app/api/telemetry/route.ts CHANGED Viewed

@@ -175,9 +175,6 @@ export async function GET() {
       getFailuresByDay(7),
     ]);
-    const overallDaily = budget.statuses.find(
-      (s) => s.scopeId === "overall" && s.window === "daily",
-    );
     const overallMonthly = budget.statuses.find(
       (s) => s.scopeId === "overall" && s.window === "monthly",
     );
@@ -196,8 +193,12 @@ export async function GET() {
       activeProjects: activeProjects?.count ?? 0,
       activeWorkflows: activeWorkflows?.count ?? 0,
       reviewPending,
-      costTodayMicros: overallDaily?.currentValue ?? 0,
-      costToDateMicros: overallMonthly?.currentValue ?? 0,
+      // Metered ledger sums only — the guardrail statuses' plan-priced budget
+      // basis must never render as "cost" (fix-dashboard-budget-vs-cost-labeling).
+      costTodayMicros: budget.meteredSpend.dailyMicros,
+      costToDateMicros: budget.meteredSpend.monthlyMicros,
+      budgetMonthlyCapMicros: overallMonthly?.limitValue ?? null,
+      planPricedMonthlyMicros: budget.planPricedMonthlyMicros,
       runtimeLabel,
       providerId,
       runtimeSdkVersion,

package/src/components/shell/telemetry-rail.tsx CHANGED Viewed

@@ -20,16 +20,19 @@ import {
 } from "lucide-react";
 import { RailCell, formatMicros } from "./rail-cell";
 import { useTelemetry } from "./use-telemetry";
+import type { TelemetrySnapshot } from "./telemetry-types";
 // The standing instrument cluster: a single dense horizontal row beneath the app
 // bar (mirrors `.hp-rail`). A cockpit for a multi-agent harness — eight real
 // cells: HOST (folder · cpu/mem) · RUNTIME (label · sdk version) · TASKS
 // (running + 24h activity spark) · THROUGHPUT (completed today + 7d spark) ·
-// FAILURES (failed + 7d spark, red) · REVIEW (pending) · COST TODAY · COST TO
+// FAILURES (failed + 7d spark, red) · REVIEW (pending) · SPEND TODAY · SPEND TO
 // DATE — plus a live/error status foot. No fabricated data: while loading, cells
 // show "—"; on a poll error the last good snapshot stays visible and the foot
 // flips to an explicit error pip. Static identity (cwd/runtime) is compressed
-// into sub-lines so the live throughput signal owns the foreground.
+// into sub-lines so the live throughput signal owns the foreground. The SPEND
+// cells render real metered ledger sums; the budget cap and any flat plan price
+// live in the sub-line, named as what they are — never presented as spend.
 // Compose the HOST sub-line from whatever live metrics the platform reports;
 // falls back to git branch so the cell is never empty.
@@ -45,6 +48,20 @@ function hostSub(
   return branch ? `git:${branch}` : "no git";
 }
+// Sub-line for SPEND TO DATE: name the flat plan price when billing is
+// subscription (it sits on top of metered spend, it is not spend), else show
+// the monthly budget cap as "budget", else the plain window label.
+function spendToDateSub(data: TelemetrySnapshot | null): string {
+  if (!data) return "—";
+  if (data.planPricedMonthlyMicros != null) {
+    return `+ plan ${formatMicros(data.planPricedMonthlyMicros)}/mo`;
+  }
+  if (data.budgetMonthlyCapMicros != null) {
+    return `of ${formatMicros(data.budgetMonthlyCapMicros)} budget`;
+  }
+  return "monthly";
+}
 export function TelemetryRail() {
   const telemetry = useTelemetry();
   const data = telemetry.data;
@@ -71,9 +88,13 @@ export function TelemetryRail() {
         loading={loading}
         value={data?.runtimeLabel ?? "—"}
         sub={
-          data?.runtimeSdkVersion
-            ? `sdk ${data.runtimeSdkVersion}`
-            : data?.providerId ?? "not configured"
+          // Never fabricate "not configured" while the snapshot is still
+          // loading — only claim it once data has actually said so.
+          data
+            ? data.runtimeSdkVersion
+              ? `sdk ${data.runtimeSdkVersion}`
+              : data.providerId ?? "not configured"
+            : "—"
         }
       />
       <RailCell
@@ -129,18 +150,18 @@ export function TelemetryRail() {
         sub="active"
       />
       <RailCell
-        label="Cost Today"
+        label="Spend Today"
         icon={<Coins aria-hidden />}
         loading={loading}
         value={data ? formatMicros(data.costTodayMicros) : "—"}
-        sub="daily"
+        sub="metered"
       />
       <RailCell
-        label="Cost To Date"
+        label="Spend To Date"
         icon={<Wallet aria-hidden />}
         loading={loading}
         value={data ? formatMicros(data.costToDateMicros) : "—"}
-        sub="monthly"
+        sub={spendToDateSub(data)}
       />
       <div className="ml-auto flex items-center gap-2 px-4 font-mono text-xs text-muted-foreground/60">
         {errored ? (

package/src/components/shell/telemetry-types.ts CHANGED Viewed

@@ -16,10 +16,14 @@ export interface TelemetrySnapshot {
   activeWorkflows: number;
   /** Unread `permission_required` notifications awaiting a human decision. */
   reviewPending: number;
-  /** Overall daily spend in micros (USD * 1e6). */
+  /** Real metered spend today in micros (usage_ledger sum, USD * 1e6) — never a budget/plan figure. */
   costTodayMicros: number;
-  /** Overall monthly (to-date) spend in micros (USD * 1e6). */
+  /** Real metered spend this month in micros (usage_ledger sum, USD * 1e6) — never a budget/plan figure. */
   costToDateMicros: number;
+  /** Overall monthly budget cap in micros, or null when unlimited. */
+  budgetMonthlyCapMicros: number | null;
+  /** Flat subscription plan price in micros when billing is subscription (not metered spend), else null. */
+  planPricedMonthlyMicros: number | null;
   /** Display label of the active runtime (e.g. "Claude Code"), or null if none configured. */
   runtimeLabel: string | null;
   /** Provider behind the active runtime ("anthropic" | "openai" | "ollama"), or null. */

package/src/lib/agents/claude-agent.ts CHANGED Viewed

@@ -13,6 +13,7 @@ import {
   scanTaskOutputDocuments,
 } from "@/lib/documents/output-scanner";
 import { getProfile } from "./profiles/registry";
+import { resolvePreferredModel } from "./runtime/model-preference";
 import { resolveProfileRuntimePayload, type ResolvedProfileRuntimePayload } from "./profiles/compatibility";
 import type { CanUseToolPolicy } from "./profiles/types";
 import {
@@ -483,6 +484,13 @@ export interface TaskQueryContext {
   maxTurns: number;
   /** Profile's canUseToolPolicy */
   canUseToolPolicy?: CanUseToolPolicy;
+  /**
+   * Concrete model to pass to `query()`: profile pin > onboarding model
+   * preference tier > quality default. Without an explicit model the SDK
+   * silently falls back to ITS default — which billed Opus to users who chose
+   * "Balanced" (fix-workflow-model-preference-propagation).
+   */
+  modelId: string;
 }
 export async function buildTaskQueryContext(
@@ -533,6 +541,10 @@ export async function buildTaskQueryContext(
   // F9: Use profile maxTurns or fall back to default
   const maxTurns = profile?.maxTurns ?? DEFAULT_MAX_TURNS;
+  const { modelId } = await resolvePreferredModel("claude-code", {
+    pinnedModelId: profile?.capabilityOverrides?.["claude-code"]?.modelId,
+  });
   return {
     userPrompt: basePrompt,
     systemInstructions,
@@ -540,6 +552,7 @@ export async function buildTaskQueryContext(
     payload,
     maxTurns,
     canUseToolPolicy: payload?.canUseToolPolicy,
+    modelId,
   };
 }
@@ -612,6 +625,10 @@ export async function executeClaudeTask(taskId: string): Promise<void> {
       prompt: ctx.userPrompt,
       options: {
         abortController,
+        // Explicit model: profile pin > onboarding preference > quality
+        // default. Omitting this let the SDK pick ITS default (Opus) and
+        // silently bill the wrong tier.
+        model: ctx.modelId,
         includePartialMessages: true,
         cwd: ctx.cwd,
         env: buildClaudeSdkEnv(authEnv),
@@ -771,6 +788,9 @@ export async function resumeClaudeTask(taskId: string): Promise<void> {
       options: {
         resume: task.sessionId,
         abortController,
+        // Same model resolution as the original run — a resume must not
+        // silently hop tiers (profile pin > preference > quality default).
+        model: ctx.modelId,
         includePartialMessages: true,
         cwd: ctx.cwd,
         env: buildClaudeSdkEnv(authEnv),

package/src/lib/agents/runtime/anthropic-direct.ts CHANGED Viewed

@@ -376,9 +376,14 @@ async function executeAnthropicDirectTask(taskId: string, isResume = false): Pro
       initialMessages = [{ role: "user", content: ctx.userPrompt }];
     }
-    // Resolve model from settings
+    // Resolve model: explicit runtime setting > onboarding model preference
+    // tier > catalog default ("Balanced" means Sonnet on every runtime, not
+    // just chat — fix-workflow-model-preference-propagation).
     const { getSetting } = await import("@/lib/settings/helpers");
-    const modelId = (await getSetting("anthropic_direct_model")) ?? getRuntimeCatalogEntry("anthropic-direct").models.default;
+    const { resolvePreferredModel } = await import("./model-preference");
+    const modelId =
+      (await getSetting("anthropic_direct_model")) ??
+      (await resolvePreferredModel("anthropic-direct")).modelId;
     const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;

package/src/lib/agents/runtime/claude.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { getProfile, listProfiles } from "@/lib/agents/profiles/registry";
 import { resolveProfileRuntimePayload } from "@/lib/agents/profiles/compatibility";
 import { executeClaudeTask, resumeClaudeTask } from "@/lib/agents/claude-agent";
 import { getRuntimeCapabilities, getRuntimeCatalogEntry } from "./catalog";
+import { resolvePreferredModel } from "./model-preference";
 import { buildClaudeSdkEnv } from "./claude-sdk";
 import { getLaunchCwd } from "@/lib/environment/workspace-context";
 import { getSetting } from "@/lib/settings/helpers";
@@ -34,10 +35,13 @@ import {
  * model in that family instead of pinning to a string that silently ages out.
  * Without this, `query()` omits `model` entirely and the SDK falls back to its
  * own default — which is not necessarily the family the chat picker selected.
+ *
+ * Honors the user's onboarding model preference ("Balanced" means Sonnet
+ * everywhere, not just chat); falls back to the quality tier when no
+ * preference is recorded.
  */
-function claudeCodeModelAlias(): string {
-  const models = getRuntimeCatalogEntry("claude-code").models;
-  return models.tiers?.quality ?? models.default;
+async function claudeCodeModelAlias(): Promise<string> {
+  return (await resolvePreferredModel("claude-code")).modelId;
 }
 function buildTaskAssistSystemPrompt(profileIds: string[]): string {
@@ -148,7 +152,7 @@ export async function runSingleProfileTest(
       prompt,
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         includePartialMessages: true,
         env: buildClaudeSdkEnv(authEnv),
         allowedTools: [],
@@ -306,7 +310,7 @@ export async function runMetaCompletion(input: {
       prompt: input.prompt,
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         includePartialMessages: true,
         cwd: getLaunchCwd(),
         env: buildClaudeSdkEnv(authEnv),
@@ -497,7 +501,7 @@ async function runClaudeProfileAssist(
       prompt,
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         includePartialMessages: true,
         cwd: getLaunchCwd(),
         env: buildClaudeSdkEnv(authEnv),
@@ -584,7 +588,7 @@ async function runClaudeTaskAssist(
       prompt,
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         includePartialMessages: true,
         cwd: getLaunchCwd(),
         env: buildClaudeSdkEnv(authEnv),
@@ -655,7 +659,7 @@ async function testClaudeConnection(): Promise<RuntimeConnectionResult> {
       prompt: "Reply with exactly: OK",
       options: {
         abortController,
-        model: claudeCodeModelAlias(),
+        model: await claudeCodeModelAlias(),
         maxTurns: 1,
         includePartialMessages: false,
         cwd: getLaunchCwd(),

package/src/lib/agents/runtime/model-preference.ts ADDED Viewed

@@ -0,0 +1,54 @@
+import { getRuntimeCatalogEntry, type AgentRuntimeId } from "./catalog";
+import { getModelPreference } from "@/lib/settings/helpers";
+/**
+ * Resolve the concrete model a claude-code (or any catalog) execution should
+ * run on, honoring the user's onboarding model preference
+ * (`chat.modelPreference`) outside chat. Order:
+ *
+ *   1. an explicit profile/step pin (`pinnedModelId`) — the user said exactly
+ *      what to run, the preference never overrides it;
+ *   2. the preference mapped to the runtime's tier ("balanced" → Sonnet,
+ *      "cost" → fast/Haiku, "quality" → Opus);
+ *   3. the runtime's quality tier / catalog default — the pre-existing
+ *      behavior when no preference is recorded.
+ *
+ * "privacy" is a runtime-level preference (route to Ollama), not a model tier
+ * within a cloud runtime — it resolves as "no within-runtime opinion".
+ *
+ * The `source` field keeps the routing inspectable: surfaces can say WHY a
+ * model was chosen instead of silently swapping tiers.
+ */
+export interface ResolvedPreferredModel {
+  modelId: string;
+  source: "pin" | "preference" | "default";
+}
+export async function resolvePreferredModel(
+  runtimeId: AgentRuntimeId,
+  options?: { pinnedModelId?: string | null },
+): Promise<ResolvedPreferredModel> {
+  if (options?.pinnedModelId) {
+    return { modelId: options.pinnedModelId, source: "pin" };
+  }
+  const models = getRuntimeCatalogEntry(runtimeId).models;
+  const preference = await getModelPreference();
+  const tierModel =
+    preference === "balanced"
+      ? models.tiers?.balanced
+      : preference === "cost"
+        ? models.tiers?.fast
+        : preference === "quality"
+          ? models.tiers?.quality
+          : undefined;
+  if (tierModel) {
+    return { modelId: tierModel, source: "preference" };
+  }
+  return {
+    modelId: models.tiers?.quality ?? models.default,
+    source: "default",
+  };
+}

package/src/lib/agents/runtime/openai-direct.ts CHANGED Viewed

@@ -319,9 +319,14 @@ async function executeOpenAIDirectTask(taskId: string, isResume = false): Promis
     );
     const pluginMcpTools = mcpServersToOpenAiTools(mergedMcpServers);
-    // Resolve model
+    // Resolve model: explicit runtime setting > onboarding model preference
+    // tier > catalog default ("Balanced" means the balanced tier on every
+    // runtime, not just chat — fix-workflow-model-preference-propagation).
     const { getSetting } = await import("@/lib/settings/helpers");
-    const modelId = (await getSetting("openai_direct_model")) ?? getRuntimeCatalogEntry("openai-direct").models.default;
+    const { resolvePreferredModel } = await import("./model-preference");
+    const modelId =
+      (await getSetting("openai_direct_model")) ??
+      (await resolvePreferredModel("openai-direct")).modelId;
     const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
     // For resume: load previous response ID

package/src/lib/chat/ollama-engine.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import {
 } from "@/lib/data/chat";
 import { buildChatContext } from "./context-builder";
 import { getWorkspaceContext } from "@/lib/environment/workspace-context";
+import { recordUsageLedgerEntry } from "@/lib/usage/ledger";
 import type { ChatStreamEvent } from "./types";
 /**
@@ -109,6 +110,47 @@ export async function* sendOllamaMessage(
   // Stream from Ollama
   let accumulated = "";
+  // Meter the turn like every other chat path (main engine writes a
+  // chat_turn row on success, degrade, and error). Ollama's final chunk
+  // reports prompt_eval_count / eval_count; local runs are recorded at $0 —
+  // those rows are what proves blended-cost savings on /costs.
+  const startedAt = new Date();
+  let inputTokens: number | null = null;
+  let outputTokens: number | null = null;
+  let ledgerRecorded = false;
+  const recordTurn = async (status: "completed" | "failed" | "cancelled") => {
+    if (ledgerRecorded) return;
+    ledgerRecorded = true;
+    await recordUsageLedgerEntry({
+      projectId: conversation.projectId ?? null,
+      activityType: "chat_turn",
+      runtimeId: "ollama",
+      providerId: "ollama",
+      modelId,
+      inputTokens,
+      outputTokens,
+      totalTokens:
+        inputTokens != null && outputTokens != null
+          ? inputTokens + outputTokens
+          : null,
+      status,
+      startedAt,
+      finishedAt: new Date(),
+    });
+  };
+  const captureTokenCounts = (parsed: {
+    prompt_eval_count?: unknown;
+    eval_count?: unknown;
+  }) => {
+    if (typeof parsed.prompt_eval_count === "number") {
+      inputTokens = parsed.prompt_eval_count;
+    }
+    if (typeof parsed.eval_count === "number") {
+      outputTokens = parsed.eval_count;
+    }
+  };
   try {
     const response = await fetch(`${baseUrl}/api/chat`, {
       method: "POST",
@@ -125,6 +167,7 @@ export async function* sendOllamaMessage(
       const errorText = await response.text().catch(() => "Unknown error");
       yield { type: "error", message: `Ollama error (${response.status}): ${errorText}` };
       await updateMessageStatus(assistantMsg.id, "complete");
+      await recordTurn("failed");
       return;
     }
@@ -132,6 +175,7 @@ export async function* sendOllamaMessage(
     if (!reader) {
       yield { type: "error", message: "No response stream from Ollama" };
       await updateMessageStatus(assistantMsg.id, "complete");
+      await recordTurn("failed");
       return;
     }
@@ -157,7 +201,10 @@ export async function* sendOllamaMessage(
             accumulated += delta;
             yield { type: "delta", content: delta };
           }
-          if (parsed.done) break;
+          if (parsed.done) {
+            captureTokenCounts(parsed);
+            break;
+          }
         } catch {
           // Skip malformed lines
         }
@@ -173,6 +220,7 @@ export async function* sendOllamaMessage(
           accumulated += delta;
           yield { type: "delta", content: delta };
         }
+        if (parsed.done) captureTokenCounts(parsed);
       } catch {
         // ignore
       }
@@ -181,6 +229,7 @@ export async function* sendOllamaMessage(
     // Persist the complete response
     await updateMessageContent(assistantMsg.id, accumulated);
     await updateMessageStatus(assistantMsg.id, "complete");
+    await recordTurn("completed");
     yield { type: "done", messageId: assistantMsg.id, quickAccess: [] };
   } catch (err) {
@@ -194,5 +243,6 @@ export async function* sendOllamaMessage(
       await updateMessageContent(assistantMsg.id, accumulated);
     }
     await updateMessageStatus(assistantMsg.id, "complete");
+    await recordTurn(signal?.aborted ? "cancelled" : "failed");
   }
 }

package/src/lib/settings/budget-guardrails.ts CHANGED Viewed

@@ -68,6 +68,10 @@ export interface BudgetSnapshot {
   monthlyResetAtIso: string;
   runtimeStates: Record<AgentRuntimeId, RuntimeSetupState>;
   pricing: PricingRegistrySnapshot;
+  /** Real metered spend (usage_ledger sums) — never the plan-priced budget basis. */
+  meteredSpend: { dailyMicros: number; monthlyMicros: number };
+  /** Flat subscription price counted as the budget basis, when billing is subscription. */
+  planPricedMonthlyMicros: number | null;
 }
 interface BudgetGuardInput {
@@ -358,6 +362,23 @@ async function getUsageAggregates(
     }
   });
+  // Real metered spend: the plain usage_ledger sums across every runtime,
+  // captured BEFORE the subscription plan-price substitution below. Guardrail
+  // statuses budget against the plan price (a flat subscription is the real
+  // monthly outlay), but display surfaces must never present that basis as
+  // spend — they read this instead.
+  const metered = {
+    daily: { costMicros: 0, totalTokens: 0 },
+    monthly: { costMicros: 0, totalTokens: 0 },
+  };
+  for (const runtimeId of SUPPORTED_AGENT_RUNTIMES) {
+    metered.daily.costMicros += runtimes[runtimeId].daily.costMicros;
+    metered.daily.totalTokens += runtimes[runtimeId].daily.totalTokens;
+    metered.monthly.costMicros += runtimes[runtimeId].monthly.costMicros;
+    metered.monthly.totalTokens += runtimes[runtimeId].monthly.totalTokens;
+  }
+  let planPricedMonthlyMicros: number | null = null;
   if (runtimeStates["claude-code"].billingMode === "subscription") {
     const planPriceUsd = await getClaudeOAuthPlanPrice(
       policy.runtimes["claude-code"].claudeOAuthPlan
@@ -366,6 +387,7 @@ async function getUsageAggregates(
     const dailyMicros = Math.round(monthlyMicros / daysInMonth(now));
     runtimes["claude-code"].monthly.costMicros = monthlyMicros;
     runtimes["claude-code"].daily.costMicros = dailyMicros;
+    planPricedMonthlyMicros = monthlyMicros;
   }
   const overall = {
@@ -387,6 +409,8 @@ async function getUsageAggregates(
   return {
     overall,
     runtimes,
+    metered,
+    planPricedMonthlyMicros,
     ...getBudgetWindowBounds(now),
   };
 }
@@ -714,5 +738,10 @@ export async function getBudgetGuardrailSnapshot(): Promise<BudgetSnapshot> {
     monthlyResetAtIso: aggregates.monthlyEnd.toISOString(),
     runtimeStates,
     pricing,
+    meteredSpend: {
+      dailyMicros: aggregates.metered.daily.costMicros,
+      monthlyMicros: aggregates.metered.monthly.costMicros,
+    },
+    planPricedMonthlyMicros: aggregates.planPricedMonthlyMicros,
   };
 }

package/src/lib/usage/pricing.ts CHANGED Viewed

@@ -15,6 +15,13 @@ export async function deriveUsageCostMicros(input: {
     return { costMicros: null, pricingVersion: null };
   }
+  // Local inference has no per-token billing: a known-free $0, not unknown
+  // pricing. The $0 rows are the evidence for the blended-cost savings story —
+  // null here would demote every local run to "unknown_pricing" and hide them.
+  if (input.providerId === "ollama") {
+    return { costMicros: 0, pricingVersion: "local-free" };
+  }
   if (input.providerId !== "anthropic" && input.providerId !== "openai") {
     return { costMicros: null, pricingVersion: null };
   }