orionfold-relay 0.22.0 → 0.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -6910,6 +6910,34 @@ var init_output_scanner = __esm({
6910
6910
  }
6911
6911
  });
6912
6912
 
6913
+ // src/lib/agents/runtime/model-preference.ts
6914
+ var model_preference_exports = {};
6915
+ __export(model_preference_exports, {
6916
+ resolvePreferredModel: () => resolvePreferredModel
6917
+ });
6918
+ async function resolvePreferredModel(runtimeId, options) {
6919
+ if (options?.pinnedModelId) {
6920
+ return { modelId: options.pinnedModelId, source: "pin" };
6921
+ }
6922
+ const models = getRuntimeCatalogEntry(runtimeId).models;
6923
+ const preference = await getModelPreference();
6924
+ const tierModel = preference === "balanced" ? models.tiers?.balanced : preference === "cost" ? models.tiers?.fast : preference === "quality" ? models.tiers?.quality : void 0;
6925
+ if (tierModel) {
6926
+ return { modelId: tierModel, source: "preference" };
6927
+ }
6928
+ return {
6929
+ modelId: models.tiers?.quality ?? models.default,
6930
+ source: "default"
6931
+ };
6932
+ }
6933
+ var init_model_preference = __esm({
6934
+ "src/lib/agents/runtime/model-preference.ts"() {
6935
+ "use strict";
6936
+ init_catalog2();
6937
+ init_helpers();
6938
+ }
6939
+ });
6940
+
6913
6941
  // src/lib/agents/runtime/claude-sdk.ts
6914
6942
  function buildClaudeSdkEnv(authEnv) {
6915
6943
  const { CLAUDECODE, ANTHROPIC_API_KEY, ...cleanEnv } = process.env;
@@ -7679,6 +7707,9 @@ async function deriveUsageCostMicros(input) {
7679
7707
  if (!input.modelId) {
7680
7708
  return { costMicros: null, pricingVersion: null };
7681
7709
  }
7710
+ if (input.providerId === "ollama") {
7711
+ return { costMicros: 0, pricingVersion: "local-free" };
7712
+ }
7682
7713
  if (input.providerId !== "anthropic" && input.providerId !== "openai") {
7683
7714
  return { costMicros: null, pricingVersion: null };
7684
7715
  }
@@ -18074,13 +18105,17 @@ ${learnedCtx}
18074
18105
  You are operating inside a git worktree (branch: ${ws.gitBranch ?? "unknown"}). All file operations MUST use paths relative to the working directory: ${cwd}. Do NOT navigate to or create files in the main repository directory.` : "";
18075
18106
  const systemInstructions = [worktreeNote, profileInstructions, learnedCtxBlock, docContext, tableContext, outputInstructions].filter(Boolean).join("\n\n");
18076
18107
  const maxTurns = profile?.maxTurns ?? DEFAULT_MAX_TURNS;
18108
+ const { modelId } = await resolvePreferredModel("claude-code", {
18109
+ pinnedModelId: profile?.capabilityOverrides?.["claude-code"]?.modelId
18110
+ });
18077
18111
  return {
18078
18112
  userPrompt: basePrompt,
18079
18113
  systemInstructions,
18080
18114
  cwd,
18081
18115
  payload,
18082
18116
  maxTurns,
18083
- canUseToolPolicy: payload?.canUseToolPolicy
18117
+ canUseToolPolicy: payload?.canUseToolPolicy,
18118
+ modelId
18084
18119
  };
18085
18120
  }
18086
18121
  async function executeClaudeTask(taskId) {
@@ -18126,6 +18161,10 @@ async function executeClaudeTask(taskId) {
18126
18161
  prompt: ctx.userPrompt,
18127
18162
  options: {
18128
18163
  abortController,
18164
+ // Explicit model: profile pin > onboarding preference > quality
18165
+ // default. Omitting this let the SDK pick ITS default (Opus) and
18166
+ // silently bill the wrong tier.
18167
+ model: ctx.modelId,
18129
18168
  includePartialMessages: true,
18130
18169
  cwd: ctx.cwd,
18131
18170
  env: buildClaudeSdkEnv(authEnv),
@@ -18248,6 +18287,9 @@ async function resumeClaudeTask(taskId) {
18248
18287
  options: {
18249
18288
  resume: task.sessionId,
18250
18289
  abortController,
18290
+ // Same model resolution as the original run — a resume must not
18291
+ // silently hop tiers (profile pin > preference > quality default).
18292
+ model: ctx.modelId,
18251
18293
  includePartialMessages: true,
18252
18294
  cwd: ctx.cwd,
18253
18295
  env: buildClaudeSdkEnv(authEnv),
@@ -18366,6 +18408,7 @@ var init_claude_agent = __esm({
18366
18408
  init_context_builder2();
18367
18409
  init_output_scanner();
18368
18410
  init_registry2();
18411
+ init_model_preference();
18369
18412
  init_compatibility();
18370
18413
  init_claude_sdk();
18371
18414
  init_types2();
@@ -18384,9 +18427,8 @@ var init_claude_agent = __esm({
18384
18427
  // src/lib/agents/runtime/claude.ts
18385
18428
  import { query as query2 } from "@anthropic-ai/claude-agent-sdk";
18386
18429
  import { eq as eq34 } from "drizzle-orm";
18387
- function claudeCodeModelAlias() {
18388
- const models = getRuntimeCatalogEntry("claude-code").models;
18389
- return models.tiers?.quality ?? models.default;
18430
+ async function claudeCodeModelAlias() {
18431
+ return (await resolvePreferredModel("claude-code")).modelId;
18390
18432
  }
18391
18433
  function buildTaskAssistSystemPrompt(profileIds) {
18392
18434
  const profileList = profileIds.length > 0 ? `Available agent profiles: ${profileIds.join(", ")}
@@ -18480,7 +18522,7 @@ Provide a brief analysis (2-3 paragraphs max). Include specific terminology rele
18480
18522
  prompt,
18481
18523
  options: {
18482
18524
  abortController,
18483
- model: claudeCodeModelAlias(),
18525
+ model: await claudeCodeModelAlias(),
18484
18526
  includePartialMessages: true,
18485
18527
  env: buildClaudeSdkEnv(authEnv),
18486
18528
  allowedTools: []
@@ -18621,7 +18663,7 @@ async function runMetaCompletion(input) {
18621
18663
  prompt: input.prompt,
18622
18664
  options: {
18623
18665
  abortController,
18624
- model: claudeCodeModelAlias(),
18666
+ model: await claudeCodeModelAlias(),
18625
18667
  includePartialMessages: true,
18626
18668
  cwd: getLaunchCwd(),
18627
18669
  env: buildClaudeSdkEnv(authEnv),
@@ -18801,7 +18843,7 @@ ${userMessage}`;
18801
18843
  prompt,
18802
18844
  options: {
18803
18845
  abortController,
18804
- model: claudeCodeModelAlias(),
18846
+ model: await claudeCodeModelAlias(),
18805
18847
  includePartialMessages: true,
18806
18848
  cwd: getLaunchCwd(),
18807
18849
  env: buildClaudeSdkEnv(authEnv),
@@ -18876,7 +18918,7 @@ ${userMessage}`;
18876
18918
  prompt,
18877
18919
  options: {
18878
18920
  abortController,
18879
- model: claudeCodeModelAlias(),
18921
+ model: await claudeCodeModelAlias(),
18880
18922
  includePartialMessages: true,
18881
18923
  cwd: getLaunchCwd(),
18882
18924
  env: buildClaudeSdkEnv(authEnv),
@@ -18941,7 +18983,7 @@ async function testClaudeConnection() {
18941
18983
  prompt: "Reply with exactly: OK",
18942
18984
  options: {
18943
18985
  abortController,
18944
- model: claudeCodeModelAlias(),
18986
+ model: await claudeCodeModelAlias(),
18945
18987
  maxTurns: 1,
18946
18988
  includePartialMessages: false,
18947
18989
  cwd: getLaunchCwd(),
@@ -18988,6 +19030,7 @@ var init_claude = __esm({
18988
19030
  init_compatibility();
18989
19031
  init_claude_agent();
18990
19032
  init_catalog2();
19033
+ init_model_preference();
18991
19034
  init_claude_sdk();
18992
19035
  init_workspace_context();
18993
19036
  init_helpers();
@@ -20882,7 +20925,8 @@ ${outputInstructions}`;
20882
20925
  initialMessages = [{ role: "user", content: ctx.userPrompt }];
20883
20926
  }
20884
20927
  const { getSetting: getSetting2 } = await Promise.resolve().then(() => (init_helpers(), helpers_exports));
20885
- const modelId = await getSetting2("anthropic_direct_model") ?? getRuntimeCatalogEntry("anthropic-direct").models.default;
20928
+ const { resolvePreferredModel: resolvePreferredModel2 } = await Promise.resolve().then(() => (init_model_preference(), model_preference_exports));
20929
+ const modelId = await getSetting2("anthropic_direct_model") ?? (await resolvePreferredModel2("anthropic-direct")).modelId;
20886
20930
  const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
20887
20931
  await db.insert(agentLogs).values({
20888
20932
  id: crypto.randomUUID(),
@@ -21353,7 +21397,8 @@ ${outputInstructions}`;
21353
21397
  );
21354
21398
  const pluginMcpTools = mcpServersToOpenAiTools(mergedMcpServers);
21355
21399
  const { getSetting: getSetting2 } = await Promise.resolve().then(() => (init_helpers(), helpers_exports));
21356
- const modelId = await getSetting2("openai_direct_model") ?? getRuntimeCatalogEntry("openai-direct").models.default;
21400
+ const { resolvePreferredModel: resolvePreferredModel2 } = await Promise.resolve().then(() => (init_model_preference(), model_preference_exports));
21401
+ const modelId = await getSetting2("openai_direct_model") ?? (await resolvePreferredModel2("openai-direct")).modelId;
21357
21402
  const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
21358
21403
  let previousResponseId = null;
21359
21404
  if (isResume) {
@@ -22193,6 +22238,17 @@ async function getUsageAggregates(policy, runtimeStates, now = /* @__PURE__ */ n
22193
22238
  runtimes[runtimeId].daily.totalTokens += row.totalTokens ?? 0;
22194
22239
  }
22195
22240
  });
22241
+ const metered = {
22242
+ daily: { costMicros: 0, totalTokens: 0 },
22243
+ monthly: { costMicros: 0, totalTokens: 0 }
22244
+ };
22245
+ for (const runtimeId of SUPPORTED_AGENT_RUNTIMES) {
22246
+ metered.daily.costMicros += runtimes[runtimeId].daily.costMicros;
22247
+ metered.daily.totalTokens += runtimes[runtimeId].daily.totalTokens;
22248
+ metered.monthly.costMicros += runtimes[runtimeId].monthly.costMicros;
22249
+ metered.monthly.totalTokens += runtimes[runtimeId].monthly.totalTokens;
22250
+ }
22251
+ let planPricedMonthlyMicros = null;
22196
22252
  if (runtimeStates["claude-code"].billingMode === "subscription") {
22197
22253
  const planPriceUsd = await getClaudeOAuthPlanPrice(
22198
22254
  policy.runtimes["claude-code"].claudeOAuthPlan
@@ -22201,6 +22257,7 @@ async function getUsageAggregates(policy, runtimeStates, now = /* @__PURE__ */ n
22201
22257
  const dailyMicros = Math.round(monthlyMicros / daysInMonth(now));
22202
22258
  runtimes["claude-code"].monthly.costMicros = monthlyMicros;
22203
22259
  runtimes["claude-code"].daily.costMicros = dailyMicros;
22260
+ planPricedMonthlyMicros = monthlyMicros;
22204
22261
  }
22205
22262
  const overall = {
22206
22263
  daily: { costMicros: 0, totalTokens: 0 },
@@ -22218,6 +22275,8 @@ async function getUsageAggregates(policy, runtimeStates, now = /* @__PURE__ */ n
22218
22275
  return {
22219
22276
  overall,
22220
22277
  runtimes,
22278
+ metered,
22279
+ planPricedMonthlyMicros,
22221
22280
  ...getBudgetWindowBounds(now)
22222
22281
  };
22223
22282
  }
@@ -25681,8 +25740,8 @@ import { execFileSync as execFileSync3 } from "child_process";
25681
25740
  import yaml12 from "js-yaml";
25682
25741
  import semver from "semver";
25683
25742
  function relayCoreVersion() {
25684
- if (semver.valid("0.22.0")) {
25685
- return "0.22.0";
25743
+ if (semver.valid("0.22.1")) {
25744
+ return "0.22.1";
25686
25745
  }
25687
25746
  try {
25688
25747
  const root = getAppRoot(import.meta.dirname, 3);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "orionfold-relay",
3
- "version": "0.22.0",
3
+ "version": "0.22.1",
4
4
  "description": "Orionfold Relay — a local-first, multi-agent orchestration runtime and builder scaffold for AI-native work.",
5
5
  "keywords": [
6
6
  "ai",
@@ -175,9 +175,6 @@ export async function GET() {
175
175
  getFailuresByDay(7),
176
176
  ]);
177
177
 
178
- const overallDaily = budget.statuses.find(
179
- (s) => s.scopeId === "overall" && s.window === "daily",
180
- );
181
178
  const overallMonthly = budget.statuses.find(
182
179
  (s) => s.scopeId === "overall" && s.window === "monthly",
183
180
  );
@@ -196,8 +193,12 @@ export async function GET() {
196
193
  activeProjects: activeProjects?.count ?? 0,
197
194
  activeWorkflows: activeWorkflows?.count ?? 0,
198
195
  reviewPending,
199
- costTodayMicros: overallDaily?.currentValue ?? 0,
200
- costToDateMicros: overallMonthly?.currentValue ?? 0,
196
+ // Metered ledger sums only — the guardrail statuses' plan-priced budget
197
+ // basis must never render as "cost" (fix-dashboard-budget-vs-cost-labeling).
198
+ costTodayMicros: budget.meteredSpend.dailyMicros,
199
+ costToDateMicros: budget.meteredSpend.monthlyMicros,
200
+ budgetMonthlyCapMicros: overallMonthly?.limitValue ?? null,
201
+ planPricedMonthlyMicros: budget.planPricedMonthlyMicros,
201
202
  runtimeLabel,
202
203
  providerId,
203
204
  runtimeSdkVersion,
@@ -20,16 +20,19 @@ import {
20
20
  } from "lucide-react";
21
21
  import { RailCell, formatMicros } from "./rail-cell";
22
22
  import { useTelemetry } from "./use-telemetry";
23
+ import type { TelemetrySnapshot } from "./telemetry-types";
23
24
 
24
25
  // The standing instrument cluster: a single dense horizontal row beneath the app
25
26
  // bar (mirrors `.hp-rail`). A cockpit for a multi-agent harness — eight real
26
27
  // cells: HOST (folder · cpu/mem) · RUNTIME (label · sdk version) · TASKS
27
28
  // (running + 24h activity spark) · THROUGHPUT (completed today + 7d spark) ·
28
- // FAILURES (failed + 7d spark, red) · REVIEW (pending) · COST TODAY · COST TO
29
+ // FAILURES (failed + 7d spark, red) · REVIEW (pending) · SPEND TODAY · SPEND TO
29
30
  // DATE — plus a live/error status foot. No fabricated data: while loading, cells
30
31
  // show "—"; on a poll error the last good snapshot stays visible and the foot
31
32
  // flips to an explicit error pip. Static identity (cwd/runtime) is compressed
32
- // into sub-lines so the live throughput signal owns the foreground.
33
+ // into sub-lines so the live throughput signal owns the foreground. The SPEND
34
+ // cells render real metered ledger sums; the budget cap and any flat plan price
35
+ // live in the sub-line, named as what they are — never presented as spend.
33
36
 
34
37
  // Compose the HOST sub-line from whatever live metrics the platform reports;
35
38
  // falls back to git branch so the cell is never empty.
@@ -45,6 +48,20 @@ function hostSub(
45
48
  return branch ? `git:${branch}` : "no git";
46
49
  }
47
50
 
51
+ // Sub-line for SPEND TO DATE: name the flat plan price when billing is
52
+ // subscription (it sits on top of metered spend, it is not spend), else show
53
+ // the monthly budget cap as "budget", else the plain window label.
54
+ function spendToDateSub(data: TelemetrySnapshot | null): string {
55
+ if (!data) return "—";
56
+ if (data.planPricedMonthlyMicros != null) {
57
+ return `+ plan ${formatMicros(data.planPricedMonthlyMicros)}/mo`;
58
+ }
59
+ if (data.budgetMonthlyCapMicros != null) {
60
+ return `of ${formatMicros(data.budgetMonthlyCapMicros)} budget`;
61
+ }
62
+ return "monthly";
63
+ }
64
+
48
65
  export function TelemetryRail() {
49
66
  const telemetry = useTelemetry();
50
67
  const data = telemetry.data;
@@ -71,9 +88,13 @@ export function TelemetryRail() {
71
88
  loading={loading}
72
89
  value={data?.runtimeLabel ?? "—"}
73
90
  sub={
74
- data?.runtimeSdkVersion
75
- ? `sdk ${data.runtimeSdkVersion}`
76
- : data?.providerId ?? "not configured"
91
+ // Never fabricate "not configured" while the snapshot is still
92
+ // loading — only claim it once data has actually said so.
93
+ data
94
+ ? data.runtimeSdkVersion
95
+ ? `sdk ${data.runtimeSdkVersion}`
96
+ : data.providerId ?? "not configured"
97
+ : "—"
77
98
  }
78
99
  />
79
100
  <RailCell
@@ -129,18 +150,18 @@ export function TelemetryRail() {
129
150
  sub="active"
130
151
  />
131
152
  <RailCell
132
- label="Cost Today"
153
+ label="Spend Today"
133
154
  icon={<Coins aria-hidden />}
134
155
  loading={loading}
135
156
  value={data ? formatMicros(data.costTodayMicros) : "—"}
136
- sub="daily"
157
+ sub="metered"
137
158
  />
138
159
  <RailCell
139
- label="Cost To Date"
160
+ label="Spend To Date"
140
161
  icon={<Wallet aria-hidden />}
141
162
  loading={loading}
142
163
  value={data ? formatMicros(data.costToDateMicros) : "—"}
143
- sub="monthly"
164
+ sub={spendToDateSub(data)}
144
165
  />
145
166
  <div className="ml-auto flex items-center gap-2 px-4 font-mono text-xs text-muted-foreground/60">
146
167
  {errored ? (
@@ -16,10 +16,14 @@ export interface TelemetrySnapshot {
16
16
  activeWorkflows: number;
17
17
  /** Unread `permission_required` notifications awaiting a human decision. */
18
18
  reviewPending: number;
19
- /** Overall daily spend in micros (USD * 1e6). */
19
+ /** Real metered spend today in micros (usage_ledger sum, USD * 1e6) — never a budget/plan figure. */
20
20
  costTodayMicros: number;
21
- /** Overall monthly (to-date) spend in micros (USD * 1e6). */
21
+ /** Real metered spend this month in micros (usage_ledger sum, USD * 1e6) — never a budget/plan figure. */
22
22
  costToDateMicros: number;
23
+ /** Overall monthly budget cap in micros, or null when unlimited. */
24
+ budgetMonthlyCapMicros: number | null;
25
+ /** Flat subscription plan price in micros when billing is subscription (not metered spend), else null. */
26
+ planPricedMonthlyMicros: number | null;
23
27
  /** Display label of the active runtime (e.g. "Claude Code"), or null if none configured. */
24
28
  runtimeLabel: string | null;
25
29
  /** Provider behind the active runtime ("anthropic" | "openai" | "ollama"), or null. */
@@ -13,6 +13,7 @@ import {
13
13
  scanTaskOutputDocuments,
14
14
  } from "@/lib/documents/output-scanner";
15
15
  import { getProfile } from "./profiles/registry";
16
+ import { resolvePreferredModel } from "./runtime/model-preference";
16
17
  import { resolveProfileRuntimePayload, type ResolvedProfileRuntimePayload } from "./profiles/compatibility";
17
18
  import type { CanUseToolPolicy } from "./profiles/types";
18
19
  import {
@@ -483,6 +484,13 @@ export interface TaskQueryContext {
483
484
  maxTurns: number;
484
485
  /** Profile's canUseToolPolicy */
485
486
  canUseToolPolicy?: CanUseToolPolicy;
487
+ /**
488
+ * Concrete model to pass to `query()`: profile pin > onboarding model
489
+ * preference tier > quality default. Without an explicit model the SDK
490
+ * silently falls back to ITS default — which billed Opus to users who chose
491
+ * "Balanced" (fix-workflow-model-preference-propagation).
492
+ */
493
+ modelId: string;
486
494
  }
487
495
 
488
496
  export async function buildTaskQueryContext(
@@ -533,6 +541,10 @@ export async function buildTaskQueryContext(
533
541
  // F9: Use profile maxTurns or fall back to default
534
542
  const maxTurns = profile?.maxTurns ?? DEFAULT_MAX_TURNS;
535
543
 
544
+ const { modelId } = await resolvePreferredModel("claude-code", {
545
+ pinnedModelId: profile?.capabilityOverrides?.["claude-code"]?.modelId,
546
+ });
547
+
536
548
  return {
537
549
  userPrompt: basePrompt,
538
550
  systemInstructions,
@@ -540,6 +552,7 @@ export async function buildTaskQueryContext(
540
552
  payload,
541
553
  maxTurns,
542
554
  canUseToolPolicy: payload?.canUseToolPolicy,
555
+ modelId,
543
556
  };
544
557
  }
545
558
 
@@ -612,6 +625,10 @@ export async function executeClaudeTask(taskId: string): Promise<void> {
612
625
  prompt: ctx.userPrompt,
613
626
  options: {
614
627
  abortController,
628
+ // Explicit model: profile pin > onboarding preference > quality
629
+ // default. Omitting this let the SDK pick ITS default (Opus) and
630
+ // silently bill the wrong tier.
631
+ model: ctx.modelId,
615
632
  includePartialMessages: true,
616
633
  cwd: ctx.cwd,
617
634
  env: buildClaudeSdkEnv(authEnv),
@@ -771,6 +788,9 @@ export async function resumeClaudeTask(taskId: string): Promise<void> {
771
788
  options: {
772
789
  resume: task.sessionId,
773
790
  abortController,
791
+ // Same model resolution as the original run — a resume must not
792
+ // silently hop tiers (profile pin > preference > quality default).
793
+ model: ctx.modelId,
774
794
  includePartialMessages: true,
775
795
  cwd: ctx.cwd,
776
796
  env: buildClaudeSdkEnv(authEnv),
@@ -376,9 +376,14 @@ async function executeAnthropicDirectTask(taskId: string, isResume = false): Pro
376
376
  initialMessages = [{ role: "user", content: ctx.userPrompt }];
377
377
  }
378
378
 
379
- // Resolve model from settings
379
+ // Resolve model: explicit runtime setting > onboarding model preference
380
+ // tier > catalog default ("Balanced" means Sonnet on every runtime, not
381
+ // just chat — fix-workflow-model-preference-propagation).
380
382
  const { getSetting } = await import("@/lib/settings/helpers");
381
- const modelId = (await getSetting("anthropic_direct_model")) ?? getRuntimeCatalogEntry("anthropic-direct").models.default;
383
+ const { resolvePreferredModel } = await import("./model-preference");
384
+ const modelId =
385
+ (await getSetting("anthropic_direct_model")) ??
386
+ (await resolvePreferredModel("anthropic-direct")).modelId;
382
387
 
383
388
  const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
384
389
 
@@ -8,6 +8,7 @@ import { getProfile, listProfiles } from "@/lib/agents/profiles/registry";
8
8
  import { resolveProfileRuntimePayload } from "@/lib/agents/profiles/compatibility";
9
9
  import { executeClaudeTask, resumeClaudeTask } from "@/lib/agents/claude-agent";
10
10
  import { getRuntimeCapabilities, getRuntimeCatalogEntry } from "./catalog";
11
+ import { resolvePreferredModel } from "./model-preference";
11
12
  import { buildClaudeSdkEnv } from "./claude-sdk";
12
13
  import { getLaunchCwd } from "@/lib/environment/workspace-context";
13
14
  import { getSetting } from "@/lib/settings/helpers";
@@ -34,10 +35,13 @@ import {
34
35
  * model in that family instead of pinning to a string that silently ages out.
35
36
  * Without this, `query()` omits `model` entirely and the SDK falls back to its
36
37
  * own default — which is not necessarily the family the chat picker selected.
38
+ *
39
+ * Honors the user's onboarding model preference ("Balanced" means Sonnet
40
+ * everywhere, not just chat); falls back to the quality tier when no
41
+ * preference is recorded.
37
42
  */
38
- function claudeCodeModelAlias(): string {
39
- const models = getRuntimeCatalogEntry("claude-code").models;
40
- return models.tiers?.quality ?? models.default;
43
+ async function claudeCodeModelAlias(): Promise<string> {
44
+ return (await resolvePreferredModel("claude-code")).modelId;
41
45
  }
42
46
 
43
47
  function buildTaskAssistSystemPrompt(profileIds: string[]): string {
@@ -148,7 +152,7 @@ export async function runSingleProfileTest(
148
152
  prompt,
149
153
  options: {
150
154
  abortController,
151
- model: claudeCodeModelAlias(),
155
+ model: await claudeCodeModelAlias(),
152
156
  includePartialMessages: true,
153
157
  env: buildClaudeSdkEnv(authEnv),
154
158
  allowedTools: [],
@@ -306,7 +310,7 @@ export async function runMetaCompletion(input: {
306
310
  prompt: input.prompt,
307
311
  options: {
308
312
  abortController,
309
- model: claudeCodeModelAlias(),
313
+ model: await claudeCodeModelAlias(),
310
314
  includePartialMessages: true,
311
315
  cwd: getLaunchCwd(),
312
316
  env: buildClaudeSdkEnv(authEnv),
@@ -497,7 +501,7 @@ async function runClaudeProfileAssist(
497
501
  prompt,
498
502
  options: {
499
503
  abortController,
500
- model: claudeCodeModelAlias(),
504
+ model: await claudeCodeModelAlias(),
501
505
  includePartialMessages: true,
502
506
  cwd: getLaunchCwd(),
503
507
  env: buildClaudeSdkEnv(authEnv),
@@ -584,7 +588,7 @@ async function runClaudeTaskAssist(
584
588
  prompt,
585
589
  options: {
586
590
  abortController,
587
- model: claudeCodeModelAlias(),
591
+ model: await claudeCodeModelAlias(),
588
592
  includePartialMessages: true,
589
593
  cwd: getLaunchCwd(),
590
594
  env: buildClaudeSdkEnv(authEnv),
@@ -655,7 +659,7 @@ async function testClaudeConnection(): Promise<RuntimeConnectionResult> {
655
659
  prompt: "Reply with exactly: OK",
656
660
  options: {
657
661
  abortController,
658
- model: claudeCodeModelAlias(),
662
+ model: await claudeCodeModelAlias(),
659
663
  maxTurns: 1,
660
664
  includePartialMessages: false,
661
665
  cwd: getLaunchCwd(),
@@ -0,0 +1,54 @@
1
+ import { getRuntimeCatalogEntry, type AgentRuntimeId } from "./catalog";
2
+ import { getModelPreference } from "@/lib/settings/helpers";
3
+
4
+ /**
5
+ * Resolve the concrete model a claude-code (or any catalog) execution should
6
+ * run on, honoring the user's onboarding model preference
7
+ * (`chat.modelPreference`) outside chat. Order:
8
+ *
9
+ * 1. an explicit profile/step pin (`pinnedModelId`) — the user said exactly
10
+ * what to run, the preference never overrides it;
11
+ * 2. the preference mapped to the runtime's tier ("balanced" → Sonnet,
12
+ * "cost" → fast/Haiku, "quality" → Opus);
13
+ * 3. the runtime's quality tier / catalog default — the pre-existing
14
+ * behavior when no preference is recorded.
15
+ *
16
+ * "privacy" is a runtime-level preference (route to Ollama), not a model tier
17
+ * within a cloud runtime — it resolves as "no within-runtime opinion".
18
+ *
19
+ * The `source` field keeps the routing inspectable: surfaces can say WHY a
20
+ * model was chosen instead of silently swapping tiers.
21
+ */
22
+ export interface ResolvedPreferredModel {
23
+ modelId: string;
24
+ source: "pin" | "preference" | "default";
25
+ }
26
+
27
+ export async function resolvePreferredModel(
28
+ runtimeId: AgentRuntimeId,
29
+ options?: { pinnedModelId?: string | null },
30
+ ): Promise<ResolvedPreferredModel> {
31
+ if (options?.pinnedModelId) {
32
+ return { modelId: options.pinnedModelId, source: "pin" };
33
+ }
34
+
35
+ const models = getRuntimeCatalogEntry(runtimeId).models;
36
+ const preference = await getModelPreference();
37
+ const tierModel =
38
+ preference === "balanced"
39
+ ? models.tiers?.balanced
40
+ : preference === "cost"
41
+ ? models.tiers?.fast
42
+ : preference === "quality"
43
+ ? models.tiers?.quality
44
+ : undefined;
45
+
46
+ if (tierModel) {
47
+ return { modelId: tierModel, source: "preference" };
48
+ }
49
+
50
+ return {
51
+ modelId: models.tiers?.quality ?? models.default,
52
+ source: "default",
53
+ };
54
+ }
@@ -319,9 +319,14 @@ async function executeOpenAIDirectTask(taskId: string, isResume = false): Promis
319
319
  );
320
320
  const pluginMcpTools = mcpServersToOpenAiTools(mergedMcpServers);
321
321
 
322
- // Resolve model
322
+ // Resolve model: explicit runtime setting > onboarding model preference
323
+ // tier > catalog default ("Balanced" means the balanced tier on every
324
+ // runtime, not just chat — fix-workflow-model-preference-propagation).
323
325
  const { getSetting } = await import("@/lib/settings/helpers");
324
- const modelId = (await getSetting("openai_direct_model")) ?? getRuntimeCatalogEntry("openai-direct").models.default;
326
+ const { resolvePreferredModel } = await import("./model-preference");
327
+ const modelId =
328
+ (await getSetting("openai_direct_model")) ??
329
+ (await resolvePreferredModel("openai-direct")).modelId;
325
330
  const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
326
331
 
327
332
  // For resume: load previous response ID
@@ -18,6 +18,7 @@ import {
18
18
  } from "@/lib/data/chat";
19
19
  import { buildChatContext } from "./context-builder";
20
20
  import { getWorkspaceContext } from "@/lib/environment/workspace-context";
21
+ import { recordUsageLedgerEntry } from "@/lib/usage/ledger";
21
22
  import type { ChatStreamEvent } from "./types";
22
23
 
23
24
  /**
@@ -109,6 +110,47 @@ export async function* sendOllamaMessage(
109
110
 
110
111
  // Stream from Ollama
111
112
  let accumulated = "";
113
+
114
+ // Meter the turn like every other chat path (main engine writes a
115
+ // chat_turn row on success, degrade, and error). Ollama's final chunk
116
+ // reports prompt_eval_count / eval_count; local runs are recorded at $0 —
117
+ // those rows are what proves blended-cost savings on /costs.
118
+ const startedAt = new Date();
119
+ let inputTokens: number | null = null;
120
+ let outputTokens: number | null = null;
121
+ let ledgerRecorded = false;
122
+ const recordTurn = async (status: "completed" | "failed" | "cancelled") => {
123
+ if (ledgerRecorded) return;
124
+ ledgerRecorded = true;
125
+ await recordUsageLedgerEntry({
126
+ projectId: conversation.projectId ?? null,
127
+ activityType: "chat_turn",
128
+ runtimeId: "ollama",
129
+ providerId: "ollama",
130
+ modelId,
131
+ inputTokens,
132
+ outputTokens,
133
+ totalTokens:
134
+ inputTokens != null && outputTokens != null
135
+ ? inputTokens + outputTokens
136
+ : null,
137
+ status,
138
+ startedAt,
139
+ finishedAt: new Date(),
140
+ });
141
+ };
142
+ const captureTokenCounts = (parsed: {
143
+ prompt_eval_count?: unknown;
144
+ eval_count?: unknown;
145
+ }) => {
146
+ if (typeof parsed.prompt_eval_count === "number") {
147
+ inputTokens = parsed.prompt_eval_count;
148
+ }
149
+ if (typeof parsed.eval_count === "number") {
150
+ outputTokens = parsed.eval_count;
151
+ }
152
+ };
153
+
112
154
  try {
113
155
  const response = await fetch(`${baseUrl}/api/chat`, {
114
156
  method: "POST",
@@ -125,6 +167,7 @@ export async function* sendOllamaMessage(
125
167
  const errorText = await response.text().catch(() => "Unknown error");
126
168
  yield { type: "error", message: `Ollama error (${response.status}): ${errorText}` };
127
169
  await updateMessageStatus(assistantMsg.id, "complete");
170
+ await recordTurn("failed");
128
171
  return;
129
172
  }
130
173
 
@@ -132,6 +175,7 @@ export async function* sendOllamaMessage(
132
175
  if (!reader) {
133
176
  yield { type: "error", message: "No response stream from Ollama" };
134
177
  await updateMessageStatus(assistantMsg.id, "complete");
178
+ await recordTurn("failed");
135
179
  return;
136
180
  }
137
181
 
@@ -157,7 +201,10 @@ export async function* sendOllamaMessage(
157
201
  accumulated += delta;
158
202
  yield { type: "delta", content: delta };
159
203
  }
160
- if (parsed.done) break;
204
+ if (parsed.done) {
205
+ captureTokenCounts(parsed);
206
+ break;
207
+ }
161
208
  } catch {
162
209
  // Skip malformed lines
163
210
  }
@@ -173,6 +220,7 @@ export async function* sendOllamaMessage(
173
220
  accumulated += delta;
174
221
  yield { type: "delta", content: delta };
175
222
  }
223
+ if (parsed.done) captureTokenCounts(parsed);
176
224
  } catch {
177
225
  // ignore
178
226
  }
@@ -181,6 +229,7 @@ export async function* sendOllamaMessage(
181
229
  // Persist the complete response
182
230
  await updateMessageContent(assistantMsg.id, accumulated);
183
231
  await updateMessageStatus(assistantMsg.id, "complete");
232
+ await recordTurn("completed");
184
233
 
185
234
  yield { type: "done", messageId: assistantMsg.id, quickAccess: [] };
186
235
  } catch (err) {
@@ -194,5 +243,6 @@ export async function* sendOllamaMessage(
194
243
  await updateMessageContent(assistantMsg.id, accumulated);
195
244
  }
196
245
  await updateMessageStatus(assistantMsg.id, "complete");
246
+ await recordTurn(signal?.aborted ? "cancelled" : "failed");
197
247
  }
198
248
  }
@@ -68,6 +68,10 @@ export interface BudgetSnapshot {
68
68
  monthlyResetAtIso: string;
69
69
  runtimeStates: Record<AgentRuntimeId, RuntimeSetupState>;
70
70
  pricing: PricingRegistrySnapshot;
71
+ /** Real metered spend (usage_ledger sums) — never the plan-priced budget basis. */
72
+ meteredSpend: { dailyMicros: number; monthlyMicros: number };
73
+ /** Flat subscription price counted as the budget basis, when billing is subscription. */
74
+ planPricedMonthlyMicros: number | null;
71
75
  }
72
76
 
73
77
  interface BudgetGuardInput {
@@ -358,6 +362,23 @@ async function getUsageAggregates(
358
362
  }
359
363
  });
360
364
 
365
+ // Real metered spend: the plain usage_ledger sums across every runtime,
366
+ // captured BEFORE the subscription plan-price substitution below. Guardrail
367
+ // statuses budget against the plan price (a flat subscription is the real
368
+ // monthly outlay), but display surfaces must never present that basis as
369
+ // spend — they read this instead.
370
+ const metered = {
371
+ daily: { costMicros: 0, totalTokens: 0 },
372
+ monthly: { costMicros: 0, totalTokens: 0 },
373
+ };
374
+ for (const runtimeId of SUPPORTED_AGENT_RUNTIMES) {
375
+ metered.daily.costMicros += runtimes[runtimeId].daily.costMicros;
376
+ metered.daily.totalTokens += runtimes[runtimeId].daily.totalTokens;
377
+ metered.monthly.costMicros += runtimes[runtimeId].monthly.costMicros;
378
+ metered.monthly.totalTokens += runtimes[runtimeId].monthly.totalTokens;
379
+ }
380
+
381
+ let planPricedMonthlyMicros: number | null = null;
361
382
  if (runtimeStates["claude-code"].billingMode === "subscription") {
362
383
  const planPriceUsd = await getClaudeOAuthPlanPrice(
363
384
  policy.runtimes["claude-code"].claudeOAuthPlan
@@ -366,6 +387,7 @@ async function getUsageAggregates(
366
387
  const dailyMicros = Math.round(monthlyMicros / daysInMonth(now));
367
388
  runtimes["claude-code"].monthly.costMicros = monthlyMicros;
368
389
  runtimes["claude-code"].daily.costMicros = dailyMicros;
390
+ planPricedMonthlyMicros = monthlyMicros;
369
391
  }
370
392
 
371
393
  const overall = {
@@ -387,6 +409,8 @@ async function getUsageAggregates(
387
409
  return {
388
410
  overall,
389
411
  runtimes,
412
+ metered,
413
+ planPricedMonthlyMicros,
390
414
  ...getBudgetWindowBounds(now),
391
415
  };
392
416
  }
@@ -714,5 +738,10 @@ export async function getBudgetGuardrailSnapshot(): Promise<BudgetSnapshot> {
714
738
  monthlyResetAtIso: aggregates.monthlyEnd.toISOString(),
715
739
  runtimeStates,
716
740
  pricing,
741
+ meteredSpend: {
742
+ dailyMicros: aggregates.metered.daily.costMicros,
743
+ monthlyMicros: aggregates.metered.monthly.costMicros,
744
+ },
745
+ planPricedMonthlyMicros: aggregates.planPricedMonthlyMicros,
717
746
  };
718
747
  }
@@ -15,6 +15,13 @@ export async function deriveUsageCostMicros(input: {
15
15
  return { costMicros: null, pricingVersion: null };
16
16
  }
17
17
 
18
+ // Local inference has no per-token billing: a known-free $0, not unknown
19
+ // pricing. The $0 rows are the evidence for the blended-cost savings story —
20
+ // null here would demote every local run to "unknown_pricing" and hide them.
21
+ if (input.providerId === "ollama") {
22
+ return { costMicros: 0, pricingVersion: "local-free" };
23
+ }
24
+
18
25
  if (input.providerId !== "anthropic" && input.providerId !== "openai") {
19
26
  return { costMicros: null, pricingVersion: null };
20
27
  }