orionfold-relay 0.22.0 → 0.22.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +72 -13
- package/package.json +1 -1
- package/src/app/api/telemetry/route.ts +6 -5
- package/src/components/shell/telemetry-rail.tsx +30 -9
- package/src/components/shell/telemetry-types.ts +6 -2
- package/src/lib/agents/claude-agent.ts +20 -0
- package/src/lib/agents/runtime/anthropic-direct.ts +7 -2
- package/src/lib/agents/runtime/claude.ts +12 -8
- package/src/lib/agents/runtime/model-preference.ts +54 -0
- package/src/lib/agents/runtime/openai-direct.ts +7 -2
- package/src/lib/chat/ollama-engine.ts +51 -1
- package/src/lib/settings/budget-guardrails.ts +29 -0
- package/src/lib/usage/pricing.ts +7 -0
package/dist/cli.js
CHANGED
|
@@ -6910,6 +6910,34 @@ var init_output_scanner = __esm({
|
|
|
6910
6910
|
}
|
|
6911
6911
|
});
|
|
6912
6912
|
|
|
6913
|
+
// src/lib/agents/runtime/model-preference.ts
|
|
6914
|
+
var model_preference_exports = {};
|
|
6915
|
+
__export(model_preference_exports, {
|
|
6916
|
+
resolvePreferredModel: () => resolvePreferredModel
|
|
6917
|
+
});
|
|
6918
|
+
async function resolvePreferredModel(runtimeId, options) {
|
|
6919
|
+
if (options?.pinnedModelId) {
|
|
6920
|
+
return { modelId: options.pinnedModelId, source: "pin" };
|
|
6921
|
+
}
|
|
6922
|
+
const models = getRuntimeCatalogEntry(runtimeId).models;
|
|
6923
|
+
const preference = await getModelPreference();
|
|
6924
|
+
const tierModel = preference === "balanced" ? models.tiers?.balanced : preference === "cost" ? models.tiers?.fast : preference === "quality" ? models.tiers?.quality : void 0;
|
|
6925
|
+
if (tierModel) {
|
|
6926
|
+
return { modelId: tierModel, source: "preference" };
|
|
6927
|
+
}
|
|
6928
|
+
return {
|
|
6929
|
+
modelId: models.tiers?.quality ?? models.default,
|
|
6930
|
+
source: "default"
|
|
6931
|
+
};
|
|
6932
|
+
}
|
|
6933
|
+
var init_model_preference = __esm({
|
|
6934
|
+
"src/lib/agents/runtime/model-preference.ts"() {
|
|
6935
|
+
"use strict";
|
|
6936
|
+
init_catalog2();
|
|
6937
|
+
init_helpers();
|
|
6938
|
+
}
|
|
6939
|
+
});
|
|
6940
|
+
|
|
6913
6941
|
// src/lib/agents/runtime/claude-sdk.ts
|
|
6914
6942
|
function buildClaudeSdkEnv(authEnv) {
|
|
6915
6943
|
const { CLAUDECODE, ANTHROPIC_API_KEY, ...cleanEnv } = process.env;
|
|
@@ -7679,6 +7707,9 @@ async function deriveUsageCostMicros(input) {
|
|
|
7679
7707
|
if (!input.modelId) {
|
|
7680
7708
|
return { costMicros: null, pricingVersion: null };
|
|
7681
7709
|
}
|
|
7710
|
+
if (input.providerId === "ollama") {
|
|
7711
|
+
return { costMicros: 0, pricingVersion: "local-free" };
|
|
7712
|
+
}
|
|
7682
7713
|
if (input.providerId !== "anthropic" && input.providerId !== "openai") {
|
|
7683
7714
|
return { costMicros: null, pricingVersion: null };
|
|
7684
7715
|
}
|
|
@@ -18074,13 +18105,17 @@ ${learnedCtx}
|
|
|
18074
18105
|
You are operating inside a git worktree (branch: ${ws.gitBranch ?? "unknown"}). All file operations MUST use paths relative to the working directory: ${cwd}. Do NOT navigate to or create files in the main repository directory.` : "";
|
|
18075
18106
|
const systemInstructions = [worktreeNote, profileInstructions, learnedCtxBlock, docContext, tableContext, outputInstructions].filter(Boolean).join("\n\n");
|
|
18076
18107
|
const maxTurns = profile?.maxTurns ?? DEFAULT_MAX_TURNS;
|
|
18108
|
+
const { modelId } = await resolvePreferredModel("claude-code", {
|
|
18109
|
+
pinnedModelId: profile?.capabilityOverrides?.["claude-code"]?.modelId
|
|
18110
|
+
});
|
|
18077
18111
|
return {
|
|
18078
18112
|
userPrompt: basePrompt,
|
|
18079
18113
|
systemInstructions,
|
|
18080
18114
|
cwd,
|
|
18081
18115
|
payload,
|
|
18082
18116
|
maxTurns,
|
|
18083
|
-
canUseToolPolicy: payload?.canUseToolPolicy
|
|
18117
|
+
canUseToolPolicy: payload?.canUseToolPolicy,
|
|
18118
|
+
modelId
|
|
18084
18119
|
};
|
|
18085
18120
|
}
|
|
18086
18121
|
async function executeClaudeTask(taskId) {
|
|
@@ -18126,6 +18161,10 @@ async function executeClaudeTask(taskId) {
|
|
|
18126
18161
|
prompt: ctx.userPrompt,
|
|
18127
18162
|
options: {
|
|
18128
18163
|
abortController,
|
|
18164
|
+
// Explicit model: profile pin > onboarding preference > quality
|
|
18165
|
+
// default. Omitting this let the SDK pick ITS default (Opus) and
|
|
18166
|
+
// silently bill the wrong tier.
|
|
18167
|
+
model: ctx.modelId,
|
|
18129
18168
|
includePartialMessages: true,
|
|
18130
18169
|
cwd: ctx.cwd,
|
|
18131
18170
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -18248,6 +18287,9 @@ async function resumeClaudeTask(taskId) {
|
|
|
18248
18287
|
options: {
|
|
18249
18288
|
resume: task.sessionId,
|
|
18250
18289
|
abortController,
|
|
18290
|
+
// Same model resolution as the original run — a resume must not
|
|
18291
|
+
// silently hop tiers (profile pin > preference > quality default).
|
|
18292
|
+
model: ctx.modelId,
|
|
18251
18293
|
includePartialMessages: true,
|
|
18252
18294
|
cwd: ctx.cwd,
|
|
18253
18295
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -18366,6 +18408,7 @@ var init_claude_agent = __esm({
|
|
|
18366
18408
|
init_context_builder2();
|
|
18367
18409
|
init_output_scanner();
|
|
18368
18410
|
init_registry2();
|
|
18411
|
+
init_model_preference();
|
|
18369
18412
|
init_compatibility();
|
|
18370
18413
|
init_claude_sdk();
|
|
18371
18414
|
init_types2();
|
|
@@ -18384,9 +18427,8 @@ var init_claude_agent = __esm({
|
|
|
18384
18427
|
// src/lib/agents/runtime/claude.ts
|
|
18385
18428
|
import { query as query2 } from "@anthropic-ai/claude-agent-sdk";
|
|
18386
18429
|
import { eq as eq34 } from "drizzle-orm";
|
|
18387
|
-
function claudeCodeModelAlias() {
|
|
18388
|
-
|
|
18389
|
-
return models.tiers?.quality ?? models.default;
|
|
18430
|
+
async function claudeCodeModelAlias() {
|
|
18431
|
+
return (await resolvePreferredModel("claude-code")).modelId;
|
|
18390
18432
|
}
|
|
18391
18433
|
function buildTaskAssistSystemPrompt(profileIds) {
|
|
18392
18434
|
const profileList = profileIds.length > 0 ? `Available agent profiles: ${profileIds.join(", ")}
|
|
@@ -18480,7 +18522,7 @@ Provide a brief analysis (2-3 paragraphs max). Include specific terminology rele
|
|
|
18480
18522
|
prompt,
|
|
18481
18523
|
options: {
|
|
18482
18524
|
abortController,
|
|
18483
|
-
model: claudeCodeModelAlias(),
|
|
18525
|
+
model: await claudeCodeModelAlias(),
|
|
18484
18526
|
includePartialMessages: true,
|
|
18485
18527
|
env: buildClaudeSdkEnv(authEnv),
|
|
18486
18528
|
allowedTools: []
|
|
@@ -18621,7 +18663,7 @@ async function runMetaCompletion(input) {
|
|
|
18621
18663
|
prompt: input.prompt,
|
|
18622
18664
|
options: {
|
|
18623
18665
|
abortController,
|
|
18624
|
-
model: claudeCodeModelAlias(),
|
|
18666
|
+
model: await claudeCodeModelAlias(),
|
|
18625
18667
|
includePartialMessages: true,
|
|
18626
18668
|
cwd: getLaunchCwd(),
|
|
18627
18669
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -18801,7 +18843,7 @@ ${userMessage}`;
|
|
|
18801
18843
|
prompt,
|
|
18802
18844
|
options: {
|
|
18803
18845
|
abortController,
|
|
18804
|
-
model: claudeCodeModelAlias(),
|
|
18846
|
+
model: await claudeCodeModelAlias(),
|
|
18805
18847
|
includePartialMessages: true,
|
|
18806
18848
|
cwd: getLaunchCwd(),
|
|
18807
18849
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -18876,7 +18918,7 @@ ${userMessage}`;
|
|
|
18876
18918
|
prompt,
|
|
18877
18919
|
options: {
|
|
18878
18920
|
abortController,
|
|
18879
|
-
model: claudeCodeModelAlias(),
|
|
18921
|
+
model: await claudeCodeModelAlias(),
|
|
18880
18922
|
includePartialMessages: true,
|
|
18881
18923
|
cwd: getLaunchCwd(),
|
|
18882
18924
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -18941,7 +18983,7 @@ async function testClaudeConnection() {
|
|
|
18941
18983
|
prompt: "Reply with exactly: OK",
|
|
18942
18984
|
options: {
|
|
18943
18985
|
abortController,
|
|
18944
|
-
model: claudeCodeModelAlias(),
|
|
18986
|
+
model: await claudeCodeModelAlias(),
|
|
18945
18987
|
maxTurns: 1,
|
|
18946
18988
|
includePartialMessages: false,
|
|
18947
18989
|
cwd: getLaunchCwd(),
|
|
@@ -18988,6 +19030,7 @@ var init_claude = __esm({
|
|
|
18988
19030
|
init_compatibility();
|
|
18989
19031
|
init_claude_agent();
|
|
18990
19032
|
init_catalog2();
|
|
19033
|
+
init_model_preference();
|
|
18991
19034
|
init_claude_sdk();
|
|
18992
19035
|
init_workspace_context();
|
|
18993
19036
|
init_helpers();
|
|
@@ -20882,7 +20925,8 @@ ${outputInstructions}`;
|
|
|
20882
20925
|
initialMessages = [{ role: "user", content: ctx.userPrompt }];
|
|
20883
20926
|
}
|
|
20884
20927
|
const { getSetting: getSetting2 } = await Promise.resolve().then(() => (init_helpers(), helpers_exports));
|
|
20885
|
-
const
|
|
20928
|
+
const { resolvePreferredModel: resolvePreferredModel2 } = await Promise.resolve().then(() => (init_model_preference(), model_preference_exports));
|
|
20929
|
+
const modelId = await getSetting2("anthropic_direct_model") ?? (await resolvePreferredModel2("anthropic-direct")).modelId;
|
|
20886
20930
|
const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
|
|
20887
20931
|
await db.insert(agentLogs).values({
|
|
20888
20932
|
id: crypto.randomUUID(),
|
|
@@ -21353,7 +21397,8 @@ ${outputInstructions}`;
|
|
|
21353
21397
|
);
|
|
21354
21398
|
const pluginMcpTools = mcpServersToOpenAiTools(mergedMcpServers);
|
|
21355
21399
|
const { getSetting: getSetting2 } = await Promise.resolve().then(() => (init_helpers(), helpers_exports));
|
|
21356
|
-
const
|
|
21400
|
+
const { resolvePreferredModel: resolvePreferredModel2 } = await Promise.resolve().then(() => (init_model_preference(), model_preference_exports));
|
|
21401
|
+
const modelId = await getSetting2("openai_direct_model") ?? (await resolvePreferredModel2("openai-direct")).modelId;
|
|
21357
21402
|
const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
|
|
21358
21403
|
let previousResponseId = null;
|
|
21359
21404
|
if (isResume) {
|
|
@@ -22193,6 +22238,17 @@ async function getUsageAggregates(policy, runtimeStates, now = /* @__PURE__ */ n
|
|
|
22193
22238
|
runtimes[runtimeId].daily.totalTokens += row.totalTokens ?? 0;
|
|
22194
22239
|
}
|
|
22195
22240
|
});
|
|
22241
|
+
const metered = {
|
|
22242
|
+
daily: { costMicros: 0, totalTokens: 0 },
|
|
22243
|
+
monthly: { costMicros: 0, totalTokens: 0 }
|
|
22244
|
+
};
|
|
22245
|
+
for (const runtimeId of SUPPORTED_AGENT_RUNTIMES) {
|
|
22246
|
+
metered.daily.costMicros += runtimes[runtimeId].daily.costMicros;
|
|
22247
|
+
metered.daily.totalTokens += runtimes[runtimeId].daily.totalTokens;
|
|
22248
|
+
metered.monthly.costMicros += runtimes[runtimeId].monthly.costMicros;
|
|
22249
|
+
metered.monthly.totalTokens += runtimes[runtimeId].monthly.totalTokens;
|
|
22250
|
+
}
|
|
22251
|
+
let planPricedMonthlyMicros = null;
|
|
22196
22252
|
if (runtimeStates["claude-code"].billingMode === "subscription") {
|
|
22197
22253
|
const planPriceUsd = await getClaudeOAuthPlanPrice(
|
|
22198
22254
|
policy.runtimes["claude-code"].claudeOAuthPlan
|
|
@@ -22201,6 +22257,7 @@ async function getUsageAggregates(policy, runtimeStates, now = /* @__PURE__ */ n
|
|
|
22201
22257
|
const dailyMicros = Math.round(monthlyMicros / daysInMonth(now));
|
|
22202
22258
|
runtimes["claude-code"].monthly.costMicros = monthlyMicros;
|
|
22203
22259
|
runtimes["claude-code"].daily.costMicros = dailyMicros;
|
|
22260
|
+
planPricedMonthlyMicros = monthlyMicros;
|
|
22204
22261
|
}
|
|
22205
22262
|
const overall = {
|
|
22206
22263
|
daily: { costMicros: 0, totalTokens: 0 },
|
|
@@ -22218,6 +22275,8 @@ async function getUsageAggregates(policy, runtimeStates, now = /* @__PURE__ */ n
|
|
|
22218
22275
|
return {
|
|
22219
22276
|
overall,
|
|
22220
22277
|
runtimes,
|
|
22278
|
+
metered,
|
|
22279
|
+
planPricedMonthlyMicros,
|
|
22221
22280
|
...getBudgetWindowBounds(now)
|
|
22222
22281
|
};
|
|
22223
22282
|
}
|
|
@@ -25681,8 +25740,8 @@ import { execFileSync as execFileSync3 } from "child_process";
|
|
|
25681
25740
|
import yaml12 from "js-yaml";
|
|
25682
25741
|
import semver from "semver";
|
|
25683
25742
|
function relayCoreVersion() {
|
|
25684
|
-
if (semver.valid("0.22.
|
|
25685
|
-
return "0.22.
|
|
25743
|
+
if (semver.valid("0.22.1")) {
|
|
25744
|
+
return "0.22.1";
|
|
25686
25745
|
}
|
|
25687
25746
|
try {
|
|
25688
25747
|
const root = getAppRoot(import.meta.dirname, 3);
|
package/package.json
CHANGED
|
@@ -175,9 +175,6 @@ export async function GET() {
|
|
|
175
175
|
getFailuresByDay(7),
|
|
176
176
|
]);
|
|
177
177
|
|
|
178
|
-
const overallDaily = budget.statuses.find(
|
|
179
|
-
(s) => s.scopeId === "overall" && s.window === "daily",
|
|
180
|
-
);
|
|
181
178
|
const overallMonthly = budget.statuses.find(
|
|
182
179
|
(s) => s.scopeId === "overall" && s.window === "monthly",
|
|
183
180
|
);
|
|
@@ -196,8 +193,12 @@ export async function GET() {
|
|
|
196
193
|
activeProjects: activeProjects?.count ?? 0,
|
|
197
194
|
activeWorkflows: activeWorkflows?.count ?? 0,
|
|
198
195
|
reviewPending,
|
|
199
|
-
|
|
200
|
-
|
|
196
|
+
// Metered ledger sums only — the guardrail statuses' plan-priced budget
|
|
197
|
+
// basis must never render as "cost" (fix-dashboard-budget-vs-cost-labeling).
|
|
198
|
+
costTodayMicros: budget.meteredSpend.dailyMicros,
|
|
199
|
+
costToDateMicros: budget.meteredSpend.monthlyMicros,
|
|
200
|
+
budgetMonthlyCapMicros: overallMonthly?.limitValue ?? null,
|
|
201
|
+
planPricedMonthlyMicros: budget.planPricedMonthlyMicros,
|
|
201
202
|
runtimeLabel,
|
|
202
203
|
providerId,
|
|
203
204
|
runtimeSdkVersion,
|
|
@@ -20,16 +20,19 @@ import {
|
|
|
20
20
|
} from "lucide-react";
|
|
21
21
|
import { RailCell, formatMicros } from "./rail-cell";
|
|
22
22
|
import { useTelemetry } from "./use-telemetry";
|
|
23
|
+
import type { TelemetrySnapshot } from "./telemetry-types";
|
|
23
24
|
|
|
24
25
|
// The standing instrument cluster: a single dense horizontal row beneath the app
|
|
25
26
|
// bar (mirrors `.hp-rail`). A cockpit for a multi-agent harness — eight real
|
|
26
27
|
// cells: HOST (folder · cpu/mem) · RUNTIME (label · sdk version) · TASKS
|
|
27
28
|
// (running + 24h activity spark) · THROUGHPUT (completed today + 7d spark) ·
|
|
28
|
-
// FAILURES (failed + 7d spark, red) · REVIEW (pending) ·
|
|
29
|
+
// FAILURES (failed + 7d spark, red) · REVIEW (pending) · SPEND TODAY · SPEND TO
|
|
29
30
|
// DATE — plus a live/error status foot. No fabricated data: while loading, cells
|
|
30
31
|
// show "—"; on a poll error the last good snapshot stays visible and the foot
|
|
31
32
|
// flips to an explicit error pip. Static identity (cwd/runtime) is compressed
|
|
32
|
-
// into sub-lines so the live throughput signal owns the foreground.
|
|
33
|
+
// into sub-lines so the live throughput signal owns the foreground. The SPEND
|
|
34
|
+
// cells render real metered ledger sums; the budget cap and any flat plan price
|
|
35
|
+
// live in the sub-line, named as what they are — never presented as spend.
|
|
33
36
|
|
|
34
37
|
// Compose the HOST sub-line from whatever live metrics the platform reports;
|
|
35
38
|
// falls back to git branch so the cell is never empty.
|
|
@@ -45,6 +48,20 @@ function hostSub(
|
|
|
45
48
|
return branch ? `git:${branch}` : "no git";
|
|
46
49
|
}
|
|
47
50
|
|
|
51
|
+
// Sub-line for SPEND TO DATE: name the flat plan price when billing is
|
|
52
|
+
// subscription (it sits on top of metered spend, it is not spend), else show
|
|
53
|
+
// the monthly budget cap as "budget", else the plain window label.
|
|
54
|
+
function spendToDateSub(data: TelemetrySnapshot | null): string {
|
|
55
|
+
if (!data) return "—";
|
|
56
|
+
if (data.planPricedMonthlyMicros != null) {
|
|
57
|
+
return `+ plan ${formatMicros(data.planPricedMonthlyMicros)}/mo`;
|
|
58
|
+
}
|
|
59
|
+
if (data.budgetMonthlyCapMicros != null) {
|
|
60
|
+
return `of ${formatMicros(data.budgetMonthlyCapMicros)} budget`;
|
|
61
|
+
}
|
|
62
|
+
return "monthly";
|
|
63
|
+
}
|
|
64
|
+
|
|
48
65
|
export function TelemetryRail() {
|
|
49
66
|
const telemetry = useTelemetry();
|
|
50
67
|
const data = telemetry.data;
|
|
@@ -71,9 +88,13 @@ export function TelemetryRail() {
|
|
|
71
88
|
loading={loading}
|
|
72
89
|
value={data?.runtimeLabel ?? "—"}
|
|
73
90
|
sub={
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
91
|
+
// Never fabricate "not configured" while the snapshot is still
|
|
92
|
+
// loading — only claim it once data has actually said so.
|
|
93
|
+
data
|
|
94
|
+
? data.runtimeSdkVersion
|
|
95
|
+
? `sdk ${data.runtimeSdkVersion}`
|
|
96
|
+
: data.providerId ?? "not configured"
|
|
97
|
+
: "—"
|
|
77
98
|
}
|
|
78
99
|
/>
|
|
79
100
|
<RailCell
|
|
@@ -129,18 +150,18 @@ export function TelemetryRail() {
|
|
|
129
150
|
sub="active"
|
|
130
151
|
/>
|
|
131
152
|
<RailCell
|
|
132
|
-
label="
|
|
153
|
+
label="Spend Today"
|
|
133
154
|
icon={<Coins aria-hidden />}
|
|
134
155
|
loading={loading}
|
|
135
156
|
value={data ? formatMicros(data.costTodayMicros) : "—"}
|
|
136
|
-
sub="
|
|
157
|
+
sub="metered"
|
|
137
158
|
/>
|
|
138
159
|
<RailCell
|
|
139
|
-
label="
|
|
160
|
+
label="Spend To Date"
|
|
140
161
|
icon={<Wallet aria-hidden />}
|
|
141
162
|
loading={loading}
|
|
142
163
|
value={data ? formatMicros(data.costToDateMicros) : "—"}
|
|
143
|
-
sub=
|
|
164
|
+
sub={spendToDateSub(data)}
|
|
144
165
|
/>
|
|
145
166
|
<div className="ml-auto flex items-center gap-2 px-4 font-mono text-xs text-muted-foreground/60">
|
|
146
167
|
{errored ? (
|
|
@@ -16,10 +16,14 @@ export interface TelemetrySnapshot {
|
|
|
16
16
|
activeWorkflows: number;
|
|
17
17
|
/** Unread `permission_required` notifications awaiting a human decision. */
|
|
18
18
|
reviewPending: number;
|
|
19
|
-
/**
|
|
19
|
+
/** Real metered spend today in micros (usage_ledger sum, USD * 1e6) — never a budget/plan figure. */
|
|
20
20
|
costTodayMicros: number;
|
|
21
|
-
/**
|
|
21
|
+
/** Real metered spend this month in micros (usage_ledger sum, USD * 1e6) — never a budget/plan figure. */
|
|
22
22
|
costToDateMicros: number;
|
|
23
|
+
/** Overall monthly budget cap in micros, or null when unlimited. */
|
|
24
|
+
budgetMonthlyCapMicros: number | null;
|
|
25
|
+
/** Flat subscription plan price in micros when billing is subscription (not metered spend), else null. */
|
|
26
|
+
planPricedMonthlyMicros: number | null;
|
|
23
27
|
/** Display label of the active runtime (e.g. "Claude Code"), or null if none configured. */
|
|
24
28
|
runtimeLabel: string | null;
|
|
25
29
|
/** Provider behind the active runtime ("anthropic" | "openai" | "ollama"), or null. */
|
|
@@ -13,6 +13,7 @@ import {
|
|
|
13
13
|
scanTaskOutputDocuments,
|
|
14
14
|
} from "@/lib/documents/output-scanner";
|
|
15
15
|
import { getProfile } from "./profiles/registry";
|
|
16
|
+
import { resolvePreferredModel } from "./runtime/model-preference";
|
|
16
17
|
import { resolveProfileRuntimePayload, type ResolvedProfileRuntimePayload } from "./profiles/compatibility";
|
|
17
18
|
import type { CanUseToolPolicy } from "./profiles/types";
|
|
18
19
|
import {
|
|
@@ -483,6 +484,13 @@ export interface TaskQueryContext {
|
|
|
483
484
|
maxTurns: number;
|
|
484
485
|
/** Profile's canUseToolPolicy */
|
|
485
486
|
canUseToolPolicy?: CanUseToolPolicy;
|
|
487
|
+
/**
|
|
488
|
+
* Concrete model to pass to `query()`: profile pin > onboarding model
|
|
489
|
+
* preference tier > quality default. Without an explicit model the SDK
|
|
490
|
+
* silently falls back to ITS default — which billed Opus to users who chose
|
|
491
|
+
* "Balanced" (fix-workflow-model-preference-propagation).
|
|
492
|
+
*/
|
|
493
|
+
modelId: string;
|
|
486
494
|
}
|
|
487
495
|
|
|
488
496
|
export async function buildTaskQueryContext(
|
|
@@ -533,6 +541,10 @@ export async function buildTaskQueryContext(
|
|
|
533
541
|
// F9: Use profile maxTurns or fall back to default
|
|
534
542
|
const maxTurns = profile?.maxTurns ?? DEFAULT_MAX_TURNS;
|
|
535
543
|
|
|
544
|
+
const { modelId } = await resolvePreferredModel("claude-code", {
|
|
545
|
+
pinnedModelId: profile?.capabilityOverrides?.["claude-code"]?.modelId,
|
|
546
|
+
});
|
|
547
|
+
|
|
536
548
|
return {
|
|
537
549
|
userPrompt: basePrompt,
|
|
538
550
|
systemInstructions,
|
|
@@ -540,6 +552,7 @@ export async function buildTaskQueryContext(
|
|
|
540
552
|
payload,
|
|
541
553
|
maxTurns,
|
|
542
554
|
canUseToolPolicy: payload?.canUseToolPolicy,
|
|
555
|
+
modelId,
|
|
543
556
|
};
|
|
544
557
|
}
|
|
545
558
|
|
|
@@ -612,6 +625,10 @@ export async function executeClaudeTask(taskId: string): Promise<void> {
|
|
|
612
625
|
prompt: ctx.userPrompt,
|
|
613
626
|
options: {
|
|
614
627
|
abortController,
|
|
628
|
+
// Explicit model: profile pin > onboarding preference > quality
|
|
629
|
+
// default. Omitting this let the SDK pick ITS default (Opus) and
|
|
630
|
+
// silently bill the wrong tier.
|
|
631
|
+
model: ctx.modelId,
|
|
615
632
|
includePartialMessages: true,
|
|
616
633
|
cwd: ctx.cwd,
|
|
617
634
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -771,6 +788,9 @@ export async function resumeClaudeTask(taskId: string): Promise<void> {
|
|
|
771
788
|
options: {
|
|
772
789
|
resume: task.sessionId,
|
|
773
790
|
abortController,
|
|
791
|
+
// Same model resolution as the original run — a resume must not
|
|
792
|
+
// silently hop tiers (profile pin > preference > quality default).
|
|
793
|
+
model: ctx.modelId,
|
|
774
794
|
includePartialMessages: true,
|
|
775
795
|
cwd: ctx.cwd,
|
|
776
796
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -376,9 +376,14 @@ async function executeAnthropicDirectTask(taskId: string, isResume = false): Pro
|
|
|
376
376
|
initialMessages = [{ role: "user", content: ctx.userPrompt }];
|
|
377
377
|
}
|
|
378
378
|
|
|
379
|
-
// Resolve model
|
|
379
|
+
// Resolve model: explicit runtime setting > onboarding model preference
|
|
380
|
+
// tier > catalog default ("Balanced" means Sonnet on every runtime, not
|
|
381
|
+
// just chat — fix-workflow-model-preference-propagation).
|
|
380
382
|
const { getSetting } = await import("@/lib/settings/helpers");
|
|
381
|
-
const
|
|
383
|
+
const { resolvePreferredModel } = await import("./model-preference");
|
|
384
|
+
const modelId =
|
|
385
|
+
(await getSetting("anthropic_direct_model")) ??
|
|
386
|
+
(await resolvePreferredModel("anthropic-direct")).modelId;
|
|
382
387
|
|
|
383
388
|
const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
|
|
384
389
|
|
|
@@ -8,6 +8,7 @@ import { getProfile, listProfiles } from "@/lib/agents/profiles/registry";
|
|
|
8
8
|
import { resolveProfileRuntimePayload } from "@/lib/agents/profiles/compatibility";
|
|
9
9
|
import { executeClaudeTask, resumeClaudeTask } from "@/lib/agents/claude-agent";
|
|
10
10
|
import { getRuntimeCapabilities, getRuntimeCatalogEntry } from "./catalog";
|
|
11
|
+
import { resolvePreferredModel } from "./model-preference";
|
|
11
12
|
import { buildClaudeSdkEnv } from "./claude-sdk";
|
|
12
13
|
import { getLaunchCwd } from "@/lib/environment/workspace-context";
|
|
13
14
|
import { getSetting } from "@/lib/settings/helpers";
|
|
@@ -34,10 +35,13 @@ import {
|
|
|
34
35
|
* model in that family instead of pinning to a string that silently ages out.
|
|
35
36
|
* Without this, `query()` omits `model` entirely and the SDK falls back to its
|
|
36
37
|
* own default — which is not necessarily the family the chat picker selected.
|
|
38
|
+
*
|
|
39
|
+
* Honors the user's onboarding model preference ("Balanced" means Sonnet
|
|
40
|
+
* everywhere, not just chat); falls back to the quality tier when no
|
|
41
|
+
* preference is recorded.
|
|
37
42
|
*/
|
|
38
|
-
function claudeCodeModelAlias(): string {
|
|
39
|
-
|
|
40
|
-
return models.tiers?.quality ?? models.default;
|
|
43
|
+
async function claudeCodeModelAlias(): Promise<string> {
|
|
44
|
+
return (await resolvePreferredModel("claude-code")).modelId;
|
|
41
45
|
}
|
|
42
46
|
|
|
43
47
|
function buildTaskAssistSystemPrompt(profileIds: string[]): string {
|
|
@@ -148,7 +152,7 @@ export async function runSingleProfileTest(
|
|
|
148
152
|
prompt,
|
|
149
153
|
options: {
|
|
150
154
|
abortController,
|
|
151
|
-
model: claudeCodeModelAlias(),
|
|
155
|
+
model: await claudeCodeModelAlias(),
|
|
152
156
|
includePartialMessages: true,
|
|
153
157
|
env: buildClaudeSdkEnv(authEnv),
|
|
154
158
|
allowedTools: [],
|
|
@@ -306,7 +310,7 @@ export async function runMetaCompletion(input: {
|
|
|
306
310
|
prompt: input.prompt,
|
|
307
311
|
options: {
|
|
308
312
|
abortController,
|
|
309
|
-
model: claudeCodeModelAlias(),
|
|
313
|
+
model: await claudeCodeModelAlias(),
|
|
310
314
|
includePartialMessages: true,
|
|
311
315
|
cwd: getLaunchCwd(),
|
|
312
316
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -497,7 +501,7 @@ async function runClaudeProfileAssist(
|
|
|
497
501
|
prompt,
|
|
498
502
|
options: {
|
|
499
503
|
abortController,
|
|
500
|
-
model: claudeCodeModelAlias(),
|
|
504
|
+
model: await claudeCodeModelAlias(),
|
|
501
505
|
includePartialMessages: true,
|
|
502
506
|
cwd: getLaunchCwd(),
|
|
503
507
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -584,7 +588,7 @@ async function runClaudeTaskAssist(
|
|
|
584
588
|
prompt,
|
|
585
589
|
options: {
|
|
586
590
|
abortController,
|
|
587
|
-
model: claudeCodeModelAlias(),
|
|
591
|
+
model: await claudeCodeModelAlias(),
|
|
588
592
|
includePartialMessages: true,
|
|
589
593
|
cwd: getLaunchCwd(),
|
|
590
594
|
env: buildClaudeSdkEnv(authEnv),
|
|
@@ -655,7 +659,7 @@ async function testClaudeConnection(): Promise<RuntimeConnectionResult> {
|
|
|
655
659
|
prompt: "Reply with exactly: OK",
|
|
656
660
|
options: {
|
|
657
661
|
abortController,
|
|
658
|
-
model: claudeCodeModelAlias(),
|
|
662
|
+
model: await claudeCodeModelAlias(),
|
|
659
663
|
maxTurns: 1,
|
|
660
664
|
includePartialMessages: false,
|
|
661
665
|
cwd: getLaunchCwd(),
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { getRuntimeCatalogEntry, type AgentRuntimeId } from "./catalog";
|
|
2
|
+
import { getModelPreference } from "@/lib/settings/helpers";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Resolve the concrete model a claude-code (or any catalog) execution should
|
|
6
|
+
* run on, honoring the user's onboarding model preference
|
|
7
|
+
* (`chat.modelPreference`) outside chat. Order:
|
|
8
|
+
*
|
|
9
|
+
* 1. an explicit profile/step pin (`pinnedModelId`) — the user said exactly
|
|
10
|
+
* what to run, the preference never overrides it;
|
|
11
|
+
* 2. the preference mapped to the runtime's tier ("balanced" → Sonnet,
|
|
12
|
+
* "cost" → fast/Haiku, "quality" → Opus);
|
|
13
|
+
* 3. the runtime's quality tier / catalog default — the pre-existing
|
|
14
|
+
* behavior when no preference is recorded.
|
|
15
|
+
*
|
|
16
|
+
* "privacy" is a runtime-level preference (route to Ollama), not a model tier
|
|
17
|
+
* within a cloud runtime — it resolves as "no within-runtime opinion".
|
|
18
|
+
*
|
|
19
|
+
* The `source` field keeps the routing inspectable: surfaces can say WHY a
|
|
20
|
+
* model was chosen instead of silently swapping tiers.
|
|
21
|
+
*/
|
|
22
|
+
export interface ResolvedPreferredModel {
|
|
23
|
+
modelId: string;
|
|
24
|
+
source: "pin" | "preference" | "default";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export async function resolvePreferredModel(
|
|
28
|
+
runtimeId: AgentRuntimeId,
|
|
29
|
+
options?: { pinnedModelId?: string | null },
|
|
30
|
+
): Promise<ResolvedPreferredModel> {
|
|
31
|
+
if (options?.pinnedModelId) {
|
|
32
|
+
return { modelId: options.pinnedModelId, source: "pin" };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const models = getRuntimeCatalogEntry(runtimeId).models;
|
|
36
|
+
const preference = await getModelPreference();
|
|
37
|
+
const tierModel =
|
|
38
|
+
preference === "balanced"
|
|
39
|
+
? models.tiers?.balanced
|
|
40
|
+
: preference === "cost"
|
|
41
|
+
? models.tiers?.fast
|
|
42
|
+
: preference === "quality"
|
|
43
|
+
? models.tiers?.quality
|
|
44
|
+
: undefined;
|
|
45
|
+
|
|
46
|
+
if (tierModel) {
|
|
47
|
+
return { modelId: tierModel, source: "preference" };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
modelId: models.tiers?.quality ?? models.default,
|
|
52
|
+
source: "default",
|
|
53
|
+
};
|
|
54
|
+
}
|
|
@@ -319,9 +319,14 @@ async function executeOpenAIDirectTask(taskId: string, isResume = false): Promis
|
|
|
319
319
|
);
|
|
320
320
|
const pluginMcpTools = mcpServersToOpenAiTools(mergedMcpServers);
|
|
321
321
|
|
|
322
|
-
// Resolve model
|
|
322
|
+
// Resolve model: explicit runtime setting > onboarding model preference
|
|
323
|
+
// tier > catalog default ("Balanced" means the balanced tier on every
|
|
324
|
+
// runtime, not just chat — fix-workflow-model-preference-propagation).
|
|
323
325
|
const { getSetting } = await import("@/lib/settings/helpers");
|
|
324
|
-
const
|
|
326
|
+
const { resolvePreferredModel } = await import("./model-preference");
|
|
327
|
+
const modelId =
|
|
328
|
+
(await getSetting("openai_direct_model")) ??
|
|
329
|
+
(await resolvePreferredModel("openai-direct")).modelId;
|
|
325
330
|
const maxTurns = ctx.maxTurns ?? DEFAULT_MAX_TURNS;
|
|
326
331
|
|
|
327
332
|
// For resume: load previous response ID
|
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
} from "@/lib/data/chat";
|
|
19
19
|
import { buildChatContext } from "./context-builder";
|
|
20
20
|
import { getWorkspaceContext } from "@/lib/environment/workspace-context";
|
|
21
|
+
import { recordUsageLedgerEntry } from "@/lib/usage/ledger";
|
|
21
22
|
import type { ChatStreamEvent } from "./types";
|
|
22
23
|
|
|
23
24
|
/**
|
|
@@ -109,6 +110,47 @@ export async function* sendOllamaMessage(
|
|
|
109
110
|
|
|
110
111
|
// Stream from Ollama
|
|
111
112
|
let accumulated = "";
|
|
113
|
+
|
|
114
|
+
// Meter the turn like every other chat path (main engine writes a
|
|
115
|
+
// chat_turn row on success, degrade, and error). Ollama's final chunk
|
|
116
|
+
// reports prompt_eval_count / eval_count; local runs are recorded at $0 —
|
|
117
|
+
// those rows are what proves blended-cost savings on /costs.
|
|
118
|
+
const startedAt = new Date();
|
|
119
|
+
let inputTokens: number | null = null;
|
|
120
|
+
let outputTokens: number | null = null;
|
|
121
|
+
let ledgerRecorded = false;
|
|
122
|
+
const recordTurn = async (status: "completed" | "failed" | "cancelled") => {
|
|
123
|
+
if (ledgerRecorded) return;
|
|
124
|
+
ledgerRecorded = true;
|
|
125
|
+
await recordUsageLedgerEntry({
|
|
126
|
+
projectId: conversation.projectId ?? null,
|
|
127
|
+
activityType: "chat_turn",
|
|
128
|
+
runtimeId: "ollama",
|
|
129
|
+
providerId: "ollama",
|
|
130
|
+
modelId,
|
|
131
|
+
inputTokens,
|
|
132
|
+
outputTokens,
|
|
133
|
+
totalTokens:
|
|
134
|
+
inputTokens != null && outputTokens != null
|
|
135
|
+
? inputTokens + outputTokens
|
|
136
|
+
: null,
|
|
137
|
+
status,
|
|
138
|
+
startedAt,
|
|
139
|
+
finishedAt: new Date(),
|
|
140
|
+
});
|
|
141
|
+
};
|
|
142
|
+
const captureTokenCounts = (parsed: {
|
|
143
|
+
prompt_eval_count?: unknown;
|
|
144
|
+
eval_count?: unknown;
|
|
145
|
+
}) => {
|
|
146
|
+
if (typeof parsed.prompt_eval_count === "number") {
|
|
147
|
+
inputTokens = parsed.prompt_eval_count;
|
|
148
|
+
}
|
|
149
|
+
if (typeof parsed.eval_count === "number") {
|
|
150
|
+
outputTokens = parsed.eval_count;
|
|
151
|
+
}
|
|
152
|
+
};
|
|
153
|
+
|
|
112
154
|
try {
|
|
113
155
|
const response = await fetch(`${baseUrl}/api/chat`, {
|
|
114
156
|
method: "POST",
|
|
@@ -125,6 +167,7 @@ export async function* sendOllamaMessage(
|
|
|
125
167
|
const errorText = await response.text().catch(() => "Unknown error");
|
|
126
168
|
yield { type: "error", message: `Ollama error (${response.status}): ${errorText}` };
|
|
127
169
|
await updateMessageStatus(assistantMsg.id, "complete");
|
|
170
|
+
await recordTurn("failed");
|
|
128
171
|
return;
|
|
129
172
|
}
|
|
130
173
|
|
|
@@ -132,6 +175,7 @@ export async function* sendOllamaMessage(
|
|
|
132
175
|
if (!reader) {
|
|
133
176
|
yield { type: "error", message: "No response stream from Ollama" };
|
|
134
177
|
await updateMessageStatus(assistantMsg.id, "complete");
|
|
178
|
+
await recordTurn("failed");
|
|
135
179
|
return;
|
|
136
180
|
}
|
|
137
181
|
|
|
@@ -157,7 +201,10 @@ export async function* sendOllamaMessage(
|
|
|
157
201
|
accumulated += delta;
|
|
158
202
|
yield { type: "delta", content: delta };
|
|
159
203
|
}
|
|
160
|
-
if (parsed.done)
|
|
204
|
+
if (parsed.done) {
|
|
205
|
+
captureTokenCounts(parsed);
|
|
206
|
+
break;
|
|
207
|
+
}
|
|
161
208
|
} catch {
|
|
162
209
|
// Skip malformed lines
|
|
163
210
|
}
|
|
@@ -173,6 +220,7 @@ export async function* sendOllamaMessage(
|
|
|
173
220
|
accumulated += delta;
|
|
174
221
|
yield { type: "delta", content: delta };
|
|
175
222
|
}
|
|
223
|
+
if (parsed.done) captureTokenCounts(parsed);
|
|
176
224
|
} catch {
|
|
177
225
|
// ignore
|
|
178
226
|
}
|
|
@@ -181,6 +229,7 @@ export async function* sendOllamaMessage(
|
|
|
181
229
|
// Persist the complete response
|
|
182
230
|
await updateMessageContent(assistantMsg.id, accumulated);
|
|
183
231
|
await updateMessageStatus(assistantMsg.id, "complete");
|
|
232
|
+
await recordTurn("completed");
|
|
184
233
|
|
|
185
234
|
yield { type: "done", messageId: assistantMsg.id, quickAccess: [] };
|
|
186
235
|
} catch (err) {
|
|
@@ -194,5 +243,6 @@ export async function* sendOllamaMessage(
|
|
|
194
243
|
await updateMessageContent(assistantMsg.id, accumulated);
|
|
195
244
|
}
|
|
196
245
|
await updateMessageStatus(assistantMsg.id, "complete");
|
|
246
|
+
await recordTurn(signal?.aborted ? "cancelled" : "failed");
|
|
197
247
|
}
|
|
198
248
|
}
|
|
@@ -68,6 +68,10 @@ export interface BudgetSnapshot {
|
|
|
68
68
|
monthlyResetAtIso: string;
|
|
69
69
|
runtimeStates: Record<AgentRuntimeId, RuntimeSetupState>;
|
|
70
70
|
pricing: PricingRegistrySnapshot;
|
|
71
|
+
/** Real metered spend (usage_ledger sums) — never the plan-priced budget basis. */
|
|
72
|
+
meteredSpend: { dailyMicros: number; monthlyMicros: number };
|
|
73
|
+
/** Flat subscription price counted as the budget basis, when billing is subscription. */
|
|
74
|
+
planPricedMonthlyMicros: number | null;
|
|
71
75
|
}
|
|
72
76
|
|
|
73
77
|
interface BudgetGuardInput {
|
|
@@ -358,6 +362,23 @@ async function getUsageAggregates(
|
|
|
358
362
|
}
|
|
359
363
|
});
|
|
360
364
|
|
|
365
|
+
// Real metered spend: the plain usage_ledger sums across every runtime,
|
|
366
|
+
// captured BEFORE the subscription plan-price substitution below. Guardrail
|
|
367
|
+
// statuses budget against the plan price (a flat subscription is the real
|
|
368
|
+
// monthly outlay), but display surfaces must never present that basis as
|
|
369
|
+
// spend — they read this instead.
|
|
370
|
+
const metered = {
|
|
371
|
+
daily: { costMicros: 0, totalTokens: 0 },
|
|
372
|
+
monthly: { costMicros: 0, totalTokens: 0 },
|
|
373
|
+
};
|
|
374
|
+
for (const runtimeId of SUPPORTED_AGENT_RUNTIMES) {
|
|
375
|
+
metered.daily.costMicros += runtimes[runtimeId].daily.costMicros;
|
|
376
|
+
metered.daily.totalTokens += runtimes[runtimeId].daily.totalTokens;
|
|
377
|
+
metered.monthly.costMicros += runtimes[runtimeId].monthly.costMicros;
|
|
378
|
+
metered.monthly.totalTokens += runtimes[runtimeId].monthly.totalTokens;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
let planPricedMonthlyMicros: number | null = null;
|
|
361
382
|
if (runtimeStates["claude-code"].billingMode === "subscription") {
|
|
362
383
|
const planPriceUsd = await getClaudeOAuthPlanPrice(
|
|
363
384
|
policy.runtimes["claude-code"].claudeOAuthPlan
|
|
@@ -366,6 +387,7 @@ async function getUsageAggregates(
|
|
|
366
387
|
const dailyMicros = Math.round(monthlyMicros / daysInMonth(now));
|
|
367
388
|
runtimes["claude-code"].monthly.costMicros = monthlyMicros;
|
|
368
389
|
runtimes["claude-code"].daily.costMicros = dailyMicros;
|
|
390
|
+
planPricedMonthlyMicros = monthlyMicros;
|
|
369
391
|
}
|
|
370
392
|
|
|
371
393
|
const overall = {
|
|
@@ -387,6 +409,8 @@ async function getUsageAggregates(
|
|
|
387
409
|
return {
|
|
388
410
|
overall,
|
|
389
411
|
runtimes,
|
|
412
|
+
metered,
|
|
413
|
+
planPricedMonthlyMicros,
|
|
390
414
|
...getBudgetWindowBounds(now),
|
|
391
415
|
};
|
|
392
416
|
}
|
|
@@ -714,5 +738,10 @@ export async function getBudgetGuardrailSnapshot(): Promise<BudgetSnapshot> {
|
|
|
714
738
|
monthlyResetAtIso: aggregates.monthlyEnd.toISOString(),
|
|
715
739
|
runtimeStates,
|
|
716
740
|
pricing,
|
|
741
|
+
meteredSpend: {
|
|
742
|
+
dailyMicros: aggregates.metered.daily.costMicros,
|
|
743
|
+
monthlyMicros: aggregates.metered.monthly.costMicros,
|
|
744
|
+
},
|
|
745
|
+
planPricedMonthlyMicros: aggregates.planPricedMonthlyMicros,
|
|
717
746
|
};
|
|
718
747
|
}
|
package/src/lib/usage/pricing.ts
CHANGED
|
@@ -15,6 +15,13 @@ export async function deriveUsageCostMicros(input: {
|
|
|
15
15
|
return { costMicros: null, pricingVersion: null };
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
+
// Local inference has no per-token billing: a known-free $0, not unknown
|
|
19
|
+
// pricing. The $0 rows are the evidence for the blended-cost savings story —
|
|
20
|
+
// null here would demote every local run to "unknown_pricing" and hide them.
|
|
21
|
+
if (input.providerId === "ollama") {
|
|
22
|
+
return { costMicros: 0, pricingVersion: "local-free" };
|
|
23
|
+
}
|
|
24
|
+
|
|
18
25
|
if (input.providerId !== "anthropic" && input.providerId !== "openai") {
|
|
19
26
|
return { costMicros: null, pricingVersion: null };
|
|
20
27
|
}
|