@desplega.ai/agent-swarm 1.79.4 → 1.80.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +98 -19
- package/package.json +12 -6
- package/src/be/db.ts +101 -30
- package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
- package/src/be/pricing-normalize.ts +81 -0
- package/src/be/seed-pricing.ts +293 -0
- package/src/commands/claude-managed-setup.ts +19 -3
- package/src/commands/runner.ts +592 -237
- package/src/http/context.ts +6 -2
- package/src/http/index.ts +115 -68
- package/src/http/session-data.ts +74 -23
- package/src/otel-impl.ts +200 -0
- package/src/otel.ts +127 -0
- package/src/providers/claude-adapter.ts +30 -5
- package/src/providers/claude-managed-adapter.ts +43 -17
- package/src/providers/claude-managed-pricing.ts +34 -0
- package/src/providers/codex-adapter.ts +38 -27
- package/src/providers/codex-models.ts +22 -3
- package/src/providers/devin-adapter.ts +11 -0
- package/src/providers/opencode-adapter.ts +31 -7
- package/src/providers/pi-mono-adapter.ts +39 -7
- package/src/providers/pricing-sources.md +52 -0
- package/src/providers/swarm-events-shared.ts +8 -4
- package/src/providers/types.ts +33 -10
- package/src/server.ts +6 -0
- package/src/tests/claude-managed-adapter.test.ts +17 -3
- package/src/tests/claude-managed-setup.test.ts +10 -1
- package/src/tests/codex-adapter.test.ts +20 -19
- package/src/tests/context-snapshot.test.ts +2 -2
- package/src/tests/context-window.test.ts +65 -1
- package/src/tests/devin-adapter.test.ts +2 -0
- package/src/tests/http/context-routes.test.ts +161 -0
- package/src/tests/migration-063-schema-relax.test.ts +109 -0
- package/src/tests/opencode-adapter.test.ts +146 -1
- package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
- package/src/tests/pages-view-count.test.ts +30 -5
- package/src/tests/providers/codex-cost.test.ts +18 -0
- package/src/tests/providers/opencode-cost.test.ts +74 -0
- package/src/tests/providers/pi-cost.test.ts +128 -0
- package/src/tests/secret-scrubber.test.ts +19 -0
- package/src/tests/session-costs-codex-recompute.test.ts +35 -22
- package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
- package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
- package/src/tests/store-progress-cost.test.ts +6 -1
- package/src/tools/store-progress.ts +16 -60
- package/src/tools/utils.ts +65 -12
- package/src/types.ts +62 -9
- package/src/utils/context-window.ts +104 -4
- package/src/utils/secret-scrubber.ts +7 -0
package/src/otel.ts
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
export type AttributeValue = string | number | boolean | string[] | number[] | boolean[];
|
|
2
|
+
export type Attributes = Record<string, AttributeValue | undefined>;
|
|
3
|
+
|
|
4
|
+
type SpanStatus = {
|
|
5
|
+
code: number;
|
|
6
|
+
message?: string;
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
export type SwarmSpan = {
|
|
10
|
+
setAttribute: (key: string, value: AttributeValue) => SwarmSpan;
|
|
11
|
+
setAttributes: (attributes: Attributes) => SwarmSpan;
|
|
12
|
+
addEvent: (name: string, attributes?: Attributes) => SwarmSpan;
|
|
13
|
+
recordException: (error: unknown) => void;
|
|
14
|
+
setStatus: (status: SpanStatus) => SwarmSpan;
|
|
15
|
+
end: () => void;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const enabled = Boolean(process.env.OTEL_EXPORTER_OTLP_ENDPOINT);
|
|
19
|
+
|
|
20
|
+
const NOOP_SPAN: SwarmSpan = {
|
|
21
|
+
setAttribute: () => NOOP_SPAN,
|
|
22
|
+
setAttributes: () => NOOP_SPAN,
|
|
23
|
+
addEvent: () => NOOP_SPAN,
|
|
24
|
+
recordException: () => {},
|
|
25
|
+
setStatus: () => NOOP_SPAN,
|
|
26
|
+
end: () => {},
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
let initialized = false;
|
|
30
|
+
let realWithSpan:
|
|
31
|
+
| (<T>(
|
|
32
|
+
name: string,
|
|
33
|
+
fn: (span: SwarmSpan) => Promise<T> | T,
|
|
34
|
+
attributes?: Attributes,
|
|
35
|
+
) => Promise<T>)
|
|
36
|
+
| undefined;
|
|
37
|
+
let realStartSpan: ((name: string, attributes?: Attributes) => SwarmSpan) | undefined;
|
|
38
|
+
let realWithRemoteContext:
|
|
39
|
+
| (<T>(carrier: Record<string, unknown>, fn: () => Promise<T> | T) => Promise<T>)
|
|
40
|
+
| undefined;
|
|
41
|
+
let realWithSpanContext: (<T>(span: SwarmSpan, fn: () => T) => T) | undefined;
|
|
42
|
+
let realInjectTraceContext:
|
|
43
|
+
| ((headers: Record<string, string>) => Record<string, string>)
|
|
44
|
+
| undefined;
|
|
45
|
+
let realShutdown: (() => Promise<void>) | undefined;
|
|
46
|
+
|
|
47
|
+
export function isOtelEnabled(): boolean {
|
|
48
|
+
return enabled;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export async function initOtel(serviceRole = process.env.AGENT_ROLE || "api"): Promise<void> {
|
|
52
|
+
if (!enabled || initialized) return;
|
|
53
|
+
initialized = true;
|
|
54
|
+
|
|
55
|
+
try {
|
|
56
|
+
const impl = await import("./otel-impl");
|
|
57
|
+
await impl.boot(serviceRole);
|
|
58
|
+
realWithSpan = impl.withSpan;
|
|
59
|
+
realStartSpan = impl.startSpan;
|
|
60
|
+
realWithRemoteContext = impl.withRemoteContext;
|
|
61
|
+
realWithSpanContext = impl.withSpanContext;
|
|
62
|
+
realInjectTraceContext = impl.injectTraceContext;
|
|
63
|
+
realShutdown = impl.shutdown;
|
|
64
|
+
console.log(
|
|
65
|
+
`[OTel] enabled for ${process.env.OTEL_SERVICE_NAME ?? "agent-swarm"} (${serviceRole})`,
|
|
66
|
+
);
|
|
67
|
+
} catch (error) {
|
|
68
|
+
console.warn(`[OTel] disabled after initialization failure: ${error}`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export async function withSpan<T>(
|
|
73
|
+
name: string,
|
|
74
|
+
fn: (span: SwarmSpan) => Promise<T> | T,
|
|
75
|
+
attributes?: Attributes,
|
|
76
|
+
): Promise<T> {
|
|
77
|
+
if (!enabled || !realWithSpan) {
|
|
78
|
+
return fn(NOOP_SPAN);
|
|
79
|
+
}
|
|
80
|
+
return realWithSpan(name, fn, attributes);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export function startSpan(name: string, attributes?: Attributes): SwarmSpan {
|
|
84
|
+
if (!enabled || !realStartSpan) {
|
|
85
|
+
return NOOP_SPAN;
|
|
86
|
+
}
|
|
87
|
+
return realStartSpan(name, attributes);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function withSpanContext<T>(span: SwarmSpan, fn: () => T): T {
|
|
91
|
+
if (!enabled || !realWithSpanContext) {
|
|
92
|
+
return fn();
|
|
93
|
+
}
|
|
94
|
+
return realWithSpanContext(span, fn);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export async function withRemoteContext<T>(
|
|
98
|
+
carrier: Record<string, unknown>,
|
|
99
|
+
fn: () => Promise<T> | T,
|
|
100
|
+
): Promise<T> {
|
|
101
|
+
if (!enabled || !realWithRemoteContext) {
|
|
102
|
+
return fn();
|
|
103
|
+
}
|
|
104
|
+
return realWithRemoteContext(carrier, fn);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function injectTraceContext(headers: Record<string, string>): Record<string, string> {
|
|
108
|
+
if (!enabled || !realInjectTraceContext) {
|
|
109
|
+
return headers;
|
|
110
|
+
}
|
|
111
|
+
return realInjectTraceContext(headers);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export async function shutdownOtel(): Promise<void> {
|
|
115
|
+
if (!realShutdown) return;
|
|
116
|
+
await realShutdown();
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export function _resetOtelForTests() {
|
|
120
|
+
initialized = false;
|
|
121
|
+
realWithSpan = undefined;
|
|
122
|
+
realStartSpan = undefined;
|
|
123
|
+
realWithRemoteContext = undefined;
|
|
124
|
+
realWithSpanContext = undefined;
|
|
125
|
+
realInjectTraceContext = undefined;
|
|
126
|
+
realShutdown = undefined;
|
|
127
|
+
}
|
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
import { readFile, unlink, writeFile } from "node:fs/promises";
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
3
|
import { dirname, join } from "node:path";
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
CONTEXT_FORMULA,
|
|
6
|
+
clampContextPercent,
|
|
7
|
+
computeContextUsedUnified,
|
|
8
|
+
getContextWindowSize,
|
|
9
|
+
} from "../utils/context-window";
|
|
5
10
|
import { validateClaudeCredentials } from "../utils/credentials";
|
|
6
11
|
import {
|
|
7
12
|
parseStderrForErrors,
|
|
@@ -465,6 +470,10 @@ class ClaudeSession implements ProviderSession {
|
|
|
465
470
|
this._sessionId = json.session_id;
|
|
466
471
|
this.emit({ type: "session_init", sessionId: json.session_id, provider: "claude" });
|
|
467
472
|
if (json.model) {
|
|
473
|
+
// Phase 4: the CLI's `init.model` reflects the actual model after any
|
|
474
|
+
// backoff/fallback. Update `this.model` so subsequent CostData rows
|
|
475
|
+
// (and the pricing lookup the API runs) use the right rate.
|
|
476
|
+
this.model = json.model;
|
|
468
477
|
this.contextWindowSize = getContextWindowSize(json.model);
|
|
469
478
|
}
|
|
470
479
|
}
|
|
@@ -487,6 +496,10 @@ class ClaudeSession implements ProviderSession {
|
|
|
487
496
|
output_tokens?: number;
|
|
488
497
|
cache_read_input_tokens?: number;
|
|
489
498
|
cache_creation_input_tokens?: number;
|
|
499
|
+
// Phase 4: claude extended-thinking flows surface this — the
|
|
500
|
+
// CLI emits `thinking_input_tokens` when the model produced
|
|
501
|
+
// thinking content during the turn.
|
|
502
|
+
thinking_input_tokens?: number;
|
|
490
503
|
}
|
|
491
504
|
| undefined;
|
|
492
505
|
|
|
@@ -499,8 +512,12 @@ class ClaudeSession implements ProviderSession {
|
|
|
499
512
|
outputTokens: usage?.output_tokens ?? 0,
|
|
500
513
|
cacheReadTokens: usage?.cache_read_input_tokens ?? 0,
|
|
501
514
|
cacheWriteTokens: usage?.cache_creation_input_tokens ?? 0,
|
|
515
|
+
// Phase 4: surface thinking tokens; previously dropped on the floor.
|
|
516
|
+
thinkingTokens: usage?.thinking_input_tokens ?? 0,
|
|
502
517
|
durationMs: json.duration_ms || 0,
|
|
503
|
-
|
|
518
|
+
// Phase 4: honest null when the CLI omits num_turns instead of a
|
|
519
|
+
// faked `1` (would have under-counted in dashboards).
|
|
520
|
+
numTurns: json.num_turns ?? null,
|
|
504
521
|
model: this.model,
|
|
505
522
|
isError: json.is_error || false,
|
|
506
523
|
provider: "claude",
|
|
@@ -539,18 +556,26 @@ class ClaudeSession implements ProviderSession {
|
|
|
539
556
|
}
|
|
540
557
|
}
|
|
541
558
|
|
|
542
|
-
// Context usage extraction from assistant message usage
|
|
559
|
+
// Context usage extraction from assistant message usage.
|
|
560
|
+
// Phase 9: unified `input + cache + output` formula across every
|
|
561
|
+
// provider so cross-provider percent comparisons are meaningful.
|
|
543
562
|
if (json.message.usage) {
|
|
544
563
|
const usage = json.message.usage;
|
|
545
|
-
const contextUsed =
|
|
564
|
+
const contextUsed = computeContextUsedUnified({
|
|
565
|
+
inputTokens: usage.input_tokens,
|
|
566
|
+
cacheReadTokens: usage.cache_read_input_tokens,
|
|
567
|
+
cacheCreateTokens: usage.cache_creation_input_tokens,
|
|
568
|
+
outputTokens: usage.output_tokens,
|
|
569
|
+
});
|
|
546
570
|
const contextTotal = this.contextWindowSize;
|
|
547
571
|
|
|
548
572
|
this.emit({
|
|
549
573
|
type: "context_usage",
|
|
550
574
|
contextUsedTokens: contextUsed,
|
|
551
575
|
contextTotalTokens: contextTotal,
|
|
552
|
-
contextPercent:
|
|
576
|
+
contextPercent: clampContextPercent(contextUsed, contextTotal) ?? 0,
|
|
553
577
|
outputTokens: usage.output_tokens ?? 0,
|
|
578
|
+
contextFormula: CONTEXT_FORMULA,
|
|
554
579
|
});
|
|
555
580
|
}
|
|
556
581
|
}
|
|
@@ -59,8 +59,15 @@ import type {
|
|
|
59
59
|
import type { SkillCreateResponse as Skill } from "@anthropic-ai/sdk/resources/beta/skills";
|
|
60
60
|
|
|
61
61
|
import { checkToolLoop } from "../hooks/tool-loop-detection";
|
|
62
|
+
import {
|
|
63
|
+
CONTEXT_FORMULA,
|
|
64
|
+
clampContextPercent,
|
|
65
|
+
computeContextUsedUnified,
|
|
66
|
+
getContextWindowSize,
|
|
67
|
+
} from "../utils/context-window";
|
|
62
68
|
import { scrubSecrets } from "../utils/secret-scrubber";
|
|
63
69
|
import { computeClaudeManagedCostUsd } from "./claude-managed-models";
|
|
70
|
+
import { getRuntimeFeePerHour } from "./claude-managed-pricing";
|
|
64
71
|
import { createClaudeManagedSwarmEventHandler } from "./claude-managed-swarm-events";
|
|
65
72
|
import type {
|
|
66
73
|
CostData,
|
|
@@ -113,13 +120,10 @@ const REQUIRED_ENV_VARS = [
|
|
|
113
120
|
"MANAGED_ENVIRONMENT_ID",
|
|
114
121
|
] as const;
|
|
115
122
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
* per-model lookup.
|
|
121
|
-
*/
|
|
122
|
-
const DEFAULT_CONTEXT_TOTAL_TOKENS = 1_000_000;
|
|
123
|
+
// Phase 5: removed the hardcoded `DEFAULT_CONTEXT_TOTAL_TOKENS = 1_000_000`.
|
|
124
|
+
// The adapter now calls `getContextWindowSize(this.model)` from
|
|
125
|
+
// `src/utils/context-window.ts`, which resolves shortnames + dated full ids
|
|
126
|
+
// so haiku-4-5 sessions don't pretend to have a 1M window.
|
|
123
127
|
|
|
124
128
|
/**
|
|
125
129
|
* Compose the per-session user-message content blocks. Returns two blocks:
|
|
@@ -187,6 +191,8 @@ function emptyCost(config: ProviderSessionConfig, model: string): CostData {
|
|
|
187
191
|
numTurns: 0,
|
|
188
192
|
model,
|
|
189
193
|
isError: false,
|
|
194
|
+
// Phase 3 — tag every emitted CostData so the API's recompute path engages.
|
|
195
|
+
provider: "claude-managed",
|
|
190
196
|
};
|
|
191
197
|
}
|
|
192
198
|
|
|
@@ -374,6 +380,11 @@ class ClaudeManagedSession implements ProviderSession {
|
|
|
374
380
|
* 2. Anthropic's $0.08/session-hour runtime fee — billed continuously by
|
|
375
381
|
* Anthropic regardless of model usage, so we add it here to surface in
|
|
376
382
|
* the swarm's per-session cost UI.
|
|
383
|
+
*
|
|
384
|
+
* Phase 5: the harness-local USD is still computed here, but the server-side
|
|
385
|
+
* recompute path (`POST /api/session-costs` after Phase 2) will reprice the
|
|
386
|
+
* row against the seeded pricing-table values and tag `costSource='pricing-table'`.
|
|
387
|
+
* The runtime fee comes from the same table now (`token_class='runtime_hour'`).
|
|
377
388
|
*/
|
|
378
389
|
private snapshotCost(isError: boolean): CostData {
|
|
379
390
|
const durationMs = Date.now() - this.startedAt;
|
|
@@ -384,9 +395,11 @@ class ClaudeManagedSession implements ProviderSession {
|
|
|
384
395
|
this.cost.cacheReadTokens ?? 0,
|
|
385
396
|
this.cost.cacheWriteTokens ?? 0,
|
|
386
397
|
);
|
|
387
|
-
//
|
|
388
|
-
//
|
|
389
|
-
|
|
398
|
+
// Phase 5: read the runtime fee from the pricing table when available so
|
|
399
|
+
// we have one source of truth. Falls back to the historical $0.08/hr
|
|
400
|
+
// constant if the row hasn't been seeded yet (e.g. on a fresh DB before
|
|
401
|
+
// seed-pricing.ts ran).
|
|
402
|
+
const runtimeFeeUsd = (durationMs / 3_600_000) * getRuntimeFeePerHour();
|
|
390
403
|
return {
|
|
391
404
|
...this.cost,
|
|
392
405
|
durationMs,
|
|
@@ -506,12 +519,15 @@ class ClaudeManagedSession implements ProviderSession {
|
|
|
506
519
|
// this event. Emit a `compaction` ProviderEvent with the values we
|
|
507
520
|
// *do* know; consumers that need richer data can subscribe to
|
|
508
521
|
// `raw_log` for the original payload.
|
|
522
|
+
// Phase 5 — pre-compact tokens are an inferred proxy (running input
|
|
523
|
+
// total); flag the compactTrigger as 'auto-inferred' so downstream
|
|
524
|
+
// dashboards can distinguish a real trigger value from our guess.
|
|
509
525
|
const _cc = event as BetaManagedAgentsAgentThreadContextCompactedEvent;
|
|
510
526
|
this.emit({
|
|
511
527
|
type: "compaction",
|
|
512
528
|
preCompactTokens: this.cost.inputTokens ?? 0,
|
|
513
|
-
compactTrigger: "auto",
|
|
514
|
-
contextTotalTokens:
|
|
529
|
+
compactTrigger: "auto-inferred",
|
|
530
|
+
contextTotalTokens: getContextWindowSize(this.cost.model),
|
|
515
531
|
});
|
|
516
532
|
return { terminal: false, isError: false };
|
|
517
533
|
}
|
|
@@ -524,16 +540,26 @@ class ClaudeManagedSession implements ProviderSession {
|
|
|
524
540
|
(this.cost.cacheReadTokens ?? 0) + usage.cache_read_input_tokens;
|
|
525
541
|
this.cost.cacheWriteTokens =
|
|
526
542
|
(this.cost.cacheWriteTokens ?? 0) + usage.cache_creation_input_tokens;
|
|
527
|
-
this.cost.numTurns
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
543
|
+
this.cost.numTurns = (this.cost.numTurns ?? 0) + 1;
|
|
544
|
+
|
|
545
|
+
// Phase 5 + Phase 9: unified `input + cache + output` formula AND a
|
|
546
|
+
// per-model window via `getContextWindowSize`. Previously this used
|
|
547
|
+
// a hardcoded 1M window and ignored cache — fine for sonnet/opus,
|
|
548
|
+
// wrong for haiku and any future smaller-window model.
|
|
549
|
+
const used = computeContextUsedUnified({
|
|
550
|
+
inputTokens: this.cost.inputTokens,
|
|
551
|
+
cacheReadTokens: this.cost.cacheReadTokens,
|
|
552
|
+
cacheCreateTokens: this.cost.cacheWriteTokens,
|
|
553
|
+
outputTokens: this.cost.outputTokens,
|
|
554
|
+
});
|
|
555
|
+
const total = getContextWindowSize(this.cost.model);
|
|
531
556
|
this.emit({
|
|
532
557
|
type: "context_usage",
|
|
533
558
|
contextUsedTokens: used,
|
|
534
559
|
contextTotalTokens: total,
|
|
535
|
-
contextPercent:
|
|
560
|
+
contextPercent: clampContextPercent(used, total),
|
|
536
561
|
outputTokens: this.cost.outputTokens ?? 0,
|
|
562
|
+
contextFormula: CONTEXT_FORMULA,
|
|
537
563
|
});
|
|
538
564
|
return { terminal: false, isError: false };
|
|
539
565
|
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 5 — small adapter-side pricing constants for claude-managed.
|
|
3
|
+
*
|
|
4
|
+
* The API server's pricing table is the canonical store (seeded by
|
|
5
|
+
* `src/be/seed-pricing.ts`). Workers can't touch the DB directly (DB
|
|
6
|
+
* boundary), so the adapter keeps a local constant for the runtime fee
|
|
7
|
+
* and lets the API-side recompute path (Phase 2) override the resulting
|
|
8
|
+
* `totalCostUsd` with the canonical figure. The constant here is what
|
|
9
|
+
* shows up in the worker's local logs before the row hits the server.
|
|
10
|
+
*
|
|
11
|
+
* If/when we plumb pricing through the worker bootstrap (HTTP fetch of
|
|
12
|
+
* `/api/pricing` at session start), this module is the place to swap.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* USD per session-hour for managed claude runtime. Source:
|
|
17
|
+
* https://docs.claude.com/en/api/agent-sdk/managed-runtime#pricing
|
|
18
|
+
* (verified 2026-04-28). Override at runtime via env for ops bumps without
|
|
19
|
+
* a redeploy.
|
|
20
|
+
*/
|
|
21
|
+
export const RUNTIME_FEE_USD_PER_HOUR = (() => {
|
|
22
|
+
const raw = process.env.CLAUDE_MANAGED_RUNTIME_FEE_USD_PER_HOUR;
|
|
23
|
+
const n = raw ? Number(raw) : NaN;
|
|
24
|
+
if (Number.isFinite(n) && n >= 0) return n;
|
|
25
|
+
return 0.08;
|
|
26
|
+
})();
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Adapter helper. Always returns a finite number — never crashes the
|
|
30
|
+
* cost snapshot.
|
|
31
|
+
*/
|
|
32
|
+
export function getRuntimeFeePerHour(): number {
|
|
33
|
+
return RUNTIME_FEE_USD_PER_HOUR;
|
|
34
|
+
}
|
|
@@ -66,6 +66,11 @@ import {
|
|
|
66
66
|
type WebSearchItem,
|
|
67
67
|
} from "@openai/codex-sdk";
|
|
68
68
|
import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
|
|
69
|
+
import {
|
|
70
|
+
CONTEXT_FORMULA,
|
|
71
|
+
clampContextPercent,
|
|
72
|
+
computeContextUsedUnified,
|
|
73
|
+
} from "../utils/context-window";
|
|
69
74
|
import { summarizeSession as runSummarize } from "../utils/internal-ai";
|
|
70
75
|
import { scrubSecrets } from "../utils/secret-scrubber";
|
|
71
76
|
import { type CodexAgentsMdHandle, writeCodexAgentsMd } from "./codex-agents-md";
|
|
@@ -523,6 +528,11 @@ class CodexSession implements ProviderSession {
|
|
|
523
528
|
const inputTokens = usage?.input_tokens ?? 0;
|
|
524
529
|
const cachedInputTokens = usage?.cached_input_tokens ?? 0;
|
|
525
530
|
const outputTokens = usage?.output_tokens ?? 0;
|
|
531
|
+
// Phase 6: Codex SDK surfaces `reasoning_output_tokens` separately from
|
|
532
|
+
// `output_tokens` for reasoning models (gpt-5.3-codex, gpt-5.4 thinking).
|
|
533
|
+
// Pre-fix this number was read into `lastUsage` but never reached
|
|
534
|
+
// `CostData`, so reasoning-heavy sessions silently under-billed.
|
|
535
|
+
const reasoningOutputTokens = usage?.reasoning_output_tokens ?? 0;
|
|
526
536
|
return {
|
|
527
537
|
// Runner overrides with its own session id.
|
|
528
538
|
sessionId: "",
|
|
@@ -540,9 +550,12 @@ class CodexSession implements ProviderSession {
|
|
|
540
550
|
),
|
|
541
551
|
inputTokens,
|
|
542
552
|
outputTokens,
|
|
553
|
+
reasoningOutputTokens,
|
|
543
554
|
cacheReadTokens: cachedInputTokens,
|
|
544
|
-
//
|
|
545
|
-
|
|
555
|
+
// Phase 6: undefined (NOT 0). Codex SDK can't honestly report cache
|
|
556
|
+
// writes; leaving it undefined preserves that distinction in the DB
|
|
557
|
+
// instead of mixing genuine zeros with "unknown".
|
|
558
|
+
cacheWriteTokens: undefined,
|
|
546
559
|
durationMs: Date.now() - this.startedAt,
|
|
547
560
|
numTurns: this.numTurns,
|
|
548
561
|
model: this.resolvedModel,
|
|
@@ -760,36 +773,34 @@ class CodexSession implements ProviderSession {
|
|
|
760
773
|
case "turn.completed": {
|
|
761
774
|
this.lastUsage = event.usage;
|
|
762
775
|
if (event.usage) {
|
|
763
|
-
//
|
|
764
|
-
//
|
|
765
|
-
//
|
|
766
|
-
//
|
|
767
|
-
// the model's context window, even though no single model call did.
|
|
776
|
+
// Phase 9: switch from the codex-specific "peak proxy" formula
|
|
777
|
+
// (`uncached_input + output`) to the unified
|
|
778
|
+
// `input + cache_read + cache_create + output` so cross-provider
|
|
779
|
+
// percent comparisons are meaningful.
|
|
768
780
|
//
|
|
769
|
-
//
|
|
770
|
-
//
|
|
771
|
-
//
|
|
772
|
-
//
|
|
773
|
-
//
|
|
774
|
-
//
|
|
775
|
-
//
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
// expecting an integer percent, so a 0-1 fraction would render as
|
|
786
|
-
// "0%" instead of e.g. "40%".
|
|
781
|
+
// Note: Codex's `input_tokens` already includes cached_input_tokens
|
|
782
|
+
// (it's the TOTAL across the turn — see the longer comment that
|
|
783
|
+
// used to live here, preserved in git history). We therefore pass
|
|
784
|
+
// `cacheReadTokens: 0` to avoid double-counting the cached portion.
|
|
785
|
+
// The trade-off the old comment flagged is still real — a chatty
|
|
786
|
+
// turn can over-report because `input_tokens` is the SUM across
|
|
787
|
+
// every model call in the turn — but having the SAME formula
|
|
788
|
+
// everywhere wins over the local optimum. Clamp catches the
|
|
789
|
+
// chatty-turn overshoot at 100%. Old rows tagged 'peak-proxy'
|
|
790
|
+
// remain in `task_context_snapshots`; the UI surfaces both.
|
|
791
|
+
const contextUsed = computeContextUsedUnified({
|
|
792
|
+
inputTokens: event.usage.input_tokens,
|
|
793
|
+
cacheReadTokens: 0,
|
|
794
|
+
cacheCreateTokens: 0,
|
|
795
|
+
outputTokens: event.usage.output_tokens,
|
|
796
|
+
});
|
|
787
797
|
this.emit({
|
|
788
798
|
type: "context_usage",
|
|
789
|
-
contextUsedTokens:
|
|
799
|
+
contextUsedTokens: contextUsed,
|
|
790
800
|
contextTotalTokens: this.contextWindow,
|
|
791
|
-
contextPercent:
|
|
801
|
+
contextPercent: clampContextPercent(contextUsed, this.contextWindow) ?? 0,
|
|
792
802
|
outputTokens: event.usage.output_tokens,
|
|
803
|
+
contextFormula: CONTEXT_FORMULA,
|
|
793
804
|
});
|
|
794
805
|
}
|
|
795
806
|
break;
|
|
@@ -126,12 +126,22 @@ export const CODEX_MODEL_PRICING: Record<CodexModel, CodexModelPricing> = {
|
|
|
126
126
|
},
|
|
127
127
|
};
|
|
128
128
|
|
|
129
|
+
/**
|
|
130
|
+
* Phase 6 — one-warning-per-process tracking so unknown models log once
|
|
131
|
+
* instead of spamming the worker log on every turn.
|
|
132
|
+
*/
|
|
133
|
+
const _warnedUnknownCodexModels = new Set<string>();
|
|
134
|
+
|
|
129
135
|
/**
|
|
130
136
|
* Compute USD cost from a Codex `Usage` payload. The Codex SDK reports
|
|
131
137
|
* `input_tokens` as the TOTAL input fed to the model across the turn (cached
|
|
132
138
|
* + uncached), so we subtract `cached_input_tokens` before billing the
|
|
133
|
-
* uncached portion at the full rate.
|
|
134
|
-
*
|
|
139
|
+
* uncached portion at the full rate.
|
|
140
|
+
*
|
|
141
|
+
* Phase 6: returns 0 for unknown models AND logs a one-time warning, so an
|
|
142
|
+
* operator running `MODEL_OVERRIDE=gpt-future-2027` notices that the worker
|
|
143
|
+
* is silently dropping cost. The server-side recompute path (Phase 2) tags
|
|
144
|
+
* such rows `costSource='unpriced'`, which surfaces as a yellow UI badge.
|
|
135
145
|
*/
|
|
136
146
|
export function computeCodexCostUsd(
|
|
137
147
|
model: string,
|
|
@@ -140,7 +150,16 @@ export function computeCodexCostUsd(
|
|
|
140
150
|
outputTokens: number,
|
|
141
151
|
): number {
|
|
142
152
|
const pricing = CODEX_MODEL_PRICING[model as CodexModel];
|
|
143
|
-
if (!pricing)
|
|
153
|
+
if (!pricing) {
|
|
154
|
+
if (!_warnedUnknownCodexModels.has(model)) {
|
|
155
|
+
_warnedUnknownCodexModels.add(model);
|
|
156
|
+
console.warn(
|
|
157
|
+
`[codex] unpriced model ${JSON.stringify(model)} — adapter cost will report $0; ` +
|
|
158
|
+
"server-side recompute will tag costSource='unpriced' if the pricing table has no rows.",
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
return 0;
|
|
162
|
+
}
|
|
144
163
|
const uncachedInput = Math.max(0, inputTokens - cachedInputTokens);
|
|
145
164
|
const inputCost = (uncachedInput / 1_000_000) * pricing.inputPerMillion;
|
|
146
165
|
const cachedCost = (cachedInputTokens / 1_000_000) * pricing.cachedInputPerMillion;
|
|
@@ -279,6 +279,12 @@ class DevinSession implements ProviderSession {
|
|
|
279
279
|
if (this.settled || this.aborted) return;
|
|
280
280
|
this.pollCount += 1;
|
|
281
281
|
|
|
282
|
+
// Phase 8: Devin's session API does NOT report per-poll context-window
|
|
283
|
+
// info (the model is fully managed by Devin). We deliberately don't emit
|
|
284
|
+
// a synthetic `context_usage` event here — faking one with `contextUsedTokens=0`
|
|
285
|
+
// would be misleading. `peakContextTokens` stays null for devin tasks,
|
|
286
|
+
// which the UI surfaces as "not available" rather than "0".
|
|
287
|
+
|
|
282
288
|
let response: DevinSessionResponse;
|
|
283
289
|
try {
|
|
284
290
|
response = await getSession(this.orgId, this.devinApiKey, this._sessionId!);
|
|
@@ -788,6 +794,11 @@ class DevinSession implements ProviderSession {
|
|
|
788
794
|
numTurns: this.pollCount,
|
|
789
795
|
model: "devin",
|
|
790
796
|
isError,
|
|
797
|
+
// Phase 3 — tag CostData so the API recompute path engages. Devin's
|
|
798
|
+
// pricing is ACU-based (one row under `provider='devin', model='*',
|
|
799
|
+
// token_class='acu'`); the harness USD value above is already correct,
|
|
800
|
+
// but tagging the row exposes its source to the UI badge.
|
|
801
|
+
provider: "devin",
|
|
791
802
|
};
|
|
792
803
|
}
|
|
793
804
|
}
|
|
@@ -12,7 +12,11 @@ import { existsSync, mkdirSync } from "node:fs";
|
|
|
12
12
|
import { join } from "node:path";
|
|
13
13
|
import type { AssistantMessage, Config, Event as OpencodeEvent } from "@opencode-ai/sdk";
|
|
14
14
|
import { createOpencode } from "@opencode-ai/sdk";
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
CONTEXT_FORMULA,
|
|
17
|
+
clampContextPercent,
|
|
18
|
+
getContextWindowSize,
|
|
19
|
+
} from "../utils/context-window";
|
|
16
20
|
import { validateOpencodeCredentials } from "../utils/credentials";
|
|
17
21
|
import { fetchInstalledMcpServers } from "../utils/mcp-server-fetcher";
|
|
18
22
|
import { scrubSecrets } from "../utils/secret-scrubber";
|
|
@@ -104,7 +108,7 @@ function resolvePluginPath(): string {
|
|
|
104
108
|
return join(import.meta.dir, "../../plugin/opencode-plugins/agent-swarm.ts");
|
|
105
109
|
}
|
|
106
110
|
|
|
107
|
-
class OpencodeSession implements ProviderSession {
|
|
111
|
+
export class OpencodeSession implements ProviderSession {
|
|
108
112
|
private _sessionId: string;
|
|
109
113
|
private listeners: Array<(event: ProviderEvent) => void> = [];
|
|
110
114
|
// Buffer for events emitted before any listener is attached.
|
|
@@ -115,6 +119,7 @@ class OpencodeSession implements ProviderSession {
|
|
|
115
119
|
// leaving agent_tasks.provider/.model NULL. Buffer + flush on first attach.
|
|
116
120
|
private pendingEvents: ProviderEvent[] = [];
|
|
117
121
|
private completionResolve!: (result: ProviderResult) => void;
|
|
122
|
+
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: reserved for future error-propagation paths; symmetric with completionResolve.
|
|
118
123
|
private completionReject!: (err: Error) => void;
|
|
119
124
|
private completionPromise: Promise<ProviderResult>;
|
|
120
125
|
private server: { url: string; close(): void };
|
|
@@ -237,6 +242,15 @@ class OpencodeSession implements ProviderSession {
|
|
|
237
242
|
case "message.updated": {
|
|
238
243
|
const msg = ev.properties.info;
|
|
239
244
|
if (!isAssistantMessage(msg) || msg.sessionID !== this._sessionId) break;
|
|
245
|
+
// Phase 9 fix: opencode fires `message.updated` repeatedly during a single
|
|
246
|
+
// assistant turn (streaming text deltas, tool transitions, etc.) and only
|
|
247
|
+
// populates `tokens`/`cost` on the FINAL update once `time.completed` is
|
|
248
|
+
// set. Accumulating on every event would either no-op (zero tokens) or —
|
|
249
|
+
// if opencode ever back-fills intermediate snapshots — multi-count. Gate
|
|
250
|
+
// the accumulator AND the context emit on the finalized signal so both
|
|
251
|
+
// paths see the same canonical "this turn is done" moment.
|
|
252
|
+
const messageFinalized = msg.time?.completed != null;
|
|
253
|
+
if (!messageFinalized) break;
|
|
240
254
|
// Accumulate cost from each completed assistant message ("step")
|
|
241
255
|
this.totalCostUsd += msg.cost;
|
|
242
256
|
this.inputTokens += msg.tokens?.input ?? 0;
|
|
@@ -247,21 +261,31 @@ class OpencodeSession implements ProviderSession {
|
|
|
247
261
|
if (!this.model && msg.modelID) this.model = msg.modelID;
|
|
248
262
|
|
|
249
263
|
// Emit context_usage so the runner can POST /api/tasks/:id/context
|
|
250
|
-
// (drives the dashboard's context-usage progress bar)
|
|
251
|
-
//
|
|
264
|
+
// (drives the dashboard's context-usage progress bar). The runner-side
|
|
265
|
+
// throttle (CONTEXT_THROTTLE_MS = 30s) means the FIRST emit wins for any
|
|
266
|
+
// short task — so this MUST carry real numbers, not the zero-tokens
|
|
267
|
+
// placeholder opencode sends on intermediate streaming updates. The
|
|
268
|
+
// `time.completed` gate above (in the accumulator block) guarantees we
|
|
269
|
+
// only land here for finalized messages.
|
|
252
270
|
const turnInput = msg.tokens?.input ?? 0;
|
|
253
271
|
const turnOutput = msg.tokens?.output ?? 0;
|
|
254
272
|
const turnCacheRead = msg.tokens?.cache?.read ?? 0;
|
|
255
273
|
const turnCacheWrite = msg.tokens?.cache?.write ?? 0;
|
|
256
|
-
|
|
274
|
+
// Phase 8 + Phase 9: unified `input + cache + output` formula instead
|
|
275
|
+
// of the previous `input + cache_read + cache_write` (which omitted
|
|
276
|
+
// output and slightly mis-counted vs every other adapter).
|
|
277
|
+
const contextUsed = turnInput + turnCacheRead + turnCacheWrite + turnOutput;
|
|
257
278
|
const contextTotal = getContextWindowSize(this.model || msg.modelID || "default");
|
|
258
|
-
if (contextTotal > 0) {
|
|
279
|
+
if (contextTotal > 0 && contextUsed > 0) {
|
|
259
280
|
this.emit({
|
|
260
281
|
type: "context_usage",
|
|
261
282
|
contextUsedTokens: contextUsed,
|
|
262
283
|
contextTotalTokens: contextTotal,
|
|
263
|
-
|
|
284
|
+
// Phase 8: clamp so a turn that briefly overshoots (e.g. due to
|
|
285
|
+
// a stale total) doesn't render as a 130% gauge in the UI.
|
|
286
|
+
contextPercent: clampContextPercent(contextUsed, contextTotal) ?? 0,
|
|
264
287
|
outputTokens: turnOutput,
|
|
288
|
+
contextFormula: CONTEXT_FORMULA,
|
|
265
289
|
});
|
|
266
290
|
}
|
|
267
291
|
break;
|