@desplega.ai/agent-swarm 1.79.4 → 1.80.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/openapi.json +496 -32
  2. package/package.json +14 -6
  3. package/src/artifact-sdk/server.ts +2 -1
  4. package/src/be/db.ts +102 -31
  5. package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
  6. package/src/be/migrations/064_scripts.sql +39 -0
  7. package/src/be/migrations/065_script_embeddings.sql +7 -0
  8. package/src/be/pricing-normalize.ts +81 -0
  9. package/src/be/scripts/db.ts +391 -0
  10. package/src/be/scripts/embeddings.ts +231 -0
  11. package/src/be/scripts/maintenance.ts +9 -0
  12. package/src/be/scripts/typecheck.ts +193 -0
  13. package/src/be/seed-pricing.ts +293 -0
  14. package/src/cli.tsx +22 -5
  15. package/src/commands/artifact.ts +3 -2
  16. package/src/commands/claude-managed-setup.ts +21 -4
  17. package/src/commands/codex-login.ts +5 -3
  18. package/src/commands/onboard.tsx +2 -1
  19. package/src/commands/runner.ts +663 -246
  20. package/src/commands/setup.tsx +5 -3
  21. package/src/hooks/hook.ts +4 -3
  22. package/src/http/context.ts +6 -2
  23. package/src/http/index.ts +126 -68
  24. package/src/http/memory.ts +28 -0
  25. package/src/http/openapi.ts +1 -0
  26. package/src/http/page-proxy.ts +2 -1
  27. package/src/http/route-def.ts +1 -0
  28. package/src/http/schedules.ts +37 -0
  29. package/src/http/scripts.ts +381 -0
  30. package/src/http/session-data.ts +74 -23
  31. package/src/linear/outbound.ts +9 -2
  32. package/src/otel-impl.ts +200 -0
  33. package/src/otel.ts +132 -0
  34. package/src/providers/claude-adapter.ts +52 -6
  35. package/src/providers/claude-managed-adapter.ts +43 -17
  36. package/src/providers/claude-managed-pricing.ts +34 -0
  37. package/src/providers/codex-adapter.ts +38 -27
  38. package/src/providers/codex-models.ts +22 -3
  39. package/src/providers/devin-adapter.ts +11 -0
  40. package/src/providers/opencode-adapter.ts +31 -7
  41. package/src/providers/pi-mono-adapter.ts +39 -7
  42. package/src/providers/pricing-sources.md +52 -0
  43. package/src/providers/swarm-events-shared.ts +8 -4
  44. package/src/providers/types.ts +33 -10
  45. package/src/scripts-runtime/ctx.ts +23 -0
  46. package/src/scripts-runtime/eval-harness.ts +39 -0
  47. package/src/scripts-runtime/executors/native.ts +229 -0
  48. package/src/scripts-runtime/executors/registry.ts +16 -0
  49. package/src/scripts-runtime/executors/types.ts +63 -0
  50. package/src/scripts-runtime/extract-signature.ts +81 -0
  51. package/src/scripts-runtime/import-allowlist.ts +109 -0
  52. package/src/scripts-runtime/loader.ts +96 -0
  53. package/src/scripts-runtime/redacted.ts +48 -0
  54. package/src/scripts-runtime/sdk-allowlist.ts +29 -0
  55. package/src/scripts-runtime/stdlib/fetch.ts +46 -0
  56. package/src/scripts-runtime/stdlib/glob.ts +8 -0
  57. package/src/scripts-runtime/stdlib/grep.ts +34 -0
  58. package/src/scripts-runtime/stdlib/index.ts +16 -0
  59. package/src/scripts-runtime/stdlib/table.ts +17 -0
  60. package/src/scripts-runtime/swarm-config.ts +35 -0
  61. package/src/scripts-runtime/swarm-sdk.ts +197 -0
  62. package/src/scripts-runtime/types/stdlib.d.ts +104 -0
  63. package/src/scripts-runtime/types/swarm-sdk.d.ts +86 -0
  64. package/src/server.ts +18 -0
  65. package/src/tests/api-key.test.ts +33 -0
  66. package/src/tests/claude-managed-adapter.test.ts +17 -3
  67. package/src/tests/claude-managed-setup.test.ts +10 -1
  68. package/src/tests/codex-adapter.test.ts +20 -19
  69. package/src/tests/codex-login.test.ts +1 -1
  70. package/src/tests/context-snapshot.test.ts +2 -2
  71. package/src/tests/context-window.test.ts +65 -1
  72. package/src/tests/devin-adapter.test.ts +2 -0
  73. package/src/tests/http/context-routes.test.ts +161 -0
  74. package/src/tests/linear-outbound-sync.test.ts +109 -0
  75. package/src/tests/mcp-tools.test.ts +69 -0
  76. package/src/tests/migration-063-schema-relax.test.ts +109 -0
  77. package/src/tests/opencode-adapter.test.ts +146 -1
  78. package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
  79. package/src/tests/pages-view-count.test.ts +30 -5
  80. package/src/tests/providers/codex-cost.test.ts +18 -0
  81. package/src/tests/providers/opencode-cost.test.ts +74 -0
  82. package/src/tests/providers/pi-cost.test.ts +128 -0
  83. package/src/tests/redacted.test.ts +29 -0
  84. package/src/tests/runner-tool-spans.test.ts +268 -0
  85. package/src/tests/script-executor-conformance.test.ts +142 -0
  86. package/src/tests/script-executor-registry.test.ts +17 -0
  87. package/src/tests/scripts-db.test.ts +329 -0
  88. package/src/tests/scripts-embeddings.test.ts +291 -0
  89. package/src/tests/scripts-extract-signature.test.ts +47 -0
  90. package/src/tests/scripts-http.test.ts +350 -0
  91. package/src/tests/scripts-import-allowlist.test.ts +55 -0
  92. package/src/tests/scripts-mcp-e2e.test.ts +269 -0
  93. package/src/tests/scripts-runtime-secret-egress.test.ts +44 -0
  94. package/src/tests/scripts-runtime.test.ts +289 -0
  95. package/src/tests/sdk-allowlist.test.ts +59 -0
  96. package/src/tests/secret-scrubber.test.ts +54 -1
  97. package/src/tests/session-costs-codex-recompute.test.ts +35 -22
  98. package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
  99. package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
  100. package/src/tests/store-progress-cost.test.ts +6 -1
  101. package/src/tests/swarm-config.test.ts +38 -0
  102. package/src/tests/tool-annotations.test.ts +2 -2
  103. package/src/tests/tool-call-progress.test.ts +30 -0
  104. package/src/tests/workflow-e2e.test.ts +218 -0
  105. package/src/tests/workflow-executors.test.ts +32 -2
  106. package/src/tests/workflow-input-redaction.test.ts +232 -0
  107. package/src/tests/workflow-swarm-script.test.ts +273 -0
  108. package/src/tools/memory-rate.ts +2 -1
  109. package/src/tools/script-common.ts +88 -0
  110. package/src/tools/script-delete.ts +35 -0
  111. package/src/tools/script-query-types.ts +37 -0
  112. package/src/tools/script-run.ts +43 -0
  113. package/src/tools/script-search.ts +32 -0
  114. package/src/tools/script-upsert.ts +43 -0
  115. package/src/tools/store-progress.ts +16 -60
  116. package/src/tools/tool-config.ts +7 -0
  117. package/src/tools/utils.ts +65 -12
  118. package/src/types.ts +122 -10
  119. package/src/utils/api-key.ts +28 -0
  120. package/src/utils/context-window.ts +104 -4
  121. package/src/utils/page-session.ts +8 -6
  122. package/src/utils/secret-scrubber.ts +29 -1
  123. package/src/workflows/engine.ts +12 -4
  124. package/src/workflows/executors/index.ts +1 -0
  125. package/src/workflows/executors/registry.ts +2 -0
  126. package/src/workflows/executors/script.ts +12 -1
  127. package/src/workflows/executors/swarm-script.ts +170 -0
  128. package/src/workflows/input.ts +65 -0
  129. package/src/workflows/recovery.ts +31 -3
  130. package/src/workflows/resume.ts +43 -5
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@desplega.ai/agent-swarm",
3
- "version": "1.79.4",
3
+ "version": "1.80.1",
4
4
  "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
5
5
  "license": "MIT",
6
6
  "author": "desplega.sh <contact@desplega.sh>",
@@ -42,8 +42,10 @@
42
42
  },
43
43
  "scripts": {
44
44
  "build:pi-skills": "bun run plugin/build-pi-skills.ts",
45
+ "build:script-types": "bun run scripts/bundle-script-types.ts",
45
46
  "tsc:check": "bun tsc --noEmit",
46
47
  "check:db-boundary": "bash scripts/check-db-boundary.sh",
48
+ "check:api-key-boundary": "bash scripts/check-api-key-boundary.sh",
47
49
  "cli": "bun src/cli.tsx",
48
50
  "hook": "bun src/hooks/hook.ts",
49
51
  "claude": "bun src/cli.tsx claude",
@@ -73,6 +75,7 @@
73
75
  "deploy:docker": "bun deploy/docker-push.ts",
74
76
  "e2e:workflows": "bun scripts/e2e-workflow-test.ts",
75
77
  "e2e:workflows:docker": "bun scripts/e2e-workflow-test.ts --with-docker",
78
+ "e2e:otel:jaeger": "bun scripts/e2e-otel-jaeger.ts",
76
79
  "docs:mcp": "bun scripts/generate-mcp-docs.ts",
77
80
  "docs:openapi": "bun scripts/generate-openapi.ts",
78
81
  "docs:business-use": "bun scripts/generate-business-use-docs.ts",
@@ -104,13 +107,18 @@
104
107
  "@desplega.ai/localtunnel": "^2.2.0",
105
108
  "@inkjs/ui": "^2.0.0",
106
109
  "@linear/sdk": "^77.0.0",
107
- "@earendil-works/pi-agent-core": "^0.74.0",
108
- "@earendil-works/pi-ai": "^0.74.0",
109
- "@earendil-works/pi-coding-agent": "^0.74.0",
110
+ "@earendil-works/pi-agent-core": "^0.75.3",
111
+ "@earendil-works/pi-ai": "^0.75.3",
112
+ "@earendil-works/pi-coding-agent": "^0.75.3",
110
113
  "@modelcontextprotocol/sdk": "^1.25.1",
111
- "@openai/codex-sdk": "^0.128.0",
112
- "@opencode-ai/sdk": "^1.14.30",
114
+ "@openai/codex-sdk": "^0.130.0",
115
+ "@opencode-ai/sdk": "^1.15.4",
113
116
  "@openfort/openfort-node": "^0.9.1",
117
+ "@opentelemetry/api": "^1.9.1",
118
+ "@opentelemetry/exporter-trace-otlp-http": "^0.218.0",
119
+ "@opentelemetry/resources": "^2.7.1",
120
+ "@opentelemetry/sdk-node": "^0.218.0",
121
+ "@opentelemetry/semantic-conventions": "^1.41.1",
114
122
  "@slack/bolt": "^4.6.0",
115
123
  "@types/react": "^19.2.7",
116
124
  "@x402/core": "^2.5.0",
@@ -1,5 +1,6 @@
1
1
  import { Hono } from "hono";
2
2
  import { serveStatic } from "hono/bun";
3
+ import { getApiKey } from "../utils/api-key";
3
4
  import { BROWSER_SDK_JS } from "./browser-sdk";
4
5
  import { getAvailablePort } from "./port";
5
6
  import { createTunnel } from "./tunnel";
@@ -23,7 +24,7 @@ export interface ArtifactServer {
23
24
 
24
25
  export function createArtifactServer(opts: ArtifactServerOptions): ArtifactServer {
25
26
  const agentId = process.env.AGENT_ID || "unknown";
26
- const apiKey = process.env.API_KEY || "";
27
+ const apiKey = getApiKey();
27
28
  const mcpBaseUrl = process.env.MCP_BASE_URL || `http://localhost:${process.env.PORT || "3013"}`;
28
29
 
29
30
  const app = new Hono();
package/src/be/db.ts CHANGED
@@ -362,7 +362,7 @@ function ensureAgentProfileColumns(database: Database): void {
362
362
  }
363
363
  }
364
364
 
365
- function computeContentHash(content: string): string {
365
+ export function computeContentHash(content: string): string {
366
366
  const hasher = new Bun.CryptoHasher("sha256");
367
367
  hasher.update(content);
368
368
  return hasher.digest("hex");
@@ -980,7 +980,7 @@ type AgentTaskRow = {
980
980
  progress: string | null;
981
981
  compactionCount: number | null;
982
982
  peakContextPercent: number | null;
983
- totalContextTokensUsed: number | null;
983
+ peakContextTokens: number | null;
984
984
  contextWindowSize: number | null;
985
985
  was_paused: number;
986
986
  credentialKeySuffix: string | null;
@@ -1036,7 +1036,7 @@ function rowToAgentTask(row: AgentTaskRow): AgentTask {
1036
1036
  contextKey: row.contextKey ?? undefined,
1037
1037
  compactionCount: row.compactionCount ?? undefined,
1038
1038
  peakContextPercent: row.peakContextPercent ?? undefined,
1039
- totalContextTokensUsed: row.totalContextTokensUsed ?? undefined,
1039
+ peakContextTokens: row.peakContextTokens ?? undefined,
1040
1040
  contextWindowSize: row.contextWindowSize ?? undefined,
1041
1041
  createdAt: row.createdAt,
1042
1042
  lastUpdatedAt: row.lastUpdatedAt,
@@ -3761,8 +3761,11 @@ type SessionCostRow = {
3761
3761
  outputTokens: number;
3762
3762
  cacheReadTokens: number;
3763
3763
  cacheWriteTokens: number;
3764
+ // Migration 063 additions:
3765
+ reasoningOutputTokens: number;
3766
+ thinkingTokens: number;
3764
3767
  durationMs: number;
3765
- numTurns: number;
3768
+ numTurns: number | null;
3766
3769
  model: string;
3767
3770
  isError: number;
3768
3771
  costSource: string;
@@ -3780,6 +3783,8 @@ function rowToSessionCost(row: SessionCostRow): SessionCost {
3780
3783
  outputTokens: row.outputTokens,
3781
3784
  cacheReadTokens: row.cacheReadTokens,
3782
3785
  cacheWriteTokens: row.cacheWriteTokens,
3786
+ reasoningOutputTokens: row.reasoningOutputTokens ?? 0,
3787
+ thinkingTokens: row.thinkingTokens ?? 0,
3783
3788
  durationMs: row.durationMs,
3784
3789
  numTurns: row.numTurns,
3785
3790
  model: row.model,
@@ -3803,15 +3808,24 @@ const sessionCostQueries = {
3803
3808
  number,
3804
3809
  number,
3805
3810
  number,
3806
- number,
3807
- number,
3808
- string,
3809
- number,
3810
- string,
3811
+ number, // reasoningOutputTokens
3812
+ number, // thinkingTokens
3813
+ number, // durationMs
3814
+ number | null, // numTurns
3815
+ string, // model
3816
+ number, // isError
3817
+ string, // costSource
3811
3818
  ]
3812
3819
  >(
3813
- `INSERT INTO session_costs (id, sessionId, taskId, agentId, totalCostUsd, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, durationMs, numTurns, model, isError, costSource, createdAt)
3814
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))`,
3820
+ `INSERT INTO session_costs (
3821
+ id, sessionId, taskId, agentId,
3822
+ totalCostUsd, inputTokens, outputTokens,
3823
+ cacheReadTokens, cacheWriteTokens,
3824
+ reasoningOutputTokens, thinkingTokens,
3825
+ durationMs, numTurns, model, isError,
3826
+ costSource, createdAt
3827
+ )
3828
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))`,
3815
3829
  ),
3816
3830
 
3817
3831
  getByTaskId: () =>
@@ -3839,16 +3853,22 @@ export interface CreateSessionCostInput {
3839
3853
  outputTokens?: number;
3840
3854
  cacheReadTokens?: number;
3841
3855
  cacheWriteTokens?: number;
3856
+ // Migration 063 additions — adapters that have these numbers should pass
3857
+ // them; defaulting to 0 preserves the old write shape for callers that don't.
3858
+ reasoningOutputTokens?: number;
3859
+ thinkingTokens?: number;
3842
3860
  durationMs: number;
3843
- numTurns: number;
3861
+ // Nullable: some adapters (claude when num_turns is absent) can't honestly
3862
+ // report a turn count; we prefer null over a faked 1.
3863
+ numTurns: number | null;
3844
3864
  model: string;
3845
3865
  isError?: boolean;
3846
3866
  /**
3847
- * Phase 6: where the recorded `totalCostUsd` came from.
3867
+ * Phase 6 (migration 063 added 'unpriced'): where `totalCostUsd` came from.
3848
3868
  * - 'harness' — value reported by the harness as-is (default).
3849
- * - 'pricing-table' — value recomputed by the API from `pricing` rows
3850
- * (Codex when DB pricing rows exist for all three
3851
- * token classes).
3869
+ * - 'pricing-table' — value recomputed by the API from `pricing` rows.
3870
+ * - 'unpriced' — recompute attempted but no matching pricing rows;
3871
+ * `totalCostUsd` is whatever the worker submitted.
3852
3872
  */
3853
3873
  costSource?: SessionCostSource;
3854
3874
  }
@@ -3856,6 +3876,8 @@ export interface CreateSessionCostInput {
3856
3876
  export function createSessionCost(input: CreateSessionCostInput): SessionCost {
3857
3877
  const id = crypto.randomUUID();
3858
3878
  const costSource: SessionCostSource = input.costSource ?? "harness";
3879
+ const reasoningOutputTokens = input.reasoningOutputTokens ?? 0;
3880
+ const thinkingTokens = input.thinkingTokens ?? 0;
3859
3881
  sessionCostQueries
3860
3882
  .insert()
3861
3883
  .run(
@@ -3868,6 +3890,8 @@ export function createSessionCost(input: CreateSessionCostInput): SessionCost {
3868
3890
  input.outputTokens ?? 0,
3869
3891
  input.cacheReadTokens ?? 0,
3870
3892
  input.cacheWriteTokens ?? 0,
3893
+ reasoningOutputTokens,
3894
+ thinkingTokens,
3871
3895
  input.durationMs,
3872
3896
  input.numTurns,
3873
3897
  input.model,
@@ -3885,6 +3909,8 @@ export function createSessionCost(input: CreateSessionCostInput): SessionCost {
3885
3909
  outputTokens: input.outputTokens ?? 0,
3886
3910
  cacheReadTokens: input.cacheReadTokens ?? 0,
3887
3911
  cacheWriteTokens: input.cacheWriteTokens ?? 0,
3912
+ reasoningOutputTokens,
3913
+ thinkingTokens,
3888
3914
  durationMs: input.durationMs,
3889
3915
  numTurns: input.numTurns,
3890
3916
  model: input.model,
@@ -4110,16 +4136,33 @@ export interface DashboardCostSummary {
4110
4136
  }
4111
4137
 
4112
4138
  export function getDashboardCostSummary(): DashboardCostSummary {
4139
+ // Phase 13: compute the date boundaries in TS and pass them as ISO 8601
4140
+ // strings. `session_costs.createdAt` is a TEXT ISO 8601 column; lexicographic
4141
+ // comparison on ISO 8601 sorts correctly, so the comparison works as long
4142
+ // as both sides are the same shape. The old code compared an ISO string
4143
+ // (`2026-05-15T03:45:12.123Z`) against `date('now')` (which returns the
4144
+ // string `2026-05-15`) — lexicographically `2026-05-15T...` > `2026-05-15`,
4145
+ // so post-midnight rows correctly counted, BUT rows whose ISO began with
4146
+ // the EXACT bare-date string would fail the `>=` check inconsistently
4147
+ // depending on millisecond precision. Use a proper ISO-millisecond boundary
4148
+ // for both halves so the comparison is unambiguous.
4149
+ const now = new Date();
4150
+ const startOfDayUtc = new Date(
4151
+ Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()),
4152
+ ).toISOString();
4153
+ const startOfMonthUtc = new Date(
4154
+ Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), 1),
4155
+ ).toISOString();
4113
4156
  type CostRow = { costToday: number; costMtd: number };
4114
4157
  const row = getDb()
4115
- .prepare<CostRow, []>(
4158
+ .prepare<CostRow, [string, string]>(
4116
4159
  `SELECT
4117
- COALESCE(SUM(CASE WHEN createdAt >= date('now') THEN totalCostUsd ELSE 0 END), 0) as costToday,
4160
+ COALESCE(SUM(CASE WHEN createdAt >= ? THEN totalCostUsd ELSE 0 END), 0) as costToday,
4118
4161
  COALESCE(SUM(totalCostUsd), 0) as costMtd
4119
4162
  FROM session_costs
4120
- WHERE createdAt >= date('now', 'start of month')`,
4163
+ WHERE createdAt >= ?`,
4121
4164
  )
4122
- .get();
4165
+ .get(startOfDayUtc, startOfMonthUtc);
4123
4166
 
4124
4167
  return row ?? { costToday: 0, costMtd: 0 };
4125
4168
  }
@@ -8245,6 +8288,8 @@ type ContextSnapshotRow = {
8245
8288
  preCompactTokens: number | null;
8246
8289
  cumulativeInputTokens: number;
8247
8290
  cumulativeOutputTokens: number;
8291
+ // Migration 063 — see ContextFormulaSchema in src/types.ts for the value set.
8292
+ contextFormula: string | null;
8248
8293
  createdAt: string;
8249
8294
  };
8250
8295
 
@@ -8258,10 +8303,11 @@ function rowToContextSnapshot(row: ContextSnapshotRow): ContextSnapshot {
8258
8303
  contextTotalTokens: row.contextTotalTokens ?? undefined,
8259
8304
  contextPercent: row.contextPercent ?? undefined,
8260
8305
  eventType: row.eventType,
8261
- compactTrigger: (row.compactTrigger as "auto" | "manual" | null) ?? undefined,
8306
+ compactTrigger: (row.compactTrigger as "auto" | "manual" | "auto-inferred" | null) ?? undefined,
8262
8307
  preCompactTokens: row.preCompactTokens ?? undefined,
8263
8308
  cumulativeInputTokens: row.cumulativeInputTokens,
8264
8309
  cumulativeOutputTokens: row.cumulativeOutputTokens,
8310
+ contextFormula: (row.contextFormula as ContextSnapshot["contextFormula"]) ?? undefined,
8265
8311
  createdAt: row.createdAt,
8266
8312
  };
8267
8313
  }
@@ -8283,11 +8329,12 @@ const contextSnapshotQueries = {
8283
8329
  number | null,
8284
8330
  number,
8285
8331
  number,
8332
+ string | null, // contextFormula (migration 063)
8286
8333
  string,
8287
8334
  ]
8288
8335
  >(
8289
- `INSERT INTO task_context_snapshots (id, taskId, agentId, sessionId, contextUsedTokens, contextTotalTokens, contextPercent, eventType, compactTrigger, preCompactTokens, cumulativeInputTokens, cumulativeOutputTokens, createdAt)
8290
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
8336
+ `INSERT INTO task_context_snapshots (id, taskId, agentId, sessionId, contextUsedTokens, contextTotalTokens, contextPercent, eventType, compactTrigger, preCompactTokens, cumulativeInputTokens, cumulativeOutputTokens, contextFormula, createdAt)
8337
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
8291
8338
  ),
8292
8339
 
8293
8340
  getByTaskId: () =>
@@ -8309,10 +8356,12 @@ export interface CreateContextSnapshotInput {
8309
8356
  contextTotalTokens?: number;
8310
8357
  contextPercent?: number;
8311
8358
  eventType: ContextSnapshotEventType;
8312
- compactTrigger?: "auto" | "manual";
8359
+ compactTrigger?: "auto" | "manual" | "auto-inferred";
8313
8360
  preCompactTokens?: number;
8314
8361
  cumulativeInputTokens?: number;
8315
8362
  cumulativeOutputTokens?: number;
8363
+ // Migration 063 — adapter-supplied formula tag.
8364
+ contextFormula?: ContextSnapshot["contextFormula"];
8316
8365
  }
8317
8366
 
8318
8367
  export function createContextSnapshot(input: CreateContextSnapshotInput): ContextSnapshot {
@@ -8334,6 +8383,7 @@ export function createContextSnapshot(input: CreateContextSnapshotInput): Contex
8334
8383
  input.preCompactTokens ?? null,
8335
8384
  input.cumulativeInputTokens ?? 0,
8336
8385
  input.cumulativeOutputTokens ?? 0,
8386
+ input.contextFormula ?? null,
8337
8387
  now,
8338
8388
  );
8339
8389
 
@@ -8347,10 +8397,15 @@ export function createContextSnapshot(input: CreateContextSnapshotInput): Contex
8347
8397
  .run(input.contextPercent, input.taskId);
8348
8398
  }
8349
8399
 
8350
- // Keep totalContextTokensUsed up to date with the latest known value
8400
+ // Migration 063: peakContextTokens is monotonic-max across snapshots, not a
8401
+ // rolling latest. Mirrors Claude Code's status-line "peak context" semantic.
8351
8402
  if (input.contextUsedTokens != null) {
8352
8403
  getDb()
8353
- .prepare("UPDATE agent_tasks SET totalContextTokensUsed = ? WHERE id = ?")
8404
+ .prepare(
8405
+ `UPDATE agent_tasks
8406
+ SET peakContextTokens = MAX(COALESCE(peakContextTokens, 0), ?)
8407
+ WHERE id = ?`,
8408
+ )
8354
8409
  .run(input.contextUsedTokens, input.taskId);
8355
8410
  }
8356
8411
 
@@ -8362,9 +8417,17 @@ export function createContextSnapshot(input: CreateContextSnapshotInput): Contex
8362
8417
  .run(input.taskId);
8363
8418
  }
8364
8419
 
8365
- if (input.eventType === "completion" && input.contextTotalTokens != null) {
8420
+ // Phase 10: set contextWindowSize on the FIRST snapshot that carries one
8421
+ // (was previously gated on eventType === 'completion', meaning the UI saw
8422
+ // NULL throughout running tasks). Subsequent snapshots leave it alone — the
8423
+ // window doesn't change mid-session.
8424
+ if (input.contextTotalTokens != null) {
8366
8425
  getDb()
8367
- .prepare("UPDATE agent_tasks SET contextWindowSize = ? WHERE id = ?")
8426
+ .prepare(
8427
+ `UPDATE agent_tasks
8428
+ SET contextWindowSize = ?
8429
+ WHERE id = ? AND contextWindowSize IS NULL`,
8430
+ )
8368
8431
  .run(input.contextTotalTokens, input.taskId);
8369
8432
  }
8370
8433
 
@@ -8381,6 +8444,7 @@ export function createContextSnapshot(input: CreateContextSnapshotInput): Contex
8381
8444
  preCompactTokens: input.preCompactTokens,
8382
8445
  cumulativeInputTokens: input.cumulativeInputTokens ?? 0,
8383
8446
  cumulativeOutputTokens: input.cumulativeOutputTokens ?? 0,
8447
+ contextFormula: input.contextFormula,
8384
8448
  createdAt: now,
8385
8449
  };
8386
8450
  }
@@ -8396,7 +8460,8 @@ export function getContextSnapshotsBySessionId(sessionId: string, limit = 500):
8396
8460
  export interface ContextSummary {
8397
8461
  compactionCount: number;
8398
8462
  peakContextPercent: number | null;
8399
- totalContextTokensUsed: number | null;
8463
+ // Migration 063: renamed from totalContextTokensUsed.
8464
+ peakContextTokens: number | null;
8400
8465
  contextWindowSize: number | null;
8401
8466
  snapshotCount: number;
8402
8467
  }
@@ -8412,7 +8477,7 @@ export function getContextSummaryByTaskId(taskId: string): ContextSummary {
8412
8477
  return {
8413
8478
  compactionCount: task?.compactionCount ?? 0,
8414
8479
  peakContextPercent: task?.peakContextPercent ?? null,
8415
- totalContextTokensUsed: task?.totalContextTokensUsed ?? null,
8480
+ peakContextTokens: task?.peakContextTokens ?? null,
8416
8481
  contextWindowSize: task?.contextWindowSize ?? null,
8417
8482
  snapshotCount: countRow?.cnt ?? 0,
8418
8483
  };
@@ -8635,6 +8700,12 @@ export function getKeyCostSummary(keyType?: string): KeyCostSummary[] {
8635
8700
  }
8636
8701
 
8637
8702
  const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
8703
+ // Phase 13: INNER JOIN -> LEFT JOIN. The `WHERE t.credentialKeySuffix IS NOT NULL`
8704
+ // still filters out rows whose taskId doesn't link to a task with credentials,
8705
+ // but switching to LEFT JOIN means a future change that drops the WHERE
8706
+ // (or a debugging query that wants orphan rows visible) doesn't silently
8707
+ // disappear them. Equivalent for the current `WHERE … IS NOT NULL` filter;
8708
+ // makes the query's intent (cost rows owned by a credential) explicit.
8638
8709
  return db
8639
8710
  .prepare<KeyCostSummary, string[]>(
8640
8711
  `SELECT
@@ -8645,7 +8716,7 @@ export function getKeyCostSummary(keyType?: string): KeyCostSummary[] {
8645
8716
  COALESCE(SUM(sc.outputTokens), 0) as totalOutputTokens,
8646
8717
  COUNT(DISTINCT sc.taskId) as taskCount
8647
8718
  FROM session_costs sc
8648
- JOIN agent_tasks t ON sc.taskId = t.id
8719
+ LEFT JOIN agent_tasks t ON sc.taskId = t.id
8649
8720
  ${where}
8650
8721
  GROUP BY t.credentialKeyType, t.credentialKeySuffix`,
8651
8722
  )
@@ -0,0 +1,133 @@
1
+ -- 063_cost_context_schema_relax.sql
2
+ -- Phase 1 of the context & cost tracking fixes plan (2026-05-15).
3
+ --
4
+ -- This migration unblocks every downstream phase by:
5
+ -- * Dropping the brittle CHECK constraints on `pricing.provider` and
6
+ -- `pricing.token_class` so we can seed rows for all 7 providers
7
+ -- (claude, claude-managed, codex, pi, opencode, devin, gemini) and the
8
+ -- extra token classes (`cache_write`, `runtime_hour`, `acu`). Zod
9
+ -- validation at the application boundary (`PricingProviderSchema`,
10
+ -- `PricingTokenClassSchema` in `src/types.ts`) keeps the actual safety
11
+ -- guarantee — the CHECKs added drift risk for no real benefit.
12
+ -- * Renaming the misleading `agent_tasks.totalContextTokensUsed` column
13
+ -- to `peakContextTokens` to match its new monotonic-max semantic
14
+ -- (mirrors Claude Code's status-line "peak context" idea).
15
+ -- * Recording the `contextFormula` used by the adapter that emitted a
16
+ -- given snapshot so we can tell apples from oranges across providers.
17
+ -- * Adding `reasoningOutputTokens` (codex reasoning models) and
18
+ -- `thinkingTokens` (claude extended thinking) columns to `session_costs`
19
+ -- so we stop dropping those numbers on the floor.
20
+ --
21
+ -- SQLite CHECK constraints can't be modified in place, so the `pricing` and
22
+ -- `task_context_snapshots` shape changes use the standard
23
+ -- create-new / copy / drop / rename dance. Existing rows are preserved.
24
+ --
25
+ -- Forward-only — no down migration. If you need to revert, write a new
26
+ -- migration that walks the schema forward to the desired state.
27
+
28
+ -- ---------------------------------------------------------------------------
29
+ -- 1. Relax `pricing` CHECK constraints (drop them entirely; Zod validates).
30
+ -- ---------------------------------------------------------------------------
31
+
32
+ CREATE TABLE pricing_new (
33
+ provider TEXT NOT NULL,
34
+ model TEXT NOT NULL,
35
+ token_class TEXT NOT NULL,
36
+ effective_from INTEGER NOT NULL,
37
+ price_per_million_usd REAL NOT NULL,
38
+ createdAt INTEGER NOT NULL,
39
+ lastUpdatedAt INTEGER NOT NULL,
40
+ PRIMARY KEY (provider, model, token_class, effective_from)
41
+ );
42
+
43
+ INSERT INTO pricing_new (provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt)
44
+ SELECT provider, model, token_class, effective_from, price_per_million_usd, createdAt, lastUpdatedAt
45
+ FROM pricing;
46
+
47
+ DROP TABLE pricing;
48
+ ALTER TABLE pricing_new RENAME TO pricing;
49
+
50
+ -- Re-create the index the original `pricing` table had (matches 046:54-55).
51
+ CREATE INDEX IF NOT EXISTS idx_pricing_lookup
52
+ ON pricing (provider, model, token_class, effective_from DESC);
53
+
54
+ -- ---------------------------------------------------------------------------
55
+ -- 2. Rename agent_tasks.totalContextTokensUsed -> peakContextTokens.
56
+ -- SQLite >= 3.25 supports RENAME COLUMN; bun:sqlite is well past that.
57
+ -- ---------------------------------------------------------------------------
58
+
59
+ ALTER TABLE agent_tasks RENAME COLUMN totalContextTokensUsed TO peakContextTokens;
60
+
61
+ -- ---------------------------------------------------------------------------
62
+ -- 3. Add contextFormula column to task_context_snapshots.
63
+ -- Using a plain TEXT column (no CHECK) so the adapter side can add new
64
+ -- formulas without an accompanying migration; Zod enum validates writes.
65
+ -- Values today:
66
+ -- 'input-cache-output' — unified formula (post-Phase 9)
67
+ -- 'input-cache-no-output' — pre-unification claude formula
68
+ -- 'input-output-no-cache' — pre-unification claude-managed formula
69
+ -- 'peak-proxy' — pre-unification codex formula
70
+ -- 'pi-delegated' — context numbers come from the pi-ai SDK
71
+ -- 'harness-reported' — context numbers come from a harness API (devin)
72
+ -- 'unknown' — pre-migration backfill or adapter didn't tag
73
+ -- ---------------------------------------------------------------------------
74
+
75
+ ALTER TABLE task_context_snapshots ADD COLUMN contextFormula TEXT;
76
+ UPDATE task_context_snapshots SET contextFormula = 'unknown' WHERE contextFormula IS NULL;
77
+
78
+ -- ---------------------------------------------------------------------------
79
+ -- 4. Rewrite session_costs to:
80
+ -- a) drop the costSource CHECK (we need 'unpriced' as a third value);
81
+ -- b) add reasoningOutputTokens + thinkingTokens columns we previously
82
+ -- dropped on the floor.
83
+ -- SQLite can't relax a CHECK in-place — table-rewrite dance, same pattern
84
+ -- as the pricing table above. FKs and indexes are restored after rename.
85
+ -- ---------------------------------------------------------------------------
86
+
87
+ CREATE TABLE session_costs_new (
88
+ id TEXT PRIMARY KEY,
89
+ sessionId TEXT NOT NULL,
90
+ taskId TEXT,
91
+ agentId TEXT NOT NULL,
92
+ totalCostUsd REAL NOT NULL,
93
+ inputTokens INTEGER NOT NULL DEFAULT 0,
94
+ outputTokens INTEGER NOT NULL DEFAULT 0,
95
+ cacheReadTokens INTEGER NOT NULL DEFAULT 0,
96
+ -- Migration 063: nullable. Codex SDK can't surface cache writes, so we
97
+ -- store null instead of faking a 0 that mixes with real zeros.
98
+ cacheWriteTokens INTEGER DEFAULT 0,
99
+ durationMs INTEGER NOT NULL,
100
+ -- Migration 063: nullable. Claude when `num_turns` is absent can't honestly
101
+ -- report a turn count; null is preferred over a faked 1.
102
+ numTurns INTEGER,
103
+ model TEXT NOT NULL,
104
+ isError INTEGER NOT NULL DEFAULT 0,
105
+ costSource TEXT NOT NULL DEFAULT 'harness',
106
+ reasoningOutputTokens INTEGER NOT NULL DEFAULT 0,
107
+ thinkingTokens INTEGER NOT NULL DEFAULT 0,
108
+ createdAt TEXT NOT NULL,
109
+ FOREIGN KEY (agentId) REFERENCES agents(id) ON DELETE CASCADE,
110
+ FOREIGN KEY (taskId) REFERENCES agent_tasks(id) ON DELETE SET NULL
111
+ );
112
+
113
+ INSERT INTO session_costs_new (
114
+ id, sessionId, taskId, agentId, totalCostUsd,
115
+ inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens,
116
+ durationMs, numTurns, model, isError, costSource,
117
+ reasoningOutputTokens, thinkingTokens, createdAt
118
+ )
119
+ SELECT
120
+ id, sessionId, taskId, agentId, totalCostUsd,
121
+ inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens,
122
+ durationMs, numTurns, model, isError, costSource,
123
+ 0, 0, createdAt
124
+ FROM session_costs;
125
+
126
+ DROP TABLE session_costs;
127
+ ALTER TABLE session_costs_new RENAME TO session_costs;
128
+
129
+ -- Recreate indexes (mirrors 001_initial.sql:360-363).
130
+ CREATE INDEX IF NOT EXISTS idx_session_costs_createdAt ON session_costs(createdAt);
131
+ CREATE INDEX IF NOT EXISTS idx_session_costs_taskId ON session_costs(taskId);
132
+ CREATE INDEX IF NOT EXISTS idx_session_costs_agentId ON session_costs(agentId);
133
+ CREATE INDEX IF NOT EXISTS idx_session_costs_agent_createdAt ON session_costs(agentId, createdAt);
@@ -0,0 +1,39 @@
1
+ CREATE TABLE scripts (
2
+ id TEXT PRIMARY KEY,
3
+ name TEXT NOT NULL,
4
+ scope TEXT NOT NULL CHECK(scope IN ('global', 'agent')),
5
+ scopeId TEXT,
6
+ source TEXT NOT NULL,
7
+ description TEXT NOT NULL,
8
+ intent TEXT NOT NULL,
9
+ signatureJson TEXT NOT NULL,
10
+ contentHash TEXT NOT NULL,
11
+ version INTEGER NOT NULL DEFAULT 1,
12
+ isScratch INTEGER NOT NULL DEFAULT 0,
13
+ typeChecked INTEGER NOT NULL DEFAULT 0,
14
+ fsMode TEXT NOT NULL DEFAULT 'none' CHECK(fsMode IN ('none', 'workspace-rw')),
15
+ createdByAgentId TEXT,
16
+ createdAt TEXT NOT NULL DEFAULT (datetime('now')),
17
+ updatedAt TEXT NOT NULL DEFAULT (datetime('now'))
18
+ );
19
+
20
+ CREATE UNIQUE INDEX idx_scripts_name_scope ON scripts(name, scope, COALESCE(scopeId, ''));
21
+ CREATE INDEX idx_scripts_scope ON scripts(scope, scopeId);
22
+ CREATE INDEX idx_scripts_scratch ON scripts(isScratch, createdAt);
23
+
24
+ CREATE TABLE script_versions (
25
+ id TEXT PRIMARY KEY,
26
+ scriptId TEXT NOT NULL REFERENCES scripts(id) ON DELETE CASCADE,
27
+ version INTEGER NOT NULL,
28
+ source TEXT NOT NULL,
29
+ description TEXT NOT NULL,
30
+ intent TEXT NOT NULL,
31
+ signatureJson TEXT NOT NULL,
32
+ contentHash TEXT NOT NULL,
33
+ changedByAgentId TEXT,
34
+ changedAt TEXT NOT NULL DEFAULT (datetime('now')),
35
+ changeReason TEXT,
36
+ UNIQUE(scriptId, version)
37
+ );
38
+
39
+ CREATE INDEX idx_script_versions_hash ON script_versions(contentHash);
@@ -0,0 +1,7 @@
1
+ CREATE TABLE script_embeddings (
2
+ scriptId TEXT PRIMARY KEY REFERENCES scripts(id) ON DELETE CASCADE,
3
+ embedding BLOB NOT NULL,
4
+ embeddingModel TEXT NOT NULL,
5
+ embeddedText TEXT NOT NULL,
6
+ embeddedAt TEXT NOT NULL DEFAULT (datetime('now'))
7
+ );
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Phase 2 fix — normalize provider model ids before pricing-table lookup.
3
+ *
4
+ * Different harnesses report the same underlying model under different keys:
5
+ *
6
+ * - claude-adapter → `claude-opus-4-7` (bare)
7
+ * - codex-adapter → `gpt-5.4` (bare, dotted)
8
+ * - opencode-adapter → `openrouter/anthropic/claude-sonnet-4.5`
9
+ * - pi-mono-adapter → `github-copilot/gpt-5.4` or
10
+ * `openrouter/anthropic/claude-sonnet-4.5`
11
+ *
12
+ * The pricing seed in `src/be/seed-pricing.ts` keys by what models.dev calls
13
+ * the model (e.g. `anthropic/claude-sonnet-4.5` for openrouter rows,
14
+ * `gpt-5.4` for openai rows). That means harness-emitted ids with extra
15
+ * routing prefixes (`openrouter/`, `github-copilot/`, …) fall through to
16
+ * `costSource='unpriced'` even when we have a perfectly good rate row.
17
+ *
18
+ * Rather than rewriting the adapter outputs (which are the harness's source
19
+ * of truth and useful for debugging), we normalize at the *lookup boundary*:
20
+ * strip noisy routing prefixes so the seeded canonical key resolves.
21
+ *
22
+ * Apply this helper symmetrically: once when seeding rows (so seed keys are
23
+ * canonical) and once when querying (so adapter-emitted keys collapse onto
24
+ * the same canonical form).
25
+ */
26
+
27
+ import type { PricingProvider } from "../types";
28
+
29
+ /**
30
+ * Routing prefixes that a harness may prepend to the underlying model id but
31
+ * that have no pricing semantics. Stripping these collapses
32
+ * `openrouter/anthropic/claude-sonnet-4.5` → `anthropic/claude-sonnet-4.5`
33
+ * which is the key models.dev/openrouter uses.
34
+ *
35
+ * Order matters: we only ever strip the *first* matching prefix so we don't
36
+ * accidentally chew through a model id like `openai/openai-test-model`.
37
+ */
38
+ const ROUTING_PREFIXES_BY_PROVIDER: Record<PricingProvider, readonly string[]> = {
39
+ // opencode routes via opencode-server which proxies to openrouter, anthropic,
40
+ // openai, … — strip whichever proxy prefix the user picked.
41
+ opencode: ["openrouter/", "github-copilot/"],
42
+ // pi-mono can hit openrouter mirrors, the github-copilot proxy, or native
43
+ // anthropic/openai/google providers.
44
+ pi: ["openrouter/", "github-copilot/"],
45
+ // codex normally reports a bare id, but a user may set MODEL_OVERRIDE to a
46
+ // prefixed form. Be forgiving on the lookup side.
47
+ codex: ["openai/", "github-copilot/"],
48
+ // claude / claude-managed / devin / gemini emit bare ids today. The empty
49
+ // list keeps the helper a no-op for them but the entry-per-provider shape
50
+ // means a future provider can opt in without changing call-sites.
51
+ claude: [],
52
+ "claude-managed": [],
53
+ devin: [],
54
+ gemini: [],
55
+ };
56
+
57
+ /**
58
+ * Canonical model key for a `(provider, model)` pair. Idempotent — calling
59
+ * this on an already-normalized value is a no-op.
60
+ *
61
+ * Rules:
62
+ * 1. Lowercase the input. Adapters sometimes pass mixed case (codex calls
63
+ * `.toLowerCase()` itself; opencode/pi don't always).
64
+ * 2. Strip the first matching routing prefix for this provider, if any.
65
+ *
66
+ * We deliberately do NOT touch dotted-vs-dashed minor versions
67
+ * (`gpt-5.4` vs `gpt-5-4`) — both harness output and models.dev use dotted
68
+ * for openai and dashed for anthropic, so there's no real drift there.
69
+ */
70
+ export function normalizeModelKey(provider: PricingProvider, model: string): string {
71
+ if (!model) return model;
72
+ let key = model.toLowerCase();
73
+ const prefixes = ROUTING_PREFIXES_BY_PROVIDER[provider] ?? [];
74
+ for (const prefix of prefixes) {
75
+ if (key.startsWith(prefix)) {
76
+ key = key.slice(prefix.length);
77
+ break;
78
+ }
79
+ }
80
+ return key;
81
+ }