@desplega.ai/agent-swarm 1.79.4 → 1.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/openapi.json +98 -19
  2. package/package.json +12 -6
  3. package/src/be/db.ts +101 -30
  4. package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
  5. package/src/be/pricing-normalize.ts +81 -0
  6. package/src/be/seed-pricing.ts +293 -0
  7. package/src/commands/claude-managed-setup.ts +19 -3
  8. package/src/commands/runner.ts +592 -237
  9. package/src/http/context.ts +6 -2
  10. package/src/http/index.ts +115 -68
  11. package/src/http/session-data.ts +74 -23
  12. package/src/otel-impl.ts +200 -0
  13. package/src/otel.ts +127 -0
  14. package/src/providers/claude-adapter.ts +30 -5
  15. package/src/providers/claude-managed-adapter.ts +43 -17
  16. package/src/providers/claude-managed-pricing.ts +34 -0
  17. package/src/providers/codex-adapter.ts +38 -27
  18. package/src/providers/codex-models.ts +22 -3
  19. package/src/providers/devin-adapter.ts +11 -0
  20. package/src/providers/opencode-adapter.ts +31 -7
  21. package/src/providers/pi-mono-adapter.ts +39 -7
  22. package/src/providers/pricing-sources.md +52 -0
  23. package/src/providers/swarm-events-shared.ts +8 -4
  24. package/src/providers/types.ts +33 -10
  25. package/src/server.ts +6 -0
  26. package/src/tests/claude-managed-adapter.test.ts +17 -3
  27. package/src/tests/claude-managed-setup.test.ts +10 -1
  28. package/src/tests/codex-adapter.test.ts +20 -19
  29. package/src/tests/context-snapshot.test.ts +2 -2
  30. package/src/tests/context-window.test.ts +65 -1
  31. package/src/tests/devin-adapter.test.ts +2 -0
  32. package/src/tests/http/context-routes.test.ts +161 -0
  33. package/src/tests/migration-063-schema-relax.test.ts +109 -0
  34. package/src/tests/opencode-adapter.test.ts +146 -1
  35. package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
  36. package/src/tests/pages-view-count.test.ts +30 -5
  37. package/src/tests/providers/codex-cost.test.ts +18 -0
  38. package/src/tests/providers/opencode-cost.test.ts +74 -0
  39. package/src/tests/providers/pi-cost.test.ts +128 -0
  40. package/src/tests/secret-scrubber.test.ts +19 -0
  41. package/src/tests/session-costs-codex-recompute.test.ts +35 -22
  42. package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
  43. package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
  44. package/src/tests/store-progress-cost.test.ts +6 -1
  45. package/src/tools/store-progress.ts +16 -60
  46. package/src/tools/utils.ts +65 -12
  47. package/src/types.ts +62 -9
  48. package/src/utils/context-window.ts +104 -4
  49. package/src/utils/secret-scrubber.ts +7 -0
@@ -6,7 +6,7 @@ import {
6
6
  getContextSummaryByTaskId,
7
7
  getTaskById,
8
8
  } from "../be/db";
9
- import { ContextSnapshotEventTypeSchema } from "../types";
9
+ import { ContextFormulaSchema, ContextSnapshotEventTypeSchema } from "../types";
10
10
  import { route } from "./route-def";
11
11
  import { json, jsonError } from "./utils";
12
12
 
@@ -25,10 +25,13 @@ const postContext = route({
25
25
  contextUsedTokens: z.number().int().min(0).optional(),
26
26
  contextTotalTokens: z.number().int().min(0).optional(),
27
27
  contextPercent: z.number().min(0).max(100).optional(),
28
- compactTrigger: z.enum(["auto", "manual"]).optional(),
28
+ compactTrigger: z.enum(["auto", "manual", "auto-inferred"]).optional(),
29
29
  preCompactTokens: z.number().int().min(0).optional(),
30
30
  cumulativeInputTokens: z.number().int().min(0).optional(),
31
31
  cumulativeOutputTokens: z.number().int().min(0).optional(),
32
+ // Migration 063: adapters tag the formula they used so cross-provider
33
+ // comparisons can tell apples from oranges.
34
+ contextFormula: ContextFormulaSchema.optional(),
32
35
  }),
33
36
  responses: {
34
37
  200: { description: "Snapshot recorded" },
@@ -91,6 +94,7 @@ export async function handleContext(
91
94
  preCompactTokens: parsed.body.preCompactTokens,
92
95
  cumulativeInputTokens: parsed.body.cumulativeInputTokens,
93
96
  cumulativeOutputTokens: parsed.body.cumulativeOutputTokens,
97
+ contextFormula: parsed.body.contextFormula,
94
98
  });
95
99
 
96
100
  json(res, { ok: true, snapshotId: snapshot.id });
package/src/http/index.ts CHANGED
@@ -14,6 +14,7 @@ import { initGitLab } from "../gitlab";
14
14
  import { stopHeartbeat } from "../heartbeat";
15
15
  import { initJira } from "../jira";
16
16
  import { initLinear } from "../linear";
17
+ import { initOtel, startSpan, withRemoteContext } from "../otel";
17
18
  import { startSlackApp, stopSlackApp } from "../slack";
18
19
  import { initTelemetry, telemetry } from "../telemetry";
19
20
  import { initWorkflows } from "../workflows";
@@ -89,6 +90,7 @@ const transports: Record<string, StreamableHTTPServerTransport> = globalState.__
89
90
  const httpServer = createHttpServer(async (req, res) => {
90
91
  const startTime = performance.now();
91
92
  let statusCode = 200;
93
+ let spanEnded = false;
92
94
 
93
95
  // Wrap writeHead to capture status code
94
96
  const originalWriteHead = res.writeHead.bind(res);
@@ -113,76 +115,108 @@ const httpServer = createHttpServer(async (req, res) => {
113
115
  console.error(`[HTTP] ❌ ${req.method} ${req.url} → Error: ${err.message}`);
114
116
  });
115
117
 
116
- setCorsHeaders(req, res);
117
-
118
- // ── Core routes (OPTIONS, health, auth, /me, /cancelled-tasks, /ping, /close) ──
119
- if (await handleCore(req, res, req.headers["x-agent-id"] as string | undefined, apiKey)) return;
120
-
121
- const pathSegments = getPathSegments(req.url || "");
122
- const queryParams = parseQueryParams(req.url || "");
123
- const myAgentId = req.headers["x-agent-id"] as string | undefined;
124
-
125
- // ── Route handlers (order matters — first match wins) ──
126
- const handlers: (() => Promise<boolean>)[] = [
127
- () => handleAgentRegister(req, res, pathSegments, myAgentId),
128
- () => handlePoll(req, res, pathSegments, queryParams, myAgentId),
129
- () => handleSessionData(req, res, pathSegments, queryParams, myAgentId),
130
- () => handleEcosystem(req, res, pathSegments, myAgentId),
131
- () => handleTrackers(req, res, pathSegments),
132
- () => handleWebhooks(req, res, pathSegments),
133
- () => handleAgentsRest(req, res, pathSegments, queryParams, myAgentId),
134
- () => handleBudgets(req, res, pathSegments, queryParams, myAgentId),
135
- () => handleContext(req, res, pathSegments, queryParams, myAgentId),
136
- () => handleTasks(req, res, pathSegments, queryParams, myAgentId),
137
- () => handleStats(req, res, pathSegments, queryParams),
138
- () => handleStatus(req, res, pathSegments, queryParams),
139
- () => handleActiveSessions(req, res, pathSegments, queryParams, myAgentId),
140
- () => handlePricing(req, res, pathSegments, queryParams, myAgentId),
141
- () => handleSchedules(req, res, pathSegments, queryParams, myAgentId),
142
- () => handleWorkflows(req, res, pathSegments, queryParams, myAgentId),
143
- () => handleWorkflowEvents(req, res, pathSegments, queryParams),
144
- () => handleApprovalRequests(req, res, pathSegments, queryParams),
145
- () => handleConfig(req, res, pathSegments, queryParams),
146
- () => handleKv(req, res, pathSegments, queryParams),
147
- () => handleIntegrations(req, res, pathSegments),
148
- () => handlePromptTemplates(req, res, pathSegments, queryParams),
149
- () => handleDbQuery(req, res, pathSegments, queryParams),
150
- () => handleRepos(req, res, pathSegments, queryParams),
151
- () => handleSkills(req, res, pathSegments, queryParams, myAgentId),
152
- () => handleMcpServers(req, res, pathSegments, queryParams),
153
- () => handleMcpOAuth(req, res, pathSegments, queryParams),
154
- () => handleMemory(req, res, pathSegments, myAgentId),
155
- () => handlePagesPublic(req, res, pathSegments, queryParams),
156
- () => handlePageProxy(req, res),
157
- () => handlePages(req, res, pathSegments, queryParams, myAgentId),
158
- () => handleApiKeys(req, res, pathSegments, queryParams),
159
- () => handleHeartbeat(req, res, pathSegments),
160
- () => handleEvents(req, res, pathSegments, queryParams, myAgentId),
161
- () => handleUsers(req, res, pathSegments, queryParams),
162
- () => handleSessions(req, res, pathSegments, queryParams),
163
- () => handleInboxState(req, res, pathSegments, queryParams),
164
- () => handleTaskTemplates(req, res, pathSegments, queryParams),
165
- () => handleMcp(req, res, transports),
166
- ];
167
-
168
- try {
169
- for (const handler of handlers) {
170
- if (await handler()) return;
171
- }
118
+ await withRemoteContext(req.headers as Record<string, unknown>, async () => {
119
+ const span = startSpan("http.server", {
120
+ "http.request.method": req.method ?? "",
121
+ "url.path": req.url?.split("?")[0] ?? "",
122
+ "agent.id": req.headers["x-agent-id"] as string | undefined,
123
+ "agentswarm.component": "api",
124
+ });
125
+
126
+ res.on("finish", () => {
127
+ if (spanEnded) return;
128
+ spanEnded = true;
129
+ span.setAttributes({
130
+ "http.response.status_code": statusCode,
131
+ "agentswarm.http.duration_ms": Math.round((performance.now() - startTime) * 10) / 10,
132
+ });
133
+ if (statusCode >= 500) {
134
+ span.setStatus({ code: 2, message: `HTTP ${statusCode}` });
135
+ }
136
+ span.end();
137
+ });
138
+
139
+ res.on("error", (err) => {
140
+ if (spanEnded) return;
141
+ spanEnded = true;
142
+ span.recordException(err);
143
+ span.setStatus({ code: 2, message: err.message });
144
+ span.end();
145
+ });
172
146
 
173
- // ── 404 ──
174
- res.writeHead(404);
175
- res.end("Not Found");
176
- } catch (err) {
177
- const message = err instanceof Error ? err.message : String(err);
178
- console.error(`[HTTP] ${req.method} ${req.url} ${message}`);
179
- if (!res.headersSent) {
180
- res.writeHead(500, { "Content-Type": "application/json" });
181
- res.end(JSON.stringify({ error: message }));
182
- } else if (!res.writableEnded) {
183
- res.end();
147
+ setCorsHeaders(req, res);
148
+
149
+ // ── Core routes (OPTIONS, health, auth, /me, /cancelled-tasks, /ping, /close) ──
150
+ if (await handleCore(req, res, req.headers["x-agent-id"] as string | undefined, apiKey)) return;
151
+
152
+ const pathSegments = getPathSegments(req.url || "");
153
+ const queryParams = parseQueryParams(req.url || "");
154
+ const myAgentId = req.headers["x-agent-id"] as string | undefined;
155
+
156
+ // ── Route handlers (order matters — first match wins) ──
157
+ const handlers: (() => Promise<boolean>)[] = [
158
+ () => handleAgentRegister(req, res, pathSegments, myAgentId),
159
+ () => handlePoll(req, res, pathSegments, queryParams, myAgentId),
160
+ () => handleSessionData(req, res, pathSegments, queryParams, myAgentId),
161
+ () => handleEcosystem(req, res, pathSegments, myAgentId),
162
+ () => handleTrackers(req, res, pathSegments),
163
+ () => handleWebhooks(req, res, pathSegments),
164
+ () => handleAgentsRest(req, res, pathSegments, queryParams, myAgentId),
165
+ () => handleBudgets(req, res, pathSegments, queryParams, myAgentId),
166
+ () => handleContext(req, res, pathSegments, queryParams, myAgentId),
167
+ () => handleTasks(req, res, pathSegments, queryParams, myAgentId),
168
+ () => handleStats(req, res, pathSegments, queryParams),
169
+ () => handleStatus(req, res, pathSegments, queryParams),
170
+ () => handleActiveSessions(req, res, pathSegments, queryParams, myAgentId),
171
+ () => handlePricing(req, res, pathSegments, queryParams, myAgentId),
172
+ () => handleSchedules(req, res, pathSegments, queryParams, myAgentId),
173
+ () => handleWorkflows(req, res, pathSegments, queryParams, myAgentId),
174
+ () => handleWorkflowEvents(req, res, pathSegments, queryParams),
175
+ () => handleApprovalRequests(req, res, pathSegments, queryParams),
176
+ () => handleConfig(req, res, pathSegments, queryParams),
177
+ () => handleKv(req, res, pathSegments, queryParams),
178
+ () => handleIntegrations(req, res, pathSegments),
179
+ () => handlePromptTemplates(req, res, pathSegments, queryParams),
180
+ () => handleDbQuery(req, res, pathSegments, queryParams),
181
+ () => handleRepos(req, res, pathSegments, queryParams),
182
+ () => handleSkills(req, res, pathSegments, queryParams, myAgentId),
183
+ () => handleMcpServers(req, res, pathSegments, queryParams),
184
+ () => handleMcpOAuth(req, res, pathSegments, queryParams),
185
+ () => handleMemory(req, res, pathSegments, myAgentId),
186
+ () => handlePagesPublic(req, res, pathSegments, queryParams),
187
+ () => handlePageProxy(req, res),
188
+ () => handlePages(req, res, pathSegments, queryParams, myAgentId),
189
+ () => handleApiKeys(req, res, pathSegments, queryParams),
190
+ () => handleHeartbeat(req, res, pathSegments),
191
+ () => handleEvents(req, res, pathSegments, queryParams, myAgentId),
192
+ () => handleUsers(req, res, pathSegments, queryParams),
193
+ () => handleSessions(req, res, pathSegments, queryParams),
194
+ () => handleInboxState(req, res, pathSegments, queryParams),
195
+ () => handleTaskTemplates(req, res, pathSegments, queryParams),
196
+ () => handleMcp(req, res, transports),
197
+ ];
198
+
199
+ try {
200
+ for (const handler of handlers) {
201
+ if (await handler()) return;
202
+ }
203
+
204
+ // ── 404 ──
205
+ res.writeHead(404);
206
+ res.end("Not Found");
207
+ } catch (err) {
208
+ span.recordException(err);
209
+ span.setStatus({ code: 2, message: err instanceof Error ? err.message : String(err) });
210
+ const message = err instanceof Error ? err.message : String(err);
211
+ console.error(`[HTTP] ❌ ${req.method} ${req.url} → ${message}`);
212
+ if (!res.headersSent) {
213
+ res.writeHead(500, { "Content-Type": "application/json" });
214
+ res.end(JSON.stringify({ error: message }));
215
+ } else if (!res.writableEnded) {
216
+ res.end();
217
+ }
184
218
  }
185
- }
219
+ });
186
220
  });
187
221
 
188
222
  // Store references in globalThis for hot reload persistence
@@ -250,9 +284,22 @@ try {
250
284
  throw err;
251
285
  }
252
286
 
287
+ // Phase 2 of the cost-tracking plan: project the vendored models.dev snapshot
288
+ // into pricing rows at boot. Lazy `getDb()` would also work, but doing it
289
+ // here surfaces the count in the boot log and makes the API ready to recompute
290
+ // USD before the first POST /api/session-costs lands.
291
+ try {
292
+ const { seedPricingFromModelsDev } = await import("../be/seed-pricing");
293
+ seedPricingFromModelsDev();
294
+ } catch (err) {
295
+ console.error("[startup] Failed to seed pricing rows:", err);
296
+ }
297
+
253
298
  // business-use initialization (no-op if envs not set)
254
299
  initialize();
255
300
 
301
+ await initOtel("api");
302
+
256
303
  httpServer
257
304
  .listen(port, async () => {
258
305
  console.log(`MCP HTTP server running on http://localhost:${port}/mcp`);
@@ -13,6 +13,7 @@ import {
13
13
  getSessionLogsByTaskId,
14
14
  getTaskById,
15
15
  } from "../be/db";
16
+ import { normalizeModelKey } from "../be/pricing-normalize";
16
17
  import type { SessionCost, SessionCostSource } from "../types";
17
18
  import { route } from "./route-def";
18
19
  import { json, jsonError } from "./utils";
@@ -65,17 +66,24 @@ const createSessionCostRoute = route({
65
66
  inputTokens: z.number().int().optional(),
66
67
  outputTokens: z.number().int().optional(),
67
68
  cacheReadTokens: z.number().int().optional(),
68
- cacheWriteTokens: z.number().int().optional(),
69
+ // Migration 063: nullable — adapters that can't honestly report cache writes
70
+ // (e.g. Codex SDK) prefer null over a faked 0.
71
+ cacheWriteTokens: z.number().int().nullable().optional(),
72
+ // Migration 063: new token classes previously dropped on the floor.
73
+ reasoningOutputTokens: z.number().int().nonnegative().optional(),
74
+ thinkingTokens: z.number().int().nonnegative().optional(),
69
75
  durationMs: z.number().int().optional(),
70
- numTurns: z.number().int().optional(),
76
+ // Migration 063: nullable for adapters that can't honestly report numTurns.
77
+ numTurns: z.number().int().nullable().optional(),
71
78
  model: z.string().optional(),
72
79
  isError: z.boolean().optional(),
73
80
  /**
74
- * Phase 6: when present, drives the codex pricing-table recompute path.
75
- * Other providers ('claude' / 'pi' / 'opencode') always trust harness-reported USD.
76
- * Optional / undefined keeps back-compat for existing callers.
81
+ * Phase 6 (extended migration 063): drives the API recompute path. After
82
+ * Phase 2 every provider with seeded pricing rows participates.
77
83
  */
78
- provider: z.enum(["claude", "codex", "pi", "opencode"]).optional(),
84
+ provider: z
85
+ .enum(["claude", "claude-managed", "codex", "pi", "opencode", "devin", "gemini"])
86
+ .optional(),
79
87
  /**
80
88
  * Phase 6: epoch-ms timestamp used as the "active price at time T" lookup
81
89
  * basis. Defaults to `Date.now()` when omitted. Including it lets
@@ -185,35 +193,75 @@ export async function handleSessionData(
185
193
  try {
186
194
  const inputTokens = parsed.body.inputTokens ?? 0;
187
195
  const cachedInputTokens = parsed.body.cacheReadTokens ?? 0;
196
+ const cacheWriteTokens = parsed.body.cacheWriteTokens ?? 0;
188
197
  const outputTokens = parsed.body.outputTokens ?? 0;
189
- const model = parsed.body.model || "opus";
198
+ // Phase 2: don't paper over a missing model with a fake default — that
199
+ // poisoned the pricing-table lookup against the wrong rate. Only the
200
+ // back-compat case (no provider tag) keeps "opus" so old callers don't
201
+ // explode.
202
+ const model = parsed.body.model || (parsed.body.provider ? "" : "opus");
190
203
 
191
- // Phase 6: Codex USD recompute. When the worker reports `provider='codex'`
192
- // and DB pricing rows exist for ALL three token classes at the lookup
193
- // time, recompute `totalCostUsd` from tokens × DB prices and tag the
194
- // row as 'pricing-table'. If any class has no row, fall back to the
195
- // worker-reported value with `costSource='harness'` (back-compat for
196
- // unseeded models). Claude / pi / opencode paths always use 'harness'.
204
+ // Phase 2: widen the recompute branch beyond codex. For any provider
205
+ // with a known model and seeded pricing rows, recompute `totalCostUsd`
206
+ // from tokens × DB prices and tag the row 'pricing-table'. When the
207
+ // (provider, model) pair has no pricing rows at all, tag 'unpriced' so
208
+ // the UI can flag it. When the provider isn't set, fall through with
209
+ // 'harness' (back-compat for older callers).
197
210
  let totalCostUsd = parsed.body.totalCostUsd;
198
211
  let costSource: SessionCostSource = "harness";
199
212
 
200
- if (parsed.body.provider === "codex") {
213
+ if (parsed.body.provider && model) {
201
214
  const lookupTime = parsed.body.createdAt ?? Date.now();
202
- const inputRow = getActivePricingRow("codex", model, "input", lookupTime);
203
- const cachedRow = getActivePricingRow("codex", model, "cached_input", lookupTime);
204
- const outputRow = getActivePricingRow("codex", model, "output", lookupTime);
215
+ // Phase 2 fix different harnesses prepend routing prefixes
216
+ // (`openrouter/`, `github-copilot/`, …) to the same underlying model
217
+ // id. The pricing seed stores canonical (un-prefixed) keys, so we
218
+ // strip the prefix here before lookup. The original adapter-emitted
219
+ // string is still persisted to `session_costs.model` for debugging.
220
+ const lookupModel = normalizeModelKey(parsed.body.provider, model);
221
+ const inputRow = getActivePricingRow(
222
+ parsed.body.provider,
223
+ lookupModel,
224
+ "input",
225
+ lookupTime,
226
+ );
227
+ const cachedRow = getActivePricingRow(
228
+ parsed.body.provider,
229
+ lookupModel,
230
+ "cached_input",
231
+ lookupTime,
232
+ );
233
+ const outputRow = getActivePricingRow(
234
+ parsed.body.provider,
235
+ lookupModel,
236
+ "output",
237
+ lookupTime,
238
+ );
239
+ const cacheWriteRow = getActivePricingRow(
240
+ parsed.body.provider,
241
+ lookupModel,
242
+ "cache_write",
243
+ lookupTime,
244
+ );
205
245
 
206
- if (inputRow && cachedRow && outputRow) {
207
- // Mirror the existing computeCodexCostUsd logic: subtract cached
208
- // tokens from input before billing the uncached portion at the full
209
- // rate (Codex SDK reports input_tokens as TOTAL across the turn).
246
+ if (inputRow && outputRow) {
247
+ // Mirror the legacy codex semantic: uncached input is billed at the
248
+ // full rate, cached input at the discounted rate. Cache writes are
249
+ // billed separately when the provider's pricing table carries that
250
+ // class (anthropic) and the adapter reports a non-zero value.
210
251
  const uncachedInputTokens = Math.max(0, inputTokens - cachedInputTokens);
252
+ const cachedRate = cachedRow?.pricePerMillionUsd ?? 0;
253
+ const cacheWriteRate = cacheWriteRow?.pricePerMillionUsd ?? 0;
211
254
  totalCostUsd =
212
255
  (uncachedInputTokens * inputRow.pricePerMillionUsd +
213
- cachedInputTokens * cachedRow.pricePerMillionUsd +
256
+ cachedInputTokens * cachedRate +
257
+ cacheWriteTokens * cacheWriteRate +
214
258
  outputTokens * outputRow.pricePerMillionUsd) /
215
259
  1_000_000;
216
260
  costSource = "pricing-table";
261
+ } else {
262
+ // Provider was tagged but we have no pricing rows for it; flag the
263
+ // row so the UI can show an "unpriced" badge instead of pretending.
264
+ costSource = "unpriced";
217
265
  }
218
266
  }
219
267
 
@@ -226,8 +274,11 @@ export async function handleSessionData(
226
274
  outputTokens,
227
275
  cacheReadTokens: cachedInputTokens,
228
276
  cacheWriteTokens: parsed.body.cacheWriteTokens ?? 0,
277
+ reasoningOutputTokens: parsed.body.reasoningOutputTokens ?? 0,
278
+ thinkingTokens: parsed.body.thinkingTokens ?? 0,
229
279
  durationMs: parsed.body.durationMs ?? 0,
230
- numTurns: parsed.body.numTurns ?? 1,
280
+ // Migration 063: pass null through honestly instead of faking a 1.
281
+ numTurns: parsed.body.numTurns ?? null,
231
282
  model,
232
283
  isError: parsed.body.isError ?? false,
233
284
  costSource,
@@ -0,0 +1,200 @@
1
+ import {
2
+ context,
3
+ propagation,
4
+ ROOT_CONTEXT,
5
+ type Span,
6
+ SpanStatusCode,
7
+ trace,
8
+ } from "@opentelemetry/api";
9
+ import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
10
+ import { resourceFromAttributes } from "@opentelemetry/resources";
11
+ import { NodeSDK } from "@opentelemetry/sdk-node";
12
+ import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from "@opentelemetry/semantic-conventions";
13
+ import pkg from "../package.json";
14
+ import type { SwarmSpan } from "./otel";
15
+ import { scrubSecrets } from "./utils/secret-scrubber";
16
+
17
+ type AttributeValue = string | number | boolean | string[] | number[] | boolean[];
18
+ type Attributes = Record<string, AttributeValue | undefined>;
19
+
20
+ const TRACER_NAME = "agent-swarm";
21
+ const RAW_SPAN = Symbol("agent-swarm.raw-span");
22
+
23
+ let sdk: NodeSDK | undefined;
24
+
25
+ function decodeResourceAttributeValue(value: string): string {
26
+ try {
27
+ return decodeURIComponent(value);
28
+ } catch {
29
+ return value;
30
+ }
31
+ }
32
+
33
+ function parseResourceAttributes(value = process.env.OTEL_RESOURCE_ATTRIBUTES): Attributes {
34
+ if (!value) return {};
35
+ const attributes: Attributes = {};
36
+ for (const pair of value.split(",")) {
37
+ const [rawKey, ...rawValueParts] = pair.split("=");
38
+ const key = rawKey?.trim();
39
+ if (!key) continue;
40
+ const rawValue = rawValueParts.join("=").trim();
41
+ if (!rawValue) continue;
42
+ attributes[key] = decodeResourceAttributeValue(rawValue);
43
+ }
44
+ return attributes;
45
+ }
46
+
47
+ function cleanAttributes(attributes?: Attributes): Record<string, AttributeValue> | undefined {
48
+ if (!attributes) return undefined;
49
+ const cleaned: Record<string, AttributeValue> = {};
50
+ for (const [key, value] of Object.entries(attributes)) {
51
+ if (value !== undefined) cleaned[key] = value;
52
+ }
53
+ return cleaned;
54
+ }
55
+
56
+ export function scrubOtelException(error: unknown): Error | string {
57
+ if (!(error instanceof Error)) {
58
+ return scrubSecrets(String(error));
59
+ }
60
+
61
+ const scrubbed = new Error(scrubSecrets(error.message));
62
+ scrubbed.name = error.name;
63
+ if (error.stack) {
64
+ scrubbed.stack = scrubSecrets(error.stack);
65
+ }
66
+ return scrubbed;
67
+ }
68
+
69
+ export function scrubOtelStatus(status: { code: number; message?: string }) {
70
+ return status.message === undefined
71
+ ? status
72
+ : {
73
+ ...status,
74
+ message: scrubSecrets(status.message),
75
+ };
76
+ }
77
+
78
+ type AdaptedSwarmSpan = SwarmSpan & { [RAW_SPAN]: Span };
79
+
80
+ function spanAdapter(span: Span): AdaptedSwarmSpan {
81
+ return {
82
+ [RAW_SPAN]: span,
83
+ setAttribute(key, value) {
84
+ span.setAttribute(key, value);
85
+ return this;
86
+ },
87
+ setAttributes(attributes) {
88
+ const cleaned = cleanAttributes(attributes);
89
+ if (cleaned) span.setAttributes(cleaned);
90
+ return this;
91
+ },
92
+ addEvent(name, attributes) {
93
+ const cleaned = cleanAttributes(attributes);
94
+ span.addEvent(name, cleaned);
95
+ return this;
96
+ },
97
+ recordException(error) {
98
+ span.recordException(scrubOtelException(error));
99
+ },
100
+ setStatus(status) {
101
+ span.setStatus(scrubOtelStatus(status));
102
+ return this;
103
+ },
104
+ end() {
105
+ span.end();
106
+ },
107
+ };
108
+ }
109
+
110
+ export async function boot(serviceRole: string): Promise<void> {
111
+ if (sdk) return;
112
+
113
+ const configuredResourceAttributes = parseResourceAttributes();
114
+ const deploymentEnvironment =
115
+ configuredResourceAttributes["deployment.environment"] || process.env.NODE_ENV || "development";
116
+ const serviceName =
117
+ process.env.OTEL_SERVICE_NAME ||
118
+ (serviceRole === "api" ? "agent-swarm-api" : "agent-swarm-worker");
119
+ sdk = new NodeSDK({
120
+ resource: resourceFromAttributes({
121
+ ...configuredResourceAttributes,
122
+ [ATTR_SERVICE_NAME]: serviceName,
123
+ [ATTR_SERVICE_VERSION]: pkg.version,
124
+ "service.namespace": configuredResourceAttributes["service.namespace"] || "agent-swarm",
125
+ "service.instance.id": process.env.AGENT_ID || crypto.randomUUID(),
126
+ "deployment.environment": deploymentEnvironment,
127
+ env: configuredResourceAttributes.env || deploymentEnvironment,
128
+ "agentswarm.service.role": serviceRole,
129
+ }),
130
+ traceExporter: new OTLPTraceExporter(),
131
+ });
132
+
133
+ sdk.start();
134
+
135
+ const shutdown = async () => {
136
+ try {
137
+ await sdk?.shutdown();
138
+ } catch {
139
+ // Best-effort flush during process shutdown.
140
+ }
141
+ };
142
+
143
+ process.once("SIGTERM", shutdown);
144
+ process.once("SIGINT", shutdown);
145
+ }
146
+
147
+ export async function shutdown(): Promise<void> {
148
+ await sdk?.shutdown();
149
+ sdk = undefined;
150
+ }
151
+
152
+ export async function withSpan<T>(
153
+ name: string,
154
+ fn: (span: SwarmSpan) => Promise<T> | T,
155
+ attributes?: Attributes,
156
+ ): Promise<T> {
157
+ const tracer = trace.getTracer(TRACER_NAME);
158
+ return tracer.startActiveSpan(name, { attributes: cleanAttributes(attributes) }, async (span) => {
159
+ try {
160
+ const result = await fn(spanAdapter(span));
161
+ span.setStatus({ code: SpanStatusCode.OK });
162
+ return result;
163
+ } catch (error) {
164
+ span.recordException(scrubOtelException(error));
165
+ span.setStatus({
166
+ code: SpanStatusCode.ERROR,
167
+ message: scrubSecrets(error instanceof Error ? error.message : String(error)),
168
+ });
169
+ throw error;
170
+ } finally {
171
+ span.end();
172
+ }
173
+ });
174
+ }
175
+
176
+ export function startSpan(name: string, attributes?: Attributes): SwarmSpan {
177
+ const span = trace.getTracer(TRACER_NAME).startSpan(name, {
178
+ attributes: cleanAttributes(attributes),
179
+ });
180
+ return spanAdapter(span);
181
+ }
182
+
183
+ export function withSpanContext<T>(span: SwarmSpan, fn: () => T): T {
184
+ const rawSpan = (span as Partial<AdaptedSwarmSpan>)[RAW_SPAN];
185
+ if (!rawSpan) return fn();
186
+ return context.with(trace.setSpan(context.active(), rawSpan), fn);
187
+ }
188
+
189
+ export async function withRemoteContext<T>(
190
+ carrier: Record<string, unknown>,
191
+ fn: () => Promise<T> | T,
192
+ ): Promise<T> {
193
+ const remoteContext = propagation.extract(ROOT_CONTEXT, carrier);
194
+ return context.with(remoteContext, fn);
195
+ }
196
+
197
+ export function injectTraceContext(headers: Record<string, string>): Record<string, string> {
198
+ propagation.inject(context.active(), headers);
199
+ return headers;
200
+ }