@desplega.ai/agent-swarm 1.79.4 → 1.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/openapi.json +98 -19
  2. package/package.json +12 -6
  3. package/src/be/db.ts +101 -30
  4. package/src/be/migrations/063_cost_context_schema_relax.sql +133 -0
  5. package/src/be/pricing-normalize.ts +81 -0
  6. package/src/be/seed-pricing.ts +293 -0
  7. package/src/commands/claude-managed-setup.ts +19 -3
  8. package/src/commands/runner.ts +592 -237
  9. package/src/http/context.ts +6 -2
  10. package/src/http/index.ts +115 -68
  11. package/src/http/session-data.ts +74 -23
  12. package/src/otel-impl.ts +200 -0
  13. package/src/otel.ts +127 -0
  14. package/src/providers/claude-adapter.ts +30 -5
  15. package/src/providers/claude-managed-adapter.ts +43 -17
  16. package/src/providers/claude-managed-pricing.ts +34 -0
  17. package/src/providers/codex-adapter.ts +38 -27
  18. package/src/providers/codex-models.ts +22 -3
  19. package/src/providers/devin-adapter.ts +11 -0
  20. package/src/providers/opencode-adapter.ts +31 -7
  21. package/src/providers/pi-mono-adapter.ts +39 -7
  22. package/src/providers/pricing-sources.md +52 -0
  23. package/src/providers/swarm-events-shared.ts +8 -4
  24. package/src/providers/types.ts +33 -10
  25. package/src/server.ts +6 -0
  26. package/src/tests/claude-managed-adapter.test.ts +17 -3
  27. package/src/tests/claude-managed-setup.test.ts +10 -1
  28. package/src/tests/codex-adapter.test.ts +20 -19
  29. package/src/tests/context-snapshot.test.ts +2 -2
  30. package/src/tests/context-window.test.ts +65 -1
  31. package/src/tests/devin-adapter.test.ts +2 -0
  32. package/src/tests/http/context-routes.test.ts +161 -0
  33. package/src/tests/migration-063-schema-relax.test.ts +109 -0
  34. package/src/tests/opencode-adapter.test.ts +146 -1
  35. package/src/tests/otel-impl-secret-scrubbing.test.ts +33 -0
  36. package/src/tests/pages-view-count.test.ts +30 -5
  37. package/src/tests/providers/codex-cost.test.ts +18 -0
  38. package/src/tests/providers/opencode-cost.test.ts +74 -0
  39. package/src/tests/providers/pi-cost.test.ts +128 -0
  40. package/src/tests/secret-scrubber.test.ts +19 -0
  41. package/src/tests/session-costs-codex-recompute.test.ts +35 -22
  42. package/src/tests/session-costs-model-key-normalize.test.ts +271 -0
  43. package/src/tests/session-costs-recompute-all-providers.test.ts +170 -0
  44. package/src/tests/store-progress-cost.test.ts +6 -1
  45. package/src/tools/store-progress.ts +16 -60
  46. package/src/tools/utils.ts +65 -12
  47. package/src/types.ts +62 -9
  48. package/src/utils/context-window.ts +104 -4
  49. package/src/utils/secret-scrubber.ts +7 -0
@@ -2,6 +2,15 @@ import { existsSync, statSync } from "node:fs";
2
2
  import { mkdir, readFile, stat, writeFile } from "node:fs/promises";
3
3
  import { ensure, initialize } from "@desplega.ai/business-use";
4
4
  import type { TemplateResponse } from "../../templates/schema.ts";
5
+ import {
6
+ type Attributes,
7
+ initOtel,
8
+ injectTraceContext,
9
+ type SwarmSpan,
10
+ startSpan,
11
+ withSpan,
12
+ withSpanContext,
13
+ } from "../otel.ts";
5
14
  import { type BasePromptArgs, getBasePrompt } from "../prompts/base-prompt.ts";
6
15
  import {
7
16
  generateDefaultClaudeMd,
@@ -15,6 +24,7 @@ import { authJsonToCredentialSelection } from "../providers/codex-oauth/auth-jso
15
24
  import {
16
25
  type CostData,
17
26
  createProviderAdapter,
27
+ type ProviderEvent,
18
28
  type ProviderResult,
19
29
  type ProviderSession,
20
30
  type ProviderSessionConfig,
@@ -1503,6 +1513,22 @@ async function registerAgent(opts: {
1503
1513
 
1504
1514
  /** Poll for triggers via HTTP API */
1505
1515
  async function pollForTrigger(opts: PollOptions): Promise<Trigger | null> {
1516
+ return withSpan(
1517
+ "worker.poll",
1518
+ async (span) => {
1519
+ const trigger = await pollForTriggerOnce(opts);
1520
+ span.setAttribute("agentswarm.poll.result", trigger ? trigger.type : "empty");
1521
+ return trigger;
1522
+ },
1523
+ {
1524
+ "agent.id": opts.agentId,
1525
+ "agentswarm.worker.poll_timeout_ms": opts.pollTimeout,
1526
+ "agentswarm.worker.poll_interval_ms": opts.pollInterval,
1527
+ },
1528
+ );
1529
+ }
1530
+
1531
+ async function pollForTriggerOnce(opts: PollOptions): Promise<Trigger | null> {
1506
1532
  const startTime = Date.now();
1507
1533
  const headers: Record<string, string> = {
1508
1534
  "X-Agent-ID": opts.agentId,
@@ -1510,6 +1536,7 @@ async function pollForTrigger(opts: PollOptions): Promise<Trigger | null> {
1510
1536
  if (opts.apiKey) {
1511
1537
  headers.Authorization = `Bearer ${opts.apiKey}`;
1512
1538
  }
1539
+ injectTraceContext(headers);
1513
1540
 
1514
1541
  while (Date.now() - startTime < opts.pollTimeout) {
1515
1542
  try {
@@ -1739,6 +1766,157 @@ function extractToolKey(toolName: string, args: unknown): Record<string, string
1739
1766
  }
1740
1767
  }
1741
1768
 
1769
+ const OTEL_PREVIEW_LIMIT = 500;
1770
+
1771
+ function telemetryPreview(value: unknown): string | undefined {
1772
+ if (value === undefined) return undefined;
1773
+ try {
1774
+ const serialized = typeof value === "string" ? value : JSON.stringify(value);
1775
+ if (!serialized) return undefined;
1776
+ const scrubbed = scrubSecrets(serialized);
1777
+ return scrubbed.length > OTEL_PREVIEW_LIMIT
1778
+ ? `${scrubbed.slice(0, OTEL_PREVIEW_LIMIT)}...`
1779
+ : scrubbed;
1780
+ } catch {
1781
+ return "[unserializable]";
1782
+ }
1783
+ }
1784
+
1785
+ type ToolTelemetry = {
1786
+ kind: "mcp" | "harness" | "skill" | "agent" | "shell" | "file" | "unknown";
1787
+ name: string;
1788
+ normalizedName: string;
1789
+ mcpServer?: string;
1790
+ mcpTool?: string;
1791
+ };
1792
+
1793
+ function classifyTool(toolName: string, args: unknown): ToolTelemetry {
1794
+ const argRecord = args && typeof args === "object" ? (args as Record<string, unknown>) : {};
1795
+
1796
+ if (toolName.startsWith("mcp__")) {
1797
+ const [, server, ...toolParts] = toolName.split("__");
1798
+ const mcpTool = toolParts.join("__") || undefined;
1799
+ return {
1800
+ kind: "mcp",
1801
+ name: toolName,
1802
+ normalizedName: server && mcpTool ? `${server}.${mcpTool}` : toolName,
1803
+ mcpServer: server,
1804
+ mcpTool,
1805
+ };
1806
+ }
1807
+
1808
+ if (typeof argRecord.server === "string" && typeof argRecord.tool === "string") {
1809
+ return {
1810
+ kind: "mcp",
1811
+ name: toolName,
1812
+ normalizedName: `${argRecord.server}.${argRecord.tool}`,
1813
+ mcpServer: argRecord.server,
1814
+ mcpTool: argRecord.tool,
1815
+ };
1816
+ }
1817
+
1818
+ if (toolName.includes(":")) {
1819
+ const [server, ...toolParts] = toolName.split(":");
1820
+ const mcpTool = toolParts.join(":") || undefined;
1821
+ return {
1822
+ kind: "mcp",
1823
+ name: toolName,
1824
+ normalizedName: server && mcpTool ? `${server}.${mcpTool}` : toolName,
1825
+ mcpServer: server,
1826
+ mcpTool,
1827
+ };
1828
+ }
1829
+
1830
+ switch (toolName) {
1831
+ case "Bash":
1832
+ case "bash":
1833
+ case "command_execution":
1834
+ return { kind: "shell", name: toolName, normalizedName: toolName };
1835
+ case "Read":
1836
+ case "Edit":
1837
+ case "Write":
1838
+ case "Delete":
1839
+ case "Grep":
1840
+ case "Glob":
1841
+ case "file_change":
1842
+ return { kind: "file", name: toolName, normalizedName: toolName };
1843
+ case "Skill":
1844
+ return { kind: "skill", name: toolName, normalizedName: toolName };
1845
+ case "Agent":
1846
+ return { kind: "agent", name: toolName, normalizedName: toolName };
1847
+ default:
1848
+ return { kind: "harness", name: toolName, normalizedName: toolName };
1849
+ }
1850
+ }
1851
+
1852
+ function providerEventAttributes(event: ProviderEvent): Attributes {
1853
+ switch (event.type) {
1854
+ case "session_init":
1855
+ return {
1856
+ "agentswarm.provider.session_id": event.sessionId,
1857
+ "agentswarm.provider.name": event.provider,
1858
+ "agentswarm.provider.meta_preview": telemetryPreview(event.providerMeta),
1859
+ };
1860
+ case "message":
1861
+ return {
1862
+ "gen_ai.message.role": event.role,
1863
+ "gen_ai.message.content_preview": telemetryPreview(event.content),
1864
+ };
1865
+ case "tool_start":
1866
+ case "tool_end": {
1867
+ const tool = classifyTool(
1868
+ event.toolName,
1869
+ event.type === "tool_start" ? event.args : undefined,
1870
+ );
1871
+ return {
1872
+ "agentswarm.tool.name": tool.name,
1873
+ "agentswarm.tool.normalized_name": tool.normalizedName,
1874
+ "agentswarm.tool.kind": tool.kind,
1875
+ "agentswarm.tool.call_id": event.toolCallId,
1876
+ "mcp.server.name": tool.mcpServer,
1877
+ "mcp.tool.name": tool.mcpTool,
1878
+ };
1879
+ }
1880
+ case "result":
1881
+ return {
1882
+ "agentswarm.session.outcome": event.isError ? "error" : "ok",
1883
+ "agentswarm.error.category": event.errorCategory,
1884
+ "gen_ai.response.model": event.cost.model,
1885
+ "gen_ai.usage.input_tokens": event.cost.inputTokens ?? 0,
1886
+ "gen_ai.usage.output_tokens": event.cost.outputTokens ?? 0,
1887
+ "agentswarm.cost.total_usd": event.cost.totalCostUsd ?? 0,
1888
+ };
1889
+ case "error":
1890
+ return {
1891
+ "agentswarm.error.category": event.category,
1892
+ "exception.message": telemetryPreview(event.message),
1893
+ };
1894
+ case "progress":
1895
+ return { "agentswarm.progress.message": telemetryPreview(event.message) };
1896
+ case "context_usage":
1897
+ return {
1898
+ "agentswarm.context.used_tokens": event.contextUsedTokens ?? undefined,
1899
+ "agentswarm.context.total_tokens": event.contextTotalTokens ?? undefined,
1900
+ "agentswarm.context.percent": event.contextPercent ?? undefined,
1901
+ "gen_ai.usage.output_tokens": event.outputTokens ?? undefined,
1902
+ };
1903
+ case "compaction":
1904
+ return {
1905
+ "agentswarm.compaction.trigger": event.compactTrigger,
1906
+ "agentswarm.compaction.pre_tokens": event.preCompactTokens,
1907
+ "agentswarm.context.total_tokens": event.contextTotalTokens,
1908
+ };
1909
+ case "custom":
1910
+ return {
1911
+ "agentswarm.provider.event_name": event.name,
1912
+ "agentswarm.provider.event_data_preview": telemetryPreview(event.data),
1913
+ };
1914
+ case "raw_log":
1915
+ case "raw_stderr":
1916
+ return {};
1917
+ }
1918
+ }
1919
+
1742
1920
  async function spawnProviderProcess(
1743
1921
  adapter: ReturnType<typeof createProviderAdapter>,
1744
1922
  opts: {
@@ -1806,7 +1984,25 @@ async function spawnProviderProcess(
1806
1984
  env: freshEnv as Record<string, string>,
1807
1985
  };
1808
1986
 
1809
- const session = await adapter.createSession(config);
1987
+ const session = await withSpan(
1988
+ "worker.session.create",
1989
+ async (span) => {
1990
+ const createdSession = await adapter.createSession(config);
1991
+ span.setAttribute("agentswarm.provider.session_id", createdSession.sessionId || "pending");
1992
+ return createdSession;
1993
+ },
1994
+ {
1995
+ "agent.id": opts.agentId,
1996
+ "agentswarm.task.id": effectiveTaskId,
1997
+ "agentswarm.task.real_id": realTaskId,
1998
+ "agentswarm.agent.role": opts.role,
1999
+ "agentswarm.harness_provider": opts.harnessProvider,
2000
+ "gen_ai.request.model": model || undefined,
2001
+ "agentswarm.session.cwd": config.cwd,
2002
+ "agentswarm.session.vcs_repo": opts.vcsRepo,
2003
+ "agentswarm.session.additional_args_count": opts.additionalArgs?.length ?? 0,
2004
+ },
2005
+ );
1810
2006
  const initialModelReport = buildLatestModelReport({
1811
2007
  model,
1812
2008
  taskModel: opts.model,
@@ -1884,6 +2080,25 @@ async function spawnProviderProcess(
1884
2080
 
1885
2081
  const eventFlushTimer = setInterval(flushEvents, EVENT_FLUSH_INTERVAL_MS);
1886
2082
  const sessionStartTime = Date.now();
2083
+ let providerSessionId = session.sessionId;
2084
+ const activeToolSpans = new Map<
2085
+ string,
2086
+ {
2087
+ span: SwarmSpan;
2088
+ startedAt: number;
2089
+ }
2090
+ >();
2091
+ const sessionSpan = startSpan("worker.session", {
2092
+ "agent.id": opts.agentId,
2093
+ "agentswarm.task.id": effectiveTaskId,
2094
+ "agentswarm.task.real_id": realTaskId,
2095
+ "agentswarm.agent.role": opts.role,
2096
+ "agentswarm.harness_provider": opts.harnessProvider,
2097
+ "agentswarm.provider.session_id": providerSessionId,
2098
+ "agentswarm.session.cwd": config.cwd,
2099
+ "agentswarm.session.vcs_repo": opts.vcsRepo,
2100
+ "gen_ai.request.model": model || undefined,
2101
+ });
1887
2102
 
1888
2103
  // Auto-progress throttle: don't update more than once per 3 seconds
1889
2104
  let lastProgressTime = 0;
@@ -1892,130 +2107,236 @@ async function spawnProviderProcess(
1892
2107
  let lastContextPostTime = 0;
1893
2108
  const CONTEXT_THROTTLE_MS = 30_000;
1894
2109
 
1895
- session.onEvent((event) => {
1896
- switch (event.type) {
1897
- case "session_init":
1898
- if (realTaskId) {
1899
- saveProviderSessionId(
1900
- opts.apiUrl,
1901
- opts.apiKey,
1902
- realTaskId,
1903
- event.sessionId,
1904
- event.provider,
1905
- event.providerMeta,
1906
- model,
1907
- ).catch((err) => console.warn(`[runner] Failed to save session ID: ${err}`));
1908
- } else {
1909
- // Pool task: save provider session ID on active session so it can be
1910
- // propagated to the real task when the agent claims one
1911
- saveProviderSessionIdOnActiveSession(
1912
- opts.apiUrl,
1913
- opts.apiKey,
1914
- effectiveTaskId,
1915
- event.sessionId,
1916
- ).catch((err) =>
1917
- console.warn(`[runner] Failed to save provider session on active session: ${err}`),
1918
- );
1919
- }
1920
-
1921
- // Buffer session start event
1922
- bufferEvent({
1923
- category: "session",
1924
- event: "session.start",
1925
- source: "worker",
1926
- agentId: opts.agentId,
1927
- taskId: effectiveTaskId,
1928
- sessionId: event.sessionId,
1929
- });
1930
- break;
1931
- case "tool_start": {
1932
- // Auto-progress: report tool activity as task progress (throttled)
1933
- const now = Date.now();
1934
- if (effectiveTaskId && opts.apiUrl && now - lastProgressTime >= PROGRESS_THROTTLE_MS) {
1935
- const progress = toolCallToProgress(event.toolName, event.args);
1936
- if (progress) {
1937
- lastProgressTime = now;
1938
- updateProgressViaAPI(opts.apiUrl, opts.apiKey, effectiveTaskId, progress).catch(
1939
- () => {},
2110
+ // Phase 10: accumulate per-turn output tokens across the session so progress
2111
+ // snapshots carry a real `cumulativeOutputTokens` (was 0 until completion).
2112
+ let cumulativeProgressOutputTokens = 0;
2113
+
2114
+ session.onEvent((event) =>
2115
+ withSpanContext(sessionSpan, () => {
2116
+ sessionSpan.addEvent(`provider.${event.type}`, providerEventAttributes(event));
2117
+ switch (event.type) {
2118
+ case "session_init":
2119
+ providerSessionId = event.sessionId;
2120
+ sessionSpan.setAttributes({
2121
+ "agentswarm.provider.session_id": providerSessionId,
2122
+ "agentswarm.provider.name": event.provider,
2123
+ "agentswarm.provider.meta_preview": telemetryPreview(event.providerMeta),
2124
+ });
2125
+ if (realTaskId) {
2126
+ saveProviderSessionId(
2127
+ opts.apiUrl,
2128
+ opts.apiKey,
2129
+ realTaskId,
2130
+ event.sessionId,
2131
+ event.provider,
2132
+ event.providerMeta,
2133
+ model,
2134
+ ).catch((err) => console.warn(`[runner] Failed to save session ID: ${err}`));
2135
+ } else {
2136
+ // Pool task: save provider session ID on active session so it can be
2137
+ // propagated to the real task when the agent claims one
2138
+ saveProviderSessionIdOnActiveSession(
2139
+ opts.apiUrl,
2140
+ opts.apiKey,
2141
+ effectiveTaskId,
2142
+ event.sessionId,
2143
+ ).catch((err) =>
2144
+ console.warn(`[runner] Failed to save provider session on active session: ${err}`),
1940
2145
  );
1941
2146
  }
1942
- }
1943
2147
 
1944
- // Buffer tool event
1945
- bufferEvent({
1946
- category: "tool",
1947
- event: "tool.start",
1948
- source: "worker",
1949
- agentId: opts.agentId,
1950
- taskId: effectiveTaskId,
1951
- sessionId: opts.runnerSessionId,
1952
- data: {
1953
- toolName: event.toolName,
1954
- toolCallId: event.toolCallId,
1955
- ...extractToolKey(event.toolName, event.args),
1956
- clientTimestamp: new Date().toISOString(),
1957
- },
1958
- });
2148
+ // Buffer session start event
2149
+ bufferEvent({
2150
+ category: "session",
2151
+ event: "session.start",
2152
+ source: "worker",
2153
+ agentId: opts.agentId,
2154
+ taskId: effectiveTaskId,
2155
+ sessionId: event.sessionId,
2156
+ });
2157
+ break;
2158
+ case "tool_start": {
2159
+ const tool = classifyTool(event.toolName, event.args);
2160
+ const toolSpan = startSpan(tool.kind === "mcp" ? "worker.mcp.tool" : "worker.tool", {
2161
+ "agent.id": opts.agentId,
2162
+ "agentswarm.task.id": effectiveTaskId,
2163
+ "agentswarm.task.real_id": realTaskId,
2164
+ "agentswarm.agent.role": opts.role,
2165
+ "agentswarm.harness_provider": opts.harnessProvider,
2166
+ "agentswarm.provider.session_id": providerSessionId,
2167
+ "agentswarm.tool.name": tool.name,
2168
+ "agentswarm.tool.normalized_name": tool.normalizedName,
2169
+ "agentswarm.tool.kind": tool.kind,
2170
+ "agentswarm.tool.call_id": event.toolCallId,
2171
+ "mcp.server.name": tool.mcpServer,
2172
+ "mcp.tool.name": tool.mcpTool,
2173
+ "agentswarm.tool.args_preview": telemetryPreview(event.args),
2174
+ });
2175
+ activeToolSpans.set(event.toolCallId, {
2176
+ span: toolSpan,
2177
+ startedAt: Date.now(),
2178
+ });
1959
2179
 
1960
- // Also emit skill event when tool is Skill
1961
- if (event.toolName === "Skill") {
1962
- const args = event.args as Record<string, unknown>;
2180
+ // Auto-progress: report tool activity as task progress (throttled)
2181
+ const now = Date.now();
2182
+ if (effectiveTaskId && opts.apiUrl && now - lastProgressTime >= PROGRESS_THROTTLE_MS) {
2183
+ const progress = toolCallToProgress(event.toolName, event.args);
2184
+ if (progress) {
2185
+ lastProgressTime = now;
2186
+ updateProgressViaAPI(opts.apiUrl, opts.apiKey, effectiveTaskId, progress).catch(
2187
+ () => {},
2188
+ );
2189
+ }
2190
+ }
2191
+
2192
+ // Buffer tool event
1963
2193
  bufferEvent({
1964
- category: "skill",
1965
- event: "skill.invoke",
2194
+ category: "tool",
2195
+ event: "tool.start",
1966
2196
  source: "worker",
1967
2197
  agentId: opts.agentId,
1968
2198
  taskId: effectiveTaskId,
1969
2199
  sessionId: opts.runnerSessionId,
1970
2200
  data: {
1971
- skillName: args.skill as string,
2201
+ toolName: event.toolName,
2202
+ toolCallId: event.toolCallId,
2203
+ ...extractToolKey(event.toolName, event.args),
1972
2204
  clientTimestamp: new Date().toISOString(),
1973
2205
  },
1974
2206
  });
2207
+
2208
+ // Also emit skill event when tool is Skill
2209
+ if (event.toolName === "Skill") {
2210
+ const args = event.args as Record<string, unknown>;
2211
+ bufferEvent({
2212
+ category: "skill",
2213
+ event: "skill.invoke",
2214
+ source: "worker",
2215
+ agentId: opts.agentId,
2216
+ taskId: effectiveTaskId,
2217
+ sessionId: opts.runnerSessionId,
2218
+ data: {
2219
+ skillName: args.skill as string,
2220
+ clientTimestamp: new Date().toISOString(),
2221
+ },
2222
+ });
2223
+ }
2224
+ break;
1975
2225
  }
1976
- break;
1977
- }
1978
- case "result":
1979
- {
1980
- const latestModel = buildLatestModelReport({
1981
- model: event.cost.model,
1982
- taskModel: opts.model,
1983
- configModel,
1984
- taskId: realTaskId,
1985
- harnessProvider: opts.harnessProvider,
2226
+ case "tool_end": {
2227
+ const active = activeToolSpans.get(event.toolCallId);
2228
+ const now = Date.now();
2229
+ if (active) {
2230
+ active.span.setAttributes({
2231
+ "agentswarm.tool.duration_ms": now - active.startedAt,
2232
+ "agentswarm.tool.result_preview": telemetryPreview(event.result),
2233
+ });
2234
+ active.span.setStatus({ code: 1 });
2235
+ active.span.end();
2236
+ activeToolSpans.delete(event.toolCallId);
2237
+ } else {
2238
+ const tool = classifyTool(event.toolName, undefined);
2239
+ const span = startSpan(tool.kind === "mcp" ? "worker.mcp.tool" : "worker.tool", {
2240
+ "agent.id": opts.agentId,
2241
+ "agentswarm.task.id": effectiveTaskId,
2242
+ "agentswarm.task.real_id": realTaskId,
2243
+ "agentswarm.agent.role": opts.role,
2244
+ "agentswarm.harness_provider": opts.harnessProvider,
2245
+ "agentswarm.provider.session_id": providerSessionId,
2246
+ "agentswarm.tool.name": tool.name,
2247
+ "agentswarm.tool.normalized_name": tool.normalizedName,
2248
+ "agentswarm.tool.kind": tool.kind,
2249
+ "agentswarm.tool.call_id": event.toolCallId,
2250
+ "mcp.server.name": tool.mcpServer,
2251
+ "mcp.tool.name": tool.mcpTool,
2252
+ "agentswarm.tool.result_preview": telemetryPreview(event.result),
2253
+ "agentswarm.tool.missing_start": true,
2254
+ });
2255
+ span.end();
2256
+ }
2257
+ break;
2258
+ }
2259
+ case "result":
2260
+ {
2261
+ const latestModel = buildLatestModelReport({
2262
+ model: event.cost.model,
2263
+ taskModel: opts.model,
2264
+ configModel,
2265
+ taskId: realTaskId,
2266
+ harnessProvider: opts.harnessProvider,
2267
+ });
2268
+ if (latestModel) {
2269
+ reportLatestModel(opts.apiUrl, opts.apiKey, opts.agentId, latestModel).catch((err) =>
2270
+ console.warn(`[runner] Failed to report latest model: ${err}`),
2271
+ );
2272
+ }
2273
+ }
2274
+ // Cost save is handled in waitForCompletion().then() to ensure
2275
+ // it completes before the process exits (fire-and-forget here
2276
+ // races with container shutdown).
2277
+
2278
+ // Buffer session end event
2279
+ bufferEvent({
2280
+ category: "session",
2281
+ event: "session.end",
2282
+ source: "worker",
2283
+ agentId: opts.agentId,
2284
+ taskId: effectiveTaskId,
2285
+ sessionId: opts.runnerSessionId,
2286
+ status: event.isError ? "error" : "ok",
2287
+ durationMs: Date.now() - sessionStartTime,
2288
+ data: {
2289
+ model: event.cost.model,
2290
+ totalCostUsd: event.cost.totalCostUsd,
2291
+ inputTokens: event.cost.inputTokens,
2292
+ outputTokens: event.cost.outputTokens,
2293
+ },
1986
2294
  });
1987
- if (latestModel) {
1988
- reportLatestModel(opts.apiUrl, opts.apiKey, opts.agentId, latestModel).catch((err) =>
1989
- console.warn(`[runner] Failed to report latest model: ${err}`),
1990
- );
2295
+ break;
2296
+ case "error":
2297
+ sessionSpan.setStatus({
2298
+ code: 2,
2299
+ message: event.message,
2300
+ });
2301
+ break;
2302
+ case "context_usage": {
2303
+ const now2 = Date.now();
2304
+ if (now2 - lastContextPostTime >= CONTEXT_THROTTLE_MS) {
2305
+ lastContextPostTime = now2;
2306
+ // Phase 10: track cumulative output tokens on the worker side and
2307
+ // forward them on every progress snapshot. Previously these were
2308
+ // 0 until the `completion` snapshot at session end, so the
2309
+ // dashboard's "tokens consumed" line was a flat zero throughout.
2310
+ cumulativeProgressOutputTokens += event.outputTokens ?? 0;
2311
+ // For inputs we don't get a per-turn delta on the `context_usage`
2312
+ // event (the unified formula bakes it into contextUsedTokens), so
2313
+ // we report the latest contextUsedTokens as the running input proxy.
2314
+ // The DB column is `cumulativeInputTokens` but the semantic on
2315
+ // progress rows is "running used-tokens" — both inputs and outputs
2316
+ // contribute, exact decomposition lives on the cost row.
2317
+ fetch(`${opts.apiUrl}/api/tasks/${realTaskId}/context`, {
2318
+ method: "POST",
2319
+ headers: {
2320
+ "Content-Type": "application/json",
2321
+ "X-Agent-ID": opts.agentId,
2322
+ Authorization: `Bearer ${opts.apiKey}`,
2323
+ },
2324
+ body: JSON.stringify({
2325
+ eventType: "progress",
2326
+ sessionId: opts.runnerSessionId,
2327
+ contextUsedTokens: event.contextUsedTokens,
2328
+ contextTotalTokens: event.contextTotalTokens,
2329
+ contextPercent: event.contextPercent,
2330
+ cumulativeInputTokens: event.contextUsedTokens,
2331
+ cumulativeOutputTokens: cumulativeProgressOutputTokens,
2332
+ contextFormula: event.contextFormula,
2333
+ }),
2334
+ }).catch(() => {});
1991
2335
  }
2336
+ break;
1992
2337
  }
1993
- // Cost save is handled in waitForCompletion().then() to ensure
1994
- // it completes before the process exits (fire-and-forget here
1995
- // races with container shutdown).
1996
-
1997
- // Buffer session end event
1998
- bufferEvent({
1999
- category: "session",
2000
- event: "session.end",
2001
- source: "worker",
2002
- agentId: opts.agentId,
2003
- taskId: effectiveTaskId,
2004
- sessionId: opts.runnerSessionId,
2005
- status: event.isError ? "error" : "ok",
2006
- durationMs: Date.now() - sessionStartTime,
2007
- data: {
2008
- model: event.cost.model,
2009
- totalCostUsd: event.cost.totalCostUsd,
2010
- inputTokens: event.cost.inputTokens,
2011
- outputTokens: event.cost.outputTokens,
2012
- },
2013
- });
2014
- break;
2015
- case "context_usage": {
2016
- const now2 = Date.now();
2017
- if (now2 - lastContextPostTime >= CONTEXT_THROTTLE_MS) {
2018
- lastContextPostTime = now2;
2338
+ case "compaction": {
2339
+ // Always record compaction events (no throttle)
2019
2340
  fetch(`${opts.apiUrl}/api/tasks/${realTaskId}/context`, {
2020
2341
  method: "POST",
2021
2342
  headers: {
@@ -2024,153 +2345,186 @@ async function spawnProviderProcess(
2024
2345
  Authorization: `Bearer ${opts.apiKey}`,
2025
2346
  },
2026
2347
  body: JSON.stringify({
2027
- eventType: "progress",
2348
+ eventType: "compaction",
2028
2349
  sessionId: opts.runnerSessionId,
2029
- contextUsedTokens: event.contextUsedTokens,
2350
+ preCompactTokens: event.preCompactTokens,
2351
+ compactTrigger: event.compactTrigger,
2030
2352
  contextTotalTokens: event.contextTotalTokens,
2031
- contextPercent: event.contextPercent,
2032
2353
  }),
2033
2354
  }).catch(() => {});
2355
+ break;
2034
2356
  }
2035
- break;
2036
- }
2037
- case "compaction": {
2038
- // Always record compaction events (no throttle)
2039
- fetch(`${opts.apiUrl}/api/tasks/${realTaskId}/context`, {
2040
- method: "POST",
2041
- headers: {
2042
- "Content-Type": "application/json",
2043
- "X-Agent-ID": opts.agentId,
2044
- Authorization: `Bearer ${opts.apiKey}`,
2045
- },
2046
- body: JSON.stringify({
2047
- eventType: "compaction",
2048
- sessionId: opts.runnerSessionId,
2049
- preCompactTokens: event.preCompactTokens,
2050
- compactTrigger: event.compactTrigger,
2051
- contextTotalTokens: event.contextTotalTokens,
2052
- }),
2053
- }).catch(() => {});
2054
- break;
2055
- }
2056
- case "raw_log":
2057
- prettyPrintLine(event.content, opts.role);
2058
- if (shouldStream) {
2059
- logBuffer.lines.push(event.content);
2060
- const shouldFlush =
2061
- logBuffer.lines.length >= LOG_BUFFER_SIZE ||
2062
- Date.now() - logBuffer.lastFlush >= LOG_FLUSH_INTERVAL_MS;
2063
- if (shouldFlush) {
2064
- flushLogBuffer(logBuffer, {
2065
- apiUrl: opts.apiUrl,
2066
- apiKey: opts.apiKey,
2067
- agentId: opts.agentId,
2068
- sessionId: opts.runnerSessionId,
2069
- iteration: opts.iteration,
2070
- taskId: effectiveTaskId,
2071
- cli: adapter.name,
2072
- }).catch(() => {});
2357
+ case "raw_log":
2358
+ prettyPrintLine(event.content, opts.role);
2359
+ if (shouldStream) {
2360
+ logBuffer.lines.push(event.content);
2361
+ const shouldFlush =
2362
+ logBuffer.lines.length >= LOG_BUFFER_SIZE ||
2363
+ Date.now() - logBuffer.lastFlush >= LOG_FLUSH_INTERVAL_MS;
2364
+ if (shouldFlush) {
2365
+ flushLogBuffer(logBuffer, {
2366
+ apiUrl: opts.apiUrl,
2367
+ apiKey: opts.apiKey,
2368
+ agentId: opts.agentId,
2369
+ sessionId: opts.runnerSessionId,
2370
+ iteration: opts.iteration,
2371
+ taskId: effectiveTaskId,
2372
+ cli: adapter.name,
2373
+ }).catch(() => {});
2374
+ }
2073
2375
  }
2074
- }
2075
- break;
2076
- case "raw_stderr":
2077
- prettyPrintStderr(event.content, opts.role);
2078
- break;
2079
-
2080
- case "progress": {
2081
- if (effectiveTaskId && opts.apiUrl) {
2082
- const now = Date.now();
2083
- if (now - lastProgressTime >= PROGRESS_THROTTLE_MS) {
2084
- lastProgressTime = now;
2085
- updateProgressViaAPI(opts.apiUrl, opts.apiKey, effectiveTaskId, event.message).catch(
2086
- () => {},
2087
- );
2376
+ break;
2377
+ case "raw_stderr":
2378
+ prettyPrintStderr(event.content, opts.role);
2379
+ break;
2380
+
2381
+ case "progress": {
2382
+ if (effectiveTaskId && opts.apiUrl) {
2383
+ const now = Date.now();
2384
+ if (now - lastProgressTime >= PROGRESS_THROTTLE_MS) {
2385
+ lastProgressTime = now;
2386
+ updateProgressViaAPI(opts.apiUrl, opts.apiKey, effectiveTaskId, event.message).catch(
2387
+ () => {},
2388
+ );
2389
+ }
2088
2390
  }
2391
+ break;
2089
2392
  }
2090
- break;
2091
2393
  }
2092
- }
2093
- });
2394
+ }),
2395
+ );
2094
2396
 
2095
- // Create promise that handles completion
2096
- const promise: Promise<ProviderResult> = session.waitForCompletion().then(async (result) => {
2097
- // Stop event flush timer and do a final flush
2098
- clearInterval(eventFlushTimer);
2099
- await flushEvents();
2100
-
2101
- // Final log flush
2102
- if (shouldStream && logBuffer.lines.length > 0) {
2103
- await flushLogBuffer(logBuffer, {
2104
- apiUrl: opts.apiUrl,
2105
- apiKey: opts.apiKey,
2106
- agentId: opts.agentId,
2107
- sessionId: opts.runnerSessionId,
2108
- iteration: opts.iteration,
2109
- taskId: effectiveTaskId,
2110
- cli: adapter.name,
2397
+ function closeActiveToolSpans(status: "ok" | "error", message?: string) {
2398
+ for (const [toolCallId, active] of activeToolSpans) {
2399
+ active.span.setAttributes({
2400
+ "agentswarm.tool.duration_ms": Date.now() - active.startedAt,
2401
+ "agentswarm.tool.unclosed": true,
2402
+ "agentswarm.tool.call_id": toolCallId,
2111
2403
  });
2404
+ if (status === "error") {
2405
+ active.span.setStatus({ code: 2, message: message || "session ended before tool_end" });
2406
+ }
2407
+ active.span.end();
2408
+ activeToolSpans.delete(toolCallId);
2112
2409
  }
2410
+ }
2113
2411
 
2114
- // Error logging for non-zero exit
2115
- if (result.exitCode !== 0) {
2116
- const errorLog = {
2117
- timestamp: new Date().toISOString(),
2118
- iteration: opts.iteration,
2119
- exitCode: result.exitCode,
2120
- taskId: effectiveTaskId,
2121
- error: true,
2122
- };
2412
+ // Create promise that handles completion
2413
+ const promise: Promise<ProviderResult> = session
2414
+ .waitForCompletion()
2415
+ .then((result) =>
2416
+ withSpanContext(sessionSpan, async () => {
2417
+ // Stop event flush timer and do a final flush
2418
+ clearInterval(eventFlushTimer);
2419
+ await flushEvents();
2420
+
2421
+ // Final log flush
2422
+ if (shouldStream && logBuffer.lines.length > 0) {
2423
+ await flushLogBuffer(logBuffer, {
2424
+ apiUrl: opts.apiUrl,
2425
+ apiKey: opts.apiKey,
2426
+ agentId: opts.agentId,
2427
+ sessionId: opts.runnerSessionId,
2428
+ iteration: opts.iteration,
2429
+ taskId: effectiveTaskId,
2430
+ cli: adapter.name,
2431
+ });
2432
+ }
2433
+
2434
+ // Error logging for non-zero exit
2435
+ if (result.exitCode !== 0) {
2436
+ const errorLog = {
2437
+ timestamp: new Date().toISOString(),
2438
+ iteration: opts.iteration,
2439
+ exitCode: result.exitCode,
2440
+ taskId: effectiveTaskId,
2441
+ error: true,
2442
+ };
2123
2443
 
2124
- const errorsFile = `${logDir}/errors.jsonl`;
2125
- const errorsFileRef = Bun.file(errorsFile);
2126
- const existingErrors = (await errorsFileRef.exists()) ? await errorsFileRef.text() : "";
2127
- await Bun.write(errorsFile, `${existingErrors}${JSON.stringify(errorLog)}\n`);
2444
+ const errorsFile = `${logDir}/errors.jsonl`;
2445
+ const errorsFileRef = Bun.file(errorsFile);
2446
+ const existingErrors = (await errorsFileRef.exists()) ? await errorsFileRef.text() : "";
2447
+ await Bun.write(errorsFile, `${existingErrors}${JSON.stringify(errorLog)}\n`);
2128
2448
 
2129
- if (!isYolo) {
2130
- console.error(
2131
- `[${opts.role}] Task ${effectiveTaskId.slice(0, 8)} exited with code ${result.exitCode}.`,
2132
- );
2133
- } else {
2134
- console.warn(
2135
- `[${opts.role}] Task ${effectiveTaskId.slice(0, 8)} exited with code ${result.exitCode}. YOLO mode - continuing...`,
2136
- );
2137
- }
2138
- }
2449
+ if (!isYolo) {
2450
+ console.error(
2451
+ `[${opts.role}] Task ${effectiveTaskId.slice(0, 8)} exited with code ${result.exitCode}.`,
2452
+ );
2453
+ } else {
2454
+ console.warn(
2455
+ `[${opts.role}] Task ${effectiveTaskId.slice(0, 8)} exited with code ${result.exitCode}. YOLO mode - continuing...`,
2456
+ );
2457
+ }
2458
+ }
2139
2459
 
2140
- // Save cost data (awaited to ensure it completes before container exits)
2141
- if (result.cost) {
2142
- try {
2143
- await saveCostData(
2144
- { ...result.cost, taskId: realTaskId, sessionId: opts.runnerSessionId },
2145
- opts.apiUrl,
2146
- opts.apiKey,
2147
- );
2148
- } catch (err) {
2149
- console.warn(`[runner] Failed to save cost: ${err}`);
2150
- }
2151
- }
2460
+ // Save cost data (awaited to ensure it completes before container exits)
2461
+ if (result.cost) {
2462
+ sessionSpan.setAttributes({
2463
+ "gen_ai.response.model": result.cost.model,
2464
+ "gen_ai.usage.input_tokens": result.cost.inputTokens ?? 0,
2465
+ "gen_ai.usage.output_tokens": result.cost.outputTokens ?? 0,
2466
+ "agentswarm.cost.total_usd": result.cost.totalCostUsd ?? 0,
2467
+ });
2468
+ try {
2469
+ await saveCostData(
2470
+ { ...result.cost, taskId: realTaskId, sessionId: opts.runnerSessionId },
2471
+ opts.apiUrl,
2472
+ opts.apiKey,
2473
+ );
2474
+ } catch (err) {
2475
+ console.warn(`[runner] Failed to save cost: ${err}`);
2476
+ }
2477
+ }
2152
2478
 
2153
- // Post completion context usage snapshot
2154
- if (result.cost && realTaskId) {
2155
- fetch(`${opts.apiUrl}/api/tasks/${realTaskId}/context`, {
2156
- method: "POST",
2157
- headers: {
2158
- "Content-Type": "application/json",
2159
- "X-Agent-ID": opts.agentId,
2160
- Authorization: `Bearer ${opts.apiKey}`,
2161
- },
2162
- body: JSON.stringify({
2163
- eventType: "completion",
2164
- sessionId: opts.runnerSessionId,
2165
- cumulativeInputTokens: result.cost.inputTokens ?? 0,
2166
- cumulativeOutputTokens: result.cost.outputTokens ?? 0,
2167
- contextTotalTokens: getContextWindowSize(result.cost.model || "default"),
2168
- }),
2169
- }).catch(() => {});
2170
- }
2479
+ // Post completion context usage snapshot
2480
+ if (result.cost && realTaskId) {
2481
+ fetch(`${opts.apiUrl}/api/tasks/${realTaskId}/context`, {
2482
+ method: "POST",
2483
+ headers: {
2484
+ "Content-Type": "application/json",
2485
+ "X-Agent-ID": opts.agentId,
2486
+ Authorization: `Bearer ${opts.apiKey}`,
2487
+ },
2488
+ body: JSON.stringify({
2489
+ eventType: "completion",
2490
+ sessionId: opts.runnerSessionId,
2491
+ cumulativeInputTokens: result.cost.inputTokens ?? 0,
2492
+ cumulativeOutputTokens: result.cost.outputTokens ?? 0,
2493
+ contextTotalTokens: getContextWindowSize(result.cost.model || "default"),
2494
+ }),
2495
+ }).catch(() => {});
2496
+ }
2171
2497
 
2172
- return result;
2173
- });
2498
+ sessionSpan.setAttributes({
2499
+ "agentswarm.session.duration_ms": Date.now() - sessionStartTime,
2500
+ "agentswarm.session.exit_code": result.exitCode,
2501
+ "agentswarm.session.outcome": result.exitCode === 0 ? "ok" : "error",
2502
+ });
2503
+ if (result.exitCode !== 0) {
2504
+ sessionSpan.setStatus({
2505
+ code: 2,
2506
+ message: result.failureReason || `Provider exited with ${result.exitCode}`,
2507
+ });
2508
+ }
2509
+ closeActiveToolSpans(result.exitCode === 0 ? "ok" : "error", result.failureReason);
2510
+ sessionSpan.end();
2511
+
2512
+ return result;
2513
+ }),
2514
+ )
2515
+ .catch((error) =>
2516
+ withSpanContext(sessionSpan, () => {
2517
+ clearInterval(eventFlushTimer);
2518
+ sessionSpan.recordException(error);
2519
+ sessionSpan.setStatus({
2520
+ code: 2,
2521
+ message: error instanceof Error ? error.message : String(error),
2522
+ });
2523
+ closeActiveToolSpans("error", error instanceof Error ? error.message : String(error));
2524
+ sessionSpan.end();
2525
+ throw error;
2526
+ }),
2527
+ );
2174
2528
 
2175
2529
  // Build credential info for rate limit tracking
2176
2530
  const primarySelection = credentialSelections[0] ?? oauthSelection;
@@ -2402,6 +2756,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
2402
2756
 
2403
2757
  // Initialize Business-Use SDK for worker-side instrumentation
2404
2758
  initialize();
2759
+ await initOtel(role);
2405
2760
 
2406
2761
  const sessionId = process.env.SESSION_ID || crypto.randomUUID().slice(0, 8);
2407
2762
  const baseLogDir = opts.logsDir || process.env.LOG_DIR || "/logs";