bosun 0.42.2 → 0.42.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.env.example +9 -0
  2. package/agent/agent-event-bus.mjs +10 -0
  3. package/agent/agent-supervisor.mjs +20 -0
  4. package/bosun-tui.mjs +107 -105
  5. package/cli.mjs +10 -0
  6. package/config/config.mjs +25 -0
  7. package/config/executor-config.mjs +124 -1
  8. package/infra/container-runner.mjs +565 -1
  9. package/infra/monitor.mjs +18 -0
  10. package/infra/tracing.mjs +544 -240
  11. package/infra/tui-bridge.mjs +13 -1
  12. package/kanban/kanban-adapter.mjs +128 -4
  13. package/lib/repo-map.mjs +114 -3
  14. package/package.json +11 -4
  15. package/server/ui-server.mjs +3 -0
  16. package/task/task-archiver.mjs +18 -6
  17. package/task/task-attachments.mjs +14 -10
  18. package/task/task-cli.mjs +24 -4
  19. package/task/task-executor.mjs +19 -0
  20. package/task/task-store.mjs +194 -37
  21. package/telegram/telegram-bot.mjs +4 -1
  22. package/tui/app.mjs +131 -171
  23. package/tui/components/status-header.mjs +178 -75
  24. package/tui/lib/header-config.mjs +68 -0
  25. package/tui/lib/ws-bridge.mjs +61 -9
  26. package/tui/screens/agents.mjs +127 -0
  27. package/tui/screens/tasks.mjs +1 -48
  28. package/ui/app.js +8 -5
  29. package/ui/components/kanban-board.js +65 -3
  30. package/ui/components/session-list.js +18 -32
  31. package/ui/demo-defaults.js +52 -2
  32. package/ui/modules/session-api.js +100 -0
  33. package/ui/modules/state.js +71 -15
  34. package/ui/tabs/workflows.js +25 -1
  35. package/ui/tui/App.js +298 -0
  36. package/ui/tui/TasksScreen.js +564 -0
  37. package/ui/tui/constants.js +55 -0
  38. package/ui/tui/tasks-screen-helpers.js +301 -0
  39. package/ui/tui/useTasks.js +61 -0
  40. package/ui/tui/useWebSocket.js +166 -0
  41. package/ui/tui/useWorkflows.js +30 -0
  42. package/workflow/workflow-engine.mjs +412 -7
  43. package/workflow/workflow-nodes.mjs +616 -75
  44. package/workflow-templates/agents.mjs +3 -0
  45. package/workflow-templates/planning.mjs +7 -0
  46. package/workflow-templates/sub-workflows.mjs +5 -0
  47. package/workflow-templates/task-execution.mjs +3 -0
  48. package/workspace/command-diagnostics.mjs +1 -1
  49. package/workspace/context-cache.mjs +182 -9
package/infra/tracing.mjs CHANGED
@@ -1,293 +1,597 @@
1
1
  /**
2
- * OpenTelemetry Tracing for Bosun Agent Observability
2
+ * OpenTelemetry tracing helpers for Bosun orchestration.
3
3
  *
4
- * Provides industry-standard tracing for:
5
- * - Agent sessions
6
- * - Workflow executions
7
- * - Task lifecycle
8
- * - Tool calls
9
- *
10
- * @module tracing
4
+ * Tracing stays external to agent prompts/context. When disabled, helpers
5
+ * degrade to no-op wrappers so there is no agent token impact.
11
6
  */
12
7
 
13
- import { existsSync, mkdirSync, appendFileSync } from "node:fs";
14
- import { resolve, dirname } from "node:path";
15
- import { fileURLToPath } from "node:url";
16
-
17
- const __dirname = dirname(fileURLToPath(import.meta.url));
18
- const TRACE_DIR = resolve(__dirname, "..", ".cache", "traces");
19
-
20
- const DEFAULT_CONFIG = {
21
- enabled: process.env.OTEL_ENABLED === "true",
22
- serviceName: process.env.OTEL_SERVICE_NAME || "bosun",
23
- exporter: process.env.OTEL_EXPORTER || "console",
24
- endpoint: process.env.OTEL_EXPORTER_ENDPOINT || "http://localhost:4318/v1/traces",
25
- traceRatio: parseFloat(process.env.OTEL_TRACE_RATIO) || 1.0,
26
- consoleExport: process.env.OTEL_CONSOLE_EXPORT === "true",
27
- fileExport: process.env.OTEL_FILE_EXPORT === "true",
28
- filePath: process.env.OTEL_FILE_PATH || resolve(TRACE_DIR, "traces.jsonl"),
29
- };
30
-
31
- let _config = { ...DEFAULT_CONFIG };
32
- let _tracer = null;
33
- let _spans = new Map();
34
- let _spanIdCounter = 0;
35
-
36
- function generateSpanId() {
37
- return `span-${++_spanIdCounter}-${Date.now()}`;
8
+ import { AsyncLocalStorage } from "node:async_hooks";
9
+
10
+ const DEFAULT_SERVICE_NAME = "bosun";
11
+ const DEFAULT_SERVICE_VERSION = process.env.npm_package_version || "0.42.0";
12
+ const TRACE_ID_BYTES = 16;
13
+ const SPAN_ID_BYTES = 8;
14
+ const DEFAULT_EXPORT_TIMEOUT_MS = 1000;
15
+
16
+ const contextStore = new AsyncLocalStorage();
17
+
18
+ const NOOP_METER = Object.freeze({
19
+ createHistogram() {
20
+ return { record() {} };
21
+ },
22
+ createCounter() {
23
+ return { add() {} };
24
+ },
25
+ createUpDownCounter() {
26
+ return { add() {} };
27
+ },
28
+ });
29
+
30
+ function randomId(bytes = 8) {
31
+ let value = "";
32
+ while (value.length < bytes * 2) {
33
+ value += Math.random().toString(16).slice(2);
34
+ }
35
+ return value.slice(0, bytes * 2);
38
36
  }
39
37
 
40
- function generateTraceId() {
41
- return `trace-${Date.now()}-${Math.random().toString(36).slice(2, 15)}`;
38
+ function nowHrTime() {
39
+ return process.hrtime.bigint();
42
40
  }
43
41
 
44
- function initTracing(config = {}) {
45
- _config = { ..._config, ...config };
42
+ function durationMs(startTime) {
43
+ return Number(process.hrtime.bigint() - startTime) / 1_000_000;
44
+ }
46
45
 
47
- if (!_config.enabled) {
48
- console.log("[tracing] OpenTelemetry tracing disabled");
49
- return null;
50
- }
46
+ function omitUndefined(object) {
47
+ return Object.fromEntries(
48
+ Object.entries(object).filter(([, value]) => value !== undefined && value !== null && value !== ""),
49
+ );
50
+ }
51
51
 
52
- try {
53
- mkdirSync(TRACE_DIR, { recursive: true });
54
- } catch { /* best effort */ }
52
+ function inferProtocol(endpoint) {
53
+ if (typeof endpoint !== "string" || endpoint.length === 0) {
54
+ return "otlp-http";
55
+ }
56
+ return endpoint.startsWith("http") ? "otlp-http" : "otlp-grpc";
57
+ }
55
58
 
56
- console.log(`[tracing] Initialized with exporter: ${_config.exporter}`);
57
- _tracer = {
58
- serviceName: _config.serviceName,
59
- startTime: Date.now(),
60
- spans: _spans,
61
- };
59
+ function createMetricStore() {
60
+ const counters = new Map();
61
+ const gauges = new Map();
62
+ const histograms = new Map();
63
+
64
+ return {
65
+ counters,
66
+ gauges,
67
+ histograms,
68
+ meter: {
69
+ createCounter(name) {
70
+ return {
71
+ add(value, attributes = {}) {
72
+ const bucket = counters.get(name) || [];
73
+ bucket.push({ value, attributes });
74
+ counters.set(name, bucket);
75
+ },
76
+ };
77
+ },
78
+ createUpDownCounter(name) {
79
+ return {
80
+ add(value, attributes = {}) {
81
+ const bucket = gauges.get(name) || [];
82
+ bucket.push({ value, attributes });
83
+ gauges.set(name, bucket);
84
+ },
85
+ };
86
+ },
87
+ createHistogram(name) {
88
+ return {
89
+ record(value, attributes = {}) {
90
+ const bucket = histograms.get(name) || [];
91
+ bucket.push({ value, attributes });
92
+ histograms.set(name, bucket);
93
+ },
94
+ };
95
+ },
96
+ },
97
+ };
98
+ }
62
99
 
63
- return _tracer;
100
+ function createNoopState() {
101
+ return {
102
+ enabled: false,
103
+ endpoint: null,
104
+ sampleRate: 0,
105
+ serviceName: DEFAULT_SERVICE_NAME,
106
+ serviceVersion: DEFAULT_SERVICE_VERSION,
107
+ exporter: null,
108
+ meter: NOOP_METER,
109
+ metrics: createMetricStore(),
110
+ provider: null,
111
+ tracer: null,
112
+ sdk: null,
113
+ api: null,
114
+ statusCodes: null,
115
+ spanKind: null,
116
+ activeSpans: new Map(),
117
+ finishedSpans: [],
118
+ };
64
119
  }
65
120
 
66
- function shouldTrace() {
67
- if (!_config.enabled) return false;
68
- return Math.random() < _config.traceRatio;
121
+ let tracingState = createNoopState();
122
+ let metricInstruments = null;
123
+
124
+ function clampSampleRate(value) {
125
+ const numeric = Number(value);
126
+ if (!Number.isFinite(numeric)) return 1;
127
+ return Math.min(1, Math.max(0, numeric));
69
128
  }
70
129
 
71
- function startSpan(name, attributes = {}, parentSpanId = null) {
72
- if (!shouldTrace()) return null;
73
-
74
- const span = {
75
- name,
76
- traceId: generateTraceId(),
77
- spanId: generateSpanId(),
78
- parentSpanId,
79
- attributes: { ...attributes },
80
- startTime: Date.now(),
81
- endTime: null,
82
- status: "ok",
83
- events: [],
84
- };
85
-
86
- _spans.set(span.spanId, span);
87
- return span;
130
+ function getCurrentContext() {
131
+ return contextStore.getStore() || null;
88
132
  }
89
133
 
90
- function endSpan(spanId, status = "ok", error = null) {
91
- const span = _spans.get(spanId);
92
- if (!span) return;
134
+ function ensureMetricInstruments() {
135
+ const meter = tracingState.meter || NOOP_METER;
136
+ metricInstruments = {
137
+ taskDuration: meter.createHistogram("bosun.task.duration"),
138
+ taskTokensTotal: meter.createCounter("bosun.task.tokens.total"),
139
+ taskCostUsd: meter.createCounter("bosun.task.cost.usd"),
140
+ agentSessionsActive: meter.createUpDownCounter("bosun.agent.sessions.active"),
141
+ agentErrors: meter.createCounter("bosun.agent.errors"),
142
+ agentInterventions: meter.createCounter("bosun.agent.interventions"),
143
+ };
144
+ }
93
145
 
94
- span.endTime = Date.now();
95
- span.durationMs = span.endTime - span.startTime;
96
- span.status = status;
146
+ function recordMetric(name, type, value, attributes = {}) {
147
+ if (!metricInstruments) return;
148
+ const instrument = metricInstruments[name];
149
+ if (!instrument) return;
150
+ if (type === "histogram") {
151
+ instrument.record(value, attributes);
152
+ return;
153
+ }
154
+ instrument.add(value, attributes);
155
+ }
97
156
 
98
- if (error) {
99
- span.status = "error";
100
- span.attributes.error = true;
101
- span.attributes["error.message"] = error.message || String(error);
102
- span.attributes["error.stack"] = error.stack || "";
103
- }
157
+ function createLocalSpan(name, attributes = {}) {
158
+ const parent = getCurrentContext();
159
+ const traceId = parent?.traceId || randomId(TRACE_ID_BYTES);
160
+ const spanId = randomId(SPAN_ID_BYTES);
161
+ return {
162
+ name,
163
+ traceId,
164
+ spanId,
165
+ parentSpanId: parent?.spanId || null,
166
+ startTime: nowHrTime(),
167
+ endTime: null,
168
+ durationMs: null,
169
+ attributes: { ...attributes },
170
+ status: { code: "OK" },
171
+ events: [],
172
+ exceptions: [],
173
+ otelSpan: null,
174
+ };
175
+ }
176
+
177
+ function attachOtelAttributes(otelSpan, attributes = {}) {
178
+ if (!otelSpan || typeof otelSpan.setAttributes !== "function") return;
179
+ const normalized = omitUndefined(attributes);
180
+ if (Object.keys(normalized).length > 0) {
181
+ otelSpan.setAttributes(normalized);
182
+ }
183
+ }
104
184
 
105
- exportSpan(span);
106
- return span;
185
+ function syncSpanContext(span) {
186
+ const spanContext = span?.otelSpan?.spanContext?.();
187
+ if (!spanContext) return;
188
+ span.traceId = spanContext.traceId || span.traceId;
189
+ span.spanId = spanContext.spanId || span.spanId;
107
190
  }
108
191
 
109
- function addSpanEvent(spanId, name, attributes = {}) {
110
- const span = _spans.get(spanId);
111
- if (!span) return;
192
+ function finalizeLocalSpan(span) {
193
+ span.endTime = nowHrTime();
194
+ span.durationMs = durationMs(span.startTime);
195
+ tracingState.activeSpans.delete(span.spanId);
196
+ tracingState.finishedSpans.push({
197
+ ...span,
198
+ attributes: { ...span.attributes },
199
+ events: [...span.events],
200
+ exceptions: [...span.exceptions],
201
+ status: { ...span.status },
202
+ });
203
+ }
112
204
 
113
- span.events.push({
114
- name,
115
- time: Date.now(),
116
- attributes,
117
- });
205
+ async function loadOtelBindings() {
206
+ const [api, sdkNode, exporterTraceHttp, exporterMetricsHttp, resources, semantic] = await Promise.all([
207
+ import("@opentelemetry/api"),
208
+ import("@opentelemetry/sdk-node"),
209
+ import("@opentelemetry/exporter-trace-otlp-http"),
210
+ import("@opentelemetry/exporter-metrics-otlp-http"),
211
+ import("@opentelemetry/resources"),
212
+ import("@opentelemetry/semantic-conventions"),
213
+ ]);
214
+
215
+ return {
216
+ api,
217
+ NodeSDK: sdkNode.NodeSDK,
218
+ OTLPTraceExporter: exporterTraceHttp.OTLPTraceExporter,
219
+ OTLPMetricExporter: exporterMetricsHttp.OTLPMetricExporter,
220
+ resourceFromAttributes:
221
+ resources.resourceFromAttributes ||
222
+ ((attributes) => new resources.Resource(attributes)),
223
+ semantic,
224
+ };
118
225
  }
119
226
 
120
- function setSpanAttribute(spanId, key, value) {
121
- const span = _spans.get(spanId);
122
- if (!span) return;
227
+ async function shutdownSdk(sdk) {
228
+ if (!sdk?.shutdown) return;
229
+ try {
230
+ await sdk.shutdown();
231
+ } catch {
232
+ }
233
+ }
234
+
235
+ export async function setupTracing(endpointOrConfig = null) {
236
+ const inputConfig =
237
+ typeof endpointOrConfig === "string"
238
+ ? { endpoint: endpointOrConfig }
239
+ : (endpointOrConfig ?? {});
240
+
241
+ const endpoint = inputConfig.endpoint || process.env.BOSUN_OTEL_ENDPOINT || null;
242
+ const enabled = inputConfig.enabled ?? Boolean(endpoint);
243
+ const sampleRate = Number(inputConfig.sampleRate ?? 1);
244
+ const exportTimeoutMillis = Math.max(
245
+ 1,
246
+ Number(inputConfig.exportTimeoutMillis ?? DEFAULT_EXPORT_TIMEOUT_MS),
247
+ );
248
+
249
+ await shutdownSdk(tracingState.sdk);
250
+
251
+ if (!enabled || !endpoint) {
252
+ tracingState = createNoopState();
253
+ ensureMetricInstruments();
254
+ return { enabled: false, endpoint: null, sampleRate: 0 };
255
+ }
256
+
257
+ const serviceName = inputConfig.serviceName || DEFAULT_SERVICE_NAME;
258
+ const serviceVersion = inputConfig.serviceVersion || DEFAULT_SERVICE_VERSION;
259
+ const resolvedSampleRate = clampSampleRate(sampleRate);
260
+ const metrics = createMetricStore();
261
+
262
+ let sdk = null;
263
+ let tracer = null;
264
+ let api = null;
265
+ let statusCodes = null;
266
+ let spanKind = null;
267
+ const exporter = {
268
+ protocol: inferProtocol(endpoint),
269
+ processor: "batch",
270
+ };
271
+
272
+ try {
273
+ const otel = await loadOtelBindings();
274
+ api = otel.api;
275
+ statusCodes = otel.api.SpanStatusCode;
276
+ spanKind = otel.api.SpanKind;
277
+ const resource = otel.resourceFromAttributes({
278
+ [otel.semantic.SEMRESATTRS_SERVICE_NAME || "service.name"]: serviceName,
279
+ [otel.semantic.SEMRESATTRS_SERVICE_VERSION || "service.version"]: serviceVersion,
280
+ });
281
+
282
+ const metricReader = new otel.PeriodicExportingMetricReader({
283
+ exporter: new otel.OTLPMetricExporter({
284
+ url: deriveMetricsEndpoint(endpoint),
285
+ timeoutMillis: exportTimeoutMillis,
286
+ }),
287
+ exportIntervalMillis: 60_000,
288
+ exportTimeoutMillis,
289
+ });
290
+ sdk = new otel.NodeSDK({
291
+ resource,
292
+ traceExporter: new otel.OTLPTraceExporter({
293
+ url: endpoint,
294
+ timeoutMillis: exportTimeoutMillis,
295
+ }),
296
+ metricReader,
297
+ sampler: new otel.ParentBasedSampler({
298
+ root: new otel.TraceIdRatioBasedSampler(resolvedSampleRate),
299
+ }),
300
+ });
301
+
302
+ if (typeof sdk.start === "function") {
303
+ await sdk.start();
304
+ }
305
+
306
+ tracer = otel.api.trace.getTracer(serviceName, serviceVersion);
307
+ } catch {
308
+ sdk = null;
309
+ tracer = null;
310
+ api = null;
311
+ statusCodes = null;
312
+ spanKind = null;
313
+ }
314
+
315
+ tracingState = {
316
+ enabled: true,
317
+ endpoint,
318
+ sampleRate: resolvedSampleRate,
319
+ serviceName,
320
+ serviceVersion,
321
+ exporter,
322
+ meter: metrics.meter,
323
+ metrics,
324
+ provider: sdk,
325
+ tracer,
326
+ sdk,
327
+ api,
328
+ statusCodes,
329
+ spanKind,
330
+ activeSpans: new Map(),
331
+ finishedSpans: [],
332
+ };
333
+ ensureMetricInstruments();
334
+
335
+ return {
336
+ enabled: true,
337
+ endpoint,
338
+ sampleRate: resolvedSampleRate,
339
+ serviceName,
340
+ serviceVersion,
341
+ exporter,
342
+ };
343
+ }
123
344
 
124
- span.attributes[key] = value;
345
+ export function getTracingState() {
346
+ return {
347
+ enabled: tracingState.enabled,
348
+ endpoint: tracingState.endpoint,
349
+ sampleRate: tracingState.sampleRate,
350
+ serviceName: tracingState.serviceName,
351
+ serviceVersion: tracingState.serviceVersion,
352
+ exporter: tracingState.exporter,
353
+ };
125
354
  }
126
355
 
127
- function exportSpan(span) {
128
- if (_config.consoleExport || _config.exporter === "console") {
129
- console.log(
130
- `[tracing] ${span.name} ${span.spanId.slice(0, 8)} ${span.durationMs}ms [${span.status}]`,
131
- );
132
- }
133
-
134
- if (_config.fileExport || _config.exporter === "file") {
135
- try {
136
- const line = JSON.stringify({
137
- name: span.name,
138
- traceId: span.traceId,
139
- spanId: span.spanId,
140
- parentSpanId: span.parentSpanId,
141
- startTime: new Date(span.startTime).toISOString(),
142
- endTime: new Date(span.endTime).toISOString(),
143
- durationMs: span.durationMs,
144
- status: span.status,
145
- attributes: span.attributes,
146
- events: span.events,
147
- });
148
- appendFileSync(_config.filePath, line + "\n", "utf8");
149
- } catch { /* best effort */ }
150
- }
151
-
152
- if (_config.exporter === "otlp" || _config.exporter === "http") {
153
- exportToOtlp(span);
154
- }
356
+ export function getFinishedSpans() {
357
+ return [...tracingState.finishedSpans];
155
358
  }
156
359
 
157
- async function exportToOtlp(span) {
158
- try {
159
- const payload = {
160
- resourceSpans: [{
161
- resource: {
162
- attributes: [
163
- { key: "service.name", value: { stringValue: _config.serviceName } },
164
- ],
165
- },
166
- scopeSpans: [{
167
- spans: [{
168
- name: span.name,
169
- traceId: span.traceId,
170
- spanId: span.spanId,
171
- parentSpanId: span.parentSpanId || undefined,
172
- startTimeUnixNano: span.startTime * 1_000_000,
173
- endTimeUnixNano: span.endTime * 1_000_000,
174
- status: { code: span.status === "error" ? 2 : 1 },
175
- attributes: Object.entries(span.attributes).map(([key, value]) => ({
176
- key,
177
- value: { stringValue: String(value) },
178
- })),
179
- }],
180
- }],
181
- }],
182
- };
183
-
184
- await fetch(_config.endpoint, {
185
- method: "POST",
186
- headers: { "Content-Type": "application/json" },
187
- body: JSON.stringify(payload),
188
- }).catch(() => {});
189
- } catch { /* best effort */ }
360
+ export function getMetricSnapshot() {
361
+ return {
362
+ counters: new Map(tracingState.metrics.counters),
363
+ gauges: new Map(tracingState.metrics.gauges),
364
+ histograms: new Map(tracingState.metrics.histograms),
365
+ };
190
366
  }
191
367
 
192
- function getActiveSpans() {
193
- return Array.from(_spans.values()).filter((s) => !s.endTime);
368
+ export async function shutdownTracing() {
369
+ await shutdownSdk(tracingState.sdk);
370
+ tracingState = createNoopState();
371
+ metricInstruments = null;
372
+ ensureMetricInstruments();
194
373
  }
195
374
 
196
- function getSpan(spanId) {
197
- return _spans.get(spanId);
375
+ export function resetTracingForTests() {
376
+ tracingState = createNoopState();
377
+ metricInstruments = null;
378
+ ensureMetricInstruments();
198
379
  }
199
380
 
200
- function clearSpans() {
201
- _spans.clear();
381
+ export function addSpanEvent(name, attributes = {}) {
382
+ const current = getCurrentContext();
383
+ if (!current) return;
384
+ const normalized = omitUndefined(attributes);
385
+ current.events.push({ name, attributes: normalized, at: Date.now() });
386
+ if (current.otelSpan?.addEvent) {
387
+ current.otelSpan.addEvent(name, normalized);
388
+ }
202
389
  }
203
390
 
204
- function flush() {
205
- for (const span of _spans.values()) {
206
- if (!span.endTime) {
207
- endSpan(span.spanId, "ok");
208
- }
209
- }
391
+ export function recordIntervention(type, attributes = {}) {
392
+ recordMetric("agentInterventions", "counter", 1, {
393
+ "bosun.intervention.type": type,
394
+ ...attributes,
395
+ });
210
396
  }
211
397
 
212
- function getTracer() {
213
- return _tracer;
398
+ export function recordAgentError(type, attributes = {}) {
399
+ recordMetric("agentErrors", "counter", 1, {
400
+ "bosun.error.type": type,
401
+ ...attributes,
402
+ });
214
403
  }
215
404
 
216
- function wrapFunction(fn, name, attributes = {}) {
217
- return function (...args) {
218
- const span = startSpan(name, attributes);
219
- try {
220
- const result = fn.apply(this, args);
221
- if (result && typeof result.then === "function") {
222
- return result
223
- .then((res) => {
224
- endSpan(span?.spanId, "ok");
225
- return res;
226
- })
227
- .catch((err) => {
228
- endSpan(span?.spanId, "error", err);
229
- throw err;
230
- });
231
- }
232
- endSpan(span?.spanId, "ok");
233
- return result;
234
- } catch (err) {
235
- endSpan(span?.spanId, "error", err);
236
- throw err;
237
- }
238
- };
405
+ async function withSpan(name, attributes, fn, hooks = {}) {
406
+ if (!tracingState.enabled) {
407
+ return fn();
408
+ }
409
+
410
+ const span = createLocalSpan(name, omitUndefined(attributes));
411
+ const parent = getCurrentContext();
412
+ const otelOptions = { attributes: span.attributes };
413
+ if (tracingState.spanKind && name === "bosun.llm.call") {
414
+ otelOptions.kind = tracingState.spanKind.CLIENT;
415
+ }
416
+
417
+ if (tracingState.tracer?.startSpan) {
418
+ const parentContext = parent?.otelSpan && tracingState.api?.trace?.setSpan
419
+ ? tracingState.api.trace.setSpan(tracingState.api.context.active(), parent.otelSpan)
420
+ : undefined;
421
+ span.otelSpan = tracingState.tracer.startSpan(name, otelOptions, parentContext);
422
+ syncSpanContext(span);
423
+ }
424
+
425
+ tracingState.activeSpans.set(span.spanId, span);
426
+
427
+ return contextStore.run(span, async () => {
428
+ try {
429
+ hooks.onStart?.(span);
430
+ const result = await fn(span);
431
+ hooks.onSuccess?.(span, result);
432
+ span.status = { code: "OK" };
433
+ if (span.otelSpan?.setStatus) {
434
+ span.otelSpan.setStatus({
435
+ code: tracingState.statusCodes?.OK ?? 1,
436
+ });
437
+ }
438
+ attachOtelAttributes(span.otelSpan, span.attributes);
439
+ return result;
440
+ } catch (error) {
441
+ span.status = { code: "ERROR" };
442
+ const exception = {
443
+ message: error?.message || String(error),
444
+ stack: error?.stack || "",
445
+ };
446
+ span.exceptions.push(exception);
447
+ if (span.otelSpan?.recordException) {
448
+ span.otelSpan.recordException(error);
449
+ }
450
+ if (span.otelSpan?.setStatus) {
451
+ span.otelSpan.setStatus({
452
+ code: tracingState.statusCodes?.ERROR ?? 2,
453
+ message: exception.message,
454
+ });
455
+ }
456
+ hooks.onError?.(span, error);
457
+ throw error;
458
+ } finally {
459
+ hooks.onFinally?.(span);
460
+ attachOtelAttributes(span.otelSpan, span.attributes);
461
+ if (span.otelSpan?.end) {
462
+ span.otelSpan.end();
463
+ }
464
+ finalizeLocalSpan(span);
465
+ }
466
+ });
239
467
  }
240
468
 
241
- async function wrapAsyncFunction(fn, name, attributes = {}) {
242
- return async function (...args) {
243
- const span = startSpan(name, attributes);
244
- try {
245
- const result = await fn.apply(this, args);
246
- endSpan(span?.spanId, "ok");
247
- return result;
248
- } catch (err) {
249
- endSpan(span?.spanId, "error", err);
250
- throw err;
251
- }
252
- };
469
+ export async function traceTaskExecution(task = {}, fn) {
470
+ return withSpan(
471
+ "bosun.task.execute",
472
+ {
473
+ "bosun.task.id": task.taskId || task.id,
474
+ "bosun.task.title": task.title,
475
+ "bosun.task.priority": task.priority,
476
+ "bosun.task.assignee": task.assignee,
477
+ "bosun.agent.sdk": task.sdk,
478
+ "llm.model": task.model,
479
+ "git.branch": task.branch,
480
+ },
481
+ fn,
482
+ {
483
+ onSuccess(span, result) {
484
+ const inputTokens = Number(result?.inputTokens || result?.tokens?.input || 0);
485
+ const outputTokens = Number(result?.outputTokens || result?.tokens?.output || 0);
486
+ const totalTokens = Number(result?.totalTokens || inputTokens + outputTokens || 0);
487
+ const costUsd = Number(result?.costUsd || result?.cost?.usd || 0);
488
+ const metricAttributes = {
489
+ "bosun.task.id": span.attributes["bosun.task.id"],
490
+ "llm.model": span.attributes["llm.model"],
491
+ "trace.span_id": span.spanId,
492
+ "trace.trace_id": span.traceId,
493
+ };
494
+ if (totalTokens > 0) {
495
+ recordMetric("taskTokensTotal", "counter", totalTokens, metricAttributes);
496
+ }
497
+ if (costUsd > 0) {
498
+ recordMetric("taskCostUsd", "counter", costUsd, metricAttributes);
499
+ }
500
+ },
501
+ onError(span, error) {
502
+ recordMetric("agentErrors", "counter", 1, {
503
+ "bosun.error.type": error?.name || "Error",
504
+ "trace.span_id": span.spanId,
505
+ "trace.trace_id": span.traceId,
506
+ });
507
+ },
508
+ onFinally(span) {
509
+ const metricAttributes = {
510
+ "bosun.task.id": span.attributes["bosun.task.id"],
511
+ "llm.model": span.attributes["llm.model"],
512
+ "trace.span_id": span.spanId,
513
+ "trace.trace_id": span.traceId,
514
+ };
515
+ recordMetric("taskDuration", "histogram", span.durationMs ?? 0, metricAttributes);
516
+ },
517
+ },
518
+ );
253
519
  }
254
520
 
255
- function createAgentSpan(agentId, sessionId, type = "agent") {
256
- return startSpan(`agent.${type}`, {
257
- "agent.id": agentId,
258
- "session.id": sessionId,
259
- "agent.type": type,
260
- });
521
+ export async function traceAgentSession(session = {}, fn) {
522
+ return withSpan(
523
+ "bosun.agent.session",
524
+ {
525
+ "bosun.session.id": session.sessionId,
526
+ "bosun.agent.sdk": session.sdk,
527
+ "bosun.thread.key": session.threadKey,
528
+ "bosun.session.start_time": session.startTime,
529
+ "bosun.tokens.budget": session.tokensBudget,
530
+ },
531
+ fn,
532
+ {
533
+ onStart(span) {
534
+ recordMetric("agentSessionsActive", "gauge", 1, {
535
+ "bosun.session.id": span.attributes["bosun.session.id"],
536
+ "trace.span_id": span.spanId,
537
+ "trace.trace_id": span.traceId,
538
+ });
539
+ },
540
+ onFinally(span) {
541
+ recordMetric("agentSessionsActive", "gauge", -1, {
542
+ "bosun.session.id": span.attributes["bosun.session.id"],
543
+ "trace.span_id": span.spanId,
544
+ "trace.trace_id": span.traceId,
545
+ });
546
+ },
547
+ },
548
+ );
261
549
  }
262
550
 
263
- function createWorkflowSpan(workflowId, runId) {
264
- return startSpan(`workflow.${workflowId}`, {
265
- "workflow.id": workflowId,
266
- "workflow.run.id": runId,
267
- });
551
+ export async function traceToolCall(tool = {}, fn) {
552
+ return withSpan(
553
+ "bosun.tool.call",
554
+ {
555
+ "bosun.tool.name": tool.toolName,
556
+ "bosun.tool.tokens_used": tool.tokensUsed,
557
+ },
558
+ async (span) => {
559
+ const startedAt = nowHrTime();
560
+ const result = await fn(span);
561
+ span.attributes["bosun.tool.success"] = result?.success ?? true;
562
+ span.attributes["bosun.tool.duration_ms"] = durationMs(startedAt);
563
+ if (result?.error) {
564
+ span.attributes["bosun.tool.error"] = result.error;
565
+ }
566
+ return result;
567
+ },
568
+ );
268
569
  }
269
570
 
270
- function createTaskSpan(taskId, taskTitle) {
271
- return startSpan(`task.${taskId}`, {
272
- "task.id": taskId,
273
- "task.title": taskTitle,
274
- });
571
+ export async function traceLLMCall(call = {}, fn) {
572
+ return withSpan(
573
+ "bosun.llm.call",
574
+ {
575
+ "llm.model": call.model,
576
+ "llm.provider": call.provider,
577
+ "llm.input_tokens": call.inputTokens,
578
+ "llm.output_tokens": call.outputTokens,
579
+ "llm.cost_usd": call.costUsd,
580
+ "llm.latency_ms": call.latency,
581
+ },
582
+ async (span) => {
583
+ const startedAt = nowHrTime();
584
+ const result = await fn(span);
585
+ const inputTokens = Number(result?.inputTokens ?? call.inputTokens ?? 0);
586
+ const outputTokens = Number(result?.outputTokens ?? call.outputTokens ?? 0);
587
+ const costUsd = Number(result?.costUsd ?? call.costUsd ?? 0);
588
+ span.attributes["llm.input_tokens"] = inputTokens;
589
+ span.attributes["llm.output_tokens"] = outputTokens;
590
+ span.attributes["llm.cost_usd"] = costUsd;
591
+ span.attributes["llm.latency_ms"] = Number(result?.latency ?? call.latency ?? durationMs(startedAt));
592
+ return result;
593
+ },
594
+ );
275
595
  }
276
596
 
277
- export {
278
- initTracing,
279
- startSpan,
280
- endSpan,
281
- addSpanEvent,
282
- setSpanAttribute,
283
- getActiveSpans,
284
- getSpan,
285
- clearSpans,
286
- flush,
287
- getTracer,
288
- wrapFunction,
289
- wrapAsyncFunction,
290
- createAgentSpan,
291
- createWorkflowSpan,
292
- createTaskSpan,
293
- };
597
+ ensureMetricInstruments();