bosun 0.42.2 → 0.42.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +9 -0
- package/agent/agent-event-bus.mjs +10 -0
- package/agent/agent-supervisor.mjs +20 -0
- package/bosun-tui.mjs +107 -105
- package/cli.mjs +10 -0
- package/config/config.mjs +25 -0
- package/config/executor-config.mjs +124 -1
- package/infra/container-runner.mjs +565 -1
- package/infra/monitor.mjs +18 -0
- package/infra/tracing.mjs +544 -240
- package/infra/tui-bridge.mjs +13 -1
- package/kanban/kanban-adapter.mjs +128 -4
- package/lib/repo-map.mjs +114 -3
- package/package.json +11 -4
- package/server/ui-server.mjs +3 -0
- package/task/task-archiver.mjs +18 -6
- package/task/task-attachments.mjs +14 -10
- package/task/task-cli.mjs +24 -4
- package/task/task-executor.mjs +19 -0
- package/task/task-store.mjs +194 -37
- package/telegram/telegram-bot.mjs +4 -1
- package/tui/app.mjs +131 -171
- package/tui/components/status-header.mjs +178 -75
- package/tui/lib/header-config.mjs +68 -0
- package/tui/lib/ws-bridge.mjs +61 -9
- package/tui/screens/agents.mjs +127 -0
- package/tui/screens/tasks.mjs +1 -48
- package/ui/app.js +8 -5
- package/ui/components/kanban-board.js +65 -3
- package/ui/components/session-list.js +18 -32
- package/ui/demo-defaults.js +52 -2
- package/ui/modules/session-api.js +100 -0
- package/ui/modules/state.js +71 -15
- package/ui/tabs/workflows.js +25 -1
- package/ui/tui/App.js +298 -0
- package/ui/tui/TasksScreen.js +564 -0
- package/ui/tui/constants.js +55 -0
- package/ui/tui/tasks-screen-helpers.js +301 -0
- package/ui/tui/useTasks.js +61 -0
- package/ui/tui/useWebSocket.js +166 -0
- package/ui/tui/useWorkflows.js +30 -0
- package/workflow/workflow-engine.mjs +412 -7
- package/workflow/workflow-nodes.mjs +616 -75
- package/workflow-templates/agents.mjs +3 -0
- package/workflow-templates/planning.mjs +7 -0
- package/workflow-templates/sub-workflows.mjs +5 -0
- package/workflow-templates/task-execution.mjs +3 -0
- package/workspace/command-diagnostics.mjs +1 -1
- package/workspace/context-cache.mjs +182 -9
package/infra/tracing.mjs
CHANGED
|
@@ -1,293 +1,597 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* OpenTelemetry
|
|
2
|
+
* OpenTelemetry tracing helpers for Bosun orchestration.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* -
|
|
6
|
-
* - Workflow executions
|
|
7
|
-
* - Task lifecycle
|
|
8
|
-
* - Tool calls
|
|
9
|
-
*
|
|
10
|
-
* @module tracing
|
|
4
|
+
* Tracing stays external to agent prompts/context. When disabled, helpers
|
|
5
|
+
* degrade to no-op wrappers so there is no agent token impact.
|
|
11
6
|
*/
|
|
12
7
|
|
|
13
|
-
import {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
const
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
8
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
9
|
+
|
|
10
|
+
const DEFAULT_SERVICE_NAME = "bosun";
|
|
11
|
+
const DEFAULT_SERVICE_VERSION = process.env.npm_package_version || "0.42.0";
|
|
12
|
+
const TRACE_ID_BYTES = 16;
|
|
13
|
+
const SPAN_ID_BYTES = 8;
|
|
14
|
+
const DEFAULT_EXPORT_TIMEOUT_MS = 1000;
|
|
15
|
+
|
|
16
|
+
const contextStore = new AsyncLocalStorage();
|
|
17
|
+
|
|
18
|
+
const NOOP_METER = Object.freeze({
|
|
19
|
+
createHistogram() {
|
|
20
|
+
return { record() {} };
|
|
21
|
+
},
|
|
22
|
+
createCounter() {
|
|
23
|
+
return { add() {} };
|
|
24
|
+
},
|
|
25
|
+
createUpDownCounter() {
|
|
26
|
+
return { add() {} };
|
|
27
|
+
},
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
function randomId(bytes = 8) {
|
|
31
|
+
let value = "";
|
|
32
|
+
while (value.length < bytes * 2) {
|
|
33
|
+
value += Math.random().toString(16).slice(2);
|
|
34
|
+
}
|
|
35
|
+
return value.slice(0, bytes * 2);
|
|
38
36
|
}
|
|
39
37
|
|
|
40
|
-
function
|
|
41
|
-
|
|
38
|
+
function nowHrTime() {
|
|
39
|
+
return process.hrtime.bigint();
|
|
42
40
|
}
|
|
43
41
|
|
|
44
|
-
function
|
|
45
|
-
|
|
42
|
+
function durationMs(startTime) {
|
|
43
|
+
return Number(process.hrtime.bigint() - startTime) / 1_000_000;
|
|
44
|
+
}
|
|
46
45
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
46
|
+
function omitUndefined(object) {
|
|
47
|
+
return Object.fromEntries(
|
|
48
|
+
Object.entries(object).filter(([, value]) => value !== undefined && value !== null && value !== ""),
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
function inferProtocol(endpoint) {
|
|
53
|
+
if (typeof endpoint !== "string" || endpoint.length === 0) {
|
|
54
|
+
return "otlp-http";
|
|
55
|
+
}
|
|
56
|
+
return endpoint.startsWith("http") ? "otlp-http" : "otlp-grpc";
|
|
57
|
+
}
|
|
55
58
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
function createMetricStore() {
|
|
60
|
+
const counters = new Map();
|
|
61
|
+
const gauges = new Map();
|
|
62
|
+
const histograms = new Map();
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
counters,
|
|
66
|
+
gauges,
|
|
67
|
+
histograms,
|
|
68
|
+
meter: {
|
|
69
|
+
createCounter(name) {
|
|
70
|
+
return {
|
|
71
|
+
add(value, attributes = {}) {
|
|
72
|
+
const bucket = counters.get(name) || [];
|
|
73
|
+
bucket.push({ value, attributes });
|
|
74
|
+
counters.set(name, bucket);
|
|
75
|
+
},
|
|
76
|
+
};
|
|
77
|
+
},
|
|
78
|
+
createUpDownCounter(name) {
|
|
79
|
+
return {
|
|
80
|
+
add(value, attributes = {}) {
|
|
81
|
+
const bucket = gauges.get(name) || [];
|
|
82
|
+
bucket.push({ value, attributes });
|
|
83
|
+
gauges.set(name, bucket);
|
|
84
|
+
},
|
|
85
|
+
};
|
|
86
|
+
},
|
|
87
|
+
createHistogram(name) {
|
|
88
|
+
return {
|
|
89
|
+
record(value, attributes = {}) {
|
|
90
|
+
const bucket = histograms.get(name) || [];
|
|
91
|
+
bucket.push({ value, attributes });
|
|
92
|
+
histograms.set(name, bucket);
|
|
93
|
+
},
|
|
94
|
+
};
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
};
|
|
98
|
+
}
|
|
62
99
|
|
|
63
|
-
|
|
100
|
+
function createNoopState() {
|
|
101
|
+
return {
|
|
102
|
+
enabled: false,
|
|
103
|
+
endpoint: null,
|
|
104
|
+
sampleRate: 0,
|
|
105
|
+
serviceName: DEFAULT_SERVICE_NAME,
|
|
106
|
+
serviceVersion: DEFAULT_SERVICE_VERSION,
|
|
107
|
+
exporter: null,
|
|
108
|
+
meter: NOOP_METER,
|
|
109
|
+
metrics: createMetricStore(),
|
|
110
|
+
provider: null,
|
|
111
|
+
tracer: null,
|
|
112
|
+
sdk: null,
|
|
113
|
+
api: null,
|
|
114
|
+
statusCodes: null,
|
|
115
|
+
spanKind: null,
|
|
116
|
+
activeSpans: new Map(),
|
|
117
|
+
finishedSpans: [],
|
|
118
|
+
};
|
|
64
119
|
}
|
|
65
120
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
121
|
+
let tracingState = createNoopState();
|
|
122
|
+
let metricInstruments = null;
|
|
123
|
+
|
|
124
|
+
function clampSampleRate(value) {
|
|
125
|
+
const numeric = Number(value);
|
|
126
|
+
if (!Number.isFinite(numeric)) return 1;
|
|
127
|
+
return Math.min(1, Math.max(0, numeric));
|
|
69
128
|
}
|
|
70
129
|
|
|
71
|
-
function
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
const span = {
|
|
75
|
-
name,
|
|
76
|
-
traceId: generateTraceId(),
|
|
77
|
-
spanId: generateSpanId(),
|
|
78
|
-
parentSpanId,
|
|
79
|
-
attributes: { ...attributes },
|
|
80
|
-
startTime: Date.now(),
|
|
81
|
-
endTime: null,
|
|
82
|
-
status: "ok",
|
|
83
|
-
events: [],
|
|
84
|
-
};
|
|
85
|
-
|
|
86
|
-
_spans.set(span.spanId, span);
|
|
87
|
-
return span;
|
|
130
|
+
function getCurrentContext() {
|
|
131
|
+
return contextStore.getStore() || null;
|
|
88
132
|
}
|
|
89
133
|
|
|
90
|
-
function
|
|
91
|
-
|
|
92
|
-
|
|
134
|
+
function ensureMetricInstruments() {
|
|
135
|
+
const meter = tracingState.meter || NOOP_METER;
|
|
136
|
+
metricInstruments = {
|
|
137
|
+
taskDuration: meter.createHistogram("bosun.task.duration"),
|
|
138
|
+
taskTokensTotal: meter.createCounter("bosun.task.tokens.total"),
|
|
139
|
+
taskCostUsd: meter.createCounter("bosun.task.cost.usd"),
|
|
140
|
+
agentSessionsActive: meter.createUpDownCounter("bosun.agent.sessions.active"),
|
|
141
|
+
agentErrors: meter.createCounter("bosun.agent.errors"),
|
|
142
|
+
agentInterventions: meter.createCounter("bosun.agent.interventions"),
|
|
143
|
+
};
|
|
144
|
+
}
|
|
93
145
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
146
|
+
function recordMetric(name, type, value, attributes = {}) {
|
|
147
|
+
if (!metricInstruments) return;
|
|
148
|
+
const instrument = metricInstruments[name];
|
|
149
|
+
if (!instrument) return;
|
|
150
|
+
if (type === "histogram") {
|
|
151
|
+
instrument.record(value, attributes);
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
instrument.add(value, attributes);
|
|
155
|
+
}
|
|
97
156
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
157
|
+
function createLocalSpan(name, attributes = {}) {
|
|
158
|
+
const parent = getCurrentContext();
|
|
159
|
+
const traceId = parent?.traceId || randomId(TRACE_ID_BYTES);
|
|
160
|
+
const spanId = randomId(SPAN_ID_BYTES);
|
|
161
|
+
return {
|
|
162
|
+
name,
|
|
163
|
+
traceId,
|
|
164
|
+
spanId,
|
|
165
|
+
parentSpanId: parent?.spanId || null,
|
|
166
|
+
startTime: nowHrTime(),
|
|
167
|
+
endTime: null,
|
|
168
|
+
durationMs: null,
|
|
169
|
+
attributes: { ...attributes },
|
|
170
|
+
status: { code: "OK" },
|
|
171
|
+
events: [],
|
|
172
|
+
exceptions: [],
|
|
173
|
+
otelSpan: null,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function attachOtelAttributes(otelSpan, attributes = {}) {
|
|
178
|
+
if (!otelSpan || typeof otelSpan.setAttributes !== "function") return;
|
|
179
|
+
const normalized = omitUndefined(attributes);
|
|
180
|
+
if (Object.keys(normalized).length > 0) {
|
|
181
|
+
otelSpan.setAttributes(normalized);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
104
184
|
|
|
105
|
-
|
|
106
|
-
|
|
185
|
+
function syncSpanContext(span) {
|
|
186
|
+
const spanContext = span?.otelSpan?.spanContext?.();
|
|
187
|
+
if (!spanContext) return;
|
|
188
|
+
span.traceId = spanContext.traceId || span.traceId;
|
|
189
|
+
span.spanId = spanContext.spanId || span.spanId;
|
|
107
190
|
}
|
|
108
191
|
|
|
109
|
-
function
|
|
110
|
-
|
|
111
|
-
|
|
192
|
+
function finalizeLocalSpan(span) {
|
|
193
|
+
span.endTime = nowHrTime();
|
|
194
|
+
span.durationMs = durationMs(span.startTime);
|
|
195
|
+
tracingState.activeSpans.delete(span.spanId);
|
|
196
|
+
tracingState.finishedSpans.push({
|
|
197
|
+
...span,
|
|
198
|
+
attributes: { ...span.attributes },
|
|
199
|
+
events: [...span.events],
|
|
200
|
+
exceptions: [...span.exceptions],
|
|
201
|
+
status: { ...span.status },
|
|
202
|
+
});
|
|
203
|
+
}
|
|
112
204
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
205
|
+
async function loadOtelBindings() {
|
|
206
|
+
const [api, sdkNode, exporterTraceHttp, exporterMetricsHttp, resources, semantic] = await Promise.all([
|
|
207
|
+
import("@opentelemetry/api"),
|
|
208
|
+
import("@opentelemetry/sdk-node"),
|
|
209
|
+
import("@opentelemetry/exporter-trace-otlp-http"),
|
|
210
|
+
import("@opentelemetry/exporter-metrics-otlp-http"),
|
|
211
|
+
import("@opentelemetry/resources"),
|
|
212
|
+
import("@opentelemetry/semantic-conventions"),
|
|
213
|
+
]);
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
api,
|
|
217
|
+
NodeSDK: sdkNode.NodeSDK,
|
|
218
|
+
OTLPTraceExporter: exporterTraceHttp.OTLPTraceExporter,
|
|
219
|
+
OTLPMetricExporter: exporterMetricsHttp.OTLPMetricExporter,
|
|
220
|
+
resourceFromAttributes:
|
|
221
|
+
resources.resourceFromAttributes ||
|
|
222
|
+
((attributes) => new resources.Resource(attributes)),
|
|
223
|
+
semantic,
|
|
224
|
+
};
|
|
118
225
|
}
|
|
119
226
|
|
|
120
|
-
function
|
|
121
|
-
|
|
122
|
-
|
|
227
|
+
async function shutdownSdk(sdk) {
|
|
228
|
+
if (!sdk?.shutdown) return;
|
|
229
|
+
try {
|
|
230
|
+
await sdk.shutdown();
|
|
231
|
+
} catch {
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
export async function setupTracing(endpointOrConfig = null) {
|
|
236
|
+
const inputConfig =
|
|
237
|
+
typeof endpointOrConfig === "string"
|
|
238
|
+
? { endpoint: endpointOrConfig }
|
|
239
|
+
: (endpointOrConfig ?? {});
|
|
240
|
+
|
|
241
|
+
const endpoint = inputConfig.endpoint || process.env.BOSUN_OTEL_ENDPOINT || null;
|
|
242
|
+
const enabled = inputConfig.enabled ?? Boolean(endpoint);
|
|
243
|
+
const sampleRate = Number(inputConfig.sampleRate ?? 1);
|
|
244
|
+
const exportTimeoutMillis = Math.max(
|
|
245
|
+
1,
|
|
246
|
+
Number(inputConfig.exportTimeoutMillis ?? DEFAULT_EXPORT_TIMEOUT_MS),
|
|
247
|
+
);
|
|
248
|
+
|
|
249
|
+
await shutdownSdk(tracingState.sdk);
|
|
250
|
+
|
|
251
|
+
if (!enabled || !endpoint) {
|
|
252
|
+
tracingState = createNoopState();
|
|
253
|
+
ensureMetricInstruments();
|
|
254
|
+
return { enabled: false, endpoint: null, sampleRate: 0 };
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const serviceName = inputConfig.serviceName || DEFAULT_SERVICE_NAME;
|
|
258
|
+
const serviceVersion = inputConfig.serviceVersion || DEFAULT_SERVICE_VERSION;
|
|
259
|
+
const resolvedSampleRate = clampSampleRate(sampleRate);
|
|
260
|
+
const metrics = createMetricStore();
|
|
261
|
+
|
|
262
|
+
let sdk = null;
|
|
263
|
+
let tracer = null;
|
|
264
|
+
let api = null;
|
|
265
|
+
let statusCodes = null;
|
|
266
|
+
let spanKind = null;
|
|
267
|
+
const exporter = {
|
|
268
|
+
protocol: inferProtocol(endpoint),
|
|
269
|
+
processor: "batch",
|
|
270
|
+
};
|
|
271
|
+
|
|
272
|
+
try {
|
|
273
|
+
const otel = await loadOtelBindings();
|
|
274
|
+
api = otel.api;
|
|
275
|
+
statusCodes = otel.api.SpanStatusCode;
|
|
276
|
+
spanKind = otel.api.SpanKind;
|
|
277
|
+
const resource = otel.resourceFromAttributes({
|
|
278
|
+
[otel.semantic.SEMRESATTRS_SERVICE_NAME || "service.name"]: serviceName,
|
|
279
|
+
[otel.semantic.SEMRESATTRS_SERVICE_VERSION || "service.version"]: serviceVersion,
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
const metricReader = new otel.PeriodicExportingMetricReader({
|
|
283
|
+
exporter: new otel.OTLPMetricExporter({
|
|
284
|
+
url: deriveMetricsEndpoint(endpoint),
|
|
285
|
+
timeoutMillis: exportTimeoutMillis,
|
|
286
|
+
}),
|
|
287
|
+
exportIntervalMillis: 60_000,
|
|
288
|
+
exportTimeoutMillis,
|
|
289
|
+
});
|
|
290
|
+
sdk = new otel.NodeSDK({
|
|
291
|
+
resource,
|
|
292
|
+
traceExporter: new otel.OTLPTraceExporter({
|
|
293
|
+
url: endpoint,
|
|
294
|
+
timeoutMillis: exportTimeoutMillis,
|
|
295
|
+
}),
|
|
296
|
+
metricReader,
|
|
297
|
+
sampler: new otel.ParentBasedSampler({
|
|
298
|
+
root: new otel.TraceIdRatioBasedSampler(resolvedSampleRate),
|
|
299
|
+
}),
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
if (typeof sdk.start === "function") {
|
|
303
|
+
await sdk.start();
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
tracer = otel.api.trace.getTracer(serviceName, serviceVersion);
|
|
307
|
+
} catch {
|
|
308
|
+
sdk = null;
|
|
309
|
+
tracer = null;
|
|
310
|
+
api = null;
|
|
311
|
+
statusCodes = null;
|
|
312
|
+
spanKind = null;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
tracingState = {
|
|
316
|
+
enabled: true,
|
|
317
|
+
endpoint,
|
|
318
|
+
sampleRate: resolvedSampleRate,
|
|
319
|
+
serviceName,
|
|
320
|
+
serviceVersion,
|
|
321
|
+
exporter,
|
|
322
|
+
meter: metrics.meter,
|
|
323
|
+
metrics,
|
|
324
|
+
provider: sdk,
|
|
325
|
+
tracer,
|
|
326
|
+
sdk,
|
|
327
|
+
api,
|
|
328
|
+
statusCodes,
|
|
329
|
+
spanKind,
|
|
330
|
+
activeSpans: new Map(),
|
|
331
|
+
finishedSpans: [],
|
|
332
|
+
};
|
|
333
|
+
ensureMetricInstruments();
|
|
334
|
+
|
|
335
|
+
return {
|
|
336
|
+
enabled: true,
|
|
337
|
+
endpoint,
|
|
338
|
+
sampleRate: resolvedSampleRate,
|
|
339
|
+
serviceName,
|
|
340
|
+
serviceVersion,
|
|
341
|
+
exporter,
|
|
342
|
+
};
|
|
343
|
+
}
|
|
123
344
|
|
|
124
|
-
|
|
345
|
+
export function getTracingState() {
|
|
346
|
+
return {
|
|
347
|
+
enabled: tracingState.enabled,
|
|
348
|
+
endpoint: tracingState.endpoint,
|
|
349
|
+
sampleRate: tracingState.sampleRate,
|
|
350
|
+
serviceName: tracingState.serviceName,
|
|
351
|
+
serviceVersion: tracingState.serviceVersion,
|
|
352
|
+
exporter: tracingState.exporter,
|
|
353
|
+
};
|
|
125
354
|
}
|
|
126
355
|
|
|
127
|
-
function
|
|
128
|
-
|
|
129
|
-
console.log(
|
|
130
|
-
`[tracing] ${span.name} ${span.spanId.slice(0, 8)} ${span.durationMs}ms [${span.status}]`,
|
|
131
|
-
);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
if (_config.fileExport || _config.exporter === "file") {
|
|
135
|
-
try {
|
|
136
|
-
const line = JSON.stringify({
|
|
137
|
-
name: span.name,
|
|
138
|
-
traceId: span.traceId,
|
|
139
|
-
spanId: span.spanId,
|
|
140
|
-
parentSpanId: span.parentSpanId,
|
|
141
|
-
startTime: new Date(span.startTime).toISOString(),
|
|
142
|
-
endTime: new Date(span.endTime).toISOString(),
|
|
143
|
-
durationMs: span.durationMs,
|
|
144
|
-
status: span.status,
|
|
145
|
-
attributes: span.attributes,
|
|
146
|
-
events: span.events,
|
|
147
|
-
});
|
|
148
|
-
appendFileSync(_config.filePath, line + "\n", "utf8");
|
|
149
|
-
} catch { /* best effort */ }
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
if (_config.exporter === "otlp" || _config.exporter === "http") {
|
|
153
|
-
exportToOtlp(span);
|
|
154
|
-
}
|
|
356
|
+
export function getFinishedSpans() {
|
|
357
|
+
return [...tracingState.finishedSpans];
|
|
155
358
|
}
|
|
156
359
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
{ key: "service.name", value: { stringValue: _config.serviceName } },
|
|
164
|
-
],
|
|
165
|
-
},
|
|
166
|
-
scopeSpans: [{
|
|
167
|
-
spans: [{
|
|
168
|
-
name: span.name,
|
|
169
|
-
traceId: span.traceId,
|
|
170
|
-
spanId: span.spanId,
|
|
171
|
-
parentSpanId: span.parentSpanId || undefined,
|
|
172
|
-
startTimeUnixNano: span.startTime * 1_000_000,
|
|
173
|
-
endTimeUnixNano: span.endTime * 1_000_000,
|
|
174
|
-
status: { code: span.status === "error" ? 2 : 1 },
|
|
175
|
-
attributes: Object.entries(span.attributes).map(([key, value]) => ({
|
|
176
|
-
key,
|
|
177
|
-
value: { stringValue: String(value) },
|
|
178
|
-
})),
|
|
179
|
-
}],
|
|
180
|
-
}],
|
|
181
|
-
}],
|
|
182
|
-
};
|
|
183
|
-
|
|
184
|
-
await fetch(_config.endpoint, {
|
|
185
|
-
method: "POST",
|
|
186
|
-
headers: { "Content-Type": "application/json" },
|
|
187
|
-
body: JSON.stringify(payload),
|
|
188
|
-
}).catch(() => {});
|
|
189
|
-
} catch { /* best effort */ }
|
|
360
|
+
export function getMetricSnapshot() {
|
|
361
|
+
return {
|
|
362
|
+
counters: new Map(tracingState.metrics.counters),
|
|
363
|
+
gauges: new Map(tracingState.metrics.gauges),
|
|
364
|
+
histograms: new Map(tracingState.metrics.histograms),
|
|
365
|
+
};
|
|
190
366
|
}
|
|
191
367
|
|
|
192
|
-
function
|
|
193
|
-
|
|
368
|
+
export async function shutdownTracing() {
|
|
369
|
+
await shutdownSdk(tracingState.sdk);
|
|
370
|
+
tracingState = createNoopState();
|
|
371
|
+
metricInstruments = null;
|
|
372
|
+
ensureMetricInstruments();
|
|
194
373
|
}
|
|
195
374
|
|
|
196
|
-
function
|
|
197
|
-
|
|
375
|
+
export function resetTracingForTests() {
|
|
376
|
+
tracingState = createNoopState();
|
|
377
|
+
metricInstruments = null;
|
|
378
|
+
ensureMetricInstruments();
|
|
198
379
|
}
|
|
199
380
|
|
|
200
|
-
function
|
|
201
|
-
|
|
381
|
+
export function addSpanEvent(name, attributes = {}) {
|
|
382
|
+
const current = getCurrentContext();
|
|
383
|
+
if (!current) return;
|
|
384
|
+
const normalized = omitUndefined(attributes);
|
|
385
|
+
current.events.push({ name, attributes: normalized, at: Date.now() });
|
|
386
|
+
if (current.otelSpan?.addEvent) {
|
|
387
|
+
current.otelSpan.addEvent(name, normalized);
|
|
388
|
+
}
|
|
202
389
|
}
|
|
203
390
|
|
|
204
|
-
function
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
}
|
|
391
|
+
export function recordIntervention(type, attributes = {}) {
|
|
392
|
+
recordMetric("agentInterventions", "counter", 1, {
|
|
393
|
+
"bosun.intervention.type": type,
|
|
394
|
+
...attributes,
|
|
395
|
+
});
|
|
210
396
|
}
|
|
211
397
|
|
|
212
|
-
function
|
|
213
|
-
|
|
398
|
+
export function recordAgentError(type, attributes = {}) {
|
|
399
|
+
recordMetric("agentErrors", "counter", 1, {
|
|
400
|
+
"bosun.error.type": type,
|
|
401
|
+
...attributes,
|
|
402
|
+
});
|
|
214
403
|
}
|
|
215
404
|
|
|
216
|
-
function
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
405
|
+
async function withSpan(name, attributes, fn, hooks = {}) {
|
|
406
|
+
if (!tracingState.enabled) {
|
|
407
|
+
return fn();
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
const span = createLocalSpan(name, omitUndefined(attributes));
|
|
411
|
+
const parent = getCurrentContext();
|
|
412
|
+
const otelOptions = { attributes: span.attributes };
|
|
413
|
+
if (tracingState.spanKind && name === "bosun.llm.call") {
|
|
414
|
+
otelOptions.kind = tracingState.spanKind.CLIENT;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
if (tracingState.tracer?.startSpan) {
|
|
418
|
+
const parentContext = parent?.otelSpan && tracingState.api?.trace?.setSpan
|
|
419
|
+
? tracingState.api.trace.setSpan(tracingState.api.context.active(), parent.otelSpan)
|
|
420
|
+
: undefined;
|
|
421
|
+
span.otelSpan = tracingState.tracer.startSpan(name, otelOptions, parentContext);
|
|
422
|
+
syncSpanContext(span);
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
tracingState.activeSpans.set(span.spanId, span);
|
|
426
|
+
|
|
427
|
+
return contextStore.run(span, async () => {
|
|
428
|
+
try {
|
|
429
|
+
hooks.onStart?.(span);
|
|
430
|
+
const result = await fn(span);
|
|
431
|
+
hooks.onSuccess?.(span, result);
|
|
432
|
+
span.status = { code: "OK" };
|
|
433
|
+
if (span.otelSpan?.setStatus) {
|
|
434
|
+
span.otelSpan.setStatus({
|
|
435
|
+
code: tracingState.statusCodes?.OK ?? 1,
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
attachOtelAttributes(span.otelSpan, span.attributes);
|
|
439
|
+
return result;
|
|
440
|
+
} catch (error) {
|
|
441
|
+
span.status = { code: "ERROR" };
|
|
442
|
+
const exception = {
|
|
443
|
+
message: error?.message || String(error),
|
|
444
|
+
stack: error?.stack || "",
|
|
445
|
+
};
|
|
446
|
+
span.exceptions.push(exception);
|
|
447
|
+
if (span.otelSpan?.recordException) {
|
|
448
|
+
span.otelSpan.recordException(error);
|
|
449
|
+
}
|
|
450
|
+
if (span.otelSpan?.setStatus) {
|
|
451
|
+
span.otelSpan.setStatus({
|
|
452
|
+
code: tracingState.statusCodes?.ERROR ?? 2,
|
|
453
|
+
message: exception.message,
|
|
454
|
+
});
|
|
455
|
+
}
|
|
456
|
+
hooks.onError?.(span, error);
|
|
457
|
+
throw error;
|
|
458
|
+
} finally {
|
|
459
|
+
hooks.onFinally?.(span);
|
|
460
|
+
attachOtelAttributes(span.otelSpan, span.attributes);
|
|
461
|
+
if (span.otelSpan?.end) {
|
|
462
|
+
span.otelSpan.end();
|
|
463
|
+
}
|
|
464
|
+
finalizeLocalSpan(span);
|
|
465
|
+
}
|
|
466
|
+
});
|
|
239
467
|
}
|
|
240
468
|
|
|
241
|
-
async function
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
469
|
+
export async function traceTaskExecution(task = {}, fn) {
|
|
470
|
+
return withSpan(
|
|
471
|
+
"bosun.task.execute",
|
|
472
|
+
{
|
|
473
|
+
"bosun.task.id": task.taskId || task.id,
|
|
474
|
+
"bosun.task.title": task.title,
|
|
475
|
+
"bosun.task.priority": task.priority,
|
|
476
|
+
"bosun.task.assignee": task.assignee,
|
|
477
|
+
"bosun.agent.sdk": task.sdk,
|
|
478
|
+
"llm.model": task.model,
|
|
479
|
+
"git.branch": task.branch,
|
|
480
|
+
},
|
|
481
|
+
fn,
|
|
482
|
+
{
|
|
483
|
+
onSuccess(span, result) {
|
|
484
|
+
const inputTokens = Number(result?.inputTokens || result?.tokens?.input || 0);
|
|
485
|
+
const outputTokens = Number(result?.outputTokens || result?.tokens?.output || 0);
|
|
486
|
+
const totalTokens = Number(result?.totalTokens || inputTokens + outputTokens || 0);
|
|
487
|
+
const costUsd = Number(result?.costUsd || result?.cost?.usd || 0);
|
|
488
|
+
const metricAttributes = {
|
|
489
|
+
"bosun.task.id": span.attributes["bosun.task.id"],
|
|
490
|
+
"llm.model": span.attributes["llm.model"],
|
|
491
|
+
"trace.span_id": span.spanId,
|
|
492
|
+
"trace.trace_id": span.traceId,
|
|
493
|
+
};
|
|
494
|
+
if (totalTokens > 0) {
|
|
495
|
+
recordMetric("taskTokensTotal", "counter", totalTokens, metricAttributes);
|
|
496
|
+
}
|
|
497
|
+
if (costUsd > 0) {
|
|
498
|
+
recordMetric("taskCostUsd", "counter", costUsd, metricAttributes);
|
|
499
|
+
}
|
|
500
|
+
},
|
|
501
|
+
onError(span, error) {
|
|
502
|
+
recordMetric("agentErrors", "counter", 1, {
|
|
503
|
+
"bosun.error.type": error?.name || "Error",
|
|
504
|
+
"trace.span_id": span.spanId,
|
|
505
|
+
"trace.trace_id": span.traceId,
|
|
506
|
+
});
|
|
507
|
+
},
|
|
508
|
+
onFinally(span) {
|
|
509
|
+
const metricAttributes = {
|
|
510
|
+
"bosun.task.id": span.attributes["bosun.task.id"],
|
|
511
|
+
"llm.model": span.attributes["llm.model"],
|
|
512
|
+
"trace.span_id": span.spanId,
|
|
513
|
+
"trace.trace_id": span.traceId,
|
|
514
|
+
};
|
|
515
|
+
recordMetric("taskDuration", "histogram", span.durationMs ?? 0, metricAttributes);
|
|
516
|
+
},
|
|
517
|
+
},
|
|
518
|
+
);
|
|
253
519
|
}
|
|
254
520
|
|
|
255
|
-
function
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
521
|
+
export async function traceAgentSession(session = {}, fn) {
|
|
522
|
+
return withSpan(
|
|
523
|
+
"bosun.agent.session",
|
|
524
|
+
{
|
|
525
|
+
"bosun.session.id": session.sessionId,
|
|
526
|
+
"bosun.agent.sdk": session.sdk,
|
|
527
|
+
"bosun.thread.key": session.threadKey,
|
|
528
|
+
"bosun.session.start_time": session.startTime,
|
|
529
|
+
"bosun.tokens.budget": session.tokensBudget,
|
|
530
|
+
},
|
|
531
|
+
fn,
|
|
532
|
+
{
|
|
533
|
+
onStart(span) {
|
|
534
|
+
recordMetric("agentSessionsActive", "gauge", 1, {
|
|
535
|
+
"bosun.session.id": span.attributes["bosun.session.id"],
|
|
536
|
+
"trace.span_id": span.spanId,
|
|
537
|
+
"trace.trace_id": span.traceId,
|
|
538
|
+
});
|
|
539
|
+
},
|
|
540
|
+
onFinally(span) {
|
|
541
|
+
recordMetric("agentSessionsActive", "gauge", -1, {
|
|
542
|
+
"bosun.session.id": span.attributes["bosun.session.id"],
|
|
543
|
+
"trace.span_id": span.spanId,
|
|
544
|
+
"trace.trace_id": span.traceId,
|
|
545
|
+
});
|
|
546
|
+
},
|
|
547
|
+
},
|
|
548
|
+
);
|
|
261
549
|
}
|
|
262
550
|
|
|
263
|
-
function
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
551
|
+
export async function traceToolCall(tool = {}, fn) {
|
|
552
|
+
return withSpan(
|
|
553
|
+
"bosun.tool.call",
|
|
554
|
+
{
|
|
555
|
+
"bosun.tool.name": tool.toolName,
|
|
556
|
+
"bosun.tool.tokens_used": tool.tokensUsed,
|
|
557
|
+
},
|
|
558
|
+
async (span) => {
|
|
559
|
+
const startedAt = nowHrTime();
|
|
560
|
+
const result = await fn(span);
|
|
561
|
+
span.attributes["bosun.tool.success"] = result?.success ?? true;
|
|
562
|
+
span.attributes["bosun.tool.duration_ms"] = durationMs(startedAt);
|
|
563
|
+
if (result?.error) {
|
|
564
|
+
span.attributes["bosun.tool.error"] = result.error;
|
|
565
|
+
}
|
|
566
|
+
return result;
|
|
567
|
+
},
|
|
568
|
+
);
|
|
268
569
|
}
|
|
269
570
|
|
|
270
|
-
function
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
571
|
+
export async function traceLLMCall(call = {}, fn) {
|
|
572
|
+
return withSpan(
|
|
573
|
+
"bosun.llm.call",
|
|
574
|
+
{
|
|
575
|
+
"llm.model": call.model,
|
|
576
|
+
"llm.provider": call.provider,
|
|
577
|
+
"llm.input_tokens": call.inputTokens,
|
|
578
|
+
"llm.output_tokens": call.outputTokens,
|
|
579
|
+
"llm.cost_usd": call.costUsd,
|
|
580
|
+
"llm.latency_ms": call.latency,
|
|
581
|
+
},
|
|
582
|
+
async (span) => {
|
|
583
|
+
const startedAt = nowHrTime();
|
|
584
|
+
const result = await fn(span);
|
|
585
|
+
const inputTokens = Number(result?.inputTokens ?? call.inputTokens ?? 0);
|
|
586
|
+
const outputTokens = Number(result?.outputTokens ?? call.outputTokens ?? 0);
|
|
587
|
+
const costUsd = Number(result?.costUsd ?? call.costUsd ?? 0);
|
|
588
|
+
span.attributes["llm.input_tokens"] = inputTokens;
|
|
589
|
+
span.attributes["llm.output_tokens"] = outputTokens;
|
|
590
|
+
span.attributes["llm.cost_usd"] = costUsd;
|
|
591
|
+
span.attributes["llm.latency_ms"] = Number(result?.latency ?? call.latency ?? durationMs(startedAt));
|
|
592
|
+
return result;
|
|
593
|
+
},
|
|
594
|
+
);
|
|
275
595
|
}
|
|
276
596
|
|
|
277
|
-
|
|
278
|
-
initTracing,
|
|
279
|
-
startSpan,
|
|
280
|
-
endSpan,
|
|
281
|
-
addSpanEvent,
|
|
282
|
-
setSpanAttribute,
|
|
283
|
-
getActiveSpans,
|
|
284
|
-
getSpan,
|
|
285
|
-
clearSpans,
|
|
286
|
-
flush,
|
|
287
|
-
getTracer,
|
|
288
|
-
wrapFunction,
|
|
289
|
-
wrapAsyncFunction,
|
|
290
|
-
createAgentSpan,
|
|
291
|
-
createWorkflowSpan,
|
|
292
|
-
createTaskSpan,
|
|
293
|
-
};
|
|
597
|
+
ensureMetricInstruments();
|