@gakr-gakr/diagnostics-prometheus 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/api.js ADDED
@@ -0,0 +1,3 @@
1
+ import { emptyPluginConfigSchema } from "autobot/plugin-sdk/plugin-entry";
2
+ import { redactSensitiveText } from "autobot/plugin-sdk/security-runtime";
3
+ export { emptyPluginConfigSchema, redactSensitiveText };
package/dist/index.js ADDED
@@ -0,0 +1,488 @@
1
+ import { redactSensitiveText } from "./api.js";
2
+ import { definePluginEntry } from "autobot/plugin-sdk/plugin-entry";
3
+ //#region extensions/diagnostics-prometheus/src/service.ts
4
+ const DURATION_BUCKETS_SECONDS = [
5
+ .005,
6
+ .01,
7
+ .025,
8
+ .05,
9
+ .1,
10
+ .25,
11
+ .5,
12
+ 1,
13
+ 2.5,
14
+ 5,
15
+ 10,
16
+ 30,
17
+ 60,
18
+ 120,
19
+ 300,
20
+ 600
21
+ ];
22
+ const TOKEN_BUCKETS = [
23
+ 1,
24
+ 4,
25
+ 16,
26
+ 64,
27
+ 256,
28
+ 1024,
29
+ 4096,
30
+ 16384,
31
+ 65536,
32
+ 262144,
33
+ 1048576
34
+ ];
35
+ const BYTE_BUCKETS = [
36
+ 1024,
37
+ 4096,
38
+ 16384,
39
+ 65536,
40
+ 262144,
41
+ 1048576,
42
+ 4194304,
43
+ 16777216,
44
+ 67108864,
45
+ 268435456,
46
+ 1073741824,
47
+ 4294967296,
48
+ 17179869184
49
+ ];
50
+ const LOW_CARDINALITY_VALUE_RE = /^[A-Za-z0-9_.:-]{1,120}$/u;
51
+ const MAX_PROMETHEUS_SERIES = 2048;
52
+ const DROPPED_SERIES_COUNTER_NAME = "autobot_prometheus_series_dropped_total";
53
+ function lowCardinalityLabel(value, fallback = "unknown") {
54
+ if (!value) return fallback;
55
+ const redacted = redactSensitiveText(value.trim());
56
+ return LOW_CARDINALITY_VALUE_RE.test(redacted) ? redacted : fallback;
57
+ }
58
+ function numericValue(value) {
59
+ return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : void 0;
60
+ }
61
+ function seconds(ms) {
62
+ const value = numericValue(ms);
63
+ return value === void 0 ? void 0 : value / 1e3;
64
+ }
65
+ function sortedLabels(labels) {
66
+ return Object.entries(labels).toSorted(([left], [right]) => left.localeCompare(right));
67
+ }
68
+ function metricKey(name, labels) {
69
+ return `${name}|${JSON.stringify(sortedLabels(labels))}`;
70
+ }
71
+ function escapeHelp(value) {
72
+ return value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n");
73
+ }
74
+ function escapeLabelValue(value) {
75
+ return value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace(/"/g, "\\\"");
76
+ }
77
+ function formatLabels(labels) {
78
+ const entries = sortedLabels(labels);
79
+ if (entries.length === 0) return "";
80
+ return `{${entries.map(([key, value]) => `${key}="${escapeLabelValue(value)}"`).join(",")}}`;
81
+ }
82
+ function formatPrometheusNumber(value) {
83
+ if (!Number.isFinite(value)) return "0";
84
+ return Number.isInteger(value) ? String(value) : String(Number(value.toPrecision(12)));
85
+ }
86
+ function createPrometheusMetricStore() {
87
+ const counters = /* @__PURE__ */ new Map();
88
+ const gauges = /* @__PURE__ */ new Map();
89
+ const histograms = /* @__PURE__ */ new Map();
90
+ let droppedSeries = 0;
91
+ const canCreateSeries = (map, key, metricName) => {
92
+ if (map.has(key)) return true;
93
+ if (metricName === DROPPED_SERIES_COUNTER_NAME) return true;
94
+ if (counters.size + gauges.size + histograms.size < MAX_PROMETHEUS_SERIES) return true;
95
+ droppedSeries += 1;
96
+ return false;
97
+ };
98
+ const counter = (name, help, labels, amount = 1) => {
99
+ if (!Number.isFinite(amount) || amount <= 0) return;
100
+ const key = metricKey(name, labels);
101
+ if (!canCreateSeries(counters, key, name)) return;
102
+ const existing = counters.get(key);
103
+ if (existing) {
104
+ existing.value += amount;
105
+ return;
106
+ }
107
+ counters.set(key, {
108
+ help,
109
+ labels,
110
+ value: amount
111
+ });
112
+ };
113
+ const gauge = (name, help, labels, value) => {
114
+ if (value === void 0 || !Number.isFinite(value)) return;
115
+ const key = metricKey(name, labels);
116
+ if (!canCreateSeries(gauges, key, name)) return;
117
+ gauges.set(key, {
118
+ help,
119
+ labels,
120
+ value
121
+ });
122
+ };
123
+ const histogram = (name, help, labels, value, buckets = DURATION_BUCKETS_SECONDS) => {
124
+ if (value === void 0 || !Number.isFinite(value) || value < 0) return;
125
+ const key = metricKey(name, labels);
126
+ if (!canCreateSeries(histograms, key, name)) return;
127
+ let sample = histograms.get(key);
128
+ if (!sample) {
129
+ sample = {
130
+ buckets,
131
+ counts: buckets.map(() => 0),
132
+ count: 0,
133
+ help,
134
+ labels,
135
+ sum: 0
136
+ };
137
+ histograms.set(key, sample);
138
+ }
139
+ sample.count += 1;
140
+ sample.sum += value;
141
+ for (let index = 0; index < sample.buckets.length; index += 1) {
142
+ const bucket = sample.buckets[index];
143
+ if (bucket !== void 0 && value <= bucket) sample.counts[index] = (sample.counts[index] ?? 0) + 1;
144
+ }
145
+ };
146
+ const snapshot = () => {
147
+ const counterSnapshot = new Map(counters);
148
+ if (droppedSeries > 0) counterSnapshot.set(metricKey(DROPPED_SERIES_COUNTER_NAME, {}), {
149
+ help: "Prometheus metric series dropped because the exporter series cap was reached.",
150
+ labels: {},
151
+ value: droppedSeries
152
+ });
153
+ return {
154
+ counters: counterSnapshot,
155
+ gauges: new Map(gauges),
156
+ histograms: new Map(histograms)
157
+ };
158
+ };
159
+ const reset = () => {
160
+ counters.clear();
161
+ gauges.clear();
162
+ histograms.clear();
163
+ droppedSeries = 0;
164
+ };
165
+ return {
166
+ counter,
167
+ gauge,
168
+ histogram,
169
+ reset,
170
+ snapshot
171
+ };
172
+ }
173
+ function safeErrorMessage(err) {
174
+ return redactSensitiveText(err instanceof Error ? err.message ?? err.name : String(err)).replaceAll("\0", " ").replace(/[\r\n\t\u2028\u2029]/gu, " ").slice(0, 500);
175
+ }
176
+ function renderPrometheusMetrics(store) {
177
+ const snapshot = store.snapshot();
178
+ const lines = [];
179
+ const emitted = /* @__PURE__ */ new Set();
180
+ const emitHeader = (name, type, help) => {
181
+ if (emitted.has(name)) return;
182
+ emitted.add(name);
183
+ lines.push(`# HELP ${name} ${escapeHelp(help)}`);
184
+ lines.push(`# TYPE ${name} ${type}`);
185
+ };
186
+ const counterEntries = [...snapshot.counters.entries()].toSorted(([left], [right]) => left.localeCompare(right));
187
+ for (const [key, sample] of counterEntries) {
188
+ const name = key.split("|", 1)[0] ?? "";
189
+ emitHeader(name, "counter", sample.help);
190
+ lines.push(`${name}${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.value)}`);
191
+ }
192
+ const gaugeEntries = [...snapshot.gauges.entries()].toSorted(([left], [right]) => left.localeCompare(right));
193
+ for (const [key, sample] of gaugeEntries) {
194
+ const name = key.split("|", 1)[0] ?? "";
195
+ emitHeader(name, "gauge", sample.help);
196
+ lines.push(`${name}${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.value)}`);
197
+ }
198
+ const histogramEntries = [...snapshot.histograms.entries()].toSorted(([left], [right]) => left.localeCompare(right));
199
+ for (const [key, sample] of histogramEntries) {
200
+ const name = key.split("|", 1)[0] ?? "";
201
+ emitHeader(name, "histogram", sample.help);
202
+ for (let index = 0; index < sample.buckets.length; index += 1) {
203
+ const bucket = sample.buckets[index];
204
+ if (bucket === void 0) continue;
205
+ lines.push(`${name}_bucket${formatLabels({
206
+ ...sample.labels,
207
+ le: String(bucket)
208
+ })} ${formatPrometheusNumber(sample.counts[index] ?? 0)}`);
209
+ }
210
+ lines.push(`${name}_bucket${formatLabels({
211
+ ...sample.labels,
212
+ le: "+Inf"
213
+ })} ${formatPrometheusNumber(sample.count)}`);
214
+ lines.push(`${name}_sum${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.sum)}`);
215
+ lines.push(`${name}_count${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.count)}`);
216
+ }
217
+ lines.push("");
218
+ return lines.join("\n");
219
+ }
220
+ function runLabels(evt) {
221
+ return {
222
+ ...evt.blockedBy ? { blocked_by: lowCardinalityLabel(evt.blockedBy) } : {},
223
+ channel: lowCardinalityLabel(evt.channel),
224
+ model: lowCardinalityLabel(evt.model),
225
+ outcome: lowCardinalityLabel(evt.outcome, "unknown"),
226
+ provider: lowCardinalityLabel(evt.provider),
227
+ trigger: lowCardinalityLabel(evt.trigger)
228
+ };
229
+ }
230
+ function modelCallLabels(evt) {
231
+ return {
232
+ api: lowCardinalityLabel(evt.api),
233
+ error_category: evt.type === "model.call.error" ? lowCardinalityLabel(evt.errorCategory, "other") : "none",
234
+ model: lowCardinalityLabel(evt.model),
235
+ outcome: evt.type === "model.call.error" ? "error" : "completed",
236
+ provider: lowCardinalityLabel(evt.provider),
237
+ transport: lowCardinalityLabel(evt.transport)
238
+ };
239
+ }
240
+ function toolExecutionLabels(evt) {
241
+ return {
242
+ error_category: evt.type === "tool.execution.error" ? lowCardinalityLabel(evt.errorCategory, "other") : "none",
243
+ outcome: evt.type === "tool.execution.error" ? "error" : "completed",
244
+ params_kind: lowCardinalityLabel(evt.paramsSummary?.kind),
245
+ tool: lowCardinalityLabel(evt.toolName, "tool")
246
+ };
247
+ }
248
+ function harnessLabels(evt) {
249
+ return {
250
+ channel: lowCardinalityLabel(evt.channel),
251
+ error_category: evt.type === "harness.run.error" ? lowCardinalityLabel(evt.errorCategory, "other") : "none",
252
+ harness: lowCardinalityLabel(evt.harnessId),
253
+ model: lowCardinalityLabel(evt.model),
254
+ outcome: evt.type === "harness.run.error" ? "error" : lowCardinalityLabel(evt.outcome),
255
+ phase: evt.type === "harness.run.error" ? lowCardinalityLabel(evt.phase) : "none",
256
+ plugin: lowCardinalityLabel(evt.pluginId),
257
+ provider: lowCardinalityLabel(evt.provider)
258
+ };
259
+ }
260
+ function sessionRecoveryLabels(evt) {
261
+ return {
262
+ action: evt.type === "session.recovery.completed" ? lowCardinalityLabel(evt.action, "unknown") : evt.allowActiveAbort ? "abort" : "recover",
263
+ active_work_kind: lowCardinalityLabel(evt.activeWorkKind, "none"),
264
+ state: evt.state,
265
+ status: evt.type === "session.recovery.completed" ? evt.status : "requested"
266
+ };
267
+ }
268
+ function talkLabels(evt) {
269
+ return {
270
+ brain: lowCardinalityLabel(evt.brain),
271
+ event_type: lowCardinalityLabel(evt.talkEventType),
272
+ mode: lowCardinalityLabel(evt.mode),
273
+ provider: lowCardinalityLabel(evt.provider),
274
+ transport: lowCardinalityLabel(evt.transport)
275
+ };
276
+ }
277
+ function recordModelUsage(store, evt) {
278
+ const labels = {
279
+ agent: lowCardinalityLabel(evt.agentId),
280
+ channel: lowCardinalityLabel(evt.channel),
281
+ model: lowCardinalityLabel(evt.model),
282
+ provider: lowCardinalityLabel(evt.provider)
283
+ };
284
+ const usage = evt.usage;
285
+ const recordTokens = (tokenType, value) => {
286
+ const amount = numericValue(value);
287
+ if (amount === void 0 || amount === 0) return;
288
+ store.counter("autobot_model_tokens_total", "Model tokens reported by diagnostic usage events.", {
289
+ ...labels,
290
+ token_type: tokenType
291
+ }, amount);
292
+ if (tokenType === "input" || tokenType === "output") store.histogram("autobot_gen_ai_client_token_usage", "GenAI token usage distribution for input and output tokens.", {
293
+ model: labels.model,
294
+ provider: labels.provider,
295
+ token_type: tokenType
296
+ }, amount, TOKEN_BUCKETS);
297
+ };
298
+ recordTokens("input", usage.input);
299
+ recordTokens("output", usage.output);
300
+ recordTokens("cache_read", usage.cacheRead);
301
+ recordTokens("cache_write", usage.cacheWrite);
302
+ recordTokens("prompt", usage.promptTokens);
303
+ recordTokens("total", usage.total);
304
+ store.counter("autobot_model_cost_usd_total", "Estimated model cost in USD reported by diagnostic usage events.", labels, numericValue(evt.costUsd) ?? 0);
305
+ store.histogram("autobot_model_usage_duration_seconds", "Model usage event duration in seconds.", labels, seconds(evt.durationMs));
306
+ }
307
+ function recordDiagnosticEvent(store, evt, metadata) {
308
+ if (!metadata.trusted) return;
309
+ switch (evt.type) {
310
+ case "model.usage":
311
+ recordModelUsage(store, evt);
312
+ return;
313
+ case "run.completed":
314
+ store.histogram("autobot_run_duration_seconds", "Agent run duration in seconds.", runLabels(evt), seconds(evt.durationMs));
315
+ store.counter("autobot_run_completed_total", "Agent runs completed by outcome.", runLabels(evt));
316
+ return;
317
+ case "model.call.completed":
318
+ case "model.call.error":
319
+ store.histogram("autobot_model_call_duration_seconds", "Provider model call duration in seconds.", modelCallLabels(evt), seconds(evt.durationMs));
320
+ store.counter("autobot_model_call_total", "Provider model calls completed by outcome.", modelCallLabels(evt));
321
+ return;
322
+ case "tool.execution.completed":
323
+ case "tool.execution.error":
324
+ store.histogram("autobot_tool_execution_duration_seconds", "Tool execution duration in seconds.", toolExecutionLabels(evt), seconds(evt.durationMs));
325
+ store.counter("autobot_tool_execution_total", "Tool executions completed by outcome.", toolExecutionLabels(evt));
326
+ return;
327
+ case "harness.run.completed":
328
+ case "harness.run.error":
329
+ store.histogram("autobot_harness_run_duration_seconds", "Agent harness run duration in seconds.", harnessLabels(evt), seconds(evt.durationMs));
330
+ store.counter("autobot_harness_run_total", "Agent harness runs completed by outcome.", harnessLabels(evt));
331
+ return;
332
+ case "message.processed":
333
+ store.counter("autobot_message_processed_total", "Inbound messages processed by outcome.", {
334
+ channel: lowCardinalityLabel(evt.channel),
335
+ outcome: evt.outcome,
336
+ reason: lowCardinalityLabel(evt.reason, "none")
337
+ });
338
+ store.histogram("autobot_message_processed_duration_seconds", "Inbound message processing duration in seconds.", {
339
+ channel: lowCardinalityLabel(evt.channel),
340
+ outcome: evt.outcome,
341
+ reason: lowCardinalityLabel(evt.reason, "none")
342
+ }, seconds(evt.durationMs));
343
+ return;
344
+ case "message.delivery.started":
345
+ store.counter("autobot_message_delivery_started_total", "Outbound message delivery attempts started.", {
346
+ channel: lowCardinalityLabel(evt.channel),
347
+ delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other")
348
+ });
349
+ return;
350
+ case "message.delivery.completed":
351
+ case "message.delivery.error":
352
+ store.counter("autobot_message_delivery_total", "Outbound message delivery attempts by outcome.", {
353
+ channel: lowCardinalityLabel(evt.channel),
354
+ delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"),
355
+ error_category: evt.type === "message.delivery.error" ? lowCardinalityLabel(evt.errorCategory, "other") : "none",
356
+ outcome: evt.type === "message.delivery.error" ? "error" : "completed"
357
+ });
358
+ store.histogram("autobot_message_delivery_duration_seconds", "Outbound message delivery duration in seconds.", {
359
+ channel: lowCardinalityLabel(evt.channel),
360
+ delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"),
361
+ error_category: evt.type === "message.delivery.error" ? lowCardinalityLabel(evt.errorCategory, "other") : "none",
362
+ outcome: evt.type === "message.delivery.error" ? "error" : "completed"
363
+ }, seconds(evt.durationMs));
364
+ return;
365
+ case "talk.event":
366
+ store.counter("autobot_talk_event_total", "Talk events emitted by type.", talkLabels(evt));
367
+ store.histogram("autobot_talk_event_duration_seconds", "Talk event duration in seconds when reported.", talkLabels(evt), seconds(evt.durationMs));
368
+ store.histogram("autobot_talk_audio_bytes", "Talk audio frame byte lengths.", talkLabels(evt), numericValue(evt.byteLength), BYTE_BUCKETS);
369
+ return;
370
+ case "session.recovery.requested":
371
+ case "session.recovery.completed":
372
+ store.counter("autobot_session_recovery_total", "Session recovery observations by status and action.", sessionRecoveryLabels(evt));
373
+ store.histogram("autobot_session_recovery_age_seconds", "Age of sessions selected for recovery in seconds.", sessionRecoveryLabels(evt), seconds(evt.ageMs));
374
+ return;
375
+ case "queue.lane.enqueue":
376
+ case "queue.lane.dequeue":
377
+ store.gauge("autobot_queue_lane_size", "Current diagnostic queue lane size.", { lane: lowCardinalityLabel(evt.lane) }, numericValue(evt.queueSize));
378
+ if (evt.type === "queue.lane.dequeue") store.histogram("autobot_queue_lane_wait_seconds", "Queue lane wait time in seconds.", { lane: lowCardinalityLabel(evt.lane) }, seconds(evt.waitMs));
379
+ return;
380
+ case "session.state":
381
+ store.counter("autobot_session_state_total", "Session state observations.", {
382
+ reason: lowCardinalityLabel(evt.reason, "none"),
383
+ state: evt.state
384
+ });
385
+ if (evt.queueDepth !== void 0) store.gauge("autobot_session_queue_depth", "Latest observed session queue depth.", { state: evt.state }, numericValue(evt.queueDepth));
386
+ return;
387
+ case "diagnostic.memory.sample":
388
+ store.gauge("autobot_memory_bytes", "Latest process memory usage by memory kind.", { kind: "rss" }, evt.memory.rssBytes);
389
+ store.gauge("autobot_memory_bytes", "Latest process memory usage by memory kind.", { kind: "heap_total" }, evt.memory.heapTotalBytes);
390
+ store.gauge("autobot_memory_bytes", "Latest process memory usage by memory kind.", { kind: "heap_used" }, evt.memory.heapUsedBytes);
391
+ store.histogram("autobot_memory_rss_bytes", "RSS memory sample distribution in bytes.", {}, numericValue(evt.memory.rssBytes), BYTE_BUCKETS);
392
+ return;
393
+ case "diagnostic.memory.pressure":
394
+ store.counter("autobot_memory_pressure_total", "Memory pressure events by level and reason.", {
395
+ level: evt.level,
396
+ reason: evt.reason
397
+ });
398
+ return;
399
+ case "diagnostic.heartbeat":
400
+ case "diagnostic.liveness.warning": return;
401
+ case "telemetry.exporter":
402
+ store.counter("autobot_telemetry_exporter_total", "Telemetry exporter lifecycle events.", {
403
+ exporter: lowCardinalityLabel(evt.exporter),
404
+ reason: lowCardinalityLabel(evt.reason, "none"),
405
+ signal: evt.signal,
406
+ status: evt.status
407
+ });
408
+ return;
409
+ default: return;
410
+ }
411
+ }
412
+ function createMetricsHandler(store) {
413
+ return (req, res) => {
414
+ if (req.method !== "GET" && req.method !== "HEAD") {
415
+ res.statusCode = 405;
416
+ res.setHeader("Allow", "GET, HEAD");
417
+ res.end("Method Not Allowed");
418
+ return true;
419
+ }
420
+ const body = renderPrometheusMetrics(store);
421
+ res.statusCode = 200;
422
+ res.setHeader("Cache-Control", "no-store");
423
+ res.setHeader("Content-Type", "text/plain; version=0.0.4; charset=utf-8");
424
+ if (req.method === "HEAD") {
425
+ res.end();
426
+ return true;
427
+ }
428
+ res.end(body);
429
+ return true;
430
+ };
431
+ }
432
+ function createDiagnosticsPrometheusExporter() {
433
+ const store = createPrometheusMetricStore();
434
+ let unsubscribe;
435
+ return {
436
+ handler: createMetricsHandler(store),
437
+ render: () => renderPrometheusMetrics(store),
438
+ service: {
439
+ id: "diagnostics-prometheus",
440
+ start(ctx) {
441
+ const subscribe = ctx.internalDiagnostics?.onEvent;
442
+ if (!subscribe) {
443
+ ctx.logger.error("diagnostics-prometheus: internal diagnostics capability unavailable");
444
+ return;
445
+ }
446
+ unsubscribe = subscribe((event, metadata) => {
447
+ try {
448
+ recordDiagnosticEvent(store, event, metadata);
449
+ } catch (err) {
450
+ ctx.logger.error(`diagnostics-prometheus: event handler failed (${event.type}): ${safeErrorMessage(err)}`);
451
+ }
452
+ });
453
+ ctx.internalDiagnostics?.emit({
454
+ type: "telemetry.exporter",
455
+ exporter: "diagnostics-prometheus",
456
+ signal: "metrics",
457
+ status: "started",
458
+ reason: "configured"
459
+ });
460
+ },
461
+ stop() {
462
+ unsubscribe?.();
463
+ unsubscribe = void 0;
464
+ store.reset();
465
+ }
466
+ }
467
+ };
468
+ }
469
+ //#endregion
470
+ //#region extensions/diagnostics-prometheus/index.ts
471
+ const exporter = createDiagnosticsPrometheusExporter();
472
+ var diagnostics_prometheus_default = definePluginEntry({
473
+ id: "diagnostics-prometheus",
474
+ name: "Diagnostics Prometheus",
475
+ description: "Expose AutoBot diagnostics metrics in Prometheus text format",
476
+ register(api) {
477
+ api.registerService(exporter.service);
478
+ api.registerHttpRoute({
479
+ path: "/api/diagnostics/prometheus",
480
+ auth: "gateway",
481
+ match: "exact",
482
+ gatewayRuntimeScopeSurface: "trusted-operator",
483
+ handler: exporter.handler
484
+ });
485
+ }
486
+ });
487
+ //#endregion
488
+ export { diagnostics_prometheus_default as default };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gakr-gakr/diagnostics-prometheus",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "AutoBot diagnostics Prometheus exporter",
5
5
  "repository": {
6
6
  "type": "git",
@@ -29,6 +29,21 @@
29
29
  "release": {
30
30
  "publishToClawHub": true,
31
31
  "publishToNpm": true
32
+ },
33
+ "runtimeExtensions": [
34
+ "./dist/index.js"
35
+ ]
36
+ },
37
+ "files": [
38
+ "dist/**",
39
+ "autobot.plugin.json"
40
+ ],
41
+ "peerDependencies": {
42
+ "autobot": ">=2026.5.19"
43
+ },
44
+ "peerDependenciesMeta": {
45
+ "autobot": {
46
+ "optional": true
32
47
  }
33
48
  }
34
49
  }
package/api.ts DELETED
@@ -1,12 +0,0 @@
1
- export type {
2
- DiagnosticEventMetadata,
3
- DiagnosticEventPayload,
4
- } from "autobot/plugin-sdk/diagnostic-runtime";
5
- export {
6
- emptyPluginConfigSchema,
7
- type AutoBotPluginApi,
8
- type AutoBotPluginHttpRouteHandler,
9
- type AutoBotPluginService,
10
- type AutoBotPluginServiceContext,
11
- } from "autobot/plugin-sdk/plugin-entry";
12
- export { redactSensitiveText } from "autobot/plugin-sdk/security-runtime";
package/index.ts DELETED
@@ -1,20 +0,0 @@
1
- import { definePluginEntry } from "autobot/plugin-sdk/plugin-entry";
2
- import { createDiagnosticsPrometheusExporter } from "./src/service.js";
3
-
4
- const exporter = createDiagnosticsPrometheusExporter();
5
-
6
- export default definePluginEntry({
7
- id: "diagnostics-prometheus",
8
- name: "Diagnostics Prometheus",
9
- description: "Expose AutoBot diagnostics metrics in Prometheus text format",
10
- register(api) {
11
- api.registerService(exporter.service);
12
- api.registerHttpRoute({
13
- path: "/api/diagnostics/prometheus",
14
- auth: "gateway",
15
- match: "exact",
16
- gatewayRuntimeScopeSurface: "trusted-operator",
17
- handler: exporter.handler,
18
- });
19
- },
20
- });
package/src/service.ts DELETED
@@ -1,759 +0,0 @@
1
- import type { IncomingMessage, ServerResponse } from "node:http";
2
- import type {
3
- DiagnosticEventMetadata,
4
- DiagnosticEventPayload,
5
- AutoBotPluginHttpRouteHandler,
6
- AutoBotPluginService,
7
- } from "../api.js";
8
- import { redactSensitiveText } from "../api.js";
9
-
10
- type LabelSet = Record<string, string>;
11
-
12
- type CounterSample = {
13
- help: string;
14
- labels: LabelSet;
15
- value: number;
16
- };
17
-
18
- type HistogramSample = {
19
- buckets: number[];
20
- counts: number[];
21
- count: number;
22
- help: string;
23
- labels: LabelSet;
24
- sum: number;
25
- };
26
-
27
- type GaugeSample = {
28
- help: string;
29
- labels: LabelSet;
30
- value: number;
31
- };
32
-
33
- type MetricSnapshot = {
34
- counters: Map<string, CounterSample>;
35
- gauges: Map<string, GaugeSample>;
36
- histograms: Map<string, HistogramSample>;
37
- };
38
-
39
- type PrometheusMetricStore = ReturnType<typeof createPrometheusMetricStore>;
40
-
41
- const DURATION_BUCKETS_SECONDS = [
42
- 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 300, 600,
43
- ];
44
- const TOKEN_BUCKETS = [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576];
45
- const BYTE_BUCKETS = [
46
- 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, 268435456, 1073741824,
47
- 4294967296, 17179869184,
48
- ];
49
- const LOW_CARDINALITY_VALUE_RE = /^[A-Za-z0-9_.:-]{1,120}$/u;
50
- const MAX_PROMETHEUS_SERIES = 2048;
51
- const DROPPED_SERIES_COUNTER_NAME = "autobot_prometheus_series_dropped_total";
52
-
53
- function lowCardinalityLabel(value: string | undefined, fallback = "unknown"): string {
54
- if (!value) {
55
- return fallback;
56
- }
57
- const redacted = redactSensitiveText(value.trim());
58
- return LOW_CARDINALITY_VALUE_RE.test(redacted) ? redacted : fallback;
59
- }
60
-
61
- function numericValue(value: number | undefined): number | undefined {
62
- return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : undefined;
63
- }
64
-
65
- function seconds(ms: number | undefined): number | undefined {
66
- const value = numericValue(ms);
67
- return value === undefined ? undefined : value / 1000;
68
- }
69
-
70
- function sortedLabels(labels: LabelSet): [string, string][] {
71
- return Object.entries(labels).toSorted(([left], [right]) => left.localeCompare(right));
72
- }
73
-
74
- function metricKey(name: string, labels: LabelSet): string {
75
- return `${name}|${JSON.stringify(sortedLabels(labels))}`;
76
- }
77
-
78
- function escapeHelp(value: string): string {
79
- return value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n");
80
- }
81
-
82
- function escapeLabelValue(value: string): string {
83
- return value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace(/"/g, '\\"');
84
- }
85
-
86
- function formatLabels(labels: LabelSet): string {
87
- const entries = sortedLabels(labels);
88
- if (entries.length === 0) {
89
- return "";
90
- }
91
- return `{${entries.map(([key, value]) => `${key}="${escapeLabelValue(value)}"`).join(",")}}`;
92
- }
93
-
94
- function formatPrometheusNumber(value: number): string {
95
- if (!Number.isFinite(value)) {
96
- return "0";
97
- }
98
- return Number.isInteger(value) ? String(value) : String(Number(value.toPrecision(12)));
99
- }
100
-
101
- function createPrometheusMetricStore() {
102
- const counters = new Map<string, CounterSample>();
103
- const gauges = new Map<string, GaugeSample>();
104
- const histograms = new Map<string, HistogramSample>();
105
- let droppedSeries = 0;
106
-
107
- const canCreateSeries = <T>(map: Map<string, T>, key: string, metricName: string): boolean => {
108
- if (map.has(key)) {
109
- return true;
110
- }
111
- if (metricName === DROPPED_SERIES_COUNTER_NAME) {
112
- return true;
113
- }
114
- if (counters.size + gauges.size + histograms.size < MAX_PROMETHEUS_SERIES) {
115
- return true;
116
- }
117
- droppedSeries += 1;
118
- return false;
119
- };
120
-
121
- const counter = (name: string, help: string, labels: LabelSet, amount = 1) => {
122
- if (!Number.isFinite(amount) || amount <= 0) {
123
- return;
124
- }
125
- const key = metricKey(name, labels);
126
- if (!canCreateSeries(counters, key, name)) {
127
- return;
128
- }
129
- const existing = counters.get(key);
130
- if (existing) {
131
- existing.value += amount;
132
- return;
133
- }
134
- counters.set(key, { help, labels, value: amount });
135
- };
136
-
137
- const gauge = (name: string, help: string, labels: LabelSet, value: number | undefined) => {
138
- if (value === undefined || !Number.isFinite(value)) {
139
- return;
140
- }
141
- const key = metricKey(name, labels);
142
- if (!canCreateSeries(gauges, key, name)) {
143
- return;
144
- }
145
- gauges.set(key, { help, labels, value });
146
- };
147
-
148
- const histogram = (
149
- name: string,
150
- help: string,
151
- labels: LabelSet,
152
- value: number | undefined,
153
- buckets = DURATION_BUCKETS_SECONDS,
154
- ) => {
155
- if (value === undefined || !Number.isFinite(value) || value < 0) {
156
- return;
157
- }
158
- const key = metricKey(name, labels);
159
- if (!canCreateSeries(histograms, key, name)) {
160
- return;
161
- }
162
- let sample = histograms.get(key);
163
- if (!sample) {
164
- sample = {
165
- buckets,
166
- counts: buckets.map(() => 0),
167
- count: 0,
168
- help,
169
- labels,
170
- sum: 0,
171
- };
172
- histograms.set(key, sample);
173
- }
174
- sample.count += 1;
175
- sample.sum += value;
176
- for (let index = 0; index < sample.buckets.length; index += 1) {
177
- const bucket = sample.buckets[index];
178
- if (bucket !== undefined && value <= bucket) {
179
- sample.counts[index] = (sample.counts[index] ?? 0) + 1;
180
- }
181
- }
182
- };
183
-
184
- const snapshot = (): MetricSnapshot => {
185
- const counterSnapshot = new Map(counters);
186
- if (droppedSeries > 0) {
187
- counterSnapshot.set(metricKey(DROPPED_SERIES_COUNTER_NAME, {}), {
188
- help: "Prometheus metric series dropped because the exporter series cap was reached.",
189
- labels: {},
190
- value: droppedSeries,
191
- });
192
- }
193
- return {
194
- counters: counterSnapshot,
195
- gauges: new Map(gauges),
196
- histograms: new Map(histograms),
197
- };
198
- };
199
-
200
- const reset = () => {
201
- counters.clear();
202
- gauges.clear();
203
- histograms.clear();
204
- droppedSeries = 0;
205
- };
206
-
207
- return { counter, gauge, histogram, reset, snapshot };
208
- }
209
-
210
- function safeErrorMessage(err: unknown): string {
211
- const message = err instanceof Error ? (err.message ?? err.name) : String(err);
212
- return redactSensitiveText(message)
213
- .replaceAll("\u0000", " ")
214
- .replace(/[\r\n\t\u2028\u2029]/gu, " ")
215
- .slice(0, 500);
216
- }
217
-
218
- function renderPrometheusMetrics(store: PrometheusMetricStore): string {
219
- const snapshot = store.snapshot();
220
- const lines: string[] = [];
221
- const emitted = new Set<string>();
222
-
223
- const emitHeader = (name: string, type: "counter" | "gauge" | "histogram", help: string) => {
224
- if (emitted.has(name)) {
225
- return;
226
- }
227
- emitted.add(name);
228
- lines.push(`# HELP ${name} ${escapeHelp(help)}`);
229
- lines.push(`# TYPE ${name} ${type}`);
230
- };
231
-
232
- const counterEntries = [...snapshot.counters.entries()].toSorted(([left], [right]) =>
233
- left.localeCompare(right),
234
- );
235
- for (const [key, sample] of counterEntries) {
236
- const name = key.split("|", 1)[0] ?? "";
237
- emitHeader(name, "counter", sample.help);
238
- lines.push(`${name}${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.value)}`);
239
- }
240
-
241
- const gaugeEntries = [...snapshot.gauges.entries()].toSorted(([left], [right]) =>
242
- left.localeCompare(right),
243
- );
244
- for (const [key, sample] of gaugeEntries) {
245
- const name = key.split("|", 1)[0] ?? "";
246
- emitHeader(name, "gauge", sample.help);
247
- lines.push(`${name}${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.value)}`);
248
- }
249
-
250
- const histogramEntries = [...snapshot.histograms.entries()].toSorted(([left], [right]) =>
251
- left.localeCompare(right),
252
- );
253
- for (const [key, sample] of histogramEntries) {
254
- const name = key.split("|", 1)[0] ?? "";
255
- emitHeader(name, "histogram", sample.help);
256
- for (let index = 0; index < sample.buckets.length; index += 1) {
257
- const bucket = sample.buckets[index];
258
- if (bucket === undefined) {
259
- continue;
260
- }
261
- lines.push(
262
- `${name}_bucket${formatLabels({ ...sample.labels, le: String(bucket) })} ${formatPrometheusNumber(sample.counts[index] ?? 0)}`,
263
- );
264
- }
265
- lines.push(
266
- `${name}_bucket${formatLabels({ ...sample.labels, le: "+Inf" })} ${formatPrometheusNumber(sample.count)}`,
267
- );
268
- lines.push(`${name}_sum${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.sum)}`);
269
- lines.push(
270
- `${name}_count${formatLabels(sample.labels)} ${formatPrometheusNumber(sample.count)}`,
271
- );
272
- }
273
-
274
- lines.push("");
275
- return lines.join("\n");
276
- }
277
-
278
- function runLabels(evt: {
279
- blockedBy?: string;
280
- channel?: string;
281
- model?: string;
282
- outcome?: string;
283
- provider?: string;
284
- trigger?: string;
285
- }): LabelSet {
286
- return {
287
- ...(evt.blockedBy ? { blocked_by: lowCardinalityLabel(evt.blockedBy) } : {}),
288
- channel: lowCardinalityLabel(evt.channel),
289
- model: lowCardinalityLabel(evt.model),
290
- outcome: lowCardinalityLabel(evt.outcome, "unknown"),
291
- provider: lowCardinalityLabel(evt.provider),
292
- trigger: lowCardinalityLabel(evt.trigger),
293
- };
294
- }
295
-
296
- function modelCallLabels(evt: {
297
- api?: string;
298
- errorCategory?: string;
299
- model?: string;
300
- provider?: string;
301
- transport?: string;
302
- type: string;
303
- }): LabelSet {
304
- return {
305
- api: lowCardinalityLabel(evt.api),
306
- error_category:
307
- evt.type === "model.call.error" ? lowCardinalityLabel(evt.errorCategory, "other") : "none",
308
- model: lowCardinalityLabel(evt.model),
309
- outcome: evt.type === "model.call.error" ? "error" : "completed",
310
- provider: lowCardinalityLabel(evt.provider),
311
- transport: lowCardinalityLabel(evt.transport),
312
- };
313
- }
314
-
315
- function toolExecutionLabels(evt: {
316
- errorCategory?: string;
317
- paramsSummary?: { kind: string };
318
- toolName: string;
319
- type: string;
320
- }): LabelSet {
321
- return {
322
- error_category:
323
- evt.type === "tool.execution.error"
324
- ? lowCardinalityLabel(evt.errorCategory, "other")
325
- : "none",
326
- outcome: evt.type === "tool.execution.error" ? "error" : "completed",
327
- params_kind: lowCardinalityLabel(evt.paramsSummary?.kind),
328
- tool: lowCardinalityLabel(evt.toolName, "tool"),
329
- };
330
- }
331
-
332
- function harnessLabels(evt: {
333
- channel?: string;
334
- errorCategory?: string;
335
- harnessId: string;
336
- model?: string;
337
- outcome?: string;
338
- phase?: string;
339
- pluginId?: string;
340
- provider?: string;
341
- type: string;
342
- }): LabelSet {
343
- return {
344
- channel: lowCardinalityLabel(evt.channel),
345
- error_category:
346
- evt.type === "harness.run.error" ? lowCardinalityLabel(evt.errorCategory, "other") : "none",
347
- harness: lowCardinalityLabel(evt.harnessId),
348
- model: lowCardinalityLabel(evt.model),
349
- outcome: evt.type === "harness.run.error" ? "error" : lowCardinalityLabel(evt.outcome),
350
- phase: evt.type === "harness.run.error" ? lowCardinalityLabel(evt.phase) : "none",
351
- plugin: lowCardinalityLabel(evt.pluginId),
352
- provider: lowCardinalityLabel(evt.provider),
353
- };
354
- }
355
-
356
- function sessionRecoveryLabels(
357
- evt: Extract<
358
- DiagnosticEventPayload,
359
- { type: "session.recovery.requested" | "session.recovery.completed" }
360
- >,
361
- ): LabelSet {
362
- return {
363
- action:
364
- evt.type === "session.recovery.completed"
365
- ? lowCardinalityLabel(evt.action, "unknown")
366
- : evt.allowActiveAbort
367
- ? "abort"
368
- : "recover",
369
- active_work_kind: lowCardinalityLabel(evt.activeWorkKind, "none"),
370
- state: evt.state,
371
- status: evt.type === "session.recovery.completed" ? evt.status : "requested",
372
- };
373
- }
374
-
375
- function talkLabels(evt: Extract<DiagnosticEventPayload, { type: "talk.event" }>): LabelSet {
376
- return {
377
- brain: lowCardinalityLabel(evt.brain),
378
- event_type: lowCardinalityLabel(evt.talkEventType),
379
- mode: lowCardinalityLabel(evt.mode),
380
- provider: lowCardinalityLabel(evt.provider),
381
- transport: lowCardinalityLabel(evt.transport),
382
- };
383
- }
384
-
385
- function recordModelUsage(
386
- store: PrometheusMetricStore,
387
- evt: Extract<DiagnosticEventPayload, { type: "model.usage" }>,
388
- ) {
389
- const labels = {
390
- agent: lowCardinalityLabel(evt.agentId),
391
- channel: lowCardinalityLabel(evt.channel),
392
- model: lowCardinalityLabel(evt.model),
393
- provider: lowCardinalityLabel(evt.provider),
394
- };
395
- const usage = evt.usage;
396
- const recordTokens = (tokenType: string, value: number | undefined) => {
397
- const amount = numericValue(value);
398
- if (amount === undefined || amount === 0) {
399
- return;
400
- }
401
- store.counter(
402
- "autobot_model_tokens_total",
403
- "Model tokens reported by diagnostic usage events.",
404
- {
405
- ...labels,
406
- token_type: tokenType,
407
- },
408
- amount,
409
- );
410
- if (tokenType === "input" || tokenType === "output") {
411
- store.histogram(
412
- "autobot_gen_ai_client_token_usage",
413
- "GenAI token usage distribution for input and output tokens.",
414
- {
415
- model: labels.model,
416
- provider: labels.provider,
417
- token_type: tokenType,
418
- },
419
- amount,
420
- TOKEN_BUCKETS,
421
- );
422
- }
423
- };
424
-
425
- recordTokens("input", usage.input);
426
- recordTokens("output", usage.output);
427
- recordTokens("cache_read", usage.cacheRead);
428
- recordTokens("cache_write", usage.cacheWrite);
429
- recordTokens("prompt", usage.promptTokens);
430
- recordTokens("total", usage.total);
431
-
432
- store.counter(
433
- "autobot_model_cost_usd_total",
434
- "Estimated model cost in USD reported by diagnostic usage events.",
435
- labels,
436
- numericValue(evt.costUsd) ?? 0,
437
- );
438
- store.histogram(
439
- "autobot_model_usage_duration_seconds",
440
- "Model usage event duration in seconds.",
441
- labels,
442
- seconds(evt.durationMs),
443
- );
444
- }
445
-
446
- function recordDiagnosticEvent(
447
- store: PrometheusMetricStore,
448
- evt: DiagnosticEventPayload,
449
- metadata: DiagnosticEventMetadata,
450
- ): void {
451
- if (!metadata.trusted) {
452
- return;
453
- }
454
-
455
- switch (evt.type) {
456
- case "model.usage":
457
- recordModelUsage(store, evt);
458
- return;
459
- case "run.completed":
460
- store.histogram(
461
- "autobot_run_duration_seconds",
462
- "Agent run duration in seconds.",
463
- runLabels(evt),
464
- seconds(evt.durationMs),
465
- );
466
- store.counter(
467
- "autobot_run_completed_total",
468
- "Agent runs completed by outcome.",
469
- runLabels(evt),
470
- );
471
- return;
472
- case "model.call.completed":
473
- case "model.call.error":
474
- store.histogram(
475
- "autobot_model_call_duration_seconds",
476
- "Provider model call duration in seconds.",
477
- modelCallLabels(evt),
478
- seconds(evt.durationMs),
479
- );
480
- store.counter(
481
- "autobot_model_call_total",
482
- "Provider model calls completed by outcome.",
483
- modelCallLabels(evt),
484
- );
485
- return;
486
- case "tool.execution.completed":
487
- case "tool.execution.error":
488
- store.histogram(
489
- "autobot_tool_execution_duration_seconds",
490
- "Tool execution duration in seconds.",
491
- toolExecutionLabels(evt),
492
- seconds(evt.durationMs),
493
- );
494
- store.counter(
495
- "autobot_tool_execution_total",
496
- "Tool executions completed by outcome.",
497
- toolExecutionLabels(evt),
498
- );
499
- return;
500
- case "harness.run.completed":
501
- case "harness.run.error":
502
- store.histogram(
503
- "autobot_harness_run_duration_seconds",
504
- "Agent harness run duration in seconds.",
505
- harnessLabels(evt),
506
- seconds(evt.durationMs),
507
- );
508
- store.counter(
509
- "autobot_harness_run_total",
510
- "Agent harness runs completed by outcome.",
511
- harnessLabels(evt),
512
- );
513
- return;
514
- case "message.processed":
515
- store.counter("autobot_message_processed_total", "Inbound messages processed by outcome.", {
516
- channel: lowCardinalityLabel(evt.channel),
517
- outcome: evt.outcome,
518
- reason: lowCardinalityLabel(evt.reason, "none"),
519
- });
520
- store.histogram(
521
- "autobot_message_processed_duration_seconds",
522
- "Inbound message processing duration in seconds.",
523
- {
524
- channel: lowCardinalityLabel(evt.channel),
525
- outcome: evt.outcome,
526
- reason: lowCardinalityLabel(evt.reason, "none"),
527
- },
528
- seconds(evt.durationMs),
529
- );
530
- return;
531
- case "message.delivery.started":
532
- store.counter(
533
- "autobot_message_delivery_started_total",
534
- "Outbound message delivery attempts started.",
535
- {
536
- channel: lowCardinalityLabel(evt.channel),
537
- delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"),
538
- },
539
- );
540
- return;
541
- case "message.delivery.completed":
542
- case "message.delivery.error":
543
- store.counter(
544
- "autobot_message_delivery_total",
545
- "Outbound message delivery attempts by outcome.",
546
- {
547
- channel: lowCardinalityLabel(evt.channel),
548
- delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"),
549
- error_category:
550
- evt.type === "message.delivery.error"
551
- ? lowCardinalityLabel(evt.errorCategory, "other")
552
- : "none",
553
- outcome: evt.type === "message.delivery.error" ? "error" : "completed",
554
- },
555
- );
556
- store.histogram(
557
- "autobot_message_delivery_duration_seconds",
558
- "Outbound message delivery duration in seconds.",
559
- {
560
- channel: lowCardinalityLabel(evt.channel),
561
- delivery_kind: lowCardinalityLabel(evt.deliveryKind, "other"),
562
- error_category:
563
- evt.type === "message.delivery.error"
564
- ? lowCardinalityLabel(evt.errorCategory, "other")
565
- : "none",
566
- outcome: evt.type === "message.delivery.error" ? "error" : "completed",
567
- },
568
- seconds(evt.durationMs),
569
- );
570
- return;
571
- case "talk.event":
572
- store.counter("autobot_talk_event_total", "Talk events emitted by type.", talkLabels(evt));
573
- store.histogram(
574
- "autobot_talk_event_duration_seconds",
575
- "Talk event duration in seconds when reported.",
576
- talkLabels(evt),
577
- seconds(evt.durationMs),
578
- );
579
- store.histogram(
580
- "autobot_talk_audio_bytes",
581
- "Talk audio frame byte lengths.",
582
- talkLabels(evt),
583
- numericValue(evt.byteLength),
584
- BYTE_BUCKETS,
585
- );
586
- return;
587
- case "session.recovery.requested":
588
- case "session.recovery.completed":
589
- store.counter(
590
- "autobot_session_recovery_total",
591
- "Session recovery observations by status and action.",
592
- sessionRecoveryLabels(evt),
593
- );
594
- store.histogram(
595
- "autobot_session_recovery_age_seconds",
596
- "Age of sessions selected for recovery in seconds.",
597
- sessionRecoveryLabels(evt),
598
- seconds(evt.ageMs),
599
- );
600
- return;
601
- case "queue.lane.enqueue":
602
- case "queue.lane.dequeue":
603
- store.gauge(
604
- "autobot_queue_lane_size",
605
- "Current diagnostic queue lane size.",
606
- {
607
- lane: lowCardinalityLabel(evt.lane),
608
- },
609
- numericValue(evt.queueSize),
610
- );
611
- if (evt.type === "queue.lane.dequeue") {
612
- store.histogram(
613
- "autobot_queue_lane_wait_seconds",
614
- "Queue lane wait time in seconds.",
615
- { lane: lowCardinalityLabel(evt.lane) },
616
- seconds(evt.waitMs),
617
- );
618
- }
619
- return;
620
- case "session.state":
621
- store.counter("autobot_session_state_total", "Session state observations.", {
622
- reason: lowCardinalityLabel(evt.reason, "none"),
623
- state: evt.state,
624
- });
625
- if (evt.queueDepth !== undefined) {
626
- store.gauge(
627
- "autobot_session_queue_depth",
628
- "Latest observed session queue depth.",
629
- {
630
- state: evt.state,
631
- },
632
- numericValue(evt.queueDepth),
633
- );
634
- }
635
- return;
636
- case "diagnostic.memory.sample":
637
- store.gauge(
638
- "autobot_memory_bytes",
639
- "Latest process memory usage by memory kind.",
640
- { kind: "rss" },
641
- evt.memory.rssBytes,
642
- );
643
- store.gauge(
644
- "autobot_memory_bytes",
645
- "Latest process memory usage by memory kind.",
646
- { kind: "heap_total" },
647
- evt.memory.heapTotalBytes,
648
- );
649
- store.gauge(
650
- "autobot_memory_bytes",
651
- "Latest process memory usage by memory kind.",
652
- { kind: "heap_used" },
653
- evt.memory.heapUsedBytes,
654
- );
655
- store.histogram(
656
- "autobot_memory_rss_bytes",
657
- "RSS memory sample distribution in bytes.",
658
- {},
659
- numericValue(evt.memory.rssBytes),
660
- BYTE_BUCKETS,
661
- );
662
- return;
663
- case "diagnostic.memory.pressure":
664
- store.counter(
665
- "autobot_memory_pressure_total",
666
- "Memory pressure events by level and reason.",
667
- {
668
- level: evt.level,
669
- reason: evt.reason,
670
- },
671
- );
672
- return;
673
- case "diagnostic.heartbeat":
674
- case "diagnostic.liveness.warning":
675
- return;
676
- case "telemetry.exporter":
677
- store.counter("autobot_telemetry_exporter_total", "Telemetry exporter lifecycle events.", {
678
- exporter: lowCardinalityLabel(evt.exporter),
679
- reason: lowCardinalityLabel(evt.reason, "none"),
680
- signal: evt.signal,
681
- status: evt.status,
682
- });
683
- return;
684
- default:
685
- return;
686
- }
687
- }
688
-
689
- function createMetricsHandler(store: PrometheusMetricStore): AutoBotPluginHttpRouteHandler {
690
- return (req: IncomingMessage, res: ServerResponse) => {
691
- if (req.method !== "GET" && req.method !== "HEAD") {
692
- res.statusCode = 405;
693
- res.setHeader("Allow", "GET, HEAD");
694
- res.end("Method Not Allowed");
695
- return true;
696
- }
697
-
698
- const body = renderPrometheusMetrics(store);
699
- res.statusCode = 200;
700
- res.setHeader("Cache-Control", "no-store");
701
- res.setHeader("Content-Type", "text/plain; version=0.0.4; charset=utf-8");
702
- if (req.method === "HEAD") {
703
- res.end();
704
- return true;
705
- }
706
- res.end(body);
707
- return true;
708
- };
709
- }
710
-
711
- export function createDiagnosticsPrometheusExporter() {
712
- const store = createPrometheusMetricStore();
713
- let unsubscribe: (() => void) | undefined;
714
-
715
- const service = {
716
- id: "diagnostics-prometheus",
717
- start(ctx) {
718
- const subscribe = ctx.internalDiagnostics?.onEvent;
719
- if (!subscribe) {
720
- ctx.logger.error("diagnostics-prometheus: internal diagnostics capability unavailable");
721
- return;
722
- }
723
- unsubscribe = subscribe((event, metadata) => {
724
- try {
725
- recordDiagnosticEvent(store, event, metadata);
726
- } catch (err) {
727
- ctx.logger.error(
728
- `diagnostics-prometheus: event handler failed (${event.type}): ${safeErrorMessage(err)}`,
729
- );
730
- }
731
- });
732
- ctx.internalDiagnostics?.emit({
733
- type: "telemetry.exporter",
734
- exporter: "diagnostics-prometheus",
735
- signal: "metrics",
736
- status: "started",
737
- reason: "configured",
738
- });
739
- },
740
- stop() {
741
- unsubscribe?.();
742
- unsubscribe = undefined;
743
- store.reset();
744
- },
745
- } satisfies AutoBotPluginService;
746
-
747
- return {
748
- handler: createMetricsHandler(store),
749
- render: () => renderPrometheusMetrics(store),
750
- service,
751
- };
752
- }
753
-
754
- export const testApi = {
755
- createPrometheusMetricStore,
756
- recordDiagnosticEvent,
757
- renderPrometheusMetrics,
758
- };
759
- export { testApi as __test__ };
package/tsconfig.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "extends": "../tsconfig.package-boundary.base.json",
3
- "compilerOptions": {
4
- "rootDir": "."
5
- },
6
- "include": ["./*.ts", "./src/**/*.ts"],
7
- "exclude": [
8
- "./**/*.test.ts",
9
- "./dist/**",
10
- "./node_modules/**",
11
- "./src/test-support/**",
12
- "./src/**/*test-helpers.ts",
13
- "./src/**/*test-harness.ts",
14
- "./src/**/*test-support.ts"
15
- ]
16
- }