pi-crew 0.1.34 → 0.1.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -0
- package/docs/architecture.md +8 -1
- package/docs/research-phase9-observability-reliability-plan.md +42 -42
- package/docs/research-source-pi-crew-reference.md +174 -0
- package/package.json +1 -1
- package/schema.json +42 -0
- package/src/config/config.ts +101 -0
- package/src/extension/register.ts +66 -3
- package/src/extension/registration/commands.ts +14 -3
- package/src/extension/registration/team-tool.ts +3 -1
- package/src/extension/team-tool/api.ts +27 -2
- package/src/extension/team-tool/context.ts +2 -0
- package/src/extension/team-tool/run.ts +2 -2
- package/src/extension/team-tool.ts +1 -1
- package/src/observability/correlation.ts +35 -0
- package/src/observability/event-to-metric.ts +54 -0
- package/src/observability/exporters/adapter.ts +24 -0
- package/src/observability/exporters/otlp-exporter.ts +65 -0
- package/src/observability/exporters/prometheus-exporter.ts +47 -0
- package/src/observability/metric-registry.ts +72 -0
- package/src/observability/metric-retention.ts +46 -0
- package/src/observability/metric-sink.ts +51 -0
- package/src/observability/metrics-primitives.ts +166 -0
- package/src/runtime/child-pi.ts +5 -1
- package/src/runtime/crash-recovery.ts +56 -0
- package/src/runtime/deadletter.ts +36 -0
- package/src/runtime/diagnostic-export.ts +8 -1
- package/src/runtime/heartbeat-gradient.ts +28 -0
- package/src/runtime/heartbeat-watcher.ts +80 -0
- package/src/runtime/retry-executor.ts +59 -0
- package/src/runtime/team-runner.ts +57 -5
- package/src/schema/config-schema.ts +29 -0
- package/src/state/event-log.ts +3 -2
- package/src/state/types.ts +7 -0
- package/src/ui/dashboard-panes/agents-pane.ts +4 -1
- package/src/ui/dashboard-panes/metrics-pane.ts +34 -0
- package/src/ui/heartbeat-aggregator.ts +14 -4
- package/src/ui/keybinding-map.ts +4 -2
- package/src/ui/live-run-sidebar.ts +5 -4
- package/src/ui/run-action-dispatcher.ts +3 -2
- package/src/ui/run-dashboard.ts +17 -6
- package/src/ui/spinner.ts +17 -0
|
@@ -27,6 +27,13 @@ import { NotificationRouter, type NotificationDescriptor } from "./notification-
|
|
|
27
27
|
import { createJsonlSink, type NotificationSink } from "./notification-sink.ts";
|
|
28
28
|
import { projectCrewRoot } from "../utils/paths.ts";
|
|
29
29
|
import { summarizeHeartbeats } from "../ui/heartbeat-aggregator.ts";
|
|
30
|
+
import { createMetricRegistry, type MetricRegistry } from "../observability/metric-registry.ts";
|
|
31
|
+
import { wireEventToMetrics, type EventToMetricSubscription } from "../observability/event-to-metric.ts";
|
|
32
|
+
import { createMetricFileSink, type MetricSink } from "../observability/metric-sink.ts";
|
|
33
|
+
import { OTLPExporter } from "../observability/exporters/otlp-exporter.ts";
|
|
34
|
+
import { HeartbeatWatcher } from "../runtime/heartbeat-watcher.ts";
|
|
35
|
+
import { appendDeadletter } from "../runtime/deadletter.ts";
|
|
36
|
+
import { detectInterruptedRuns } from "../runtime/crash-recovery.ts";
|
|
30
37
|
|
|
31
38
|
export { __test__subagentSpawnParams };
|
|
32
39
|
|
|
@@ -68,6 +75,11 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
68
75
|
const widgetState: CrewWidgetState = { frame: 0 };
|
|
69
76
|
let notificationSink: NotificationSink | undefined;
|
|
70
77
|
let notificationRouter: NotificationRouter | undefined;
|
|
78
|
+
let metricRegistry: MetricRegistry | undefined;
|
|
79
|
+
let eventMetricSub: EventToMetricSubscription | undefined;
|
|
80
|
+
let metricSink: MetricSink | undefined;
|
|
81
|
+
let heartbeatWatcher: HeartbeatWatcher | undefined;
|
|
82
|
+
let otlpExporter: OTLPExporter | undefined;
|
|
71
83
|
const configureNotifications = (ctx: ExtensionContext): void => {
|
|
72
84
|
notificationRouter?.dispose();
|
|
73
85
|
notificationSink?.dispose();
|
|
@@ -92,6 +104,46 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
92
104
|
}
|
|
93
105
|
});
|
|
94
106
|
};
|
|
107
|
+
const configureObservability = (ctx: ExtensionContext): void => {
|
|
108
|
+
heartbeatWatcher?.dispose();
|
|
109
|
+
metricSink?.dispose();
|
|
110
|
+
eventMetricSub?.dispose();
|
|
111
|
+
otlpExporter?.dispose();
|
|
112
|
+
metricRegistry?.dispose();
|
|
113
|
+
heartbeatWatcher = undefined;
|
|
114
|
+
metricSink = undefined;
|
|
115
|
+
eventMetricSub = undefined;
|
|
116
|
+
otlpExporter = undefined;
|
|
117
|
+
metricRegistry = undefined;
|
|
118
|
+
const config = loadConfig(ctx.cwd).config;
|
|
119
|
+
if (config.observability?.enabled === false) return;
|
|
120
|
+
metricRegistry = createMetricRegistry();
|
|
121
|
+
eventMetricSub = wireEventToMetrics(pi.events, metricRegistry);
|
|
122
|
+
if (config.telemetry?.enabled !== false) metricSink = createMetricFileSink({ crewRoot: projectCrewRoot(ctx.cwd), registry: metricRegistry, retentionDays: config.observability?.metricRetentionDays ?? 7 });
|
|
123
|
+
if (config.otlp?.enabled === true && config.otlp.endpoint) {
|
|
124
|
+
otlpExporter = new OTLPExporter({ endpoint: config.otlp.endpoint, headers: config.otlp.headers, intervalMs: config.otlp.intervalMs }, metricRegistry);
|
|
125
|
+
otlpExporter.start();
|
|
126
|
+
}
|
|
127
|
+
heartbeatWatcher = new HeartbeatWatcher({
|
|
128
|
+
cwd: ctx.cwd,
|
|
129
|
+
pollIntervalMs: config.observability?.pollIntervalMs ?? 5000,
|
|
130
|
+
manifestCache: getManifestCache(ctx.cwd),
|
|
131
|
+
registry: metricRegistry,
|
|
132
|
+
router: { enqueue: (notification) => { notifyOperator(notification); return true; } },
|
|
133
|
+
deadletterTickThreshold: config.reliability?.deadletterThreshold ?? 3,
|
|
134
|
+
onDeadletterTrigger: (manifest, taskId) => {
|
|
135
|
+
appendDeadletter(manifest, { taskId, runId: manifest.runId, reason: "heartbeat-dead", attempts: 0, timestamp: new Date().toISOString() });
|
|
136
|
+
metricRegistry?.counter("crew.task.deadletter_total", "Deadletter triggers by reason").inc({ reason: "heartbeat-dead" });
|
|
137
|
+
pi.events?.emit?.("crew.task.deadletter", { runId: manifest.runId, taskId, reason: "heartbeat-dead" });
|
|
138
|
+
},
|
|
139
|
+
});
|
|
140
|
+
heartbeatWatcher.start();
|
|
141
|
+
if (config.reliability?.autoRecover === true) {
|
|
142
|
+
for (const plan of detectInterruptedRuns(ctx.cwd, getManifestCache(ctx.cwd))) {
|
|
143
|
+
notifyOperator({ id: `recovery_prompt_${plan.runId}`, severity: "warning", source: "crash-recovery", runId: plan.runId, title: `Run ${plan.runId} was interrupted`, body: `${plan.resumableTasks.length} tasks pending recovery. Open dashboard to inspect before resuming.` });
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
};
|
|
95
147
|
const autoRecoveryLast = new Map<string, number>();
|
|
96
148
|
const notifyOperator = (notification: NotificationDescriptor): void => {
|
|
97
149
|
try {
|
|
@@ -245,6 +297,16 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
245
297
|
stopAsyncRunNotifier(notifierState);
|
|
246
298
|
stopCrewWidget(currentCtx, widgetState, currentCtx ? loadConfig(currentCtx.cwd).config.ui : undefined);
|
|
247
299
|
clearPiCrewPowerbar(pi.events, currentCtx);
|
|
300
|
+
heartbeatWatcher?.dispose();
|
|
301
|
+
metricSink?.dispose();
|
|
302
|
+
eventMetricSub?.dispose();
|
|
303
|
+
otlpExporter?.dispose();
|
|
304
|
+
metricRegistry?.dispose();
|
|
305
|
+
heartbeatWatcher = undefined;
|
|
306
|
+
metricSink = undefined;
|
|
307
|
+
eventMetricSub = undefined;
|
|
308
|
+
otlpExporter = undefined;
|
|
309
|
+
metricRegistry = undefined;
|
|
248
310
|
manifestCache.dispose();
|
|
249
311
|
runSnapshotCache.dispose?.();
|
|
250
312
|
renderScheduler?.dispose();
|
|
@@ -272,6 +334,7 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
272
334
|
const loadedConfig = loadConfig(ctx.cwd);
|
|
273
335
|
autoRecoveryLast.clear();
|
|
274
336
|
configureNotifications(ctx);
|
|
337
|
+
configureObservability(ctx);
|
|
275
338
|
registerPiCrewPowerbarSegments(pi.events, loadedConfig.config.ui);
|
|
276
339
|
startAsyncRunNotifier(ctx, notifierState, loadedConfig.config.notifierIntervalMs ?? DEFAULT_UI.notifierIntervalMs);
|
|
277
340
|
const cache = getManifestCache(ctx.cwd);
|
|
@@ -318,7 +381,7 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
318
381
|
}
|
|
319
382
|
};
|
|
320
383
|
renderScheduler = new RenderScheduler(pi.events, renderTick, {
|
|
321
|
-
fallbackMs: loadedConfig.config.ui?.dashboardLiveRefreshMs ??
|
|
384
|
+
fallbackMs: loadedConfig.config.ui?.dashboardLiveRefreshMs ?? 250,
|
|
322
385
|
onInvalidate: () => getRunSnapshotCache(ctx.cwd).invalidate(),
|
|
323
386
|
});
|
|
324
387
|
});
|
|
@@ -343,11 +406,11 @@ export function registerPiTeams(pi: ExtensionAPI): void {
|
|
|
343
406
|
};
|
|
344
407
|
});
|
|
345
408
|
|
|
346
|
-
registerTeamTool(pi, { foregroundControllers, startForegroundRun, openLiveSidebar, getManifestCache, getRunSnapshotCache, widgetState });
|
|
409
|
+
registerTeamTool(pi, { foregroundControllers, startForegroundRun, openLiveSidebar, getManifestCache, getRunSnapshotCache, getMetricRegistry: () => metricRegistry, widgetState });
|
|
347
410
|
registerSubagentTools(pi, subagentManager);
|
|
348
411
|
time("register.tools");
|
|
349
412
|
|
|
350
|
-
registerTeamCommands(pi, { startForegroundRun, openLiveSidebar, getManifestCache, getRunSnapshotCache, dismissNotifications: () => {
|
|
413
|
+
registerTeamCommands(pi, { startForegroundRun, openLiveSidebar, getManifestCache, getRunSnapshotCache, getMetricRegistry: () => metricRegistry, dismissNotifications: () => {
|
|
351
414
|
widgetState.notificationCount = 0;
|
|
352
415
|
if (currentCtx) {
|
|
353
416
|
const uiConfig = loadConfig(currentCtx.cwd).config.ui;
|
|
@@ -21,12 +21,14 @@ import { openTranscriptViewer, selectAgentTask } from "./viewers.ts";
|
|
|
21
21
|
import { printTimings, time } from "../../utils/timings.ts";
|
|
22
22
|
import { requestRenderTarget } from "../../ui/pi-ui-compat.ts";
|
|
23
23
|
import type { createRunSnapshotCache } from "../../ui/run-snapshot-cache.ts";
|
|
24
|
+
import type { MetricRegistry } from "../../observability/metric-registry.ts";
|
|
24
25
|
|
|
25
26
|
export interface RegisterTeamCommandsDeps {
|
|
26
27
|
startForegroundRun: (ctx: ExtensionContext, runner: (signal?: AbortSignal) => Promise<void>, runId?: string) => void;
|
|
27
28
|
openLiveSidebar: (ctx: ExtensionContext, runId: string) => void;
|
|
28
29
|
getManifestCache: (cwd: string) => { list(max?: number): TeamRunManifest[] };
|
|
29
30
|
getRunSnapshotCache?: (cwd: string) => ReturnType<typeof createRunSnapshotCache>;
|
|
31
|
+
getMetricRegistry?: () => MetricRegistry | undefined;
|
|
30
32
|
dismissNotifications?: () => void;
|
|
31
33
|
}
|
|
32
34
|
|
|
@@ -106,12 +108,15 @@ async function handleHealthDashboardAction(ctx: ExtensionCommandContext, selecti
|
|
|
106
108
|
const confirmed = await openConfirm(ctx, { title: "Recent diagnostic exists", body: `File ${recent} was created <1min ago. Export another diagnostic?`, defaultAction: "cancel" });
|
|
107
109
|
if (!confirmed) return;
|
|
108
110
|
}
|
|
109
|
-
const result = await dispatchDiagnosticExport(ctx as ExtensionContext, selection.runId);
|
|
111
|
+
const result = await dispatchDiagnosticExport(ctx as ExtensionContext, selection.runId, { registry: depsRef?.getMetricRegistry?.() });
|
|
110
112
|
depsNotify(ctx, result.message, result.ok ? "info" : "error");
|
|
111
113
|
}
|
|
112
114
|
}
|
|
113
115
|
|
|
116
|
+
let depsRef: RegisterTeamCommandsDeps | undefined;
|
|
117
|
+
|
|
114
118
|
export function registerTeamCommands(pi: ExtensionAPI, deps: RegisterTeamCommandsDeps): void {
|
|
119
|
+
depsRef = deps;
|
|
115
120
|
pi.registerCommand("teams", {
|
|
116
121
|
description: "List pi-crew teams, workflows, and agents",
|
|
117
122
|
handler: async (_args: string, ctx: ExtensionCommandContext) => {
|
|
@@ -123,7 +128,7 @@ export function registerTeamCommands(pi: ExtensionAPI, deps: RegisterTeamCommand
|
|
|
123
128
|
pi.registerCommand("team-run", {
|
|
124
129
|
description: "Manually start a pi-crew run (agent may also use the team tool autonomously)",
|
|
125
130
|
handler: async (args: string, ctx: ExtensionCommandContext) => {
|
|
126
|
-
const result = await handleTeamTool(parseRunArgs(args), { ...ctx, startForegroundRun: (runner, runId) => deps.startForegroundRun(ctx as ExtensionContext, runner, runId), onRunStarted: (runId) => deps.openLiveSidebar(ctx as ExtensionContext, runId) });
|
|
131
|
+
const result = await handleTeamTool(parseRunArgs(args), { ...ctx, metricRegistry: deps.getMetricRegistry?.(), startForegroundRun: (runner, runId) => deps.startForegroundRun(ctx as ExtensionContext, runner, runId), onRunStarted: (runId) => deps.openLiveSidebar(ctx as ExtensionContext, runId) });
|
|
127
132
|
await notifyCommandResult(ctx, commandText(result));
|
|
128
133
|
},
|
|
129
134
|
});
|
|
@@ -161,6 +166,12 @@ export function registerTeamCommands(pi: ExtensionAPI, deps: RegisterTeamCommand
|
|
|
161
166
|
},
|
|
162
167
|
});
|
|
163
168
|
|
|
169
|
+
pi.registerCommand("team-metrics", { description: "Show pi-crew metrics snapshot: [filter]", handler: async (args: string, ctx: ExtensionCommandContext) => {
|
|
170
|
+
const filter = args.trim() || undefined;
|
|
171
|
+
const result = await handleTeamTool({ action: "api", config: { operation: "metrics-snapshot", filter } }, { ...ctx, metricRegistry: deps.getMetricRegistry?.() });
|
|
172
|
+
await notifyCommandResult(ctx, commandText(result));
|
|
173
|
+
} });
|
|
174
|
+
|
|
164
175
|
pi.registerCommand("team-imports", { description: "List imported pi-crew run bundles", handler: async (_args: string, ctx: ExtensionCommandContext) => {
|
|
165
176
|
const result = await handleTeamTool({ action: "imports" }, ctx);
|
|
166
177
|
await notifyCommandResult(ctx, commandText(result));
|
|
@@ -225,7 +236,7 @@ export function registerTeamCommands(pi: ExtensionAPI, deps: RegisterTeamCommand
|
|
|
225
236
|
const uiConfig = loadConfig(ctx.cwd).config.ui;
|
|
226
237
|
const rightPanel = uiConfig?.dashboardPlacement !== "center";
|
|
227
238
|
const width = rightPanel ? Math.min(90, Math.max(40, uiConfig?.dashboardWidth ?? 56)) : "90%";
|
|
228
|
-
const selection = await ctx.ui.custom<RunDashboardSelection | undefined>((_tui, theme, _keybindings, done) => new RunDashboard(runs, done, theme, { placement: rightPanel ? "right" : "center", showModel: uiConfig?.showModel, showTokens: uiConfig?.showTokens, showTools: uiConfig?.showTools, snapshotCache: deps.getRunSnapshotCache?.(ctx.cwd), runProvider: () => deps.getManifestCache(ctx.cwd).list(50) }), { overlay: true, overlayOptions: rightPanel ? { width, minWidth: 40, maxHeight: "100%", anchor: "top-right", offsetX: 0, offsetY: 0, margin: { top: 0, right: 0, bottom: 0, left: 0 } } : { width, maxHeight: "90%", anchor: "center", margin: 2 } });
|
|
239
|
+
const selection = await ctx.ui.custom<RunDashboardSelection | undefined>((_tui, theme, _keybindings, done) => new RunDashboard(runs, done, theme, { placement: rightPanel ? "right" : "center", showModel: uiConfig?.showModel, showTokens: uiConfig?.showTokens, showTools: uiConfig?.showTools, snapshotCache: deps.getRunSnapshotCache?.(ctx.cwd), runProvider: () => deps.getManifestCache(ctx.cwd).list(50), registry: deps.getMetricRegistry?.() }), { overlay: true, overlayOptions: rightPanel ? { width, minWidth: 40, maxHeight: "100%", anchor: "top-right", offsetX: 0, offsetY: 0, margin: { top: 0, right: 0, bottom: 0, left: 0 } } : { width, maxHeight: "90%", anchor: "center", margin: 2 } });
|
|
229
240
|
if (!selection) return;
|
|
230
241
|
if (selection.action === "reload") continue;
|
|
231
242
|
if (selection.action === "notifications-dismiss") {
|
|
@@ -6,6 +6,7 @@ import { updateCrewWidget } from "../../ui/crew-widget.ts";
|
|
|
6
6
|
import { updatePiCrewPowerbar } from "../../ui/powerbar-publisher.ts";
|
|
7
7
|
import type { createManifestCache } from "../../runtime/manifest-cache.ts";
|
|
8
8
|
import type { createRunSnapshotCache } from "../../ui/run-snapshot-cache.ts";
|
|
9
|
+
import type { MetricRegistry } from "../../observability/metric-registry.ts";
|
|
9
10
|
import { handleTeamTool } from "../team-tool.ts";
|
|
10
11
|
|
|
11
12
|
export interface RegisterTeamToolDeps {
|
|
@@ -14,6 +15,7 @@ export interface RegisterTeamToolDeps {
|
|
|
14
15
|
openLiveSidebar: (ctx: ExtensionContext, runId: string) => void;
|
|
15
16
|
getManifestCache: (cwd: string) => ReturnType<typeof createManifestCache>;
|
|
16
17
|
getRunSnapshotCache?: (cwd: string) => ReturnType<typeof createRunSnapshotCache>;
|
|
18
|
+
getMetricRegistry?: () => MetricRegistry | undefined;
|
|
17
19
|
widgetState: CrewWidgetState;
|
|
18
20
|
}
|
|
19
21
|
|
|
@@ -36,7 +38,7 @@ export function registerTeamTool(pi: ExtensionAPI, deps: RegisterTeamToolDeps):
|
|
|
36
38
|
const runLabel = resolved.team ?? resolved.agent ?? "direct";
|
|
37
39
|
pi.setSessionName(`pi-crew: ${runLabel}/${resolved.workflow ?? "default"} — ${resolved.goal.slice(0, 60)}`);
|
|
38
40
|
}
|
|
39
|
-
const output = await handleTeamTool(resolved, { ...ctx, signal: controller.signal, startForegroundRun: (runner, runId) => deps.startForegroundRun(ctx, runner, runId), onRunStarted: (runId) => deps.openLiveSidebar(ctx, runId) });
|
|
41
|
+
const output = await handleTeamTool(resolved, { ...ctx, signal: controller.signal, metricRegistry: deps.getMetricRegistry?.(), startForegroundRun: (runner, runId) => deps.startForegroundRun(ctx, runner, runId), onRunStarted: (runId) => deps.openLiveSidebar(ctx, runId) });
|
|
40
42
|
if (resolved.action === "run") {
|
|
41
43
|
pi.appendEntry("crew:run-started", {
|
|
42
44
|
runId: output.details?.runId,
|
|
@@ -19,12 +19,37 @@ import { liveControlRealtimeMessage, publishLiveControlRealtime } from "../../su
|
|
|
19
19
|
import type { PiTeamsToolResult } from "../tool-result.ts";
|
|
20
20
|
import { configRecord, result, type TeamContext } from "./context.ts";
|
|
21
21
|
|
|
22
|
+
function globMatch(value: string, pattern: string): boolean {
|
|
23
|
+
const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*");
|
|
24
|
+
return new RegExp(`^${escaped}$`).test(value);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function snapshotHasRunId(snapshot: { values?: unknown }, runId: string): boolean {
|
|
28
|
+
const values = Array.isArray(snapshot.values) ? snapshot.values : [];
|
|
29
|
+
return values.some((value) => {
|
|
30
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) return false;
|
|
31
|
+
const labels = (value as { labels?: unknown }).labels;
|
|
32
|
+
return labels && typeof labels === "object" && !Array.isArray(labels) && (labels as Record<string, unknown>).runId === runId;
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
|
|
22
36
|
export async function handleApi(params: TeamToolParamsValue, ctx: TeamContext): Promise<PiTeamsToolResult> {
|
|
37
|
+
const cfg = configRecord(params.config);
|
|
38
|
+
const operation = typeof cfg.operation === "string" ? cfg.operation : "read-manifest";
|
|
39
|
+
if (operation === "metrics-snapshot") {
|
|
40
|
+
const filter = typeof cfg.filter === "string" ? cfg.filter : undefined;
|
|
41
|
+
const runIdFilter = typeof cfg.runId === "string" ? cfg.runId : params.runId;
|
|
42
|
+
const snapshots = ctx.metricRegistry?.snapshot() ?? [];
|
|
43
|
+
const filtered = snapshots.filter((snapshot) => {
|
|
44
|
+
if (filter && !globMatch(snapshot.name, filter)) return false;
|
|
45
|
+
if (runIdFilter && !snapshotHasRunId(snapshot, runIdFilter)) return false;
|
|
46
|
+
return true;
|
|
47
|
+
});
|
|
48
|
+
return result(JSON.stringify(filtered, null, 2), { action: "api", status: "ok", ...(runIdFilter ? { runId: runIdFilter } : {}) });
|
|
49
|
+
}
|
|
23
50
|
if (!params.runId) return result("API requires runId.", { action: "api", status: "error" }, true);
|
|
24
51
|
const loaded = loadRunManifestById(ctx.cwd, params.runId);
|
|
25
52
|
if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "api", status: "error" }, true);
|
|
26
|
-
const cfg = configRecord(params.config);
|
|
27
|
-
const operation = typeof cfg.operation === "string" ? cfg.operation : "read-manifest";
|
|
28
53
|
if (operation === "read-manifest") {
|
|
29
54
|
return result(JSON.stringify(loaded.manifest, null, 2), { action: "api", status: "ok", runId: loaded.manifest.runId, artifactsRoot: loaded.manifest.artifactsRoot });
|
|
30
55
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
2
|
+
import type { MetricRegistry } from "../../observability/metric-registry.ts";
|
|
2
3
|
import type { TeamToolDetails } from "../team-tool-types.ts";
|
|
3
4
|
import { toolResult, type PiTeamsToolResult } from "../tool-result.ts";
|
|
4
5
|
|
|
@@ -6,6 +7,7 @@ export type TeamContext = Pick<ExtensionContext, "cwd"> & Partial<Pick<Extension
|
|
|
6
7
|
modelRegistry?: unknown;
|
|
7
8
|
sessionManager?: { getBranch?: () => unknown[] };
|
|
8
9
|
events?: { emit?: (event: string, data: unknown) => void };
|
|
10
|
+
metricRegistry?: MetricRegistry;
|
|
9
11
|
signal?: AbortSignal;
|
|
10
12
|
startForegroundRun?: (runner: (signal?: AbortSignal) => Promise<void>, runId?: string) => void;
|
|
11
13
|
onRunStarted?: (runId: string) => void;
|
|
@@ -134,7 +134,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
|
|
|
134
134
|
if (executeWorkers && ctx.startForegroundRun) {
|
|
135
135
|
ctx.onRunStarted?.(updatedManifest.runId);
|
|
136
136
|
ctx.startForegroundRun(async (signal) => {
|
|
137
|
-
await executeTeamRun({ manifest: updatedManifest, tasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal });
|
|
137
|
+
await executeTeamRun({ manifest: updatedManifest, tasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal, reliability: executedConfig.reliability, metricRegistry: ctx.metricRegistry });
|
|
138
138
|
}, updatedManifest.runId);
|
|
139
139
|
const text = [
|
|
140
140
|
`Started foreground pi-crew run ${updatedManifest.runId}.`,
|
|
@@ -150,7 +150,7 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
|
|
|
150
150
|
].join("\n");
|
|
151
151
|
return result(text, { action: "run", status: "ok", runId: updatedManifest.runId, artifactsRoot: updatedManifest.artifactsRoot });
|
|
152
152
|
}
|
|
153
|
-
const executed = await executeTeamRun({ manifest: updatedManifest, tasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal: ctx.signal });
|
|
153
|
+
const executed = await executeTeamRun({ manifest: updatedManifest, tasks, team, workflow, agents, executeWorkers, limits: executedConfig.limits, runtime, runtimeConfig: executedConfig.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal: ctx.signal, reliability: executedConfig.reliability, metricRegistry: ctx.metricRegistry });
|
|
154
154
|
const text = [
|
|
155
155
|
`Created pi-crew run ${executed.manifest.runId}.`,
|
|
156
156
|
`Team: ${team.name}`,
|
|
@@ -185,7 +185,7 @@ export async function handleResume(params: TeamToolParamsValue, ctx: TeamContext
|
|
|
185
185
|
const loadedConfig = loadConfig(ctx.cwd);
|
|
186
186
|
const runtime = await resolveCrewRuntime(loadedConfig.config);
|
|
187
187
|
const executeWorkers = runtime.kind !== "scaffold";
|
|
188
|
-
const executed = await executeTeamRun({ manifest: resumeManifest, tasks: resetTasks, team, workflow, agents, executeWorkers, limits: loadedConfig.config.limits, runtime, runtimeConfig: loadedConfig.config.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal: ctx.signal });
|
|
188
|
+
const executed = await executeTeamRun({ manifest: resumeManifest, tasks: resetTasks, team, workflow, agents, executeWorkers, limits: loadedConfig.config.limits, runtime, runtimeConfig: loadedConfig.config.runtime, parentContext: buildParentContext(ctx), parentModel: ctx.model, modelRegistry: ctx.modelRegistry, modelOverride: params.model, signal: ctx.signal, reliability: loadedConfig.config.reliability, metricRegistry: ctx.metricRegistry });
|
|
189
189
|
return result([`Resumed run ${executed.manifest.runId}.`, `Status: ${executed.manifest.status}`, `Tasks: ${executed.tasks.length}`, `Artifacts: ${executed.manifest.artifactsRoot}`].join("\n"), { action: "resume", status: executed.manifest.status === "failed" ? "error" : "ok", runId: executed.manifest.runId, artifactsRoot: executed.manifest.artifactsRoot }, executed.manifest.status === "failed");
|
|
190
190
|
});
|
|
191
191
|
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
2
|
+
|
|
3
|
+
export interface CorrelationContext {
|
|
4
|
+
traceId: string;
|
|
5
|
+
parentSpanId?: string;
|
|
6
|
+
spanId: string;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
const storage = new AsyncLocalStorage<CorrelationContext>();
|
|
10
|
+
let spanCounter = 0;
|
|
11
|
+
|
|
12
|
+
export function withCorrelation<T>(ctx: CorrelationContext, fn: () => T): T {
|
|
13
|
+
return storage.run(ctx, fn);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function getCurrentContext(): CorrelationContext | undefined {
|
|
17
|
+
return storage.getStore();
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function newSpanId(runId: string, taskId = "main"): string {
|
|
21
|
+
spanCounter += 1;
|
|
22
|
+
return `${runId}:${taskId}:${spanCounter}`;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function childCorrelation(runId: string, taskId: string): CorrelationContext {
|
|
26
|
+
const parent = getCurrentContext();
|
|
27
|
+
const spanId = newSpanId(runId, taskId);
|
|
28
|
+
return { traceId: parent?.traceId ?? spanId, parentSpanId: parent?.spanId, spanId };
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function correlatedEvent<T extends { runId?: string; data?: Record<string, unknown> }>(event: T): T {
|
|
32
|
+
const ctx = getCurrentContext();
|
|
33
|
+
if (!ctx) return event;
|
|
34
|
+
return { ...event, data: { ...(event.data ?? {}), traceId: ctx.traceId, spanId: ctx.spanId, parentSpanId: ctx.parentSpanId } };
|
|
35
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
2
|
+
import { MetricRegistry } from "./metric-registry.ts";
|
|
3
|
+
|
|
4
|
+
function recordValue(value: unknown): Record<string, unknown> {
|
|
5
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : {};
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
function stringValue(value: unknown, fallback: string): string {
|
|
9
|
+
return typeof value === "string" && value.length > 0 ? value : fallback;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function numberValue(value: unknown, fallback = 0): number {
|
|
13
|
+
return typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface EventToMetricSubscription {
|
|
17
|
+
dispose(): void;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function wireEventToMetrics(events: ExtensionAPI["events"] | undefined, registry: MetricRegistry): EventToMetricSubscription {
|
|
21
|
+
const runCount = registry.counter("crew.run.count", "Total runs by status");
|
|
22
|
+
const taskCount = registry.counter("crew.task.count", "Total tasks by status");
|
|
23
|
+
const subagentCount = registry.counter("crew.subagent.count", "Total subagent records by status");
|
|
24
|
+
const mailboxCount = registry.counter("crew.mailbox.count", "Total mailbox messages by direction");
|
|
25
|
+
registry.counter("crew.task.deadletter_total", "Deadletter triggers by reason");
|
|
26
|
+
registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds");
|
|
27
|
+
const runDuration = registry.histogram("crew.run.duration_ms", "Run end-to-end duration, milliseconds");
|
|
28
|
+
const taskDuration = registry.histogram("crew.task.duration_ms", "Task duration, milliseconds");
|
|
29
|
+
registry.histogram("crew.task.retry_count", "Retries per task", [0, 1, 2, 3, 5, 10]);
|
|
30
|
+
const tokenUsage = registry.histogram("crew.task.tokens_total", "Token usage per task");
|
|
31
|
+
|
|
32
|
+
const handlers: Array<[string, (data: unknown) => void]> = [
|
|
33
|
+
["crew.run.completed", (data) => { const item = recordValue(data); runCount.inc({ status: "completed" }); runDuration.observe({ team: stringValue(item.team, "unknown") }, numberValue(item.durationMs)); }],
|
|
34
|
+
["crew.run.failed", () => runCount.inc({ status: "failed" })],
|
|
35
|
+
["crew.run.cancelled", () => runCount.inc({ status: "cancelled" })],
|
|
36
|
+
["crew.task.completed", (data) => { const item = recordValue(data); taskCount.inc({ status: "completed" }); taskDuration.observe({ role: stringValue(item.role, "unknown") }, numberValue(item.durationMs)); tokenUsage.observe({ role: stringValue(item.role, "unknown") }, numberValue(item.tokens)); }],
|
|
37
|
+
["crew.task.failed", () => taskCount.inc({ status: "failed" })],
|
|
38
|
+
["crew.task.retry_attempt", (data) => { const item = recordValue(data); taskCount.inc({ status: "retry" }); registry.counter("crew.task.retry_attempt_total", "Retry attempts by run and task").inc({ runId: stringValue(item.runId, "unknown"), taskId: stringValue(item.taskId, "unknown") }); }],
|
|
39
|
+
["crew.task.deadletter", (data) => { const item = recordValue(data); registry.counter("crew.task.deadletter_total", "Deadletter triggers by reason").inc({ reason: stringValue(item.reason, "unknown") }); }],
|
|
40
|
+
["crew.subagent.completed", (data) => { const item = recordValue(data); subagentCount.inc({ status: stringValue(item.status, "completed") }); }],
|
|
41
|
+
["crew.subagent.failed", () => subagentCount.inc({ status: "failed" })],
|
|
42
|
+
["crew.mailbox.message", (data) => { const item = recordValue(data); mailboxCount.inc({ direction: stringValue(item.direction, "unknown") }); }],
|
|
43
|
+
];
|
|
44
|
+
|
|
45
|
+
const unsubscribers: Array<() => void> = [];
|
|
46
|
+
for (const [event, handler] of handlers) {
|
|
47
|
+
const unsubscribe = events?.on?.(event, (data: unknown) => {
|
|
48
|
+
try { handler(data); } catch { /* metric handlers must never break event delivery */ }
|
|
49
|
+
});
|
|
50
|
+
if (typeof unsubscribe === "function") unsubscribers.push(unsubscribe);
|
|
51
|
+
}
|
|
52
|
+
let disposed = false;
|
|
53
|
+
return { dispose() { if (disposed) return; disposed = true; for (const unsubscribe of unsubscribers.splice(0)) unsubscribe(); } };
|
|
54
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { MetricSnapshot } from "../metrics-primitives.ts";
|
|
2
|
+
|
|
3
|
+
export interface MetricExporter {
|
|
4
|
+
name: string;
|
|
5
|
+
push(snapshots: MetricSnapshot[]): Promise<void>;
|
|
6
|
+
dispose(): void;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export class CompositeExporter implements MetricExporter {
|
|
10
|
+
name = "composite";
|
|
11
|
+
private readonly exporters: MetricExporter[];
|
|
12
|
+
|
|
13
|
+
constructor(exporters: MetricExporter[]) {
|
|
14
|
+
this.exporters = exporters;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async push(snapshots: MetricSnapshot[]): Promise<void> {
|
|
18
|
+
await Promise.allSettled(this.exporters.map((exporter) => exporter.push(snapshots)));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
dispose(): void {
|
|
22
|
+
for (const exporter of this.exporters) exporter.dispose();
|
|
23
|
+
}
|
|
24
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { logInternalError } from "../../utils/internal-error.ts";
|
|
2
|
+
import type { MetricRegistry } from "../metric-registry.ts";
|
|
3
|
+
import type { MetricSnapshot } from "../metrics-primitives.ts";
|
|
4
|
+
import type { MetricExporter } from "./adapter.ts";
|
|
5
|
+
|
|
6
|
+
export interface OTLPExporterOptions {
|
|
7
|
+
endpoint: string;
|
|
8
|
+
headers?: Record<string, string>;
|
|
9
|
+
intervalMs?: number;
|
|
10
|
+
timeoutMs?: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function pointValues(snapshot: MetricSnapshot): unknown[] {
|
|
14
|
+
return snapshot.values.map((value) => ({ attributes: Object.entries(value.labels).map(([key, item]) => ({ key, value: { stringValue: String(item) } })), asDouble: "value" in value ? value.value : undefined, count: "count" in value ? value.count : undefined, sum: "sum" in value ? value.sum : undefined }));
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function convertToOTLP(snapshots: MetricSnapshot[]): unknown {
|
|
18
|
+
return {
|
|
19
|
+
resourceMetrics: [{
|
|
20
|
+
resource: { attributes: [{ key: "service.name", value: { stringValue: "pi-crew" } }] },
|
|
21
|
+
scopeMetrics: [{
|
|
22
|
+
scope: { name: "pi-crew" },
|
|
23
|
+
metrics: snapshots.map((snapshot) => ({ name: snapshot.name, description: snapshot.description, [snapshot.type === "histogram" ? "histogram" : snapshot.type === "gauge" ? "gauge" : "sum"]: { dataPoints: pointValues(snapshot) } })),
|
|
24
|
+
}],
|
|
25
|
+
}],
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export class OTLPExporter implements MetricExporter {
|
|
30
|
+
name = "otlp";
|
|
31
|
+
private timer?: ReturnType<typeof setInterval>;
|
|
32
|
+
private readonly opts: OTLPExporterOptions;
|
|
33
|
+
private readonly registry: MetricRegistry;
|
|
34
|
+
|
|
35
|
+
constructor(opts: OTLPExporterOptions, registry: MetricRegistry) {
|
|
36
|
+
this.opts = opts;
|
|
37
|
+
this.registry = registry;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
start(): void {
|
|
41
|
+
this.dispose();
|
|
42
|
+
this.timer = setInterval(() => { void this.push(this.registry.snapshot()); }, this.opts.intervalMs ?? 60_000);
|
|
43
|
+
this.timer.unref?.();
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async push(snapshots: MetricSnapshot[]): Promise<void> {
|
|
47
|
+
try {
|
|
48
|
+
const timeoutMs = this.opts.timeoutMs ?? 10_000;
|
|
49
|
+
const controller = new AbortController();
|
|
50
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
51
|
+
try {
|
|
52
|
+
await fetch(this.opts.endpoint, { method: "POST", headers: { "content-type": "application/json", ...(this.opts.headers ?? {}) }, body: JSON.stringify(convertToOTLP(snapshots)), signal: controller.signal });
|
|
53
|
+
} finally {
|
|
54
|
+
clearTimeout(timer);
|
|
55
|
+
}
|
|
56
|
+
} catch (error) {
|
|
57
|
+
logInternalError("otlp-export", error);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
dispose(): void {
|
|
62
|
+
if (this.timer) clearInterval(this.timer);
|
|
63
|
+
this.timer = undefined;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { HistogramPoint, MetricLabels, MetricPoint, MetricSnapshot } from "../metrics-primitives.ts";
|
|
2
|
+
|
|
3
|
+
function prometheusName(name: string): string {
|
|
4
|
+
return name.replace(/\./g, "_");
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
function escapeLabel(value: string): string {
|
|
8
|
+
return value.replace(/\\/g, "\\\\").replace(/\n/g, "\\n").replace(/"/g, "\\\"");
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function labelsText(labels: MetricLabels): string {
|
|
12
|
+
const entries = Object.entries(labels);
|
|
13
|
+
if (!entries.length) return "";
|
|
14
|
+
return `{${entries.map(([key, value]) => `${key}="${escapeLabel(String(value))}"`).join(",")}}`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function metricType(type: MetricSnapshot["type"]): string {
|
|
18
|
+
return type === "histogram" ? "histogram" : type === "gauge" ? "gauge" : "counter";
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function isHistogramPoint(value: MetricPoint | HistogramPoint): value is HistogramPoint {
|
|
22
|
+
return "buckets" in value && "counts" in value;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function formatPrometheus(snapshots: MetricSnapshot[]): string {
|
|
26
|
+
const lines: string[] = [];
|
|
27
|
+
for (const snapshot of snapshots) {
|
|
28
|
+
const name = prometheusName(snapshot.name);
|
|
29
|
+
lines.push(`# HELP ${name} ${snapshot.description}`);
|
|
30
|
+
lines.push(`# TYPE ${name} ${metricType(snapshot.type)}`);
|
|
31
|
+
for (const value of snapshot.values) {
|
|
32
|
+
if (isHistogramPoint(value)) {
|
|
33
|
+
let cumulative = 0;
|
|
34
|
+
for (let index = 0; index < value.buckets.length; index += 1) {
|
|
35
|
+
cumulative += value.counts[index] ?? 0;
|
|
36
|
+
const le = Number.isFinite(value.buckets[index]) ? String(value.buckets[index]) : "+Inf";
|
|
37
|
+
lines.push(`${name}_bucket${labelsText({ ...value.labels, le })} ${cumulative}`);
|
|
38
|
+
}
|
|
39
|
+
lines.push(`${name}_sum${labelsText(value.labels)} ${value.sum}`);
|
|
40
|
+
lines.push(`${name}_count${labelsText(value.labels)} ${value.count}`);
|
|
41
|
+
} else {
|
|
42
|
+
lines.push(`${name}${labelsText(value.labels)} ${value.value}`);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return `${lines.join("\n")}\n`;
|
|
47
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { Counter, Gauge, Histogram, type Metric, type MetricSnapshot } from "./metrics-primitives.ts";
|
|
2
|
+
|
|
3
|
+
const METRIC_NAME_PATTERN = /^crew\.[a-z]+\.[a-z][a-z_]*$/;
|
|
4
|
+
|
|
5
|
+
function assertMetricName(name: string): void {
|
|
6
|
+
if (!METRIC_NAME_PATTERN.test(name)) throw new Error(`Invalid metric name '${name}'. Expected crew.<domain>.<measure>.`);
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export class MetricRegistry {
|
|
10
|
+
private metrics = new Map<string, Metric>();
|
|
11
|
+
|
|
12
|
+
registerCounter(name: string, description: string): Counter {
|
|
13
|
+
assertMetricName(name);
|
|
14
|
+
if (this.metrics.has(name)) throw new Error(`Metric '${name}' is already registered.`);
|
|
15
|
+
const metric = new Counter(name, description);
|
|
16
|
+
this.metrics.set(name, metric);
|
|
17
|
+
return metric;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
registerGauge(name: string, description: string): Gauge {
|
|
21
|
+
assertMetricName(name);
|
|
22
|
+
if (this.metrics.has(name)) throw new Error(`Metric '${name}' is already registered.`);
|
|
23
|
+
const metric = new Gauge(name, description);
|
|
24
|
+
this.metrics.set(name, metric);
|
|
25
|
+
return metric;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
registerHistogram(name: string, description: string, buckets?: number[]): Histogram {
|
|
29
|
+
assertMetricName(name);
|
|
30
|
+
if (this.metrics.has(name)) throw new Error(`Metric '${name}' is already registered.`);
|
|
31
|
+
const metric = new Histogram(name, description, buckets);
|
|
32
|
+
this.metrics.set(name, metric);
|
|
33
|
+
return metric;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
counter(name: string, description: string): Counter {
|
|
37
|
+
const existing = this.metrics.get(name);
|
|
38
|
+
if (existing instanceof Counter) return existing;
|
|
39
|
+
if (existing) throw new Error(`Metric '${name}' is not a counter.`);
|
|
40
|
+
return this.registerCounter(name, description);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
gauge(name: string, description: string): Gauge {
|
|
44
|
+
const existing = this.metrics.get(name);
|
|
45
|
+
if (existing instanceof Gauge) return existing;
|
|
46
|
+
if (existing) throw new Error(`Metric '${name}' is not a gauge.`);
|
|
47
|
+
return this.registerGauge(name, description);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
histogram(name: string, description: string, buckets?: number[]): Histogram {
|
|
51
|
+
const existing = this.metrics.get(name);
|
|
52
|
+
if (existing instanceof Histogram) return existing;
|
|
53
|
+
if (existing) throw new Error(`Metric '${name}' is not a histogram.`);
|
|
54
|
+
return this.registerHistogram(name, description, buckets);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
get(name: string): Metric | undefined {
|
|
58
|
+
return this.metrics.get(name);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
snapshot(): MetricSnapshot[] {
|
|
62
|
+
return [...this.metrics.values()].map((metric) => metric.snapshot());
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
dispose(): void {
|
|
66
|
+
this.metrics.clear();
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function createMetricRegistry(): MetricRegistry {
|
|
71
|
+
return new MetricRegistry();
|
|
72
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { labelKey, type MetricLabels } from "./metrics-primitives.ts";
|
|
2
|
+
|
|
3
|
+
interface WindowEvent {
|
|
4
|
+
timestamp: number;
|
|
5
|
+
labels: MetricLabels;
|
|
6
|
+
delta: number;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export class TimeWindowedCounter {
|
|
10
|
+
private events: WindowEvent[] = [];
|
|
11
|
+
private readonly windowMs: number;
|
|
12
|
+
private readonly now: () => number;
|
|
13
|
+
|
|
14
|
+
constructor(windowMs = 3_600_000, now: () => number = () => Date.now()) {
|
|
15
|
+
this.windowMs = windowMs;
|
|
16
|
+
this.now = now;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
inc(labels: MetricLabels = {}, delta = 1): void {
|
|
20
|
+
if (!Number.isFinite(delta)) return;
|
|
21
|
+
this.events.push({ timestamp: this.now(), labels: { ...labels }, delta });
|
|
22
|
+
this.prune();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
count(labels: MetricLabels = {}, durationMs = this.windowMs): number {
|
|
26
|
+
this.prune();
|
|
27
|
+
const key = labelKey(labels);
|
|
28
|
+
const cutoff = this.now() - durationMs;
|
|
29
|
+
return this.events.filter((event) => event.timestamp >= cutoff && labelKey(event.labels) === key).reduce((sum, event) => sum + event.delta, 0);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
rate(labels: MetricLabels = {}, durationMs = this.windowMs): number {
|
|
33
|
+
if (durationMs <= 0) return 0;
|
|
34
|
+
return this.count(labels, durationMs) / (durationMs / 1000);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
size(): number {
|
|
38
|
+
this.prune();
|
|
39
|
+
return this.events.length;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
private prune(): void {
|
|
43
|
+
const cutoff = this.now() - this.windowMs;
|
|
44
|
+
this.events = this.events.filter((event) => event.timestamp >= cutoff);
|
|
45
|
+
}
|
|
46
|
+
}
|