@smithers-orchestrator/observability 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/package.json +44 -0
- package/src/MetricName.ts +1 -0
- package/src/MetricsServiceLive.js +9 -0
- package/src/ResolvedSmithersObservabilityOptions.ts +10 -0
- package/src/SmithersEvent.ts +603 -0
- package/src/SmithersLogFormat.ts +1 -0
- package/src/SmithersMetricDefinition.ts +15 -0
- package/src/SmithersMetricType.ts +1 -0
- package/src/SmithersMetricUnit.ts +8 -0
- package/src/SmithersObservability.js +6 -0
- package/src/SmithersObservabilityOptions.ts +10 -0
- package/src/SmithersObservabilityService.ts +15 -0
- package/src/_coreCorrelation/CorrelationContext.ts +10 -0
- package/src/_coreCorrelation/CorrelationContextLive.js +11 -0
- package/src/_coreCorrelation/CorrelationContextService.js +6 -0
- package/src/_coreCorrelation/CorrelationContextServiceShape.ts +14 -0
- package/src/_coreCorrelation/CorrelationPatch.ts +3 -0
- package/src/_coreCorrelation/_correlationStorage.js +4 -0
- package/src/_coreCorrelation/correlationContextFiberRef.js +2 -0
- package/src/_coreCorrelation/correlationContextToLogAnnotations.js +28 -0
- package/src/_coreCorrelation/getCurrentCorrelationContext.js +9 -0
- package/src/_coreCorrelation/getCurrentCorrelationContextEffect.js +11 -0
- package/src/_coreCorrelation/index.js +14 -0
- package/src/_coreCorrelation/mergeCorrelationContext.js +61 -0
- package/src/_coreCorrelation/runWithCorrelationContext.js +14 -0
- package/src/_coreCorrelation/updateCurrentCorrelationContext.js +19 -0
- package/src/_coreCorrelation/withCorrelationContext.js +15 -0
- package/src/_coreCorrelation/withCurrentCorrelationContext.js +13 -0
- package/src/_coreMetrics.js +510 -0
- package/src/_coreMetricsShape.ts +55 -0
- package/src/_corePrometheus.js +93 -0
- package/src/_corePrometheusShape.ts +11 -0
- package/src/_coreTracing.js +142 -0
- package/src/_coreTracingShape.ts +17 -0
- package/src/_smithersSpanAttributeAliases.js +19 -0
- package/src/_smithersTraceSpanStorage.js +3 -0
- package/src/annotateSmithersTrace.js +11 -0
- package/src/correlation.js +20 -0
- package/src/createSmithersObservabilityLayer.js +49 -0
- package/src/createSmithersOtelLayer.js +21 -0
- package/src/createSmithersRuntimeLayer.js +2 -0
- package/src/getCurrentSmithersTraceAnnotations.js +14 -0
- package/src/getCurrentSmithersTraceSpan.js +7 -0
- package/src/index.d.ts +1032 -0
- package/src/index.js +35 -0
- package/src/logging.js +91 -0
- package/src/makeSmithersSpanAttributes.js +20 -0
- package/src/metrics/SmithersMetricDefinition.ts +17 -0
- package/src/metrics/SmithersMetricType.ts +1 -0
- package/src/metrics/SmithersMetricUnit.ts +8 -0
- package/src/metrics/_asyncExternalWaitCounts.js +4 -0
- package/src/metrics/_buckets.js +43 -0
- package/src/metrics/_processStartMs.js +1 -0
- package/src/metrics/activeNodes.js +2 -0
- package/src/metrics/activeRuns.js +2 -0
- package/src/metrics/agentActionsTotal.js +2 -0
- package/src/metrics/agentDurationMs.js +3 -0
- package/src/metrics/agentErrorsTotal.js +2 -0
- package/src/metrics/agentEventsTotal.js +2 -0
- package/src/metrics/agentInvocationsTotal.js +2 -0
- package/src/metrics/agentRetriesTotal.js +2 -0
- package/src/metrics/agentSessionsTotal.js +2 -0
- package/src/metrics/agentTokensTotal.js +2 -0
- package/src/metrics/alertDeliveriesAttempted.js +2 -0
- package/src/metrics/alertDeliveriesSuppressed.js +2 -0
- package/src/metrics/alertsAcknowledgedTotal.js +2 -0
- package/src/metrics/alertsActive.js +2 -0
- package/src/metrics/alertsEscalatedTotal.js +2 -0
- package/src/metrics/alertsFiredTotal.js +2 -0
- package/src/metrics/alertsReopenedTotal.js +2 -0
- package/src/metrics/alertsResolvedTotal.js +2 -0
- package/src/metrics/alertsSilencedTotal.js +2 -0
- package/src/metrics/approvalPending.js +2 -0
- package/src/metrics/approvalWaitDuration.js +3 -0
- package/src/metrics/approvalsDenied.js +2 -0
- package/src/metrics/approvalsGranted.js +2 -0
- package/src/metrics/approvalsRequested.js +2 -0
- package/src/metrics/attemptDuration.js +3 -0
- package/src/metrics/attentionBacklog.js +2 -0
- package/src/metrics/cacheHits.js +2 -0
- package/src/metrics/cacheMisses.js +2 -0
- package/src/metrics/dbQueryDuration.js +3 -0
- package/src/metrics/dbRetries.js +2 -0
- package/src/metrics/dbTransactionDuration.js +3 -0
- package/src/metrics/dbTransactionRetries.js +2 -0
- package/src/metrics/dbTransactionRollbacks.js +2 -0
- package/src/metrics/devtoolsActiveSubscribers.js +2 -0
- package/src/metrics/devtoolsBackpressureDisconnectTotal.js +2 -0
- package/src/metrics/devtoolsDeltaBuildMs.js +3 -0
- package/src/metrics/devtoolsEventBytes.js +3 -0
- package/src/metrics/devtoolsEventTotal.js +2 -0
- package/src/metrics/devtoolsSnapshotBuildMs.js +3 -0
- package/src/metrics/devtoolsSubscribeTotal.js +2 -0
- package/src/metrics/errorsTotal.js +2 -0
- package/src/metrics/eventsEmittedTotal.js +2 -0
- package/src/metrics/externalWaitAsyncPending.js +2 -0
- package/src/metrics/gatewayApprovalDecisionsTotal.js +2 -0
- package/src/metrics/gatewayAuthEventsTotal.js +2 -0
- package/src/metrics/gatewayConnectionsActive.js +2 -0
- package/src/metrics/gatewayConnectionsClosedTotal.js +2 -0
- package/src/metrics/gatewayConnectionsTotal.js +2 -0
- package/src/metrics/gatewayCronTriggersTotal.js +2 -0
- package/src/metrics/gatewayErrorsTotal.js +2 -0
- package/src/metrics/gatewayHeartbeatTicksTotal.js +2 -0
- package/src/metrics/gatewayMessagesReceivedTotal.js +2 -0
- package/src/metrics/gatewayMessagesSentTotal.js +2 -0
- package/src/metrics/gatewayRpcCallsTotal.js +2 -0
- package/src/metrics/gatewayRpcDuration.js +3 -0
- package/src/metrics/gatewayRunsCompletedTotal.js +2 -0
- package/src/metrics/gatewayRunsStartedTotal.js +2 -0
- package/src/metrics/gatewaySignalsTotal.js +2 -0
- package/src/metrics/gatewayWebhooksReceivedTotal.js +2 -0
- package/src/metrics/gatewayWebhooksRejectedTotal.js +2 -0
- package/src/metrics/gatewayWebhooksVerifiedTotal.js +2 -0
- package/src/metrics/heartbeatDataSizeBytes.js +3 -0
- package/src/metrics/heartbeatIntervalMs.js +3 -0
- package/src/metrics/hotReloadDuration.js +3 -0
- package/src/metrics/hotReloadFailures.js +2 -0
- package/src/metrics/hotReloads.js +2 -0
- package/src/metrics/httpRequestDuration.js +3 -0
- package/src/metrics/httpRequests.js +2 -0
- package/src/metrics/index.js +151 -0
- package/src/metrics/metricsServiceAdapter.js +188 -0
- package/src/metrics/nodeDuration.js +3 -0
- package/src/metrics/nodeRetriesTotal.js +2 -0
- package/src/metrics/nodesFailed.js +2 -0
- package/src/metrics/nodesFinished.js +2 -0
- package/src/metrics/nodesStarted.js +2 -0
- package/src/metrics/processHeapUsedBytes.js +2 -0
- package/src/metrics/processMemoryRssBytes.js +2 -0
- package/src/metrics/processUptimeSeconds.js +2 -0
- package/src/metrics/promptSizeBytes.js +3 -0
- package/src/metrics/responseSizeBytes.js +3 -0
- package/src/metrics/rewindDurationMs.js +3 -0
- package/src/metrics/rewindFramesDeleted.js +3 -0
- package/src/metrics/rewindRollbackTotal.js +2 -0
- package/src/metrics/rewindSandboxesReverted.js +3 -0
- package/src/metrics/rewindTotal.js +2 -0
- package/src/metrics/runDuration.js +3 -0
- package/src/metrics/runsAncestryDepth.js +3 -0
- package/src/metrics/runsCancelledTotal.js +2 -0
- package/src/metrics/runsCarriedStateBytes.js +3 -0
- package/src/metrics/runsContinuedTotal.js +2 -0
- package/src/metrics/runsFailedTotal.js +2 -0
- package/src/metrics/runsFinishedTotal.js +2 -0
- package/src/metrics/runsResumedTotal.js +2 -0
- package/src/metrics/runsTotal.js +2 -0
- package/src/metrics/sandboxActive.js +2 -0
- package/src/metrics/sandboxBundleSizeBytes.js +3 -0
- package/src/metrics/sandboxCompletedTotal.js +2 -0
- package/src/metrics/sandboxCreatedTotal.js +2 -0
- package/src/metrics/sandboxDurationMs.js +3 -0
- package/src/metrics/sandboxPatchCount.js +3 -0
- package/src/metrics/sandboxTransportDurationMs.js +3 -0
- package/src/metrics/schedulerConcurrencyUtilization.js +2 -0
- package/src/metrics/schedulerQueueDepth.js +2 -0
- package/src/metrics/schedulerWaitDuration.js +3 -0
- package/src/metrics/scorerEventsFailed.js +2 -0
- package/src/metrics/scorerEventsFinished.js +2 -0
- package/src/metrics/scorerEventsStarted.js +2 -0
- package/src/metrics/smithersMetricCatalog.js +484 -0
- package/src/metrics/smithersMetricCatalogByKey.js +2 -0
- package/src/metrics/smithersMetricCatalogByName.js +2 -0
- package/src/metrics/smithersMetricCatalogByPrometheusName.js +2 -0
- package/src/metrics/supervisorPollDuration.js +3 -0
- package/src/metrics/supervisorPollsTotal.js +2 -0
- package/src/metrics/supervisorResumeLag.js +3 -0
- package/src/metrics/supervisorResumedTotal.js +2 -0
- package/src/metrics/supervisorSkippedTotal.js +2 -0
- package/src/metrics/supervisorStaleDetected.js +2 -0
- package/src/metrics/taskHeartbeatTimeoutTotal.js +2 -0
- package/src/metrics/taskHeartbeatsTotal.js +2 -0
- package/src/metrics/timerDelayDuration.js +3 -0
- package/src/metrics/timersCancelled.js +2 -0
- package/src/metrics/timersCreated.js +2 -0
- package/src/metrics/timersFired.js +2 -0
- package/src/metrics/timersPending.js +2 -0
- package/src/metrics/toPrometheusMetricName.js +8 -0
- package/src/metrics/tokensCacheReadTotal.js +2 -0
- package/src/metrics/tokensCacheWriteTotal.js +2 -0
- package/src/metrics/tokensContextWindowBucketTotal.js +2 -0
- package/src/metrics/tokensContextWindowPerCall.js +3 -0
- package/src/metrics/tokensInputPerCall.js +3 -0
- package/src/metrics/tokensInputTotal.js +2 -0
- package/src/metrics/tokensOutputPerCall.js +3 -0
- package/src/metrics/tokensOutputTotal.js +2 -0
- package/src/metrics/tokensReasoningTotal.js +2 -0
- package/src/metrics/toolCallErrorsTotal.js +2 -0
- package/src/metrics/toolCallsTotal.js +2 -0
- package/src/metrics/toolDuration.js +3 -0
- package/src/metrics/toolOutputTruncatedTotal.js +2 -0
- package/src/metrics/trackEvent.js +604 -0
- package/src/metrics/updateAsyncExternalWaitPending.js +14 -0
- package/src/metrics/updateProcessMetrics.js +17 -0
- package/src/metrics/vcsDuration.js +3 -0
- package/src/prometheusContentType.js +1 -0
- package/src/renderPrometheusMetrics.js +205 -0
- package/src/resolveSmithersObservabilityOptions.js +79 -0
- package/src/smithersMetrics.js +2 -0
- package/src/smithersSpanNames.js +6 -0
- package/src/withSmithersSpan.js +15 -0
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
import { Context, Effect, Layer } from "effect";
|
|
2
|
+
import { renderPrometheusSamples, toPrometheusMetricName, } from "./_corePrometheus.js";
|
|
3
|
+
/** @typedef {import("./MetricName.ts").MetricName} MetricName */
|
|
4
|
+
/** @typedef {import("./SmithersMetricType.ts").SmithersMetricType} SmithersMetricType */
|
|
5
|
+
/** @typedef {import("./SmithersMetricUnit.ts").SmithersMetricUnit} SmithersMetricUnit */
|
|
6
|
+
/** @typedef {import("./_corePrometheusShape.ts").MetricLabels} MetricLabels */
|
|
7
|
+
/** @typedef {import("./_corePrometheusShape.ts").PrometheusSample} PrometheusSample */
|
|
8
|
+
/** @typedef {import("./_coreMetricsShape.ts").CounterEntry} CounterEntry */
|
|
9
|
+
/** @typedef {import("./_coreMetricsShape.ts").GaugeEntry} GaugeEntry */
|
|
10
|
+
/** @typedef {import("./_coreMetricsShape.ts").HistogramEntry} HistogramEntry */
|
|
11
|
+
/** @typedef {import("./_coreMetricsShape.ts").MetricsSnapshot} MetricsSnapshot */
|
|
12
|
+
/** @typedef {import("./_coreMetricsShape.ts").MetricsServiceShape} MetricsServiceShape */
|
|
13
|
+
/** @typedef {import("./_coreMetricsShape.ts").SmithersMetricEvent} SmithersMetricEvent */
|
|
14
|
+
/** @typedef {import("./SmithersMetricDefinition.ts").SmithersMetricDefinition} SmithersMetricDefinition */
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @param {string} key
|
|
18
|
+
* @param {string} name
|
|
19
|
+
* @param {SmithersMetricType} type
|
|
20
|
+
* @param {Omit<SmithersMetricDefinition, "key" | "name" | "prometheusName" | "type">} options
|
|
21
|
+
* @returns {SmithersMetricDefinition}
|
|
22
|
+
*/
|
|
23
|
+
function metricDefinition(key, name, type, options) {
|
|
24
|
+
return {
|
|
25
|
+
key,
|
|
26
|
+
name,
|
|
27
|
+
prometheusName: toPrometheusMetricName(name),
|
|
28
|
+
type,
|
|
29
|
+
...options,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
const DURATION_BUCKETS = [
|
|
33
|
+
100,
|
|
34
|
+
200,
|
|
35
|
+
400,
|
|
36
|
+
800,
|
|
37
|
+
1_600,
|
|
38
|
+
3_200,
|
|
39
|
+
6_400,
|
|
40
|
+
12_800,
|
|
41
|
+
25_600,
|
|
42
|
+
51_200,
|
|
43
|
+
102_400,
|
|
44
|
+
204_800,
|
|
45
|
+
];
|
|
46
|
+
const FAST_BUCKETS = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1_024, 2_048];
|
|
47
|
+
const TOKEN_BUCKETS = [
|
|
48
|
+
10,
|
|
49
|
+
20,
|
|
50
|
+
40,
|
|
51
|
+
80,
|
|
52
|
+
160,
|
|
53
|
+
320,
|
|
54
|
+
640,
|
|
55
|
+
1_280,
|
|
56
|
+
2_560,
|
|
57
|
+
5_120,
|
|
58
|
+
10_240,
|
|
59
|
+
20_480,
|
|
60
|
+
40_960,
|
|
61
|
+
81_920,
|
|
62
|
+
163_840,
|
|
63
|
+
327_680,
|
|
64
|
+
655_360,
|
|
65
|
+
1_310_720,
|
|
66
|
+
];
|
|
67
|
+
const SIZE_BUCKETS = [
|
|
68
|
+
100,
|
|
69
|
+
200,
|
|
70
|
+
400,
|
|
71
|
+
800,
|
|
72
|
+
1_600,
|
|
73
|
+
3_200,
|
|
74
|
+
6_400,
|
|
75
|
+
12_800,
|
|
76
|
+
25_600,
|
|
77
|
+
51_200,
|
|
78
|
+
102_400,
|
|
79
|
+
204_800,
|
|
80
|
+
409_600,
|
|
81
|
+
819_200,
|
|
82
|
+
1_638_400,
|
|
83
|
+
3_276_800,
|
|
84
|
+
];
|
|
85
|
+
export const smithersMetricCatalog = [
|
|
86
|
+
metricDefinition("runsTotal", "smithers.runs.total", "counter", { label: "Runs started", unit: "count" }),
|
|
87
|
+
metricDefinition("nodesStarted", "smithers.nodes.started", "counter", { label: "Nodes started", unit: "count" }),
|
|
88
|
+
metricDefinition("nodesFinished", "smithers.nodes.finished", "counter", { label: "Nodes finished", unit: "count" }),
|
|
89
|
+
metricDefinition("nodesFailed", "smithers.nodes.failed", "counter", { label: "Nodes failed", unit: "count" }),
|
|
90
|
+
metricDefinition("toolCallsTotal", "smithers.tool_calls.total", "counter", { label: "Tool calls", unit: "count" }),
|
|
91
|
+
metricDefinition("cacheHits", "smithers.cache.hits", "counter", { label: "Cache hits", unit: "count" }),
|
|
92
|
+
metricDefinition("cacheMisses", "smithers.cache.misses", "counter", { label: "Cache misses", unit: "count" }),
|
|
93
|
+
metricDefinition("dbRetries", "smithers.db.retries", "counter", { label: "DB retries", unit: "count" }),
|
|
94
|
+
metricDefinition("dbTransactionRollbacks", "smithers.db.transaction_rollbacks", "counter", { label: "DB transaction rollbacks", unit: "count" }),
|
|
95
|
+
metricDefinition("dbTransactionRetries", "smithers.db.transaction_retries", "counter", { label: "DB transaction retries", unit: "count" }),
|
|
96
|
+
metricDefinition("hotReloads", "smithers.hot.reloads", "counter", { label: "Hot reloads", unit: "count" }),
|
|
97
|
+
metricDefinition("hotReloadFailures", "smithers.hot.reload_failures", "counter", { label: "Hot reload failures", unit: "count" }),
|
|
98
|
+
metricDefinition("httpRequests", "smithers.http.requests", "counter", { label: "HTTP requests", unit: "count", labels: ["method", "route", "status_code", "status_class"] }),
|
|
99
|
+
metricDefinition("approvalsRequested", "smithers.approvals.requested", "counter", { label: "Approvals requested", unit: "count" }),
|
|
100
|
+
metricDefinition("approvalsGranted", "smithers.approvals.granted", "counter", { label: "Approvals granted", unit: "count" }),
|
|
101
|
+
metricDefinition("approvalsDenied", "smithers.approvals.denied", "counter", { label: "Approvals denied", unit: "count" }),
|
|
102
|
+
metricDefinition("timersCreated", "smithers.timers.created", "counter", { label: "Timers created", unit: "count" }),
|
|
103
|
+
metricDefinition("timersFired", "smithers.timers.fired", "counter", { label: "Timers fired", unit: "count" }),
|
|
104
|
+
metricDefinition("timersCancelled", "smithers.timers.cancelled", "counter", { label: "Timers cancelled", unit: "count" }),
|
|
105
|
+
metricDefinition("sandboxCreatedTotal", "smithers.sandbox.created_total", "counter", { label: "Sandboxes created", unit: "count", labels: ["runtime"] }),
|
|
106
|
+
metricDefinition("sandboxCompletedTotal", "smithers.sandbox.completed_total", "counter", { label: "Sandboxes completed", unit: "count", labels: ["runtime", "status"] }),
|
|
107
|
+
metricDefinition("alertsFiredTotal", "smithers.alerts.fired_total", "counter", { label: "Alerts fired", unit: "count", labels: ["policy"] }),
|
|
108
|
+
metricDefinition("alertsAcknowledgedTotal", "smithers.alerts.acknowledged_total", "counter", { label: "Alerts acknowledged", unit: "count", labels: ["policy"] }),
|
|
109
|
+
metricDefinition("scorerEventsStarted", "smithers.scorer_events.started", "counter", { label: "Scorer events started", unit: "count" }),
|
|
110
|
+
metricDefinition("scorerEventsFinished", "smithers.scorer_events.finished", "counter", { label: "Scorer events finished", unit: "count" }),
|
|
111
|
+
metricDefinition("scorerEventsFailed", "smithers.scorer_events.failed", "counter", { label: "Scorer events failed", unit: "count" }),
|
|
112
|
+
metricDefinition("tokensInputTotal", "smithers.tokens.input_total", "counter", { label: "Input tokens", unit: "tokens", labels: ["agent", "model"] }),
|
|
113
|
+
metricDefinition("tokensOutputTotal", "smithers.tokens.output_total", "counter", { label: "Output tokens", unit: "tokens", labels: ["agent", "model"] }),
|
|
114
|
+
metricDefinition("tokensCacheReadTotal", "smithers.tokens.cache_read_total", "counter", { label: "Cache read tokens", unit: "tokens", labels: ["agent", "model"] }),
|
|
115
|
+
metricDefinition("tokensCacheWriteTotal", "smithers.tokens.cache_write_total", "counter", { label: "Cache write tokens", unit: "tokens", labels: ["agent", "model"] }),
|
|
116
|
+
metricDefinition("tokensReasoningTotal", "smithers.tokens.reasoning_total", "counter", { label: "Reasoning tokens", unit: "tokens", labels: ["agent", "model"] }),
|
|
117
|
+
metricDefinition("tokensContextWindowBucketTotal", "smithers.tokens.context_window_bucket_total", "counter", { label: "Context window bucket hits", unit: "count", labels: ["agent", "bucket", "model"] }),
|
|
118
|
+
metricDefinition("runsFinishedTotal", "smithers.runs.finished_total", "counter", { label: "Runs finished", unit: "count" }),
|
|
119
|
+
metricDefinition("runsFailedTotal", "smithers.runs.failed_total", "counter", { label: "Runs failed", unit: "count" }),
|
|
120
|
+
metricDefinition("runsCancelledTotal", "smithers.runs.cancelled_total", "counter", { label: "Runs cancelled", unit: "count" }),
|
|
121
|
+
metricDefinition("runsResumedTotal", "smithers.runs.resumed_total", "counter", { label: "Runs resumed", unit: "count" }),
|
|
122
|
+
metricDefinition("runsContinuedTotal", "smithers.runs.continued_total", "counter", { label: "Runs continued", unit: "count" }),
|
|
123
|
+
metricDefinition("supervisorPollsTotal", "smithers.supervisor.polls_total", "counter", { label: "Supervisor polls", unit: "count" }),
|
|
124
|
+
metricDefinition("supervisorStaleDetected", "smithers.supervisor.stale_detected", "counter", { label: "Supervisor stale runs detected", unit: "count" }),
|
|
125
|
+
metricDefinition("supervisorResumedTotal", "smithers.supervisor.resumed_total", "counter", { label: "Supervisor auto-resumes", unit: "count" }),
|
|
126
|
+
metricDefinition("supervisorSkippedTotal", "smithers.supervisor.skipped_total", "counter", { label: "Supervisor skipped auto-resumes", unit: "count", labels: ["reason"] }),
|
|
127
|
+
metricDefinition("errorsTotal", "smithers.errors.total", "counter", { label: "Errors", unit: "count" }),
|
|
128
|
+
metricDefinition("nodeRetriesTotal", "smithers.node.retries_total", "counter", { label: "Node retries", unit: "count" }),
|
|
129
|
+
metricDefinition("toolCallErrorsTotal", "smithers.tool_calls.errors_total", "counter", { label: "Tool call errors", unit: "count" }),
|
|
130
|
+
metricDefinition("toolOutputTruncatedTotal", "smithers.tool.output_truncated_total", "counter", { label: "Tool outputs truncated", unit: "count" }),
|
|
131
|
+
metricDefinition("agentInvocationsTotal", "smithers.agent_invocations_total", "counter", { label: "Agent invocations", unit: "count", labels: ["engine", "model"] }),
|
|
132
|
+
metricDefinition("agentTokensTotal", "smithers.agent_tokens_total", "counter", { label: "Agent tokens", unit: "tokens", labels: ["engine", "model", "kind", "source"] }),
|
|
133
|
+
metricDefinition("agentErrorsTotal", "smithers.agent_errors_total", "counter", { label: "Agent errors", unit: "count", labels: ["engine", "model", "reason", "source"] }),
|
|
134
|
+
metricDefinition("agentRetriesTotal", "smithers.agent_retries_total", "counter", { label: "Agent retries", unit: "count", labels: ["engine", "model", "reason", "source"] }),
|
|
135
|
+
metricDefinition("agentEventsTotal", "smithers.agent_events_total", "counter", { label: "Agent events", unit: "count", labels: ["engine", "event_type", "source"] }),
|
|
136
|
+
metricDefinition("agentSessionsTotal", "smithers.agent_sessions_total", "counter", { label: "Agent sessions", unit: "count", labels: ["engine", "model", "resume", "source", "status"] }),
|
|
137
|
+
metricDefinition("agentActionsTotal", "smithers.agent_actions_total", "counter", { label: "Agent actions", unit: "count", labels: ["action_name", "action_type", "engine", "source"] }),
|
|
138
|
+
metricDefinition("gatewayConnectionsTotal", "smithers.gateway.connections_total", "counter", { label: "Gateway connections opened", unit: "count", labels: ["transport"] }),
|
|
139
|
+
metricDefinition("gatewayConnectionsClosedTotal", "smithers.gateway.connections_closed_total", "counter", { label: "Gateway connections closed", unit: "count", labels: ["code", "reason", "transport"] }),
|
|
140
|
+
metricDefinition("gatewayMessagesReceivedTotal", "smithers.gateway.messages_received_total", "counter", { label: "Gateway messages received", unit: "count", labels: ["kind", "transport"] }),
|
|
141
|
+
metricDefinition("gatewayMessagesSentTotal", "smithers.gateway.messages_sent_total", "counter", { label: "Gateway messages sent", unit: "count", labels: ["kind", "transport"] }),
|
|
142
|
+
metricDefinition("gatewayRpcCallsTotal", "smithers.gateway.rpc_calls_total", "counter", { label: "Gateway RPC calls", unit: "count", labels: ["method", "transport"] }),
|
|
143
|
+
metricDefinition("gatewayErrorsTotal", "smithers.gateway.errors_total", "counter", { label: "Gateway errors", unit: "count", labels: ["code", "stage", "transport"] }),
|
|
144
|
+
metricDefinition("gatewayRunsStartedTotal", "smithers.gateway.runs_started_total", "counter", { label: "Gateway runs started", unit: "count", labels: ["transport"] }),
|
|
145
|
+
metricDefinition("gatewayRunsCompletedTotal", "smithers.gateway.runs_completed_total", "counter", { label: "Gateway runs completed", unit: "count", labels: ["status", "transport"] }),
|
|
146
|
+
metricDefinition("gatewayApprovalDecisionsTotal", "smithers.gateway.approval_decisions_total", "counter", { label: "Gateway approval decisions", unit: "count", labels: ["decision", "transport"] }),
|
|
147
|
+
metricDefinition("gatewaySignalsTotal", "smithers.gateway.signals_total", "counter", { label: "Gateway signals", unit: "count", labels: ["outcome", "transport"] }),
|
|
148
|
+
metricDefinition("gatewayAuthEventsTotal", "smithers.gateway.auth_events_total", "counter", { label: "Gateway auth events", unit: "count", labels: ["outcome", "transport"] }),
|
|
149
|
+
metricDefinition("gatewayHeartbeatTicksTotal", "smithers.gateway.heartbeat_ticks_total", "counter", { label: "Gateway heartbeats", unit: "count" }),
|
|
150
|
+
metricDefinition("gatewayCronTriggersTotal", "smithers.gateway.cron_triggers_total", "counter", { label: "Gateway cron triggers", unit: "count", labels: ["source"] }),
|
|
151
|
+
metricDefinition("gatewayWebhooksReceivedTotal", "smithers.gateway.webhooks_received_total", "counter", { label: "Gateway webhooks received", unit: "count", labels: ["provider"] }),
|
|
152
|
+
metricDefinition("gatewayWebhooksVerifiedTotal", "smithers.gateway.webhooks_verified_total", "counter", { label: "Gateway webhooks verified", unit: "count", labels: ["provider"] }),
|
|
153
|
+
metricDefinition("gatewayWebhooksRejectedTotal", "smithers.gateway.webhooks_rejected_total", "counter", { label: "Gateway webhooks rejected", unit: "count", labels: ["provider", "reason"] }),
|
|
154
|
+
metricDefinition("eventsEmittedTotal", "smithers.events.emitted_total", "counter", { label: "Events emitted", unit: "count" }),
|
|
155
|
+
metricDefinition("taskHeartbeatsTotal", "smithers.heartbeats.total", "counter", { label: "Task heartbeats", unit: "count" }),
|
|
156
|
+
metricDefinition("taskHeartbeatTimeoutTotal", "smithers.heartbeats.timeout_total", "counter", { label: "Task heartbeat timeouts", unit: "count" }),
|
|
157
|
+
metricDefinition("activeRuns", "smithers.runs.active", "gauge", { label: "Active runs", unit: "count" }),
|
|
158
|
+
metricDefinition("activeNodes", "smithers.nodes.active", "gauge", { label: "Active nodes", unit: "count" }),
|
|
159
|
+
metricDefinition("schedulerQueueDepth", "smithers.scheduler.queue_depth", "gauge", { label: "Scheduler queue depth", unit: "count" }),
|
|
160
|
+
metricDefinition("sandboxActive", "smithers.sandbox.active", "gauge", { label: "Active sandboxes", unit: "count", labels: ["runtime"] }),
|
|
161
|
+
metricDefinition("alertsActive", "smithers.alerts.active", "gauge", { label: "Active alerts", unit: "count", labels: ["policy"] }),
|
|
162
|
+
metricDefinition("gatewayConnectionsActive", "smithers.gateway.connections_active", "gauge", { label: "Active gateway connections", unit: "count", labels: ["transport"] }),
|
|
163
|
+
metricDefinition("approvalPending", "smithers.approval.pending", "gauge", { label: "Pending approvals", unit: "count" }),
|
|
164
|
+
metricDefinition("externalWaitAsyncPending", "smithers.external_wait.async_pending", "gauge", { label: "Pending external waits", unit: "count", labels: ["kind"], defaultLabels: [{ kind: "approval" }, { kind: "event" }] }),
|
|
165
|
+
metricDefinition("timersPending", "smithers.timers.pending", "gauge", { label: "Pending timers", unit: "count" }),
|
|
166
|
+
metricDefinition("schedulerConcurrencyUtilization", "smithers.scheduler.concurrency_utilization", "gauge", { label: "Scheduler concurrency utilization", unit: "ratio" }),
|
|
167
|
+
metricDefinition("processUptimeSeconds", "smithers.process.uptime_seconds", "gauge", { label: "Process uptime", unit: "seconds" }),
|
|
168
|
+
metricDefinition("processMemoryRssBytes", "smithers.process.memory_rss_bytes", "gauge", { label: "Process RSS memory", unit: "bytes" }),
|
|
169
|
+
metricDefinition("processHeapUsedBytes", "smithers.process.heap_used_bytes", "gauge", { label: "Process heap used", unit: "bytes" }),
|
|
170
|
+
metricDefinition("nodeDuration", "smithers.node.duration_ms", "histogram", { label: "Node duration", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
171
|
+
metricDefinition("attemptDuration", "smithers.attempt.duration_ms", "histogram", { label: "Attempt duration", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
172
|
+
metricDefinition("toolDuration", "smithers.tool.duration_ms", "histogram", { label: "Tool duration", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
173
|
+
metricDefinition("dbQueryDuration", "smithers.db.query_ms", "histogram", { label: "DB query duration", unit: "milliseconds", boundaries: FAST_BUCKETS }),
|
|
174
|
+
metricDefinition("dbTransactionDuration", "smithers.db.transaction_ms", "histogram", { label: "DB transaction duration", unit: "milliseconds", boundaries: FAST_BUCKETS }),
|
|
175
|
+
metricDefinition("httpRequestDuration", "smithers.http.request_duration_ms", "histogram", { label: "HTTP request duration", unit: "milliseconds", labels: ["method", "route", "status_code", "status_class"], boundaries: FAST_BUCKETS }),
|
|
176
|
+
metricDefinition("hotReloadDuration", "smithers.hot.reload_duration_ms", "histogram", { label: "Hot reload duration", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
177
|
+
metricDefinition("vcsDuration", "smithers.vcs.duration_ms", "histogram", { label: "VCS duration", unit: "milliseconds", boundaries: FAST_BUCKETS }),
|
|
178
|
+
metricDefinition("agentDurationMs", "smithers.agent_duration_ms", "histogram", { label: "Agent duration", unit: "milliseconds", labels: ["engine", "model"], boundaries: DURATION_BUCKETS }),
|
|
179
|
+
metricDefinition("tokensInputPerCall", "smithers.tokens.input_per_call", "histogram", { label: "Input tokens per call", unit: "tokens", labels: ["agent", "model"], boundaries: TOKEN_BUCKETS }),
|
|
180
|
+
metricDefinition("tokensOutputPerCall", "smithers.tokens.output_per_call", "histogram", { label: "Output tokens per call", unit: "tokens", labels: ["agent", "model"], boundaries: TOKEN_BUCKETS }),
|
|
181
|
+
metricDefinition("tokensContextWindowPerCall", "smithers.tokens.context_window_per_call", "histogram", { label: "Context window per call", unit: "tokens", labels: ["agent", "model"], boundaries: [50_000, 100_000, 200_000, 500_000, 1_000_000] }),
|
|
182
|
+
metricDefinition("runDuration", "smithers.run.duration_ms", "histogram", { label: "Run duration", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
183
|
+
metricDefinition("promptSizeBytes", "smithers.prompt.size_bytes", "histogram", { label: "Prompt size", unit: "bytes", boundaries: SIZE_BUCKETS }),
|
|
184
|
+
metricDefinition("responseSizeBytes", "smithers.response.size_bytes", "histogram", { label: "Response size", unit: "bytes", boundaries: SIZE_BUCKETS }),
|
|
185
|
+
metricDefinition("approvalWaitDuration", "smithers.approval.wait_duration_ms", "histogram", { label: "Approval wait duration", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
186
|
+
metricDefinition("timerDelayDuration", "smithers.timers.delay_ms", "histogram", { label: "Timer delay", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
187
|
+
metricDefinition("gatewayRpcDuration", "smithers.gateway.rpc_duration_ms", "histogram", { label: "Gateway RPC duration", unit: "milliseconds", labels: ["method", "transport"], boundaries: DURATION_BUCKETS }),
|
|
188
|
+
metricDefinition("schedulerWaitDuration", "smithers.scheduler.wait_duration_ms", "histogram", { label: "Scheduler wait duration", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
189
|
+
metricDefinition("supervisorPollDuration", "smithers.supervisor.poll_duration_ms", "histogram", { label: "Supervisor poll duration", unit: "milliseconds", boundaries: FAST_BUCKETS }),
|
|
190
|
+
metricDefinition("supervisorResumeLag", "smithers.supervisor.resume_lag_ms", "histogram", { label: "Supervisor resume lag", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
191
|
+
metricDefinition("runsAncestryDepth", "smithers.runs.ancestry_depth", "histogram", { label: "Run ancestry depth", unit: "depth", boundaries: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1_024, 2_048] }),
|
|
192
|
+
metricDefinition("runsCarriedStateBytes", "smithers.runs.carried_state_bytes", "histogram", { label: "Run carried state size", unit: "bytes", boundaries: SIZE_BUCKETS }),
|
|
193
|
+
metricDefinition("sandboxDurationMs", "smithers.sandbox.duration_ms", "histogram", { label: "Sandbox duration", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
194
|
+
metricDefinition("sandboxBundleSizeBytes", "smithers.sandbox.bundle_size_bytes", "histogram", { label: "Sandbox bundle size", unit: "bytes", boundaries: SIZE_BUCKETS }),
|
|
195
|
+
metricDefinition("sandboxTransportDurationMs", "smithers.sandbox.transport_duration_ms", "histogram", { label: "Sandbox transport duration", unit: "milliseconds", boundaries: DURATION_BUCKETS }),
|
|
196
|
+
metricDefinition("sandboxPatchCount", "smithers.sandbox.patch_count", "histogram", { label: "Sandbox patch count", unit: "count", boundaries: TOKEN_BUCKETS }),
|
|
197
|
+
metricDefinition("heartbeatDataSizeBytes", "smithers.heartbeats.data_size_bytes", "histogram", { label: "Heartbeat data size", unit: "bytes", boundaries: SIZE_BUCKETS }),
|
|
198
|
+
metricDefinition("heartbeatIntervalMs", "smithers.heartbeats.interval_ms", "histogram", { label: "Heartbeat interval", unit: "milliseconds", boundaries: FAST_BUCKETS }),
|
|
199
|
+
];
|
|
200
|
+
export const smithersMetricCatalogByKey = new Map(smithersMetricCatalog.map((metric) => [metric.key, metric]));
|
|
201
|
+
export const smithersMetricCatalogByPrometheusName = new Map(smithersMetricCatalog.map((metric) => [metric.prometheusName, metric]));
|
|
202
|
+
export const smithersMetricCatalogByName = new Map(smithersMetricCatalog.map((metric) => [metric.name, metric]));
|
|
203
|
+
export const smithersMetrics = Object.freeze(Object.fromEntries(smithersMetricCatalog.map((metric) => [metric.key, metric.name])));
|
|
204
|
+
const _MetricsServiceBase = /** @type {Context.TagClass<MetricsService, "MetricsService", MetricsServiceShape>} */ (/** @type {unknown} */ (Context.Tag("MetricsService")()));
|
|
205
|
+
export class MetricsService extends _MetricsServiceBase {
|
|
206
|
+
}
|
|
207
|
+
const DEFAULT_HISTOGRAM_BUCKETS = [
|
|
208
|
+
1,
|
|
209
|
+
5,
|
|
210
|
+
10,
|
|
211
|
+
25,
|
|
212
|
+
50,
|
|
213
|
+
100,
|
|
214
|
+
250,
|
|
215
|
+
500,
|
|
216
|
+
1_000,
|
|
217
|
+
2_500,
|
|
218
|
+
5_000,
|
|
219
|
+
10_000,
|
|
220
|
+
30_000,
|
|
221
|
+
];
|
|
222
|
+
/**
|
|
223
|
+
* @param {MetricLabels} [labels]
|
|
224
|
+
* @returns {string}
|
|
225
|
+
*/
|
|
226
|
+
function labelsKey(labels = {}) {
|
|
227
|
+
return JSON.stringify(Object.entries(labels).sort(([left], [right]) => left.localeCompare(right)));
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* @param {string} name
|
|
231
|
+
* @param {MetricLabels} [labels]
|
|
232
|
+
* @returns {string}
|
|
233
|
+
*/
|
|
234
|
+
function metricKey(name, labels) {
|
|
235
|
+
return `${name}|${labelsKey(labels)}`;
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* @param {MetricLabels} [labels]
|
|
239
|
+
* @returns {MetricLabels}
|
|
240
|
+
*/
|
|
241
|
+
function cloneLabels(labels = {}) {
|
|
242
|
+
return Object.freeze({ ...labels });
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* @returns {Context.Tag.Service<MetricsService>}
|
|
246
|
+
*/
|
|
247
|
+
export function makeInMemoryMetricsService() {
|
|
248
|
+
const registry = new Map();
|
|
249
|
+
const processStartMs = Date.now();
|
|
250
|
+
const asyncExternalWaitCounts = {
|
|
251
|
+
approval: 0,
|
|
252
|
+
event: 0,
|
|
253
|
+
};
|
|
254
|
+
/**
|
|
255
|
+
* @param {string} name
|
|
256
|
+
* @param {MetricLabels} [labels]
|
|
257
|
+
* @returns {CounterEntry}
|
|
258
|
+
*/
|
|
259
|
+
function upsertCounter(name, labels) {
|
|
260
|
+
const key = metricKey(name, labels);
|
|
261
|
+
const existing = registry.get(key);
|
|
262
|
+
if (existing?.type === "counter")
|
|
263
|
+
return existing;
|
|
264
|
+
const created = {
|
|
265
|
+
type: "counter",
|
|
266
|
+
value: 0,
|
|
267
|
+
labels: cloneLabels(labels),
|
|
268
|
+
};
|
|
269
|
+
registry.set(key, created);
|
|
270
|
+
return created;
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* @param {string} name
|
|
274
|
+
* @param {MetricLabels} [labels]
|
|
275
|
+
* @returns {GaugeEntry}
|
|
276
|
+
*/
|
|
277
|
+
function upsertGauge(name, labels) {
|
|
278
|
+
const key = metricKey(name, labels);
|
|
279
|
+
const existing = registry.get(key);
|
|
280
|
+
if (existing?.type === "gauge")
|
|
281
|
+
return existing;
|
|
282
|
+
const created = {
|
|
283
|
+
type: "gauge",
|
|
284
|
+
value: 0,
|
|
285
|
+
labels: cloneLabels(labels),
|
|
286
|
+
};
|
|
287
|
+
registry.set(key, created);
|
|
288
|
+
return created;
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* @param {string} name
|
|
292
|
+
* @param {MetricLabels} [labels]
|
|
293
|
+
* @returns {HistogramEntry}
|
|
294
|
+
*/
|
|
295
|
+
function upsertHistogram(name, labels) {
|
|
296
|
+
const key = metricKey(name, labels);
|
|
297
|
+
const existing = registry.get(key);
|
|
298
|
+
if (existing?.type === "histogram")
|
|
299
|
+
return existing;
|
|
300
|
+
const created = {
|
|
301
|
+
type: "histogram",
|
|
302
|
+
sum: 0,
|
|
303
|
+
count: 0,
|
|
304
|
+
labels: cloneLabels(labels),
|
|
305
|
+
buckets: new Map(DEFAULT_HISTOGRAM_BUCKETS.map((bucket) => [bucket, 0])),
|
|
306
|
+
};
|
|
307
|
+
registry.set(key, created);
|
|
308
|
+
return created;
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* @returns {PrometheusSample[]}
|
|
312
|
+
*/
|
|
313
|
+
function samples() {
|
|
314
|
+
return [...registry.entries()].map(([key, entry]) => {
|
|
315
|
+
const name = key.slice(0, key.indexOf("|"));
|
|
316
|
+
if (entry.type === "histogram") {
|
|
317
|
+
return {
|
|
318
|
+
name,
|
|
319
|
+
type: entry.type,
|
|
320
|
+
labels: entry.labels,
|
|
321
|
+
buckets: new Map(entry.buckets),
|
|
322
|
+
sum: entry.sum,
|
|
323
|
+
count: entry.count,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
return {
|
|
327
|
+
name,
|
|
328
|
+
type: entry.type,
|
|
329
|
+
labels: entry.labels,
|
|
330
|
+
value: entry.value,
|
|
331
|
+
};
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
const service = {
|
|
335
|
+
increment: (name, labels) => service.incrementBy(name, 1, labels),
|
|
336
|
+
incrementBy: (name, value, labels) => Effect.sync(() => {
|
|
337
|
+
const key = metricKey(name, labels);
|
|
338
|
+
const existing = registry.get(key);
|
|
339
|
+
const definition = smithersMetricCatalogByName.get(name);
|
|
340
|
+
if (existing?.type === "gauge" || definition?.type === "gauge") {
|
|
341
|
+
upsertGauge(name, labels).value += value;
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
upsertCounter(name, labels).value += value;
|
|
345
|
+
}),
|
|
346
|
+
gauge: (name, value, labels) => Effect.sync(() => {
|
|
347
|
+
upsertGauge(name, labels).value = value;
|
|
348
|
+
}),
|
|
349
|
+
histogram: (name, value, labels) => Effect.sync(() => {
|
|
350
|
+
const entry = upsertHistogram(name, labels);
|
|
351
|
+
entry.count += 1;
|
|
352
|
+
entry.sum += value;
|
|
353
|
+
for (const boundary of DEFAULT_HISTOGRAM_BUCKETS) {
|
|
354
|
+
if (value <= boundary) {
|
|
355
|
+
entry.buckets.set(boundary, (entry.buckets.get(boundary) ?? 0) + 1);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
}),
|
|
359
|
+
recordEvent: (event) => {
|
|
360
|
+
const eventType = String(event.type);
|
|
361
|
+
const countEvent = service.increment("smithers.events.emitted_total", {
|
|
362
|
+
type: eventType,
|
|
363
|
+
});
|
|
364
|
+
switch (event.type) {
|
|
365
|
+
case "RunStarted":
|
|
366
|
+
return Effect.all([
|
|
367
|
+
countEvent,
|
|
368
|
+
service.increment("smithers.runs.total"),
|
|
369
|
+
service.incrementBy("smithers.runs.active", 1),
|
|
370
|
+
], { discard: true });
|
|
371
|
+
case "RunFinished":
|
|
372
|
+
return Effect.all([
|
|
373
|
+
countEvent,
|
|
374
|
+
service.incrementBy("smithers.runs.active", -1),
|
|
375
|
+
service.increment("smithers.runs.finished_total"),
|
|
376
|
+
], { discard: true });
|
|
377
|
+
case "RunFailed":
|
|
378
|
+
return Effect.all([
|
|
379
|
+
countEvent,
|
|
380
|
+
service.incrementBy("smithers.runs.active", -1),
|
|
381
|
+
service.increment("smithers.runs.failed_total"),
|
|
382
|
+
service.increment("smithers.errors.total"),
|
|
383
|
+
], { discard: true });
|
|
384
|
+
case "RunCancelled":
|
|
385
|
+
return Effect.all([
|
|
386
|
+
countEvent,
|
|
387
|
+
service.incrementBy("smithers.runs.active", -1),
|
|
388
|
+
service.increment("smithers.runs.cancelled_total"),
|
|
389
|
+
], { discard: true });
|
|
390
|
+
case "RunContinuedAsNew":
|
|
391
|
+
return Effect.all([countEvent, service.increment("smithers.runs.continued_total")], { discard: true });
|
|
392
|
+
case "NodeStarted":
|
|
393
|
+
return Effect.all([
|
|
394
|
+
countEvent,
|
|
395
|
+
service.increment("smithers.nodes.started"),
|
|
396
|
+
service.incrementBy("smithers.nodes.active", 1),
|
|
397
|
+
], { discard: true });
|
|
398
|
+
case "NodeFinished":
|
|
399
|
+
return Effect.all([
|
|
400
|
+
countEvent,
|
|
401
|
+
service.increment("smithers.nodes.finished"),
|
|
402
|
+
service.incrementBy("smithers.nodes.active", -1),
|
|
403
|
+
typeof event.durationMs === "number"
|
|
404
|
+
? service.histogram("smithers.node.duration_ms", event.durationMs)
|
|
405
|
+
: Effect.void,
|
|
406
|
+
], { discard: true });
|
|
407
|
+
case "NodeFailed":
|
|
408
|
+
return Effect.all([
|
|
409
|
+
countEvent,
|
|
410
|
+
service.increment("smithers.nodes.failed"),
|
|
411
|
+
service.increment("smithers.errors.total"),
|
|
412
|
+
service.incrementBy("smithers.nodes.active", -1),
|
|
413
|
+
], { discard: true });
|
|
414
|
+
case "CacheHit":
|
|
415
|
+
return Effect.all([countEvent, service.increment("smithers.cache.hits")], { discard: true });
|
|
416
|
+
case "CacheMiss":
|
|
417
|
+
return Effect.all([countEvent, service.increment("smithers.cache.misses")], { discard: true });
|
|
418
|
+
case "ApprovalRequested":
|
|
419
|
+
return Effect.all([
|
|
420
|
+
countEvent,
|
|
421
|
+
service.increment("smithers.approvals.requested"),
|
|
422
|
+
service.incrementBy("smithers.approval.pending", 1),
|
|
423
|
+
], { discard: true });
|
|
424
|
+
case "ApprovalResolved": {
|
|
425
|
+
const approved = event.approved === true || event.status === "approved";
|
|
426
|
+
return Effect.all([
|
|
427
|
+
countEvent,
|
|
428
|
+
service.increment(approved
|
|
429
|
+
? "smithers.approvals.granted"
|
|
430
|
+
: "smithers.approvals.denied"),
|
|
431
|
+
service.incrementBy("smithers.approval.pending", -1),
|
|
432
|
+
], { discard: true });
|
|
433
|
+
}
|
|
434
|
+
case "TimerCreated":
|
|
435
|
+
return Effect.all([
|
|
436
|
+
countEvent,
|
|
437
|
+
service.increment("smithers.timers.created"),
|
|
438
|
+
service.incrementBy("smithers.timers.pending", 1),
|
|
439
|
+
], { discard: true });
|
|
440
|
+
case "TimerFired":
|
|
441
|
+
return Effect.all([
|
|
442
|
+
countEvent,
|
|
443
|
+
service.increment("smithers.timers.fired"),
|
|
444
|
+
service.incrementBy("smithers.timers.pending", -1),
|
|
445
|
+
], { discard: true });
|
|
446
|
+
case "TaskHeartbeat":
|
|
447
|
+
return Effect.all([countEvent, service.increment("smithers.heartbeats.total")], { discard: true });
|
|
448
|
+
case "TaskHeartbeatTimeout":
|
|
449
|
+
return Effect.all([
|
|
450
|
+
countEvent,
|
|
451
|
+
service.increment("smithers.heartbeats.timeout_total"),
|
|
452
|
+
service.increment("smithers.errors.total"),
|
|
453
|
+
], { discard: true });
|
|
454
|
+
case "TokenUsageReported": {
|
|
455
|
+
const effects = [countEvent];
|
|
456
|
+
const labels = {
|
|
457
|
+
...(typeof event.agent === "string" ? { agent: event.agent } : {}),
|
|
458
|
+
...(typeof event.model === "string" ? { model: event.model } : {}),
|
|
459
|
+
};
|
|
460
|
+
/**
|
|
461
|
+
* @param {string} name
|
|
462
|
+
* @param {unknown} value
|
|
463
|
+
*/
|
|
464
|
+
const push = (name, value) => {
|
|
465
|
+
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
|
466
|
+
effects.push(service.incrementBy(name, value, labels));
|
|
467
|
+
}
|
|
468
|
+
};
|
|
469
|
+
push("smithers.tokens.input_total", event.inputTokens);
|
|
470
|
+
push("smithers.tokens.output_total", event.outputTokens);
|
|
471
|
+
push("smithers.tokens.cache_read_total", event.cacheReadTokens);
|
|
472
|
+
push("smithers.tokens.cache_write_total", event.cacheWriteTokens);
|
|
473
|
+
push("smithers.tokens.reasoning_total", event.reasoningTokens);
|
|
474
|
+
return Effect.all(effects, { discard: true });
|
|
475
|
+
}
|
|
476
|
+
default:
|
|
477
|
+
return countEvent;
|
|
478
|
+
}
|
|
479
|
+
},
|
|
480
|
+
updateProcessMetrics: () => Effect.sync(() => {
|
|
481
|
+
const uptimeS = (Date.now() - processStartMs) / 1000;
|
|
482
|
+
const mem = process.memoryUsage();
|
|
483
|
+
upsertGauge("smithers.process.uptime_seconds").value = uptimeS;
|
|
484
|
+
upsertGauge("smithers.process.memory_rss_bytes").value = mem.rss;
|
|
485
|
+
upsertGauge("smithers.process.heap_used_bytes").value = mem.heapUsed;
|
|
486
|
+
}),
|
|
487
|
+
updateAsyncExternalWaitPending: (kind, delta) => Effect.sync(() => {
|
|
488
|
+
asyncExternalWaitCounts[kind] = Math.max(0, asyncExternalWaitCounts[kind] + delta);
|
|
489
|
+
upsertGauge("smithers.external_wait.async_pending", { kind }).value =
|
|
490
|
+
asyncExternalWaitCounts[kind];
|
|
491
|
+
}),
|
|
492
|
+
renderPrometheus: () => Effect.sync(() => renderPrometheusSamples(samples())),
|
|
493
|
+
snapshot: () => Effect.sync(() => new Map(registry)),
|
|
494
|
+
};
|
|
495
|
+
return service;
|
|
496
|
+
}
|
|
497
|
+
/** @type {Layer.Layer<MetricsService, never, never>} */
|
|
498
|
+
export const MetricsServiceLive = Layer.sync(MetricsService, makeInMemoryMetricsService);
|
|
499
|
+
/** @type {Layer.Layer<MetricsService, never, never>} */
|
|
500
|
+
export const MetricsServiceNoop = Layer.succeed(MetricsService, {
|
|
501
|
+
increment: () => Effect.void,
|
|
502
|
+
incrementBy: () => Effect.void,
|
|
503
|
+
gauge: () => Effect.void,
|
|
504
|
+
histogram: () => Effect.void,
|
|
505
|
+
recordEvent: () => Effect.void,
|
|
506
|
+
updateProcessMetrics: () => Effect.void,
|
|
507
|
+
updateAsyncExternalWaitPending: () => Effect.void,
|
|
508
|
+
renderPrometheus: () => Effect.succeed(""),
|
|
509
|
+
snapshot: () => Effect.succeed(new Map()),
|
|
510
|
+
});
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import type { Effect } from "effect";
|
|
2
|
+
import type { MetricLabels } from "./_corePrometheusShape.ts";
|
|
3
|
+
import type { MetricName } from "./MetricName.ts";
|
|
4
|
+
|
|
5
|
+
export type SmithersMetricEvent = {
|
|
6
|
+
readonly type: string;
|
|
7
|
+
readonly [key: string]: unknown;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export type CounterEntry = {
|
|
11
|
+
readonly type: "counter";
|
|
12
|
+
value: number;
|
|
13
|
+
readonly labels: MetricLabels;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export type GaugeEntry = {
|
|
17
|
+
readonly type: "gauge";
|
|
18
|
+
value: number;
|
|
19
|
+
readonly labels: MetricLabels;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export type HistogramEntry = {
|
|
23
|
+
readonly type: "histogram";
|
|
24
|
+
sum: number;
|
|
25
|
+
count: number;
|
|
26
|
+
readonly labels: MetricLabels;
|
|
27
|
+
readonly buckets: Map<number, number>;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
export type MetricEntry = CounterEntry | GaugeEntry | HistogramEntry;
|
|
31
|
+
|
|
32
|
+
export type MetricsSnapshot = ReadonlyMap<string, MetricEntry>;
|
|
33
|
+
|
|
34
|
+
export type MetricsServiceShape = {
|
|
35
|
+
readonly increment: (name: MetricName, labels?: MetricLabels) => Effect.Effect<void>;
|
|
36
|
+
readonly incrementBy: (
|
|
37
|
+
name: MetricName,
|
|
38
|
+
value: number,
|
|
39
|
+
labels?: MetricLabels,
|
|
40
|
+
) => Effect.Effect<void>;
|
|
41
|
+
readonly gauge: (name: MetricName, value: number, labels?: MetricLabels) => Effect.Effect<void>;
|
|
42
|
+
readonly histogram: (
|
|
43
|
+
name: MetricName,
|
|
44
|
+
value: number,
|
|
45
|
+
labels?: MetricLabels,
|
|
46
|
+
) => Effect.Effect<void>;
|
|
47
|
+
readonly recordEvent: (event: SmithersMetricEvent) => Effect.Effect<void>;
|
|
48
|
+
readonly updateProcessMetrics: () => Effect.Effect<void>;
|
|
49
|
+
readonly updateAsyncExternalWaitPending: (
|
|
50
|
+
kind: "approval" | "event",
|
|
51
|
+
delta: number,
|
|
52
|
+
) => Effect.Effect<void>;
|
|
53
|
+
readonly renderPrometheus: () => Effect.Effect<string>;
|
|
54
|
+
readonly snapshot: () => Effect.Effect<MetricsSnapshot>;
|
|
55
|
+
};
|