@smithers-orchestrator/observability 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/LICENSE +21 -0
  2. package/package.json +44 -0
  3. package/src/MetricName.ts +1 -0
  4. package/src/MetricsServiceLive.js +9 -0
  5. package/src/ResolvedSmithersObservabilityOptions.ts +10 -0
  6. package/src/SmithersEvent.ts +603 -0
  7. package/src/SmithersLogFormat.ts +1 -0
  8. package/src/SmithersMetricDefinition.ts +15 -0
  9. package/src/SmithersMetricType.ts +1 -0
  10. package/src/SmithersMetricUnit.ts +8 -0
  11. package/src/SmithersObservability.js +6 -0
  12. package/src/SmithersObservabilityOptions.ts +10 -0
  13. package/src/SmithersObservabilityService.ts +15 -0
  14. package/src/_coreCorrelation/CorrelationContext.ts +10 -0
  15. package/src/_coreCorrelation/CorrelationContextLive.js +11 -0
  16. package/src/_coreCorrelation/CorrelationContextService.js +6 -0
  17. package/src/_coreCorrelation/CorrelationContextServiceShape.ts +14 -0
  18. package/src/_coreCorrelation/CorrelationPatch.ts +3 -0
  19. package/src/_coreCorrelation/_correlationStorage.js +4 -0
  20. package/src/_coreCorrelation/correlationContextFiberRef.js +2 -0
  21. package/src/_coreCorrelation/correlationContextToLogAnnotations.js +28 -0
  22. package/src/_coreCorrelation/getCurrentCorrelationContext.js +9 -0
  23. package/src/_coreCorrelation/getCurrentCorrelationContextEffect.js +11 -0
  24. package/src/_coreCorrelation/index.js +14 -0
  25. package/src/_coreCorrelation/mergeCorrelationContext.js +61 -0
  26. package/src/_coreCorrelation/runWithCorrelationContext.js +14 -0
  27. package/src/_coreCorrelation/updateCurrentCorrelationContext.js +19 -0
  28. package/src/_coreCorrelation/withCorrelationContext.js +15 -0
  29. package/src/_coreCorrelation/withCurrentCorrelationContext.js +13 -0
  30. package/src/_coreMetrics.js +510 -0
  31. package/src/_coreMetricsShape.ts +55 -0
  32. package/src/_corePrometheus.js +93 -0
  33. package/src/_corePrometheusShape.ts +11 -0
  34. package/src/_coreTracing.js +142 -0
  35. package/src/_coreTracingShape.ts +17 -0
  36. package/src/_smithersSpanAttributeAliases.js +19 -0
  37. package/src/_smithersTraceSpanStorage.js +3 -0
  38. package/src/annotateSmithersTrace.js +11 -0
  39. package/src/correlation.js +20 -0
  40. package/src/createSmithersObservabilityLayer.js +49 -0
  41. package/src/createSmithersOtelLayer.js +21 -0
  42. package/src/createSmithersRuntimeLayer.js +2 -0
  43. package/src/getCurrentSmithersTraceAnnotations.js +14 -0
  44. package/src/getCurrentSmithersTraceSpan.js +7 -0
  45. package/src/index.d.ts +1032 -0
  46. package/src/index.js +35 -0
  47. package/src/logging.js +91 -0
  48. package/src/makeSmithersSpanAttributes.js +20 -0
  49. package/src/metrics/SmithersMetricDefinition.ts +17 -0
  50. package/src/metrics/SmithersMetricType.ts +1 -0
  51. package/src/metrics/SmithersMetricUnit.ts +8 -0
  52. package/src/metrics/_asyncExternalWaitCounts.js +4 -0
  53. package/src/metrics/_buckets.js +43 -0
  54. package/src/metrics/_processStartMs.js +1 -0
  55. package/src/metrics/activeNodes.js +2 -0
  56. package/src/metrics/activeRuns.js +2 -0
  57. package/src/metrics/agentActionsTotal.js +2 -0
  58. package/src/metrics/agentDurationMs.js +3 -0
  59. package/src/metrics/agentErrorsTotal.js +2 -0
  60. package/src/metrics/agentEventsTotal.js +2 -0
  61. package/src/metrics/agentInvocationsTotal.js +2 -0
  62. package/src/metrics/agentRetriesTotal.js +2 -0
  63. package/src/metrics/agentSessionsTotal.js +2 -0
  64. package/src/metrics/agentTokensTotal.js +2 -0
  65. package/src/metrics/alertDeliveriesAttempted.js +2 -0
  66. package/src/metrics/alertDeliveriesSuppressed.js +2 -0
  67. package/src/metrics/alertsAcknowledgedTotal.js +2 -0
  68. package/src/metrics/alertsActive.js +2 -0
  69. package/src/metrics/alertsEscalatedTotal.js +2 -0
  70. package/src/metrics/alertsFiredTotal.js +2 -0
  71. package/src/metrics/alertsReopenedTotal.js +2 -0
  72. package/src/metrics/alertsResolvedTotal.js +2 -0
  73. package/src/metrics/alertsSilencedTotal.js +2 -0
  74. package/src/metrics/approvalPending.js +2 -0
  75. package/src/metrics/approvalWaitDuration.js +3 -0
  76. package/src/metrics/approvalsDenied.js +2 -0
  77. package/src/metrics/approvalsGranted.js +2 -0
  78. package/src/metrics/approvalsRequested.js +2 -0
  79. package/src/metrics/attemptDuration.js +3 -0
  80. package/src/metrics/attentionBacklog.js +2 -0
  81. package/src/metrics/cacheHits.js +2 -0
  82. package/src/metrics/cacheMisses.js +2 -0
  83. package/src/metrics/dbQueryDuration.js +3 -0
  84. package/src/metrics/dbRetries.js +2 -0
  85. package/src/metrics/dbTransactionDuration.js +3 -0
  86. package/src/metrics/dbTransactionRetries.js +2 -0
  87. package/src/metrics/dbTransactionRollbacks.js +2 -0
  88. package/src/metrics/devtoolsActiveSubscribers.js +2 -0
  89. package/src/metrics/devtoolsBackpressureDisconnectTotal.js +2 -0
  90. package/src/metrics/devtoolsDeltaBuildMs.js +3 -0
  91. package/src/metrics/devtoolsEventBytes.js +3 -0
  92. package/src/metrics/devtoolsEventTotal.js +2 -0
  93. package/src/metrics/devtoolsSnapshotBuildMs.js +3 -0
  94. package/src/metrics/devtoolsSubscribeTotal.js +2 -0
  95. package/src/metrics/errorsTotal.js +2 -0
  96. package/src/metrics/eventsEmittedTotal.js +2 -0
  97. package/src/metrics/externalWaitAsyncPending.js +2 -0
  98. package/src/metrics/gatewayApprovalDecisionsTotal.js +2 -0
  99. package/src/metrics/gatewayAuthEventsTotal.js +2 -0
  100. package/src/metrics/gatewayConnectionsActive.js +2 -0
  101. package/src/metrics/gatewayConnectionsClosedTotal.js +2 -0
  102. package/src/metrics/gatewayConnectionsTotal.js +2 -0
  103. package/src/metrics/gatewayCronTriggersTotal.js +2 -0
  104. package/src/metrics/gatewayErrorsTotal.js +2 -0
  105. package/src/metrics/gatewayHeartbeatTicksTotal.js +2 -0
  106. package/src/metrics/gatewayMessagesReceivedTotal.js +2 -0
  107. package/src/metrics/gatewayMessagesSentTotal.js +2 -0
  108. package/src/metrics/gatewayRpcCallsTotal.js +2 -0
  109. package/src/metrics/gatewayRpcDuration.js +3 -0
  110. package/src/metrics/gatewayRunsCompletedTotal.js +2 -0
  111. package/src/metrics/gatewayRunsStartedTotal.js +2 -0
  112. package/src/metrics/gatewaySignalsTotal.js +2 -0
  113. package/src/metrics/gatewayWebhooksReceivedTotal.js +2 -0
  114. package/src/metrics/gatewayWebhooksRejectedTotal.js +2 -0
  115. package/src/metrics/gatewayWebhooksVerifiedTotal.js +2 -0
  116. package/src/metrics/heartbeatDataSizeBytes.js +3 -0
  117. package/src/metrics/heartbeatIntervalMs.js +3 -0
  118. package/src/metrics/hotReloadDuration.js +3 -0
  119. package/src/metrics/hotReloadFailures.js +2 -0
  120. package/src/metrics/hotReloads.js +2 -0
  121. package/src/metrics/httpRequestDuration.js +3 -0
  122. package/src/metrics/httpRequests.js +2 -0
  123. package/src/metrics/index.js +151 -0
  124. package/src/metrics/metricsServiceAdapter.js +188 -0
  125. package/src/metrics/nodeDuration.js +3 -0
  126. package/src/metrics/nodeRetriesTotal.js +2 -0
  127. package/src/metrics/nodesFailed.js +2 -0
  128. package/src/metrics/nodesFinished.js +2 -0
  129. package/src/metrics/nodesStarted.js +2 -0
  130. package/src/metrics/processHeapUsedBytes.js +2 -0
  131. package/src/metrics/processMemoryRssBytes.js +2 -0
  132. package/src/metrics/processUptimeSeconds.js +2 -0
  133. package/src/metrics/promptSizeBytes.js +3 -0
  134. package/src/metrics/responseSizeBytes.js +3 -0
  135. package/src/metrics/rewindDurationMs.js +3 -0
  136. package/src/metrics/rewindFramesDeleted.js +3 -0
  137. package/src/metrics/rewindRollbackTotal.js +2 -0
  138. package/src/metrics/rewindSandboxesReverted.js +3 -0
  139. package/src/metrics/rewindTotal.js +2 -0
  140. package/src/metrics/runDuration.js +3 -0
  141. package/src/metrics/runsAncestryDepth.js +3 -0
  142. package/src/metrics/runsCancelledTotal.js +2 -0
  143. package/src/metrics/runsCarriedStateBytes.js +3 -0
  144. package/src/metrics/runsContinuedTotal.js +2 -0
  145. package/src/metrics/runsFailedTotal.js +2 -0
  146. package/src/metrics/runsFinishedTotal.js +2 -0
  147. package/src/metrics/runsResumedTotal.js +2 -0
  148. package/src/metrics/runsTotal.js +2 -0
  149. package/src/metrics/sandboxActive.js +2 -0
  150. package/src/metrics/sandboxBundleSizeBytes.js +3 -0
  151. package/src/metrics/sandboxCompletedTotal.js +2 -0
  152. package/src/metrics/sandboxCreatedTotal.js +2 -0
  153. package/src/metrics/sandboxDurationMs.js +3 -0
  154. package/src/metrics/sandboxPatchCount.js +3 -0
  155. package/src/metrics/sandboxTransportDurationMs.js +3 -0
  156. package/src/metrics/schedulerConcurrencyUtilization.js +2 -0
  157. package/src/metrics/schedulerQueueDepth.js +2 -0
  158. package/src/metrics/schedulerWaitDuration.js +3 -0
  159. package/src/metrics/scorerEventsFailed.js +2 -0
  160. package/src/metrics/scorerEventsFinished.js +2 -0
  161. package/src/metrics/scorerEventsStarted.js +2 -0
  162. package/src/metrics/smithersMetricCatalog.js +484 -0
  163. package/src/metrics/smithersMetricCatalogByKey.js +2 -0
  164. package/src/metrics/smithersMetricCatalogByName.js +2 -0
  165. package/src/metrics/smithersMetricCatalogByPrometheusName.js +2 -0
  166. package/src/metrics/supervisorPollDuration.js +3 -0
  167. package/src/metrics/supervisorPollsTotal.js +2 -0
  168. package/src/metrics/supervisorResumeLag.js +3 -0
  169. package/src/metrics/supervisorResumedTotal.js +2 -0
  170. package/src/metrics/supervisorSkippedTotal.js +2 -0
  171. package/src/metrics/supervisorStaleDetected.js +2 -0
  172. package/src/metrics/taskHeartbeatTimeoutTotal.js +2 -0
  173. package/src/metrics/taskHeartbeatsTotal.js +2 -0
  174. package/src/metrics/timerDelayDuration.js +3 -0
  175. package/src/metrics/timersCancelled.js +2 -0
  176. package/src/metrics/timersCreated.js +2 -0
  177. package/src/metrics/timersFired.js +2 -0
  178. package/src/metrics/timersPending.js +2 -0
  179. package/src/metrics/toPrometheusMetricName.js +8 -0
  180. package/src/metrics/tokensCacheReadTotal.js +2 -0
  181. package/src/metrics/tokensCacheWriteTotal.js +2 -0
  182. package/src/metrics/tokensContextWindowBucketTotal.js +2 -0
  183. package/src/metrics/tokensContextWindowPerCall.js +3 -0
  184. package/src/metrics/tokensInputPerCall.js +3 -0
  185. package/src/metrics/tokensInputTotal.js +2 -0
  186. package/src/metrics/tokensOutputPerCall.js +3 -0
  187. package/src/metrics/tokensOutputTotal.js +2 -0
  188. package/src/metrics/tokensReasoningTotal.js +2 -0
  189. package/src/metrics/toolCallErrorsTotal.js +2 -0
  190. package/src/metrics/toolCallsTotal.js +2 -0
  191. package/src/metrics/toolDuration.js +3 -0
  192. package/src/metrics/toolOutputTruncatedTotal.js +2 -0
  193. package/src/metrics/trackEvent.js +604 -0
  194. package/src/metrics/updateAsyncExternalWaitPending.js +14 -0
  195. package/src/metrics/updateProcessMetrics.js +17 -0
  196. package/src/metrics/vcsDuration.js +3 -0
  197. package/src/prometheusContentType.js +1 -0
  198. package/src/renderPrometheusMetrics.js +205 -0
  199. package/src/resolveSmithersObservabilityOptions.js +79 -0
  200. package/src/smithersMetrics.js +2 -0
  201. package/src/smithersSpanNames.js +6 -0
  202. package/src/withSmithersSpan.js +15 -0
@@ -0,0 +1,2 @@
1
+ import { Metric } from "effect";
2
+ export const toolCallsTotal = Metric.counter("smithers.tool_calls.total");
@@ -0,0 +1,3 @@
1
+ import { Metric } from "effect";
2
+ import { toolBuckets } from "./_buckets.js";
3
+ export const toolDuration = Metric.histogram("smithers.tool.duration_ms", toolBuckets);
@@ -0,0 +1,2 @@
1
+ import { Metric } from "effect";
2
+ export const toolOutputTruncatedTotal = Metric.counter("smithers.tool.output_truncated_total");
@@ -0,0 +1,604 @@
1
+ import { Effect, Metric } from "effect";
2
+ import { memoryFactWrites, memoryRecallQueries, memoryMessageSaves, } from "@smithers-orchestrator/memory/metrics";
3
+ import { openApiToolCallsTotal, openApiToolCallErrorsTotal, openApiToolDuration, } from "@smithers-orchestrator/openapi/metrics";
4
+ import { runsTotal } from "./runsTotal.js";
5
+ import { nodesStarted } from "./nodesStarted.js";
6
+ import { nodesFinished } from "./nodesFinished.js";
7
+ import { nodesFailed } from "./nodesFailed.js";
8
+ import { toolCallsTotal } from "./toolCallsTotal.js";
9
+ import { toolCallErrorsTotal } from "./toolCallErrorsTotal.js";
10
+ import { errorsTotal } from "./errorsTotal.js";
11
+ import { nodeRetriesTotal } from "./nodeRetriesTotal.js";
12
+ import { eventsEmittedTotal } from "./eventsEmittedTotal.js";
13
+ import { activeRuns } from "./activeRuns.js";
14
+ import { activeNodes } from "./activeNodes.js";
15
+ import { runsFinishedTotal } from "./runsFinishedTotal.js";
16
+ import { runsFailedTotal } from "./runsFailedTotal.js";
17
+ import { runsCancelledTotal } from "./runsCancelledTotal.js";
18
+ import { runsContinuedTotal } from "./runsContinuedTotal.js";
19
+ import { runsAncestryDepth } from "./runsAncestryDepth.js";
20
+ import { runsCarriedStateBytes } from "./runsCarriedStateBytes.js";
21
+ import { approvalsRequested } from "./approvalsRequested.js";
22
+ import { approvalsGranted } from "./approvalsGranted.js";
23
+ import { approvalsDenied } from "./approvalsDenied.js";
24
+ import { approvalPending } from "./approvalPending.js";
25
+ import { timersCreated } from "./timersCreated.js";
26
+ import { timersFired } from "./timersFired.js";
27
+ import { timersCancelled } from "./timersCancelled.js";
28
+ import { timersPending } from "./timersPending.js";
29
+ import { timerDelayDuration } from "./timerDelayDuration.js";
30
+ import { tokensInputTotal } from "./tokensInputTotal.js";
31
+ import { tokensOutputTotal } from "./tokensOutputTotal.js";
32
+ import { tokensCacheReadTotal } from "./tokensCacheReadTotal.js";
33
+ import { tokensCacheWriteTotal } from "./tokensCacheWriteTotal.js";
34
+ import { tokensReasoningTotal } from "./tokensReasoningTotal.js";
35
+ import { tokensContextWindowBucketTotal } from "./tokensContextWindowBucketTotal.js";
36
+ import { tokensInputPerCall } from "./tokensInputPerCall.js";
37
+ import { tokensOutputPerCall } from "./tokensOutputPerCall.js";
38
+ import { tokensContextWindowPerCall } from "./tokensContextWindowPerCall.js";
39
+ import { scorerEventsStarted } from "./scorerEventsStarted.js";
40
+ import { scorerEventsFinished } from "./scorerEventsFinished.js";
41
+ import { scorerEventsFailed } from "./scorerEventsFailed.js";
42
+ import { supervisorPollsTotal } from "./supervisorPollsTotal.js";
43
+ import { supervisorStaleDetected } from "./supervisorStaleDetected.js";
44
+ import { supervisorResumedTotal } from "./supervisorResumedTotal.js";
45
+ import { supervisorSkippedTotal } from "./supervisorSkippedTotal.js";
46
+ import { supervisorPollDuration } from "./supervisorPollDuration.js";
47
+ import { supervisorResumeLag } from "./supervisorResumeLag.js";
48
+ import { sandboxCreatedTotal } from "./sandboxCreatedTotal.js";
49
+ import { sandboxCompletedTotal } from "./sandboxCompletedTotal.js";
50
+ import { sandboxActive } from "./sandboxActive.js";
51
+ import { sandboxBundleSizeBytes } from "./sandboxBundleSizeBytes.js";
52
+ import { sandboxDurationMs } from "./sandboxDurationMs.js";
53
+ import { sandboxPatchCount } from "./sandboxPatchCount.js";
54
+ import { taskHeartbeatsTotal } from "./taskHeartbeatsTotal.js";
55
+ import { taskHeartbeatTimeoutTotal } from "./taskHeartbeatTimeoutTotal.js";
56
+ import { heartbeatDataSizeBytes } from "./heartbeatDataSizeBytes.js";
57
+ import { heartbeatIntervalMs } from "./heartbeatIntervalMs.js";
58
+ import { agentEventsTotal } from "./agentEventsTotal.js";
59
+ import { agentSessionsTotal } from "./agentSessionsTotal.js";
60
+ import { agentActionsTotal } from "./agentActionsTotal.js";
61
+ import { agentErrorsTotal } from "./agentErrorsTotal.js";
62
+ import { agentRetriesTotal } from "./agentRetriesTotal.js";
63
+ import { agentTokensTotal } from "./agentTokensTotal.js";
64
+ /** @typedef {import("@smithers-orchestrator/observability/SmithersEvent").SmithersEvent} SmithersEvent */
65
+
66
+ /**
67
+ * @param {unknown} value
68
+ * @returns {string | undefined}
69
+ */
70
+ function normalizeMetricTag(value) {
71
+ if (typeof value !== "string")
72
+ return undefined;
73
+ const trimmed = value.trim();
74
+ return trimmed.length > 0 ? trimmed : undefined;
75
+ }
76
+ /**
77
+ * @template A
78
+ * @param {A} metric
79
+ * @param {Record<string, string | undefined>} tags
80
+ * @returns {A}
81
+ */
82
+ function tagMetricWithTags(metric, tags) {
83
+ let tagged = metric;
84
+ for (const [key, value] of Object.entries(tags)) {
85
+ if (!value)
86
+ continue;
87
+ tagged = Metric.tagged(tagged, key, value);
88
+ }
89
+ return tagged;
90
+ }
91
+ /**
92
+ * @param {unknown} value
93
+ * @returns {number | undefined}
94
+ */
95
+ function asFiniteMetricCount(value) {
96
+ return typeof value === "number" && Number.isFinite(value) && value > 0
97
+ ? value
98
+ : undefined;
99
+ }
100
+ /**
101
+ * @param {Extract<SmithersEvent, { type: "TokenUsageReported" }>} event
102
+ * @returns {number | undefined}
103
+ */
104
+ function resolveContextWindowTokens(event) {
105
+ const inputTokens = asFiniteMetricCount(event.inputTokens);
106
+ if (inputTokens) {
107
+ return inputTokens;
108
+ }
109
+ const cachedInputTokens = (asFiniteMetricCount(event.cacheReadTokens) ?? 0)
110
+ + (asFiniteMetricCount(event.cacheWriteTokens) ?? 0);
111
+ return cachedInputTokens > 0 ? cachedInputTokens : undefined;
112
+ }
113
+ /**
114
+ * @param {number} tokens
115
+ * @returns {string}
116
+ */
117
+ function classifyContextWindowBucket(tokens) {
118
+ if (tokens < 50_000)
119
+ return "lt_50k";
120
+ if (tokens < 100_000)
121
+ return "gte_50k_lt_100k";
122
+ if (tokens < 200_000)
123
+ return "gte_100k_lt_200k";
124
+ if (tokens < 500_000)
125
+ return "gte_200k_lt_500k";
126
+ if (tokens < 1_000_000)
127
+ return "gte_500k_lt_1m";
128
+ return "gte_1m";
129
+ }
130
+ /**
131
+ * @param {Record<string, unknown> | undefined} usage
132
+ * @returns {AgentUsageTotals}
133
+ */
134
+ function extractAgentUsageTotals(usage) {
135
+ if (!usage)
136
+ return {};
137
+ const value = usage;
138
+ const inputTokens = asFiniteMetricCount(value.inputTokens)
139
+ ?? asFiniteMetricCount(value.input_tokens)
140
+ ?? asFiniteMetricCount(value.prompt_tokens);
141
+ const outputTokens = asFiniteMetricCount(value.outputTokens)
142
+ ?? asFiniteMetricCount(value.output_tokens)
143
+ ?? asFiniteMetricCount(value.completion_tokens);
144
+ const cacheReadTokens = asFiniteMetricCount(value.cacheReadTokens)
145
+ ?? asFiniteMetricCount(value.cache_read_input_tokens)
146
+ ?? asFiniteMetricCount(value.cached_input_tokens)
147
+ ?? asFiniteMetricCount(value.inputTokenDetails?.cacheReadTokens);
148
+ const cacheWriteTokens = asFiniteMetricCount(value.cacheWriteTokens)
149
+ ?? asFiniteMetricCount(value.cache_creation_input_tokens)
150
+ ?? asFiniteMetricCount(value.inputTokenDetails?.cacheWriteTokens);
151
+ const reasoningTokens = asFiniteMetricCount(value.reasoningTokens)
152
+ ?? asFiniteMetricCount(value.reasoning_tokens)
153
+ ?? asFiniteMetricCount(value.outputTokenDetails?.reasoningTokens);
154
+ const totalTokens = asFiniteMetricCount(value.totalTokens)
155
+ ?? asFiniteMetricCount((inputTokens ?? 0)
156
+ + (outputTokens ?? 0)
157
+ + (cacheReadTokens ?? 0)
158
+ + (cacheWriteTokens ?? 0)
159
+ + (reasoningTokens ?? 0));
160
+ return {
161
+ inputTokens,
162
+ outputTokens,
163
+ cacheReadTokens,
164
+ cacheWriteTokens,
165
+ reasoningTokens,
166
+ totalTokens,
167
+ };
168
+ }
169
+ /**
170
+ * @param {Record<string, string | undefined>} tags
171
+ * @param {Record<string, unknown> | undefined} usage
172
+ * @returns {Effect.Effect<void>}
173
+ */
174
+ function recordAgentUsageMetrics(tags, usage) {
175
+ const totals = extractAgentUsageTotals(usage);
176
+ const effects = [];
177
+ /**
178
+ * @param {string} kind
179
+ * @param {number | undefined} value
180
+ */
181
+ const pushMetric = (kind, value) => {
182
+ if (!value || value <= 0)
183
+ return;
184
+ effects.push(Metric.incrementBy(tagMetricWithTags(agentTokensTotal, {
185
+ ...tags,
186
+ kind,
187
+ }), value));
188
+ };
189
+ pushMetric("input", totals.inputTokens);
190
+ pushMetric("output", totals.outputTokens);
191
+ pushMetric("cache_read", totals.cacheReadTokens);
192
+ pushMetric("cache_write", totals.cacheWriteTokens);
193
+ pushMetric("reasoning", totals.reasoningTokens);
194
+ pushMetric("total", totals.totalTokens);
195
+ return effects.length > 0 ? Effect.all(effects, { discard: true }) : Effect.void;
196
+ }
197
+ /**
198
+ * @param {AgentEventPayload} event
199
+ * @returns {boolean}
200
+ */
201
+ function hasAgentRetrySignal(event) {
202
+ const retryPattern = /\bretry(?:ing|able| after)?\b|\bbackoff\b|\brate limit\b/i;
203
+ switch (event.type) {
204
+ case "started":
205
+ return false;
206
+ case "action": {
207
+ const detail = event.action.detail;
208
+ if (detail) {
209
+ const retryKeys = [
210
+ "retryAfter",
211
+ "retryAttempt",
212
+ "retryDelayMs",
213
+ "retryable",
214
+ "backoffMs",
215
+ ];
216
+ if (retryKeys.some((key) => key in detail)) {
217
+ return true;
218
+ }
219
+ }
220
+ return retryPattern.test(`${event.action.title} ${event.message ?? ""}`);
221
+ }
222
+ case "completed":
223
+ return retryPattern.test(event.error ?? "");
224
+ }
225
+ }
226
+ // ---------------------------------------------------------------------------
227
+ // Event-driven metric tracking
228
+ // ---------------------------------------------------------------------------
229
+ /**
230
+ * @param {SmithersEvent} event
231
+ * @returns {Effect.Effect<void>}
232
+ */
233
+ export function trackEvent(event) {
234
+ // Always count the event by type
235
+ const countEvent = Metric.increment(eventsEmittedTotal);
236
+ switch (event.type) {
237
+ case "SupervisorStarted":
238
+ return countEvent;
239
+ case "SupervisorPollCompleted":
240
+ return Effect.all([
241
+ countEvent,
242
+ Metric.increment(supervisorPollsTotal),
243
+ Metric.incrementBy(supervisorStaleDetected, event.staleCount),
244
+ Metric.update(supervisorPollDuration, event.durationMs),
245
+ ], { discard: true });
246
+ case "RunAutoResumed":
247
+ return Effect.all([
248
+ countEvent,
249
+ Metric.increment(supervisorResumedTotal),
250
+ Metric.update(supervisorResumeLag, event.staleDurationMs),
251
+ ], { discard: true });
252
+ case "RunAutoResumeSkipped":
253
+ return Effect.all([
254
+ countEvent,
255
+ Metric.increment(Metric.tagged(supervisorSkippedTotal, "reason", event.reason)),
256
+ ], { discard: true });
257
+ case "RunStarted":
258
+ return Effect.all([
259
+ countEvent,
260
+ Metric.increment(runsTotal),
261
+ Metric.incrementBy(activeRuns, 1),
262
+ ], { discard: true });
263
+ case "SandboxCreated": {
264
+ const byRuntime = event.runtime && event.runtime.length > 0
265
+ ? Metric.tagged(sandboxCreatedTotal, "runtime", event.runtime)
266
+ : sandboxCreatedTotal;
267
+ return Effect.all([
268
+ countEvent,
269
+ Metric.increment(byRuntime),
270
+ Metric.incrementBy(event.runtime ? Metric.tagged(sandboxActive, "runtime", event.runtime) : sandboxActive, 1),
271
+ ], { discard: true });
272
+ }
273
+ case "SandboxShipped":
274
+ return Effect.all([
275
+ countEvent,
276
+ Metric.update(sandboxBundleSizeBytes, event.bundleSizeBytes),
277
+ ], { discard: true });
278
+ case "SandboxBundleReceived":
279
+ return Effect.all([
280
+ countEvent,
281
+ Metric.update(sandboxBundleSizeBytes, event.bundleSizeBytes),
282
+ Metric.update(sandboxPatchCount, event.patchCount),
283
+ ], { discard: true });
284
+ case "SandboxCompleted": {
285
+ const byRuntime = event.runtime && event.runtime.length > 0
286
+ ? Metric.tagged(Metric.tagged(sandboxCompletedTotal, "runtime", event.runtime), "status", event.status)
287
+ : sandboxCompletedTotal;
288
+ return Effect.all([
289
+ countEvent,
290
+ Metric.increment(byRuntime),
291
+ Metric.incrementBy(event.runtime ? Metric.tagged(sandboxActive, "runtime", event.runtime) : sandboxActive, -1),
292
+ Metric.update(sandboxDurationMs, event.durationMs),
293
+ ], { discard: true });
294
+ }
295
+ case "SandboxFailed":
296
+ return Effect.all([
297
+ countEvent,
298
+ Metric.increment(errorsTotal),
299
+ ], { discard: true });
300
+ case "SandboxDiffReviewRequested":
301
+ return Effect.all([
302
+ countEvent,
303
+ Metric.update(sandboxPatchCount, event.patchCount),
304
+ ], { discard: true });
305
+ case "SandboxDiffAccepted":
306
+ return Effect.all([
307
+ countEvent,
308
+ Metric.update(sandboxPatchCount, event.patchCount),
309
+ ], { discard: true });
310
+ case "SandboxDiffRejected":
311
+ return Effect.all([
312
+ countEvent,
313
+ Metric.increment(errorsTotal),
314
+ ], { discard: true });
315
+ case "RunFinished":
316
+ return Effect.all([
317
+ countEvent,
318
+ Metric.incrementBy(activeRuns, -1),
319
+ Metric.increment(runsFinishedTotal),
320
+ ], { discard: true });
321
+ case "RunFailed":
322
+ return Effect.all([
323
+ countEvent,
324
+ Metric.incrementBy(activeRuns, -1),
325
+ Metric.increment(runsFailedTotal),
326
+ Metric.increment(errorsTotal),
327
+ ], { discard: true });
328
+ case "RunCancelled":
329
+ return Effect.all([
330
+ countEvent,
331
+ Metric.incrementBy(activeRuns, -1),
332
+ Metric.increment(runsCancelledTotal),
333
+ ], { discard: true });
334
+ case "RunContinuedAsNew":
335
+ return Effect.all([
336
+ countEvent,
337
+ Metric.incrementBy(activeRuns, -1),
338
+ Metric.increment(runsContinuedTotal),
339
+ Metric.update(runsCarriedStateBytes, event.carriedStateSize),
340
+ ...(typeof event.ancestryDepth === "number"
341
+ ? [Metric.update(runsAncestryDepth, event.ancestryDepth)]
342
+ : []),
343
+ ], { discard: true });
344
+ case "NodeStarted":
345
+ return Effect.all([
346
+ countEvent,
347
+ Metric.increment(nodesStarted),
348
+ Metric.incrementBy(activeNodes, 1),
349
+ ], { discard: true });
350
+ case "TaskHeartbeat":
351
+ return Effect.all([
352
+ countEvent,
353
+ Metric.increment(taskHeartbeatsTotal),
354
+ Metric.update(heartbeatDataSizeBytes, event.dataSizeBytes),
355
+ ...(typeof event.intervalMs === "number"
356
+ ? [Metric.update(heartbeatIntervalMs, event.intervalMs)]
357
+ : []),
358
+ ], { discard: true });
359
+ case "TaskHeartbeatTimeout":
360
+ return Effect.all([
361
+ countEvent,
362
+ Metric.increment(taskHeartbeatTimeoutTotal),
363
+ ], { discard: true });
364
+ case "NodeFinished":
365
+ return Effect.all([
366
+ countEvent,
367
+ Metric.increment(nodesFinished),
368
+ Metric.incrementBy(activeNodes, -1),
369
+ ], { discard: true });
370
+ case "NodeFailed":
371
+ return Effect.all([
372
+ countEvent,
373
+ Metric.increment(nodesFailed),
374
+ Metric.incrementBy(activeNodes, -1),
375
+ Metric.increment(errorsTotal),
376
+ ], { discard: true });
377
+ case "NodeCancelled":
378
+ return Effect.all([
379
+ countEvent,
380
+ Metric.incrementBy(activeNodes, -1),
381
+ ], { discard: true });
382
+ case "NodeRetrying":
383
+ return Effect.all([
384
+ countEvent,
385
+ Metric.increment(nodeRetriesTotal),
386
+ ], { discard: true });
387
+ case "ToolCallStarted":
388
+ return Effect.all([
389
+ countEvent,
390
+ Metric.increment(toolCallsTotal),
391
+ ], { discard: true });
392
+ case "ToolCallFinished":
393
+ return event.status === "error"
394
+ ? Effect.all([
395
+ countEvent,
396
+ Metric.increment(toolCallErrorsTotal),
397
+ ], { discard: true })
398
+ : countEvent;
399
+ case "ApprovalRequested":
400
+ return Effect.all([
401
+ countEvent,
402
+ Metric.increment(approvalsRequested),
403
+ Metric.incrementBy(approvalPending, 1),
404
+ ], { discard: true });
405
+ case "ApprovalGranted":
406
+ return Effect.all([
407
+ countEvent,
408
+ Metric.increment(approvalsGranted),
409
+ Metric.incrementBy(approvalPending, -1),
410
+ ], { discard: true });
411
+ case "ApprovalAutoApproved":
412
+ return Effect.all([
413
+ countEvent,
414
+ Metric.increment(approvalsGranted),
415
+ ], { discard: true });
416
+ case "ApprovalDenied":
417
+ return Effect.all([
418
+ countEvent,
419
+ Metric.increment(approvalsDenied),
420
+ Metric.incrementBy(approvalPending, -1),
421
+ ], { discard: true });
422
+ case "TimerCreated":
423
+ return Effect.all([
424
+ countEvent,
425
+ Metric.increment(timersCreated),
426
+ Metric.incrementBy(timersPending, 1),
427
+ ], { discard: true });
428
+ case "TimerFired":
429
+ return Effect.all([
430
+ countEvent,
431
+ Metric.increment(timersFired),
432
+ Metric.incrementBy(timersPending, -1),
433
+ Metric.update(timerDelayDuration, event.delayMs),
434
+ ], { discard: true });
435
+ case "TimerCancelled":
436
+ return Effect.all([
437
+ countEvent,
438
+ Metric.increment(timersCancelled),
439
+ Metric.incrementBy(timersPending, -1),
440
+ ], { discard: true });
441
+ case "TokenUsageReported": {
442
+ const effects = [countEvent];
443
+ const tags = {};
444
+ if (event.model && event.model !== "unknown")
445
+ tags.model = event.model;
446
+ if (event.agent && event.agent !== "unknown")
447
+ tags.agent = event.agent;
448
+ /**
449
+ * @template A
450
+ * @param {A} m
451
+ * @returns {A}
452
+ */
453
+ const tagMetric = (m) => {
454
+ let res = m;
455
+ for (const [k, v] of Object.entries(tags)) {
456
+ res = Metric.tagged(res, k, v);
457
+ }
458
+ return res;
459
+ };
460
+ if (event.inputTokens > 0) {
461
+ effects.push(Metric.incrementBy(tagMetric(tokensInputTotal), event.inputTokens), Metric.update(tagMetric(tokensInputPerCall), event.inputTokens));
462
+ }
463
+ if (event.outputTokens > 0) {
464
+ effects.push(Metric.incrementBy(tagMetric(tokensOutputTotal), event.outputTokens), Metric.update(tagMetric(tokensOutputPerCall), event.outputTokens));
465
+ }
466
+ if (event.cacheReadTokens && event.cacheReadTokens > 0) {
467
+ effects.push(Metric.incrementBy(tagMetric(tokensCacheReadTotal), event.cacheReadTokens));
468
+ }
469
+ if (event.cacheWriteTokens && event.cacheWriteTokens > 0) {
470
+ effects.push(Metric.incrementBy(tagMetric(tokensCacheWriteTotal), event.cacheWriteTokens));
471
+ }
472
+ if (event.reasoningTokens && event.reasoningTokens > 0) {
473
+ effects.push(Metric.incrementBy(tagMetric(tokensReasoningTotal), event.reasoningTokens));
474
+ }
475
+ const contextWindowTokens = resolveContextWindowTokens(event);
476
+ if (contextWindowTokens) {
477
+ effects.push(Metric.update(tagMetric(tokensContextWindowPerCall), contextWindowTokens), Metric.increment(tagMetric(Metric.tagged(tokensContextWindowBucketTotal, "bucket", classifyContextWindowBucket(contextWindowTokens)))));
478
+ }
479
+ return Effect.all(effects, { discard: true });
480
+ }
481
+ case "AgentEvent": {
482
+ const agentEvent = event.event;
483
+ const engine = normalizeMetricTag(agentEvent.engine)
484
+ ?? normalizeMetricTag(event.engine)
485
+ ?? "unknown";
486
+ const baseTags = {
487
+ engine,
488
+ source: "event",
489
+ };
490
+ const effects = [
491
+ countEvent,
492
+ Metric.increment(tagMetricWithTags(agentEventsTotal, {
493
+ ...baseTags,
494
+ event_type: agentEvent.type,
495
+ })),
496
+ ];
497
+ switch (agentEvent.type) {
498
+ case "started":
499
+ effects.push(Metric.increment(tagMetricWithTags(agentSessionsTotal, {
500
+ ...baseTags,
501
+ status: "started",
502
+ resume: agentEvent.resume ? "true" : "false",
503
+ })));
504
+ break;
505
+ case "action":
506
+ effects.push(Metric.increment(tagMetricWithTags(agentActionsTotal, {
507
+ ...baseTags,
508
+ action_kind: agentEvent.action.kind,
509
+ phase: agentEvent.phase,
510
+ level: agentEvent.level,
511
+ entry_type: agentEvent.entryType,
512
+ ok: typeof agentEvent.ok === "boolean" ? String(agentEvent.ok) : undefined,
513
+ })));
514
+ if (agentEvent.level === "error" || agentEvent.ok === false) {
515
+ effects.push(Metric.increment(tagMetricWithTags(agentErrorsTotal, {
516
+ ...baseTags,
517
+ event_type: agentEvent.type,
518
+ action_kind: agentEvent.action.kind,
519
+ })));
520
+ }
521
+ if (hasAgentRetrySignal(agentEvent)) {
522
+ effects.push(Metric.increment(tagMetricWithTags(agentRetriesTotal, {
523
+ ...baseTags,
524
+ reason: "event_signal",
525
+ })));
526
+ }
527
+ break;
528
+ case "completed":
529
+ effects.push(Metric.increment(tagMetricWithTags(agentSessionsTotal, {
530
+ ...baseTags,
531
+ status: agentEvent.ok ? "completed" : "failed",
532
+ resume: agentEvent.resume ? "true" : "false",
533
+ })));
534
+ effects.push(recordAgentUsageMetrics(baseTags, agentEvent.usage));
535
+ if (!agentEvent.ok) {
536
+ effects.push(Metric.increment(tagMetricWithTags(agentErrorsTotal, {
537
+ ...baseTags,
538
+ event_type: agentEvent.type,
539
+ })));
540
+ }
541
+ if (hasAgentRetrySignal(agentEvent)) {
542
+ effects.push(Metric.increment(tagMetricWithTags(agentRetriesTotal, {
543
+ ...baseTags,
544
+ reason: "event_signal",
545
+ })));
546
+ }
547
+ break;
548
+ }
549
+ return Effect.all(effects, { discard: true });
550
+ }
551
+ case "ScorerStarted":
552
+ return Effect.all([
553
+ countEvent,
554
+ Metric.increment(scorerEventsStarted),
555
+ ], { discard: true });
556
+ case "ScorerFinished":
557
+ return Effect.all([
558
+ countEvent,
559
+ Metric.increment(scorerEventsFinished),
560
+ ], { discard: true });
561
+ case "ScorerFailed":
562
+ return Effect.all([
563
+ countEvent,
564
+ Metric.increment(scorerEventsFailed),
565
+ Metric.increment(errorsTotal),
566
+ ], { discard: true });
567
+ case "SnapshotCaptured":
568
+ return countEvent;
569
+ case "RunForked":
570
+ return countEvent;
571
+ case "ReplayStarted":
572
+ return countEvent;
573
+ case "MemoryFactSet":
574
+ return Effect.all([
575
+ countEvent,
576
+ Metric.increment(memoryFactWrites),
577
+ ], { discard: true });
578
+ case "MemoryRecalled":
579
+ return Effect.all([
580
+ countEvent,
581
+ Metric.increment(memoryRecallQueries),
582
+ ], { discard: true });
583
+ case "MemoryMessageSaved":
584
+ return Effect.all([
585
+ countEvent,
586
+ Metric.increment(memoryMessageSaves),
587
+ ], { discard: true });
588
+ case "OpenApiToolCalled":
589
+ return event.status === "error"
590
+ ? Effect.all([
591
+ countEvent,
592
+ Metric.increment(openApiToolCallsTotal),
593
+ Metric.increment(openApiToolCallErrorsTotal),
594
+ Metric.update(openApiToolDuration, event.durationMs),
595
+ ], { discard: true })
596
+ : Effect.all([
597
+ countEvent,
598
+ Metric.increment(openApiToolCallsTotal),
599
+ Metric.update(openApiToolDuration, event.durationMs),
600
+ ], { discard: true });
601
+ default:
602
+ return countEvent;
603
+ }
604
+ }
@@ -0,0 +1,14 @@
1
+ import { Effect, Metric } from "effect";
2
+ import { asyncExternalWaitCounts } from "./_asyncExternalWaitCounts.js";
3
+ import { externalWaitAsyncPending } from "./externalWaitAsyncPending.js";
4
+ /**
5
+ * @param {"approval" | "event"} kind
6
+ * @param {number} delta
7
+ * @returns {Effect.Effect<void>}
8
+ */
9
+ export function updateAsyncExternalWaitPending(kind, delta) {
10
+ return Effect.sync(() => {
11
+ asyncExternalWaitCounts[kind] = Math.max(0, asyncExternalWaitCounts[kind] + delta);
12
+ return asyncExternalWaitCounts[kind];
13
+ }).pipe(Effect.flatMap((value) => Metric.set(Metric.tagged(externalWaitAsyncPending, "kind", kind), value)));
14
+ }
@@ -0,0 +1,17 @@
1
+ import { Effect, Metric } from "effect";
2
+ import { processStartMs } from "./_processStartMs.js";
3
+ import { processUptimeSeconds } from "./processUptimeSeconds.js";
4
+ import { processMemoryRssBytes } from "./processMemoryRssBytes.js";
5
+ import { processHeapUsedBytes } from "./processHeapUsedBytes.js";
6
+ /**
7
+ * @returns {Effect.Effect<void>}
8
+ */
9
+ export function updateProcessMetrics() {
10
+ const uptimeS = (Date.now() - processStartMs) / 1000;
11
+ const mem = process.memoryUsage();
12
+ return Effect.all([
13
+ Metric.set(processUptimeSeconds, uptimeS),
14
+ Metric.set(processMemoryRssBytes, mem.rss),
15
+ Metric.set(processHeapUsedBytes, mem.heapUsed),
16
+ ], { discard: true });
17
+ }
@@ -0,0 +1,3 @@
1
+ import { Metric } from "effect";
2
+ import { fastBuckets } from "./_buckets.js";
3
+ export const vcsDuration = Metric.histogram("smithers.vcs.duration_ms", fastBuckets);
@@ -0,0 +1 @@
1
+ export const prometheusContentType = "text/plain; version=0.0.4; charset=utf-8";