autotel 2.26.3 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. package/README.md +50 -23
  2. package/dist/attribute-redacting-processor.cjs +8 -8
  3. package/dist/attribute-redacting-processor.d.cts +10 -1
  4. package/dist/attribute-redacting-processor.d.ts +10 -1
  5. package/dist/attribute-redacting-processor.js +1 -1
  6. package/dist/attributes.cjs +21 -21
  7. package/dist/attributes.d.cts +3 -3
  8. package/dist/attributes.d.ts +3 -3
  9. package/dist/attributes.js +2 -2
  10. package/dist/auto.cjs +3 -3
  11. package/dist/auto.js +2 -2
  12. package/dist/business-baggage.d.cts +1 -1
  13. package/dist/business-baggage.d.ts +1 -1
  14. package/dist/chunk-4P6ZOARG.cjs +33 -0
  15. package/dist/chunk-4P6ZOARG.cjs.map +1 -0
  16. package/dist/{chunk-U54FTVFH.js → chunk-52PUSFC2.js} +3 -3
  17. package/dist/{chunk-U54FTVFH.js.map → chunk-52PUSFC2.js.map} +1 -1
  18. package/dist/{chunk-YEVCD6DR.cjs → chunk-7SMNC4LS.cjs} +7 -7
  19. package/dist/{chunk-YEVCD6DR.cjs.map → chunk-7SMNC4LS.cjs.map} +1 -1
  20. package/dist/{chunk-563EL6O6.cjs → chunk-BPO2PQ3T.cjs} +12 -8
  21. package/dist/chunk-BPO2PQ3T.cjs.map +1 -0
  22. package/dist/{chunk-WZOKY3PW.cjs → chunk-DAZ7EGR4.cjs} +19 -19
  23. package/dist/{chunk-WZOKY3PW.cjs.map → chunk-DAZ7EGR4.cjs.map} +1 -1
  24. package/dist/{chunk-ER43K7ES.js → chunk-DDXIUZEG.js} +3 -3
  25. package/dist/{chunk-ER43K7ES.js.map → chunk-DDXIUZEG.js.map} +1 -1
  26. package/dist/{chunk-JKIMEPI2.cjs → chunk-DQ2SUROF.cjs} +4 -4
  27. package/dist/{chunk-JKIMEPI2.cjs.map → chunk-DQ2SUROF.cjs.map} +1 -1
  28. package/dist/{chunk-B3ZHLLMP.js → chunk-DSMSIVTG.js} +2 -2
  29. package/dist/chunk-DSMSIVTG.js.map +1 -0
  30. package/dist/{chunk-OBWXM4NN.cjs → chunk-HKZHUGGN.cjs} +15 -14
  31. package/dist/chunk-HKZHUGGN.cjs.map +1 -0
  32. package/dist/{chunk-TDNKIHKT.js → chunk-JVWJDHDB.js} +13 -4
  33. package/dist/chunk-JVWJDHDB.js.map +1 -0
  34. package/dist/{chunk-YN7USLHW.js → chunk-K7HSRLP5.js} +11 -10
  35. package/dist/chunk-K7HSRLP5.js.map +1 -0
  36. package/dist/chunk-KIL5CUN6.js +31 -0
  37. package/dist/chunk-KIL5CUN6.js.map +1 -0
  38. package/dist/chunk-KKGM42RQ.cjs +1207 -0
  39. package/dist/chunk-KKGM42RQ.cjs.map +1 -0
  40. package/dist/{chunk-6YGUN7IY.cjs → chunk-MOO75VE4.cjs} +18 -17
  41. package/dist/chunk-MOO75VE4.cjs.map +1 -0
  42. package/dist/{chunk-GML3FBOT.cjs → chunk-NCSMD3TK.cjs} +2 -2
  43. package/dist/chunk-NCSMD3TK.cjs.map +1 -0
  44. package/dist/{chunk-CMNGGTQL.cjs → chunk-NXLRY2CE.cjs} +13 -4
  45. package/dist/chunk-NXLRY2CE.cjs.map +1 -0
  46. package/dist/{chunk-BJ2XPN77.js → chunk-OM4OSBOP.js} +5 -5
  47. package/dist/{chunk-BJ2XPN77.js.map → chunk-OM4OSBOP.js.map} +1 -1
  48. package/dist/{chunk-HPUGKUMZ.js → chunk-PMRWMRXY.js} +13 -640
  49. package/dist/chunk-PMRWMRXY.js.map +1 -0
  50. package/dist/{chunk-UTZR7P7E.cjs → chunk-QPH5ZKP5.cjs} +43 -673
  51. package/dist/chunk-QPH5ZKP5.cjs.map +1 -0
  52. package/dist/chunk-SEO6NAQT.js +14 -0
  53. package/dist/chunk-SEO6NAQT.js.map +1 -0
  54. package/dist/{chunk-QC5MNKVF.js → chunk-TFRZOUTV.js} +13 -12
  55. package/dist/chunk-TFRZOUTV.js.map +1 -0
  56. package/dist/chunk-VQTCQKHQ.cjs +17 -0
  57. package/dist/chunk-VQTCQKHQ.cjs.map +1 -0
  58. package/dist/chunk-Z7VAOK5X.js +1183 -0
  59. package/dist/chunk-Z7VAOK5X.js.map +1 -0
  60. package/dist/{chunk-W35FVJBC.js → chunk-ZDPIWKWD.js} +9 -5
  61. package/dist/chunk-ZDPIWKWD.js.map +1 -0
  62. package/dist/correlation-id.cjs +22 -10
  63. package/dist/correlation-id.js +14 -2
  64. package/dist/decorators.cjs +7 -8
  65. package/dist/decorators.cjs.map +1 -1
  66. package/dist/decorators.d.cts +1 -1
  67. package/dist/decorators.d.ts +1 -1
  68. package/dist/decorators.js +6 -7
  69. package/dist/decorators.js.map +1 -1
  70. package/dist/event.cjs +8 -9
  71. package/dist/event.js +5 -6
  72. package/dist/functional.cjs +13 -14
  73. package/dist/functional.d.cts +1 -1
  74. package/dist/functional.d.ts +1 -1
  75. package/dist/functional.js +6 -7
  76. package/dist/http.cjs +13 -2
  77. package/dist/http.cjs.map +1 -1
  78. package/dist/http.js +12 -1
  79. package/dist/http.js.map +1 -1
  80. package/dist/index.cjs +305 -280
  81. package/dist/index.cjs.map +1 -1
  82. package/dist/index.d.cts +89 -10
  83. package/dist/index.d.ts +89 -10
  84. package/dist/index.js +180 -181
  85. package/dist/index.js.map +1 -1
  86. package/dist/instrumentation.cjs +9 -9
  87. package/dist/instrumentation.js +2 -2
  88. package/dist/messaging-adapters.d.cts +1 -1
  89. package/dist/messaging-adapters.d.ts +1 -1
  90. package/dist/messaging-testing.d.cts +1 -1
  91. package/dist/messaging-testing.d.ts +1 -1
  92. package/dist/messaging.cjs +11 -11
  93. package/dist/messaging.d.cts +1 -1
  94. package/dist/messaging.d.ts +1 -1
  95. package/dist/messaging.js +8 -8
  96. package/dist/semantic-helpers.cjs +11 -12
  97. package/dist/semantic-helpers.d.cts +1 -1
  98. package/dist/semantic-helpers.d.ts +1 -1
  99. package/dist/semantic-helpers.js +7 -8
  100. package/dist/{trace-context-t5X1AP-e.d.cts → trace-context-DbGKd1Rn.d.cts} +18 -5
  101. package/dist/{trace-context-t5X1AP-e.d.ts → trace-context-DbGKd1Rn.d.ts} +18 -5
  102. package/dist/trace-helpers.cjs +13 -13
  103. package/dist/trace-helpers.d.cts +2 -2
  104. package/dist/trace-helpers.d.ts +2 -2
  105. package/dist/trace-helpers.js +1 -1
  106. package/dist/{utils-CbUkl8r1.d.cts → utils-BahBCFtJ.d.cts} +1 -1
  107. package/dist/{utils-Buel3cj0.d.ts → utils-CLKwaUlG.d.ts} +1 -1
  108. package/dist/webhook.cjs +21 -12
  109. package/dist/webhook.cjs.map +1 -1
  110. package/dist/webhook.d.cts +1 -1
  111. package/dist/webhook.d.ts +1 -1
  112. package/dist/webhook.js +20 -11
  113. package/dist/webhook.js.map +1 -1
  114. package/dist/workflow-distributed.cjs +25 -21
  115. package/dist/workflow-distributed.cjs.map +1 -1
  116. package/dist/workflow-distributed.d.cts +1 -1
  117. package/dist/workflow-distributed.d.ts +1 -1
  118. package/dist/workflow-distributed.js +23 -19
  119. package/dist/workflow-distributed.js.map +1 -1
  120. package/dist/workflow.cjs +12 -12
  121. package/dist/workflow.d.cts +1 -1
  122. package/dist/workflow.d.ts +1 -1
  123. package/dist/workflow.js +8 -8
  124. package/package.json +43 -45
  125. package/skills/analyze-traces/SKILL.md +178 -0
  126. package/skills/autotel-core/SKILL.md +2 -7
  127. package/skills/autotel-events/SKILL.md +2 -6
  128. package/skills/autotel-frameworks/SKILL.md +2 -9
  129. package/skills/autotel-instrumentation/SKILL.md +2 -7
  130. package/skills/autotel-request-logging/SKILL.md +2 -8
  131. package/skills/autotel-structured-errors/SKILL.md +2 -7
  132. package/skills/build-audit-trails/SKILL.md +302 -0
  133. package/skills/debug-missing-spans/SKILL.md +248 -0
  134. package/skills/migrate-to-autotel/SKILL.md +268 -0
  135. package/skills/review-otel-patterns/SKILL.md +488 -0
  136. package/skills/review-otel-patterns/references/code-review.md +75 -0
  137. package/skills/review-otel-patterns/references/processor-pipeline.md +205 -0
  138. package/skills/review-otel-patterns/references/structured-errors.md +102 -0
  139. package/skills/review-otel-patterns/references/wide-spans.md +85 -0
  140. package/skills/tune-sampling/SKILL.md +210 -0
  141. package/src/attribute-redacting-processor.test.ts +6 -4
  142. package/src/attribute-redacting-processor.ts +11 -2
  143. package/src/correlated-events.test.ts +151 -0
  144. package/src/correlated-events.ts +47 -0
  145. package/src/drain-toolkit.test.ts +113 -0
  146. package/src/drain-toolkit.ts +129 -0
  147. package/src/enricher-toolkit.test.ts +67 -0
  148. package/src/enricher-toolkit.ts +79 -0
  149. package/src/functional.ts +2 -0
  150. package/src/gen-ai-events.ts +14 -5
  151. package/src/index.ts +39 -4
  152. package/src/messaging.ts +10 -9
  153. package/src/redact-values.test.ts +24 -10
  154. package/src/redact-values.ts +9 -2
  155. package/src/request-logger.test.ts +91 -0
  156. package/src/request-logger.ts +40 -5
  157. package/src/structured-error.test.ts +86 -1
  158. package/src/structured-error.ts +9 -2
  159. package/src/trace-context.ts +39 -11
  160. package/src/trace-helpers.ts +2 -2
  161. package/src/trace-hybrid.test.ts +42 -0
  162. package/src/trace-hybrid.ts +37 -0
  163. package/src/webhook.ts +16 -7
  164. package/src/workflow-distributed.ts +18 -13
  165. package/src/workflow.ts +7 -6
  166. package/bin/intent.js +0 -6
  167. package/dist/chunk-563EL6O6.cjs.map +0 -1
  168. package/dist/chunk-6YGUN7IY.cjs.map +0 -1
  169. package/dist/chunk-B3ZHLLMP.js.map +0 -1
  170. package/dist/chunk-BBBWDIYQ.js +0 -211
  171. package/dist/chunk-BBBWDIYQ.js.map +0 -1
  172. package/dist/chunk-CMNGGTQL.cjs.map +0 -1
  173. package/dist/chunk-D5LMF53P.cjs +0 -150
  174. package/dist/chunk-D5LMF53P.cjs.map +0 -1
  175. package/dist/chunk-GML3FBOT.cjs.map +0 -1
  176. package/dist/chunk-HPUGKUMZ.js.map +0 -1
  177. package/dist/chunk-HZ3FYBJG.cjs +0 -217
  178. package/dist/chunk-HZ3FYBJG.cjs.map +0 -1
  179. package/dist/chunk-JSNUWSBH.cjs +0 -62
  180. package/dist/chunk-JSNUWSBH.cjs.map +0 -1
  181. package/dist/chunk-OBWXM4NN.cjs.map +0 -1
  182. package/dist/chunk-QC5MNKVF.js.map +0 -1
  183. package/dist/chunk-S4OFEXLA.js +0 -53
  184. package/dist/chunk-S4OFEXLA.js.map +0 -1
  185. package/dist/chunk-TDNKIHKT.js.map +0 -1
  186. package/dist/chunk-UTZR7P7E.cjs.map +0 -1
  187. package/dist/chunk-W35FVJBC.js.map +0 -1
  188. package/dist/chunk-WD4RP6IV.js +0 -146
  189. package/dist/chunk-WD4RP6IV.js.map +0 -1
  190. package/dist/chunk-YN7USLHW.js.map +0 -1
  191. package/src/package-manifest.test.ts +0 -24
@@ -2,19 +2,14 @@
2
2
  name: autotel-core
3
3
  description: >
4
4
  When to use trace vs span vs request logger vs events in Autotel. Init once at startup, package exports (autotel, autotel/event, autotel/testing). Use for setup and choosing the right API.
5
- type: core
6
- library: autotel
7
- library_version: '2.23.0'
8
- sources:
9
- - jagreehal/autotel:AGENTS.md
10
- - jagreehal/autotel:docs/AGENT-GUIDE.md
11
- - jagreehal/autotel:packages/autotel/CLAUDE.md
12
5
  ---
13
6
 
14
7
  # Autotel — Core
15
8
 
16
9
  OpenTelemetry instrumentation for Node.js and edge. Instrument once; stream to any OTLP backend. Use `trace()`/`span()` for spans, `getRequestLogger()` for one snapshot per request, `createStructuredError`/`parseError` for errors, `track()` for product events.
17
10
 
11
+ Event guidance: for new instrumentation, emit events as correlated logs (via request logger or logging pipeline bridged to OTel Logs API). Do not introduce new direct span-event dependencies for business/exception events.
12
+
18
13
  ## When to Use What
19
14
 
20
15
  | Need | API | Import |
@@ -2,18 +2,14 @@
2
2
  name: autotel-events
3
3
  description: >
4
4
  track(), Event API, subscribers (e.g. PostHog). Configure subscribers in init(); use track() or Event for product/analytics events.
5
- type: core
6
- library: autotel
7
- library_version: '2.23.0'
8
- sources:
9
- - jagreehal/autotel:packages/autotel/src/event.ts
10
- - jagreehal/autotel:packages/autotel/src/event-subscriber.ts
11
5
  ---
12
6
 
13
7
  # Autotel — Events
14
8
 
15
9
  Send product and analytics events with `track(name, attributes)` or the `Event` class from `autotel/event`. Configure subscribers (e.g. PostHog) in `init()`; they receive events automatically.
16
10
 
11
+ For observability events in new code, prefer log-based correlated events (OTel Logs API model) over introducing new direct span-event instrumentation.
12
+
17
13
  ## Setup
18
14
 
19
15
  ```typescript
@@ -2,15 +2,6 @@
2
2
  name: autotel-frameworks
3
3
  description: >
4
4
  Hono, Fastify, TanStack Start, Cloudflare Workers. Middleware and init; getRequestLogger() in handlers. Load when adding Autotel to a web framework.
5
- type: framework
6
- library: autotel
7
- library_version: '2.23.0'
8
- requires:
9
- - autotel-instrumentation
10
- sources:
11
- - jagreehal/autotel:packages/autotel-hono/src/index.ts
12
- - jagreehal/autotel:docs/AGENT-GUIDE.md
13
- - jagreehal/autotel:AGENTS.md
14
5
  ---
15
6
 
16
7
  # Autotel — Framework Integration
@@ -19,6 +10,8 @@ This skill builds on autotel-instrumentation. Read it first for init() and span
19
10
 
20
11
  Use framework-specific middleware or wrappers to create a span per request; then call `getRequestLogger()` inside handlers. Each framework package (autotel-hono, autotel-tanstack, autotel-cloudflare) provides the glue.
21
12
 
13
+ When adding new request/exception events in framework handlers, prefer correlated logs (`getRequestLogger().info/warn/error`) instead of introducing new `span.addEvent()` usage.
14
+
22
15
  ## Setup
23
16
 
24
17
  ### Hono
@@ -2,19 +2,14 @@
2
2
  name: autotel-instrumentation
3
3
  description: >
4
4
  trace(), span(), instrument(), init(). Factory vs direct pattern, name inference. Sync init; use node-require for optional deps. Load when wrapping handlers or functions with spans.
5
- type: core
6
- library: autotel
7
- library_version: '2.23.0'
8
- sources:
9
- - jagreehal/autotel:docs/ARCHITECTURE.md
10
- - jagreehal/autotel:packages/autotel/src/functional.ts
11
- - jagreehal/autotel:packages/autotel/CLAUDE.md
12
5
  ---
13
6
 
14
7
  # Autotel — Instrumentation
15
8
 
16
9
  Wrap functions and handlers with `trace()`, `span()`, or `instrument()`. Call `init()` once at app startup. Keep init synchronous; use `safeRequire`/`requireModule` for optional dependencies.
17
10
 
11
+ For new event emission, prefer correlated logs (OTel Logs API path) over adding new direct span-event calls.
12
+
18
13
  ## Setup
19
14
 
20
15
  ```typescript
@@ -2,14 +2,6 @@
2
2
  name: autotel-request-logging
3
3
  description: >
4
4
  getRequestLogger(), set(), info/warn/error, emitNow(). One snapshot per request; requires active span. Use when adding request-scoped context or replacing scattered console.log.
5
- type: core
6
- library: autotel
7
- library_version: '2.23.0'
8
- requires:
9
- - autotel-instrumentation
10
- sources:
11
- - jagreehal/autotel:packages/autotel/src/request-logger.ts
12
- - jagreehal/autotel:docs/AGENT-GUIDE.md
13
5
  ---
14
6
 
15
7
  # Autotel — Request Logging
@@ -18,6 +10,8 @@ This skill builds on autotel-instrumentation. Read it first for init and span cr
18
10
 
19
11
  Accumulate context with `getRequestLogger(ctx)`, `.set()`, and `.info()`/`.warn()`/`.error()`. Call `.emitNow()` (or rely on middleware) to emit one snapshot per request. Request logger requires an active span — use inside `trace()` or framework middleware.
20
12
 
13
+ Preferred event model: treat request logger emissions as the default way to capture request-correlated events in new code. If a backend still expects span-event rendering, keep compatibility at export/processor level rather than adding new `span.addEvent()` calls in application code.
14
+
21
15
  ## Setup
22
16
 
23
17
  ```typescript
@@ -2,13 +2,6 @@
2
2
  name: autotel-structured-errors
3
3
  description: >
4
4
  createStructuredError, parseError, recordStructuredError. API errors with message, why, fix, link; client parsing for UI. Use in API routes and client catch blocks.
5
- type: core
6
- library: autotel
7
- library_version: '2.23.0'
8
- sources:
9
- - jagreehal/autotel:packages/autotel/src/structured-error.ts
10
- - jagreehal/autotel:packages/autotel/src/parse-error.ts
11
- - jagreehal/autotel:docs/AGENT-GUIDE.md
12
5
  ---
13
6
 
14
7
  # Autotel — Structured Errors
@@ -69,6 +62,8 @@ try {
69
62
 
70
63
  **Record on current span:** Use `recordStructuredError(ctx, error)` or the request logger's `.error(error, fields)` so the span gets error attributes and status.
71
64
 
65
+ For new exception event flows, prefer request-logger/log-based correlation and keep span-event compatibility as an implementation detail (processors/export path), not a new app-level dependency.
66
+
72
67
  **parseError** handles FetchError (ofetch), nested `data.data`, and plain Error. Returns `{ message, status, why?, fix?, link?, raw }`.
73
68
 
74
69
  ## Common Mistakes
@@ -0,0 +1,302 @@
1
+ ---
2
+ name: build-audit-trails
3
+ description: >
4
+ Design tamper-aware audit trails on top of OpenTelemetry spans using
5
+ autotel. Covers what counts as auditable, the audit-only span discipline,
6
+ signing and tamper-detection, denial logging, redaction, retention,
7
+ separation of concerns from operational telemetry, and framework wiring
8
+ (Next.js, Nuxt, Hono, Express, Cloudflare Workers).
9
+ license: MIT
10
+ ---
11
+
12
+ # Build audit trails
13
+
14
+ An _audit trail_ is a record of who did what to which resource, when, and whether it was permitted — durable, tamper-evident, and admissible. Operational telemetry (latency, errors, span shapes) is for engineers; audit trails are for compliance, security, and forensics. They overlap technically but differ on every other axis.
15
+
16
+ autotel lets you express both with the same primitive — a span — but you should keep them on **separate processors** so an audit event never gets dropped by sampling, never gets redacted by a debug rule, and never goes to the same backend as your ops data.
17
+
18
+ ## When to use
19
+
20
+ - Implementing GDPR / HIPAA / SOC2 / PCI-DSS / ISO 27001 / GxP compliance
21
+ - Adding "who did what" trails for admin actions, access reviews, payments
22
+ - Recording authorization decisions (allow + deny)
23
+ - Building immutable evidence for incident response
24
+
25
+ ## The audit span discipline
26
+
27
+ An auditable event has six required parts:
28
+
29
+ | Field | OTel attribute | Example |
30
+ | ------------------- | --------------------------------------------------------------- | --------------------------------------------- |
31
+ | When | (span timestamp) | `2026-05-04T17:23:11.412Z` |
32
+ | Who | `enduser.id` + `enduser.role` | `usr_42`, `admin` |
33
+ | Where (acting from) | `client.address`, `network.peer.address`, `user_agent.original` | `203.0.113.5`, `Chrome 121` |
34
+ | What | `audit.action` | `secret.read`, `policy.update`, `user.delete` |
35
+ | Which resource | `audit.resource.type` + `audit.resource.id` | `secret`, `sec_abc` |
36
+ | Outcome | `audit.outcome` (`allow` / `deny`) + `audit.reason` | `deny`, `MFA required` |
37
+
38
+ Plus useful optional fields: `audit.policy.id` (which policy made the call), `audit.evidence` (linked artefact id), `audit.actor.session.id`.
39
+
40
+ ## Step 1: Define a typed `audit()` helper
41
+
42
+ Centralise the schema in one place so every site gets it right:
43
+
44
+ ```typescript
45
+ import { trace, SpanKind } from '@opentelemetry/api';
46
+
47
+ type AuditAction =
48
+ | 'secret.read'
49
+ | 'secret.write'
50
+ | 'secret.delete'
51
+ | 'policy.update'
52
+ | 'user.create'
53
+ | 'user.delete'
54
+ | 'data.export'
55
+ | 'session.assume';
56
+
57
+ interface AuditPayload {
58
+ action: AuditAction;
59
+ resource: { type: string; id: string };
60
+ outcome: 'allow' | 'deny';
61
+ reason?: string;
62
+ actor?: { id: string; role?: string; sessionId?: string };
63
+ policy?: { id: string };
64
+ evidence?: { id: string };
65
+ }
66
+
67
+ const tracer = trace.getTracer('autotel-audit', '1.0.0');
68
+
69
+ export function audit(payload: AuditPayload): void {
70
+ const span = tracer.startSpan(`audit.${payload.action}`, {
71
+ kind: SpanKind.INTERNAL,
72
+ attributes: {
73
+ audit: true,
74
+ 'audit.action': payload.action,
75
+ 'audit.outcome': payload.outcome,
76
+ 'audit.resource.type': payload.resource.type,
77
+ 'audit.resource.id': payload.resource.id,
78
+ ...(payload.reason && { 'audit.reason': payload.reason }),
79
+ ...(payload.actor?.id && { 'enduser.id': payload.actor.id }),
80
+ ...(payload.actor?.role && { 'enduser.role': payload.actor.role }),
81
+ ...(payload.actor?.sessionId && {
82
+ 'audit.actor.session.id': payload.actor.sessionId,
83
+ }),
84
+ ...(payload.policy?.id && { 'audit.policy.id': payload.policy.id }),
85
+ ...(payload.evidence?.id && { 'audit.evidence.id': payload.evidence.id }),
86
+ },
87
+ });
88
+ span.end();
89
+ }
90
+ ```
91
+
92
+ ## Step 2: Always log denials
93
+
94
+ A frequent compliance failure is "we logged what users did but not what we **stopped** them doing." Wrap the authorization decision so both branches go through `audit()`:
95
+
96
+ ```typescript
97
+ export async function withAuthz<T>(
98
+ payload: Omit<AuditPayload, 'outcome'>,
99
+ decide: () => Promise<{ allow: boolean; reason?: string }>,
100
+ body: () => Promise<T>,
101
+ ): Promise<T> {
102
+ const decision = await decide();
103
+ if (!decision.allow) {
104
+ audit({ ...payload, outcome: 'deny', reason: decision.reason });
105
+ throw createStructuredError({
106
+ status: 403,
107
+ code: 'FORBIDDEN',
108
+ message: 'Not allowed',
109
+ why: decision.reason ?? 'Insufficient permissions',
110
+ });
111
+ }
112
+ audit({ ...payload, outcome: 'allow' });
113
+ return body();
114
+ }
115
+ ```
116
+
117
+ ## Step 3: Separate the audit pipeline
118
+
119
+ Critical: route audit spans to a **different processor and backend** so:
120
+
121
+ - They are never dropped by head or tail sampling.
122
+ - They are not subject to development-mode debug exporters.
123
+ - They go to a write-once / append-only store (S3 Object Lock, immutable bucket, dedicated audit DB).
124
+
125
+ ```typescript
126
+ import {
127
+ composeSpanProcessors,
128
+ composeSubscribers,
129
+ defineConfig,
130
+ } from 'autotel-edge';
131
+ import { BatchSpanProcessor, FilteringSpanProcessor } from 'autotel/processors';
132
+
133
+ const auditExporter = new BatchSpanProcessor(
134
+ new OTLPHttpJsonExporter({
135
+ url: process.env.AUDIT_OTLP!,
136
+ headers: { authorization: `Bearer ${process.env.AUDIT_TOKEN!}` },
137
+ }),
138
+ );
139
+ const opsExporter = new BatchSpanProcessor(
140
+ new OTLPHttpJsonExporter({ url: process.env.OPS_OTLP! }),
141
+ );
142
+
143
+ // Only audit spans reach the audit pipeline.
144
+ const auditOnly = new FilteringSpanProcessor({
145
+ include: (span) => span.attributes['audit'] === true,
146
+ next: auditExporter,
147
+ });
148
+
149
+ // Conversely, ops never sees audit spans (avoid leaking PII to dashboards).
150
+ const opsOnly = new FilteringSpanProcessor({
151
+ exclude: (span) => span.attributes['audit'] === true,
152
+ next: opsExporter,
153
+ });
154
+
155
+ export const otelConfig = defineConfig({
156
+ service: { name: 'app' },
157
+ spanProcessors: composeSpanProcessors([auditOnly, opsOnly]),
158
+ });
159
+ ```
160
+
161
+ ## Step 4: Tamper detection
162
+
163
+ For environments where audit storage is shared with the producing service (no append-only bucket), sign each span:
164
+
165
+ ```typescript
166
+ import { createHmac, randomUUID } from 'node:crypto'
167
+
168
+ function signAuditAttributes(attrs: Record<string, unknown>): string {
169
+ const key = process.env.AUDIT_HMAC_KEY!
170
+ const payload = JSON.stringify(Object.fromEntries(Object.entries(attrs).sort()))
171
+ return createHmac('sha256', key).update(payload).digest('hex')
172
+ }
173
+
174
+ export function audit(payload: AuditPayload): void {
175
+ const id = randomUUID()
176
+ const attributes = { /* … as before … */, 'audit.id': id }
177
+ const signature = signAuditAttributes(attributes)
178
+ attributes['audit.signature.alg'] = 'HMAC-SHA256'
179
+ attributes['audit.signature.value'] = signature
180
+ // … startSpan …
181
+ }
182
+ ```
183
+
184
+ Verify on the read side: recompute the HMAC over the same sorted attribute set (excluding `audit.signature.value` itself); mismatched ⇒ tampered.
185
+
186
+ For multi-tenant or extra-strict (HIPAA), use Ed25519 with per-environment keys and rotate.
187
+
188
+ ## Step 5: Redaction — what stays and what goes
189
+
190
+ | Field | In audit span? | Notes |
191
+ | ------------------------ | -------------- | ------------------------------------------------------------------------------------- |
192
+ | `enduser.id` | ✅ | Internal user id; never the email |
193
+ | `audit.resource.id` | ✅ | Required for forensics |
194
+ | `client.address` | ✅ | Last-octet redaction acceptable for IPv4 |
195
+ | Free-form payload bodies | ❌ | Never inline raw input — link by id (`audit.evidence.id`) |
196
+ | Secret values | ❌ | Use `audit.action=secret.read` + `audit.resource.id=sec_abc`, never the secret itself |
197
+ | Authorization headers | ❌ | Token names ok (`bearer.*`), values never |
198
+
199
+ `attributeRedactor` defaults are too aggressive for audit (you may need `enduser.id` literal, not masked). Disable redaction selectively:
200
+
201
+ ```typescript
202
+ spanProcessors: composeSpanProcessors([
203
+ // No redactor on the audit branch — keys are already conservative
204
+ auditOnly,
205
+ // Strict redactor on ops
206
+ new AttributeRedactingProcessor(opsOnly, { redactor: 'strict' }),
207
+ ]);
208
+ ```
209
+
210
+ ## Step 6: Retention
211
+
212
+ Audit retention is set by regulation, not engineering taste. Common minimums:
213
+
214
+ | Regulation | Minimum retention |
215
+ | -------------------- | -------------------------------------------- |
216
+ | GDPR | 6 years (financial), 12 months (operational) |
217
+ | HIPAA | 6 years |
218
+ | PCI-DSS | 1 year (online), 3 months hot |
219
+ | SOX | 7 years |
220
+ | GxP / 21 CFR Part 11 | Lifetime of product + 10 years |
221
+
222
+ Express retention as a backend lifecycle policy (S3 Object Lock COMPLIANCE mode, BigQuery `--time_partitioning_expiration`), not application code.
223
+
224
+ ## Step 7: Framework wiring
225
+
226
+ ### Next.js
227
+
228
+ ```typescript
229
+ // app/admin/users/[id]/route.ts
230
+ import { withAuthz, audit } from '@/lib/audit';
231
+
232
+ export async function DELETE(
233
+ req: Request,
234
+ { params }: { params: { id: string } },
235
+ ) {
236
+ return withAuthz(
237
+ {
238
+ action: 'user.delete',
239
+ resource: { type: 'user', id: params.id },
240
+ actor: { id: req.headers.get('x-user-id')!, role: 'admin' },
241
+ },
242
+ async () => ({ allow: await canDelete(req, params.id) }),
243
+ async () => {
244
+ await db.user.delete({ where: { id: params.id } });
245
+ return Response.json({ ok: true });
246
+ },
247
+ );
248
+ }
249
+ ```
250
+
251
+ ### Hono
252
+
253
+ ```typescript
254
+ import { audit, withAuthz } from './audit';
255
+ app.post('/secrets/:id/read', async (c) => {
256
+ return withAuthz(
257
+ {
258
+ action: 'secret.read',
259
+ resource: { type: 'secret', id: c.req.param('id') },
260
+ actor: { id: c.var.user.id, role: c.var.user.role },
261
+ },
262
+ () => requireScope(c, 'secrets:read'),
263
+ async () => c.json({ value: await secrets.read(c.req.param('id')) }),
264
+ );
265
+ });
266
+ ```
267
+
268
+ ### Cloudflare Workers
269
+
270
+ `audit()` from inside `defineWorkerFetch` — `ctx.waitUntil` makes sure the audit span is exported before the response returns:
271
+
272
+ ```typescript
273
+ export default defineWorkerFetch(
274
+ { service: { name: 'admin-api' } },
275
+ async (request, env, ctx, log) => {
276
+ return withAuthz(
277
+ {
278
+ action: 'data.export',
279
+ resource: { type: 'project', id: 'p_123' },
280
+ actor: { id: 'usr_42' },
281
+ },
282
+ async () => ({ allow: true }),
283
+ async () => Response.json({ ok: true }),
284
+ );
285
+ },
286
+ );
287
+ ```
288
+
289
+ ## Anti-patterns
290
+
291
+ | Anti-pattern | Fix |
292
+ | ---------------------------------------------- | -------------------------------------------------------------- |
293
+ | Audit logs in `console.log` / unstructured | Use `audit()` so every event has the same shape |
294
+ | Same backend for audit and ops | Separate processors, separate retention |
295
+ | Audit subject to sampling | `FilteringSpanProcessor` with `include: span.attributes.audit` |
296
+ | Logging only successes | Always log denials too |
297
+ | Putting secrets / payloads in audit attributes | Reference by id only (`audit.evidence.id`) |
298
+ | No tamper detection | HMAC signature on critical environments |
299
+ | Custom retention in code | Express via storage-layer lifecycle policy |
300
+ | Audit on every read of harmless data | Audit _meaningful_ events; not every list call |
301
+ | Audit row tied to a specific framework | The `audit()` function is framework-agnostic |
302
+ | `enduser.id` = email | Use the internal id; emails go in a separate identity table |
@@ -0,0 +1,248 @@
1
+ ---
2
+ name: debug-missing-spans
3
+ description: >
4
+ Troubleshoot when expected OpenTelemetry spans don't reach the backend.
5
+ Walks the chain top-to-bottom — code → SDK init → processor → exporter →
6
+ network → backend ingest — with concrete tests at each step. Covers head
7
+ sampling, ctx.waitUntil drops on Cloudflare, init-order races, runtime
8
+ detection failures, propagation breaks, exporter auth errors, and
9
+ silent ratelimits.
10
+ license: MIT
11
+ ---
12
+
13
+ # Debug missing spans
14
+
15
+ When a span you expect isn't in the backend, the cause is somewhere in this chain:
16
+
17
+ ```
18
+ code → SDK init → head sampler → processor → exporter → network → backend ingest → backend index
19
+ ```
20
+
21
+ This skill walks each link in order with a quick check you can run. Don't skip steps — the cause is rarely where you'd guess.
22
+
23
+ ## Step 0: Reproduce locally with the pretty exporter
24
+
25
+ Before chasing remote backends, confirm the span exists at all:
26
+
27
+ ```typescript
28
+ init({
29
+ service: 'my-app',
30
+ debug: 'pretty', // hierarchical colourised output to stdout
31
+ });
32
+ ```
33
+
34
+ If you see the span in stdout, the SDK + sampler are fine — skip to "exporter / network". If you don't, keep reading.
35
+
36
+ ## Step 1: Is the SDK actually initialised?
37
+
38
+ Common failure: `init()` runs after the first request because of import-order.
39
+
40
+ ```typescript
41
+ import { trace } from '@opentelemetry/api';
42
+
43
+ const tracer = trace.getTracer('autotel-debug');
44
+ console.log(
45
+ '[autotel-debug] tracer is no-op:',
46
+ tracer.constructor.name === 'NoopTracer',
47
+ );
48
+ ```
49
+
50
+ If `true`, `init()` ran too late. Move it to the very top of the entry file (or to `instrumentation.ts` for Next.js).
51
+
52
+ ## Step 2: Head sampler
53
+
54
+ Print the effective head rate:
55
+
56
+ ```typescript
57
+ import { getActiveConfig } from 'autotel-edge';
58
+ console.log('[autotel-debug] sampling:', getActiveConfig()?.sampling);
59
+ ```
60
+
61
+ Common gotchas:
62
+
63
+ - `sampling.rates: { server: 5 }` — 5 % means 95 % of spans never start.
64
+ - Inheriting `OTEL_TRACES_SAMPLER_ARG=0.01` from the environment via the OTel default sampler.
65
+ - Your test happens to hit the unsampled branch — instrument with `sampling: { rates: { server: 100 } }` while reproducing.
66
+
67
+ To force sampling for one request, send a `traceparent` with the sampled flag set:
68
+
69
+ ```
70
+ traceparent: 00-<traceid>-<spanid>-01
71
+ ```
72
+
73
+ (`-01` at the end = sampled.) autotel's parent-based sampler will respect it.
74
+
75
+ ## Step 3: Cloudflare Workers — `ctx.waitUntil`
76
+
77
+ The single biggest cause of missing spans on the edge: **the response returned before the exporter flushed**.
78
+
79
+ If you're using `addEventListener('fetch', …)` or a hand-rolled `fetch` in a module worker without wiring `ctx.waitUntil(…)` to the export call, async drains drop silently.
80
+
81
+ Fix — switch to `defineWorkerFetch` or `wrapModule`, both of which wire `waitUntil` automatically:
82
+
83
+ ```typescript
84
+ import { defineWorkerFetch } from 'autotel-cloudflare';
85
+
86
+ export default defineWorkerFetch(
87
+ { service: { name: 'edge' } },
88
+ async (request, env, ctx, log) => {
89
+ // log.set / spans here all flush via ctx.waitUntil before response returns
90
+ return new Response('ok');
91
+ },
92
+ );
93
+ ```
94
+
95
+ ## Step 4: Processor pipeline
96
+
97
+ Print what's wired:
98
+
99
+ ```typescript
100
+ import { trace } from '@opentelemetry/api';
101
+ const provider = trace.getTracerProvider();
102
+ console.log('[autotel-debug] provider:', provider.constructor.name);
103
+ console.log(
104
+ '[autotel-debug] processors:',
105
+ (provider as any)._registeredSpanProcessors?.map(
106
+ (p: any) => p.constructor.name,
107
+ ),
108
+ );
109
+ ```
110
+
111
+ Common issues:
112
+
113
+ - **A `FilteringSpanProcessor` excludes your span.** Check the `include` / `exclude` predicates.
114
+ - **A `TailSamplingProcessor` dropped the trace** (no error, no slow root, no debug header).
115
+ - **A `composePostProcessors` step returns `[]` for your span.**
116
+
117
+ To bisect, temporarily strip post-processors:
118
+
119
+ ```typescript
120
+ init({
121
+ service: 'my-app',
122
+ exporter: { url: process.env.OTLP_ENDPOINT! },
123
+ // no postProcessor, no tail sampler, no filter
124
+ });
125
+ ```
126
+
127
+ If the span shows up now, add back the processors one at a time.
128
+
129
+ ## Step 5: Exporter
130
+
131
+ Tail the SDK's diagnostic log:
132
+
133
+ ```typescript
134
+ import { diag, DiagConsoleLogger, DiagLogLevel } from '@opentelemetry/api';
135
+ diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG);
136
+ ```
137
+
138
+ Look for:
139
+
140
+ ```
141
+ @opentelemetry/api: ... OTLPExporter: failed to send 4 traces, status: 401, error: ...
142
+ ```
143
+
144
+ Common exporter errors:
145
+
146
+ | Status | Meaning | Fix |
147
+ | ------------- | ------------------------- | ----------------------------------------------------------- |
148
+ | `401` | Bad / missing auth header | Check `OTLP_HEADERS` / vendor token name |
149
+ | `403` | Token has no write scope | Issue a token with the right scope |
150
+ | `404` | Wrong endpoint URL | Check region (`api.honeycomb.io` vs `api.eu1.honeycomb.io`) |
151
+ | `413` | Batch too big | Lower `BatchSpanProcessor` `maxExportBatchSize` |
152
+ | `429` | Rate-limited | Reduce head/tail rates; honour `retry-after` |
153
+ | `502/503/504` | Upstream unhealthy | Often transient; add retries; check backend status |
154
+ | Network error | DNS / firewall | `curl -v <url>` from the same network |
155
+
156
+ ## Step 6: Network / TLS
157
+
158
+ For self-hosted Collectors:
159
+
160
+ ```bash
161
+ curl -v -X POST $OTLP_ENDPOINT \
162
+ -H 'content-type: application/json' \
163
+ -H "$AUTH_HEADER" \
164
+ -d '{"resourceSpans":[]}'
165
+ ```
166
+
167
+ Should return `200`. If it doesn't, the problem is between you and the Collector — not autotel.
168
+
169
+ For Cloudflare Workers, run `wrangler tail` and look for `OTLPExporter` errors.
170
+
171
+ ## Step 7: Backend ingest — silent rejection
172
+
173
+ Some backends accept the request with a 200 but drop the events:
174
+
175
+ - **Honeycomb**: dataset must exist _and_ the API key must have write access to it. Mismatched key/dataset → silent drop.
176
+ - **Datadog**: check `service` is set (resource attribute `service.name`) — they ignore spans without it.
177
+ - **Sentry**: SDK version mismatch on envelope → 200 but events disappear.
178
+ - **Grafana Cloud Tempo**: spans without `service.name` go to a fallback service called `unknown_service`.
179
+
180
+ For each backend, the dataset / index / project where you'd expect the span:
181
+
182
+ | Backend | Where the span lands |
183
+ | ------------- | --------------------------------------- |
184
+ | Honeycomb | dataset = `service.name` (auto-created) |
185
+ | Datadog | `service:<name>` filter |
186
+ | Grafana Tempo | search by `traceId` |
187
+ | Jaeger | service dropdown = `service.name` |
188
+ | Sentry | project linked to the DSN |
189
+
190
+ ## Step 8: Backend index lag
191
+
192
+ After a 200, expect ingestion lag of:
193
+
194
+ | Backend | Typical lag |
195
+ | ------------------ | ----------- |
196
+ | Honeycomb | < 5 s |
197
+ | Datadog | 30–60 s |
198
+ | Grafana Tempo | 10–30 s |
199
+ | Sentry | 30–120 s |
200
+ | Self-hosted Jaeger | < 1 s |
201
+
202
+ Don't conclude the span is missing until you've waited > 2× the expected lag.
203
+
204
+ ## Step-by-step checklist
205
+
206
+ ```
207
+ [ ] Span shows in `debug: 'pretty'` stdout
208
+ [ ] `tracer.constructor.name !== 'NoopTracer'` (SDK initialised)
209
+ [ ] Head rate is high enough to allow the request
210
+ [ ] Workers handler uses defineWorkerFetch / wrapModule
211
+ [ ] No post-processor / tail sampler / filter strips it
212
+ [ ] Exporter logs no 4xx/5xx
213
+ [ ] Curl to OTLP endpoint returns 200
214
+ [ ] Backend has the right service.name / dataset / project
215
+ [ ] Waited 2× expected ingest lag
216
+ ```
217
+
218
+ ## When the trace partially shows up
219
+
220
+ Some spans land, some don't:
221
+
222
+ - **Trace context broken between services** — outbound HTTP calls aren't propagating `traceparent`. Confirm autotel's global fetch instrumentation is on (`instrumentation.instrumentGlobalFetch: true`, default).
223
+ - **Async boundary loses context** — a `setTimeout` / queue callback ran outside the AsyncLocalStorage scope. Wrap with `trace()` or use `context.with()`.
224
+ - **Cross-runtime call** — Node service → Workers → browser; verify `traceparent` arrives at each leg via response headers / network panel.
225
+
226
+ ## When the SDK itself crashes
227
+
228
+ ```
229
+ TypeError: Cannot read properties of undefined (reading 'startActiveSpan')
230
+ ```
231
+
232
+ Usually means the API version (`@opentelemetry/api`) and SDK version (`@opentelemetry/sdk-trace-base`) drifted. Run:
233
+
234
+ ```bash
235
+ pnpm why @opentelemetry/api
236
+ ```
237
+
238
+ There should be exactly one resolved version. If there are two, dedup via `pnpm.overrides`.
239
+
240
+ ## Anti-patterns to fix as you debug
241
+
242
+ | Anti-pattern | Why it loses spans |
243
+ | --------------------------------------------------------- | ------------------------------------------------------------------ |
244
+ | `init()` after the first import that uses tracing | Spans before `init()` are no-ops |
245
+ | `addEventListener('fetch', …)` on Workers | Pre-module-worker style; no `ctx.waitUntil` to wire |
246
+ | Single `OTLP_ENDPOINT` env var with `?` chars URL-encoded | Auth gets parsed as part of the path |
247
+ | Importing both `@sentry/tracing` and `autotel` | Double-instrumentation eats spans |
248
+ | `process.exit(0)` immediately after the work | The exporter never flushed; call `await provider.shutdown()` first |