agentfootprint 6.16.0 → 6.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/adapters/observability/audit.js +559 -0
  2. package/dist/adapters/observability/audit.js.map +1 -0
  3. package/dist/adapters/observability/otel.js +545 -56
  4. package/dist/adapters/observability/otel.js.map +1 -1
  5. package/dist/adapters/observability/xray.js +97 -13
  6. package/dist/adapters/observability/xray.js.map +1 -1
  7. package/dist/esm/adapters/observability/audit.js +554 -0
  8. package/dist/esm/adapters/observability/audit.js.map +1 -0
  9. package/dist/esm/adapters/observability/otel.js +545 -56
  10. package/dist/esm/adapters/observability/otel.js.map +1 -1
  11. package/dist/esm/adapters/observability/xray.js +97 -13
  12. package/dist/esm/adapters/observability/xray.js.map +1 -1
  13. package/dist/esm/lib/canonicalJson.js +125 -0
  14. package/dist/esm/lib/canonicalJson.js.map +1 -0
  15. package/dist/esm/observability-providers.js +5 -0
  16. package/dist/esm/observability-providers.js.map +1 -1
  17. package/dist/lib/canonicalJson.js +129 -0
  18. package/dist/lib/canonicalJson.js.map +1 -0
  19. package/dist/observability-providers.js +13 -1
  20. package/dist/observability-providers.js.map +1 -1
  21. package/dist/types/adapters/observability/audit.d.ts +255 -0
  22. package/dist/types/adapters/observability/audit.d.ts.map +1 -0
  23. package/dist/types/adapters/observability/otel.d.ts +143 -20
  24. package/dist/types/adapters/observability/otel.d.ts.map +1 -1
  25. package/dist/types/adapters/observability/xray.d.ts +7 -1
  26. package/dist/types/adapters/observability/xray.d.ts.map +1 -1
  27. package/dist/types/lib/canonicalJson.d.ts +57 -0
  28. package/dist/types/lib/canonicalJson.d.ts.map +1 -0
  29. package/dist/types/observability-providers.d.ts +3 -1
  30. package/dist/types/observability-providers.d.ts.map +1 -1
  31. package/package.json +3 -2
@@ -1,10 +1,12 @@
1
1
  /**
2
2
  * otelObservability — OpenTelemetry distributed-tracing adapter.
3
3
  *
4
- * Ships every agentfootprint event as OpenTelemetry spans + log
5
- * records via a consumer-supplied OTel API. Same hierarchical
6
- * mapping as the X-Ray adapter, but the destination is whichever
7
- * OTel-compat backend the consumer's SDK exports to:
4
+ * Ships every agentfootprint event as OpenTelemetry spans + span events
5
+ * via a consumer-supplied OTel API, following the OpenTelemetry **GenAI
6
+ * semantic conventions** (`gen_ai.*` attribute namespace) plus
7
+ * agentfootprint-specific explainability attributes (`agentfootprint.*`).
8
+ * Same hierarchical mapping as the X-Ray adapter, but the destination is
9
+ * whichever OTel-compat backend the consumer's SDK exports to:
8
10
  *
9
11
  * - **Honeycomb** (OTLP/HTTP)
10
12
  * - **Grafana Cloud / Tempo / Mimir** (OTLP)
@@ -27,24 +29,67 @@
27
29
  * configure the SDK + exporter once at app startup; we just speak
28
30
  * the typed OTel API.
29
31
  *
30
- * Mapping:
32
+ * ## Event → span/attribute mapping
31
33
  *
32
- * agent.turn_start ↦ start root span (one trace per turn)
33
- * agent.turn_end ↦ end root span
34
+ * agent.turn_start ↦ start root span (one trace per turn)
35
+ * `gen_ai.operation.name: 'invoke_agent'`
36
+ * agent.turn_end ↦ end root span (+ turn-total `gen_ai.usage.*`)
34
37
  * agent.iteration_start ↦ start child span under root
35
38
  * agent.iteration_end ↦ end iteration span
36
- * stream.llm_start ↦ start child span (model call)
37
- * stream.llm_end ↦ end llm span
38
- * stream.tool_start start child span (tool call)
39
- * stream.tool_end ↦ end tool span (with `error: true` if errored)
39
+ * stream.llm_start ↦ start child span (inference) — `gen_ai.*`
40
+ * request attrs (`chat` operation)
41
+ * stream.llm_end end llm span (+ `gen_ai.usage.*`,
42
+ * `gen_ai.response.*`)
43
+ * stream.tool_start ↦ start child span — `execute_tool` operation,
44
+ * `gen_ai.tool.name` / `gen_ai.tool.call.id`
45
+ * stream.tool_end ↦ end tool span (ERROR status + `error.type`
46
+ * if errored). Correlated by toolCallId so
47
+ * PARALLEL tool calls close the right span.
40
48
  * cost.tick ↦ setAttribute on topmost active span
49
+ * error.fatal ↦ ERROR status on root + defensive unwind
50
+ * context.evaluated ↦ N span events `agentfootprint.skill.routing`
51
+ * — SYNTHESIZED name (one per routing entry),
52
+ * not a registry-verbatim forward; all other
53
+ * span events use the registry name verbatim
54
+ *
55
+ * ## Decisions = SPAN EVENTS, not attributes (design decision)
56
+ *
57
+ * Explainability signals (route decisions, skill routing, validation
58
+ * rejections, permission checks, credential lifecycle) are emitted as
59
+ * **span events** on the currently-active span rather than attributes:
60
+ *
61
+ * 1. MULTIPLICITY — an iteration span can carry several decisions
62
+ * (route + N skill routings + M permission checks). Attributes are
63
+ * last-write-wins and would clobber; span events accumulate.
64
+ * 2. ORDERING — span events carry their own timestamps, preserving the
65
+ * decision sequence inside one span. Compliance review (EU AI Act
66
+ * Art. 12 record-keeping) needs the order decisions were made.
67
+ * 3. ROUND-TRIP — OTLP backends (and agentThinkingUI's `fromOTLP`
68
+ * ingestion) surface span events as first-class timeline entries.
69
+ *
70
+ * When the consumer-injected tracer's spans don't implement `addEvent`
71
+ * (minimal test doubles), the adapter falls back to flattened
72
+ * `${eventName}.${key}` attributes — degraded (last-write-wins) but
73
+ * never silently dropped.
74
+ *
75
+ * ## PII discipline
76
+ *
77
+ * Mirrors the #9 validation contract: attribute values NEVER echo
78
+ * runtime VALUES that can carry PII —
79
+ * - tool args → top-level key NAMES only (`agentfootprint.tool.args.keys`)
80
+ * - tool results → `typeof` only (`agentfootprint.tool.result.type`)
81
+ * - validation issues → path / expected / got TYPES (bounded upstream)
82
+ * - decide() evidence → rule labels, operators, thresholds (developer
83
+ * constants) and the engine's redaction-aware value SUMMARIES
84
+ * - userPrompt / llm content / thinking → never emitted
85
+ * - error.fatal → stage + scope only (error MESSAGES can echo values)
86
+ * - credential events carry no secrets by construction (registry contract)
41
87
  *
42
88
  * @example Basic — Honeycomb via OTLP
43
89
  * ```ts
44
90
  * import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
45
91
  * import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
46
92
  * import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base';
47
- * import { trace } from '@opentelemetry/api';
48
93
  * import { otelObservability } from 'agentfootprint/observability-providers';
49
94
  *
50
95
  * // Set up OTel ONCE at app startup.
@@ -55,12 +100,13 @@
55
100
  * })));
56
101
  * provider.register();
57
102
  *
58
- * agent.enable.observability({
59
- * strategy: otelObservability({
60
- * serviceName: 'my-agent',
61
- * // tracer optional — defaults to trace.getTracer('agentfootprint').
62
- * }),
103
+ * const otel = otelObservability({
104
+ * serviceName: 'my-agent',
105
+ * // genAiSpanNames: true, // opt-in spec span names ('chat gpt-4', …)
63
106
  * });
107
+ * agent.enable.observability({ strategy: otel });
108
+ * // Optional — operator-level decide()/select() evidence as span events:
109
+ * // Agent.create({...}).recorder(otel.decisionEvidenceRecorder())
64
110
  * ```
65
111
  *
66
112
  * @example Test injection
@@ -72,6 +118,65 @@
72
118
  * ```
73
119
  */
74
120
  import { lazyRequire } from '../../lib/lazyRequire.js';
121
+ // ─── Bounding helpers (PII / cardinality discipline) ─────────────────
122
+ /** Hard caps for attribute payloads. Evidence is bounded upstream
123
+ * (#5 `maxFieldChars`); these are defense-in-depth for the OTLP wire. */
124
+ const MAX_ATTR_CHARS = 256;
125
+ const MAX_LIST_ITEMS = 20;
126
+ function bound(value) {
127
+ const s = typeof value === 'string' ? value : JSON.stringify(value) ?? String(value);
128
+ return s.length > MAX_ATTR_CHARS ? `${s.slice(0, MAX_ATTR_CHARS - 1)}…` : s;
129
+ }
130
+ function boundList(items) {
131
+ const capped = items.slice(0, MAX_LIST_ITEMS).map(bound);
132
+ return items.length > MAX_LIST_ITEMS
133
+ ? [...capped, `…+${items.length - MAX_LIST_ITEMS} more`]
134
+ : capped;
135
+ }
136
+ /** Render one rule's operator-level conditions as compact strings:
137
+ * `creditScore gt 700 → 750 (true)`. Value summaries come from the
138
+ * engine already bounded + redaction-aware — we only re-cap length. */
139
+ function renderConditions(rule) {
140
+ if (rule.conditions !== undefined && rule.conditions.length > 0) {
141
+ return boundList(rule.conditions.map((c) => `${c.key} ${c.op} ${bound(c.threshold)} → ${c.actualSummary} (${c.result})`));
142
+ }
143
+ if (rule.inputs !== undefined && rule.inputs.length > 0) {
144
+ return boundList(rule.inputs.map((i) => `${i.key} = ${i.valueSummary}`));
145
+ }
146
+ return [];
147
+ }
148
+ /** Flatten decide()/select() evidence into span-event attributes. */
149
+ function renderEvidenceAttrs(evidence) {
150
+ const attrs = {};
151
+ if (evidence.chosen !== undefined)
152
+ attrs['agentfootprint.decision.chosen'] = bound(evidence.chosen);
153
+ if (evidence.default !== undefined)
154
+ attrs['agentfootprint.decision.default'] = bound(evidence.default);
155
+ if (evidence.selected !== undefined)
156
+ attrs['agentfootprint.decision.selected'] = boundList(evidence.selected.map(String));
157
+ const rules = evidence.rules ?? [];
158
+ if (rules.length > 0)
159
+ attrs['agentfootprint.decision.rules_evaluated'] = rules.length;
160
+ const matched = rules.find((r) => r.matched === true);
161
+ if (matched !== undefined) {
162
+ if (matched.label !== undefined)
163
+ attrs['agentfootprint.decision.rule.label'] = bound(matched.label);
164
+ if (matched.ruleIndex !== undefined)
165
+ attrs['agentfootprint.decision.rule.index'] = matched.ruleIndex;
166
+ if (matched.branch !== undefined)
167
+ attrs['agentfootprint.decision.rule.branch'] = bound(matched.branch);
168
+ const conditions = renderConditions(matched);
169
+ if (conditions.length > 0)
170
+ attrs['agentfootprint.decision.conditions'] = conditions;
171
+ }
172
+ return attrs;
173
+ }
174
+ /** Is this object shaped like decide()/select() evidence? */
175
+ function looksLikeDecideEvidence(value) {
176
+ return (typeof value === 'object' &&
177
+ value !== null &&
178
+ Array.isArray(value.rules));
179
+ }
75
180
  // ─── Strategy factory ────────────────────────────────────────────────
76
181
  export function otelObservability(opts) {
77
182
  if (!opts.serviceName) {
@@ -79,6 +184,8 @@ export function otelObservability(opts) {
79
184
  `Pass an identifier visible in your OTel backend's service map, e.g. 'my-agent-prod'.`);
80
185
  }
81
186
  const sampleRate = opts.sampleRate ?? 1;
187
+ const genAiNames = opts.genAiSpanNames === true;
188
+ const explainability = opts.explainability !== false;
82
189
  // Lazy-resolve tracer if not injected. Defer the API import until
83
190
  // first event so consumers who don't actually fire events (no agent
84
191
  // run yet) don't even hit the OTel API surface.
@@ -105,12 +212,23 @@ export function otelObservability(opts) {
105
212
  tracer = otelApi.trace.getTracer('agentfootprint');
106
213
  return tracer;
107
214
  }
108
- // Per-turn state — same pattern as xrayObservability. Events for
109
- // multiple in-flight turns interleave correctly because we key by
110
- // `runId` from the event payload.
111
215
  const activeTurns = new Map();
112
216
  let stopped = false;
113
217
  let onErrorHook;
218
+ /**
219
+ * Resolve the run anchor for an event.
220
+ *
221
+ * Real runtime events are dispatcher envelopes — the run id lives on
222
+ * `event.meta.runId` (built by `bridge/eventMeta.ts`). The legacy
223
+ * `payload.runId` read is kept as a fallback for consumers feeding
224
+ * hand-built events (the pre-6.17 shape this adapter's own tests
225
+ * used). Without the meta read, NO span ever opened on a real agent
226
+ * run — the bug the fabricated test shapes masked.
227
+ */
228
+ function anchorRunId(event) {
229
+ const meta = event.meta;
230
+ return meta?.runId ?? event.payload?.runId;
231
+ }
114
232
  function pushSpan(turnState, name, attrs) {
115
233
  // OTel parent-context wiring: we capture the parent in a context
116
234
  // and start the new span under it. (For BYO SDK setups, the
@@ -126,12 +244,13 @@ export function otelObservability(opts) {
126
244
  turnState.stack.push({ name, span });
127
245
  return span;
128
246
  }
129
- function popSpan(turnState, expectedName) {
247
+ function popSpan(turnState, match) {
130
248
  let idx = turnState.stack.length - 1;
131
- if (expectedName) {
132
- // idx >= 0 guard above guarantees stack[idx] exists.
249
+ if (match !== undefined) {
250
+ const matches = typeof match === 'string' ? (name) => name === match : match;
251
+ // idx >= 0 guard guarantees stack[idx] exists.
133
252
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
134
- while (idx >= 0 && turnState.stack[idx].name !== expectedName)
253
+ while (idx >= 0 && !matches(turnState.stack[idx].name))
135
254
  idx--;
136
255
  }
137
256
  if (idx < 0)
@@ -140,8 +259,8 @@ export function otelObservability(opts) {
140
259
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
141
260
  return turnState.stack.splice(idx, 1)[0].span;
142
261
  }
143
- function endSpan(span, opts) {
144
- if (opts?.error) {
262
+ function endSpan(span, endOpts) {
263
+ if (endOpts?.error) {
145
264
  const code = otelApi?.SpanStatusCode?.ERROR ?? 2;
146
265
  try {
147
266
  span.setStatus({ code });
@@ -152,26 +271,229 @@ export function otelObservability(opts) {
152
271
  }
153
272
  span.end();
154
273
  }
274
+ function setAttrs(span, attrs) {
275
+ for (const [key, value] of Object.entries(attrs)) {
276
+ try {
277
+ span.setAttribute(key, value);
278
+ }
279
+ catch {
280
+ /* ignore — never break the agent loop on a sink error */
281
+ }
282
+ }
283
+ }
284
+ /** Emit a span event (preferred) or flattened-attribute fallback —
285
+ * see "Decisions = SPAN EVENTS" in the module docs. */
286
+ function recordSpanEvent(span, name, attrs) {
287
+ if (typeof span.addEvent === 'function') {
288
+ try {
289
+ span.addEvent(name, attrs);
290
+ return;
291
+ }
292
+ catch {
293
+ /* fall through to attribute fallback */
294
+ }
295
+ }
296
+ const flattened = {};
297
+ for (const [key, value] of Object.entries(attrs))
298
+ flattened[`${name}.${key}`] = value;
299
+ setAttrs(span, flattened);
300
+ }
301
+ function topSpan(t) {
302
+ return t?.stack[t.stack.length - 1]?.span;
303
+ }
304
+ /** Single-active-turn resolution for FlowRecorder evidence (which has
305
+ * no dispatcher runId to join on). One agent = one turn in flight is
306
+ * the norm; with >1 concurrent turn we can't attribute the decision
307
+ * safely, so we skip rather than risk cross-run contamination. */
308
+ function soleActiveTurn() {
309
+ if (activeTurns.size !== 1)
310
+ return undefined;
311
+ const [t] = activeTurns.values();
312
+ return t;
313
+ }
314
+ // ─── Explainability span events (typed-event side) ─────────────────
315
+ function handleExplainability(event, t) {
316
+ const top = topSpan(t);
317
+ if (!top)
318
+ return;
319
+ const p = event.payload;
320
+ switch (event.type) {
321
+ // The ReAct loop's own decision: tool-calls vs final.
322
+ case 'agentfootprint.agent.route_decided': {
323
+ recordSpanEvent(top, 'agentfootprint.agent.route_decided', {
324
+ 'agentfootprint.decision.stage': 'react-route',
325
+ 'agentfootprint.decision.chosen': bound(p.chosen),
326
+ ...(typeof p.rationale === 'string' && {
327
+ 'agentfootprint.decision.rationale': bound(p.rationale),
328
+ }),
329
+ ...(typeof p.iterIndex === 'number' && {
330
+ 'agentfootprint.iteration.index': p.iterIndex,
331
+ }),
332
+ });
333
+ break;
334
+ }
335
+ // Conditional core-flow routing. `evidence` (when an emitter
336
+ // populates it with decide() output) renders at operator level.
337
+ case 'agentfootprint.composition.route_decided': {
338
+ const attrs = {
339
+ 'agentfootprint.decision.stage': bound(p.conditionalId),
340
+ 'agentfootprint.decision.chosen': bound(p.chosen),
341
+ ...(typeof p.rationale === 'string' && {
342
+ 'agentfootprint.decision.rationale': bound(p.rationale),
343
+ }),
344
+ };
345
+ if (looksLikeDecideEvidence(p.evidence))
346
+ Object.assign(attrs, renderEvidenceAttrs(p.evidence));
347
+ recordSpanEvent(top, 'agentfootprint.composition.route_decided', attrs);
348
+ break;
349
+ }
350
+ // Skill-graph routing provenance — one span event per routed
351
+ // injection: the decision path (predicate labels + branch taken),
352
+ // the route edge, and the tools the route unlocked.
353
+ case 'agentfootprint.context.evaluated': {
354
+ const routing = p.routing;
355
+ if (!Array.isArray(routing))
356
+ break; // no skill routing this iteration — no event
357
+ for (const r of routing) {
358
+ recordSpanEvent(top, 'agentfootprint.skill.routing', {
359
+ 'agentfootprint.skill.injection_id': bound(r.injectionId),
360
+ ...(r.via !== undefined && { 'agentfootprint.skill.via': bound(r.via) }),
361
+ ...(r.label !== undefined && { 'agentfootprint.skill.label': bound(r.label) }),
362
+ ...(r.from !== undefined && { 'agentfootprint.skill.from': bound(r.from) }),
363
+ ...(Array.isArray(r.path) && {
364
+ 'agentfootprint.skill.path': boundList(r.path.map((step) => `${step.label} → ${step.branch}`)),
365
+ }),
366
+ ...(Array.isArray(r.tools) && {
367
+ 'agentfootprint.skill.tools': boundList(r.tools.map(String)),
368
+ }),
369
+ });
370
+ }
371
+ break;
372
+ }
373
+ case 'agentfootprint.skill.activated': {
374
+ recordSpanEvent(top, 'agentfootprint.skill.activated', {
375
+ 'agentfootprint.skill.id': bound(p.skillId),
376
+ 'agentfootprint.skill.reason': bound(p.reason),
377
+ ...(Array.isArray(p.injectedTools) && {
378
+ 'agentfootprint.skill.tools': boundList(p.injectedTools.map(String)),
379
+ }),
380
+ });
381
+ break;
382
+ }
383
+ // #9 tool-arg validation rejections. Issues carry paths /
384
+ // expectations / received TYPES — never values (PII contract).
385
+ case 'agentfootprint.validation.args_invalid': {
386
+ const issues = (p.issues ?? []);
387
+ recordSpanEvent(top, 'agentfootprint.validation.args_invalid', {
388
+ 'agentfootprint.validation.tool_name': bound(p.toolName),
389
+ 'agentfootprint.validation.tool_call_id': bound(p.toolCallId),
390
+ 'agentfootprint.validation.enforced': p.enforced === true,
391
+ 'agentfootprint.validation.issue_count': issues.length,
392
+ 'agentfootprint.validation.issues': boundList(issues.map((i) => `${i.path}: expected ${i.expected}, got ${i.got}`)),
393
+ });
394
+ break;
395
+ }
396
+ case 'agentfootprint.permission.check': {
397
+ recordSpanEvent(top, 'agentfootprint.permission.check', {
398
+ 'agentfootprint.permission.capability': bound(p.capability),
399
+ 'agentfootprint.permission.actor': bound(p.actor),
400
+ ...(p.target !== undefined && { 'agentfootprint.permission.target': bound(p.target) }),
401
+ 'agentfootprint.permission.result': bound(p.result),
402
+ ...(p.policyRuleId !== undefined && {
403
+ 'agentfootprint.permission.policy_rule_id': bound(p.policyRuleId),
404
+ }),
405
+ ...(typeof p.rationale === 'string' && {
406
+ 'agentfootprint.permission.rationale': bound(p.rationale),
407
+ }),
408
+ ...(typeof p.reason === 'string' && {
409
+ 'agentfootprint.permission.reason': bound(p.reason),
410
+ }),
411
+ });
412
+ break;
413
+ }
414
+ case 'agentfootprint.permission.halt': {
415
+ recordSpanEvent(top, 'agentfootprint.permission.halt', {
416
+ 'agentfootprint.permission.target': bound(p.target),
417
+ 'agentfootprint.permission.reason': bound(p.reason),
418
+ ...(typeof p.iteration === 'number' && {
419
+ 'agentfootprint.iteration.index': p.iteration,
420
+ }),
421
+ });
422
+ break;
423
+ }
424
+ // Credential lifecycle — payloads carry kind / service / session
425
+ // identifiers ONLY (the registry contract: never the secret).
426
+ case 'agentfootprint.credential.requested':
427
+ case 'agentfootprint.credential.acquired':
428
+ case 'agentfootprint.credential.authorization_required':
429
+ case 'agentfootprint.credential.failed': {
430
+ recordSpanEvent(top, event.type, {
431
+ 'agentfootprint.credential.service': bound(p.service),
432
+ ...(p.kind !== undefined && { 'agentfootprint.credential.kind': bound(p.kind) }),
433
+ ...(p.mode !== undefined && { 'agentfootprint.credential.mode': bound(p.mode) }),
434
+ ...(p.sessionId !== undefined && {
435
+ 'agentfootprint.credential.session_id': bound(p.sessionId),
436
+ }),
437
+ ...(p.reason !== undefined && { 'agentfootprint.credential.reason': bound(p.reason) }),
438
+ });
439
+ break;
440
+ }
441
+ default:
442
+ break;
443
+ }
444
+ }
155
445
  // ─── Event-to-span dispatch ────────────────────────────────────────
156
446
  function handleEvent(event) {
157
447
  if (stopped)
158
448
  return;
159
- const runId = event.payload?.runId;
449
+ const runId = anchorRunId(event);
160
450
  if (!runId)
161
451
  return; // Events without a turn anchor — skip.
162
452
  switch (event.type) {
163
453
  case 'agentfootprint.agent.turn_start': {
164
454
  const sampled = sampleRate >= 1 || Math.random() < sampleRate;
165
- const turnState = { stack: [], sampled };
455
+ const turnState = { stack: [], sampled, toolSpans: new Map() };
166
456
  activeTurns.set(runId, turnState);
167
- if (sampled)
168
- pushSpan(turnState, opts.serviceName, { 'service.name': opts.serviceName });
457
+ if (sampled) {
458
+ const turnIndex = event.payload.turnIndex;
459
+ // `invoke_agent` span per the GenAI agent-span conventions.
460
+ // `gen_ai.provider.name` / `gen_ai.request.model` (conditionally
461
+ // required) are back-filled on the first llm_start — unknown here.
462
+ // `userPrompt` is deliberately NOT emitted (PII).
463
+ // We emit `agentfootprint.run.id` (not `gen_ai.conversation.id`):
464
+ // a run is one turn, not a conversation/session — agentfootprint
465
+ // has no session primitive yet, and mislabeling would corrupt
466
+ // backends' session grouping.
467
+ turnState.root = pushSpan(turnState, genAiNames ? `invoke_agent ${opts.serviceName}` : opts.serviceName, {
468
+ 'service.name': opts.serviceName,
469
+ 'gen_ai.operation.name': 'invoke_agent',
470
+ 'gen_ai.agent.name': opts.serviceName,
471
+ 'agentfootprint.run.id': runId,
472
+ ...(typeof turnIndex === 'number' && { 'agentfootprint.turn.index': turnIndex }),
473
+ });
474
+ }
169
475
  break;
170
476
  }
171
477
  case 'agentfootprint.agent.turn_end': {
172
478
  const t = activeTurns.get(runId);
173
479
  if (!t)
174
480
  break;
481
+ if (t.root) {
482
+ // Turn-total usage on the invoke_agent span (semconv allows
483
+ // usage attrs on agent spans) + the iteration count.
484
+ const p = event.payload;
485
+ setAttrs(t.root, {
486
+ ...(typeof p.totalInputTokens === 'number' && {
487
+ 'gen_ai.usage.input_tokens': p.totalInputTokens,
488
+ }),
489
+ ...(typeof p.totalOutputTokens === 'number' && {
490
+ 'gen_ai.usage.output_tokens': p.totalOutputTokens,
491
+ }),
492
+ ...(typeof p.iterationCount === 'number' && {
493
+ 'agentfootprint.iteration.count': p.iterationCount,
494
+ }),
495
+ });
496
+ }
175
497
  // Defensive: end everything still on the stack.
176
498
  while (t.stack.length > 0) {
177
499
  const span = popSpan(t);
@@ -184,7 +506,8 @@ export function otelObservability(opts) {
184
506
  case 'agentfootprint.agent.iteration_start': {
185
507
  const t = activeTurns.get(runId);
186
508
  if (t?.sampled) {
187
- const iteration = event.payload.iteration;
509
+ const iteration = event.payload.iterIndex ??
510
+ event.payload.iteration;
188
511
  pushSpan(t, `iteration:${iteration ?? '?'}`, {
189
512
  ...(typeof iteration === 'number' && { 'iteration.number': iteration }),
190
513
  });
@@ -194,9 +517,13 @@ export function otelObservability(opts) {
194
517
  case 'agentfootprint.agent.iteration_end': {
195
518
  const t = activeTurns.get(runId);
196
519
  if (t?.sampled) {
197
- const span = popSpan(t);
198
- if (span)
520
+ const span = popSpan(t, (name) => name.startsWith('iteration:'));
521
+ if (span) {
522
+ const toolCallCount = event.payload.toolCallCount;
523
+ if (typeof toolCallCount === 'number')
524
+ setAttrs(span, { 'agentfootprint.tool_call.count': toolCallCount });
199
525
  endSpan(span);
526
+ }
200
527
  }
201
528
  break;
202
529
  }
@@ -204,56 +531,169 @@ export function otelObservability(opts) {
204
531
  const t = activeTurns.get(runId);
205
532
  if (!t?.sampled)
206
533
  break;
207
- const model = event.payload.model;
208
- pushSpan(t, 'llm', model ? { 'gen_ai.request.model': model } : undefined);
534
+ const p = event.payload;
535
+ // Inference span per GenAI semconv: operation `chat`.
536
+ // `gen_ai.provider.name` passes the adapter's provider id through
537
+ // unchanged — 'anthropic' / 'openai' / 'cohere' are already
538
+ // well-known semconv values; others ride as custom values (the
539
+ // spec permits them).
540
+ pushSpan(t, genAiNames && p.model ? `chat ${p.model}` : 'llm', {
541
+ 'gen_ai.operation.name': 'chat',
542
+ ...(p.model !== undefined && { 'gen_ai.request.model': p.model }),
543
+ ...(p.provider !== undefined && { 'gen_ai.provider.name': p.provider }),
544
+ ...(typeof p.temperature === 'number' && {
545
+ 'gen_ai.request.temperature': p.temperature,
546
+ }),
547
+ });
548
+ // Back-fill the conditionally-required agent-span attrs now that
549
+ // the first inference call reveals provider + model.
550
+ if (t.root && t.rootEnriched !== true) {
551
+ t.rootEnriched = true;
552
+ setAttrs(t.root, {
553
+ ...(p.provider !== undefined && { 'gen_ai.provider.name': p.provider }),
554
+ ...(p.model !== undefined && { 'gen_ai.request.model': p.model }),
555
+ });
556
+ }
209
557
  break;
210
558
  }
211
559
  case 'agentfootprint.stream.llm_end': {
212
560
  const t = activeTurns.get(runId);
213
561
  if (!t?.sampled)
214
562
  break;
215
- const span = popSpan(t, 'llm');
216
- if (span)
217
- endSpan(span);
563
+ const span = popSpan(t, (name) => name === 'llm' || name.startsWith('chat'));
564
+ if (!span)
565
+ break;
566
+ const p = event.payload;
567
+ // Response-side semconv attrs. `content` is deliberately NOT
568
+ // emitted (PII) — the snapshot/audit-log channel carries it
569
+ // under the consumer's redaction policy.
570
+ setAttrs(span, {
571
+ ...(typeof p.usage?.input === 'number' && {
572
+ 'gen_ai.usage.input_tokens': p.usage.input,
573
+ }),
574
+ ...(typeof p.usage?.output === 'number' && {
575
+ 'gen_ai.usage.output_tokens': p.usage.output,
576
+ }),
577
+ ...(typeof p.usage?.cacheRead === 'number' && {
578
+ 'gen_ai.usage.cache_read.input_tokens': p.usage.cacheRead,
579
+ }),
580
+ ...(typeof p.usage?.cacheWrite === 'number' && {
581
+ 'gen_ai.usage.cache_creation.input_tokens': p.usage.cacheWrite,
582
+ }),
583
+ ...(typeof p.stopReason === 'string' && {
584
+ 'gen_ai.response.finish_reasons': [p.stopReason],
585
+ }),
586
+ ...(typeof p.providerResponseRef === 'string' && {
587
+ 'gen_ai.response.id': p.providerResponseRef,
588
+ }),
589
+ });
590
+ endSpan(span);
218
591
  break;
219
592
  }
220
593
  case 'agentfootprint.stream.tool_start': {
221
594
  const t = activeTurns.get(runId);
222
595
  if (!t?.sampled)
223
596
  break;
224
- const toolName = event.payload.toolName ?? 'tool';
225
- pushSpan(t, `tool:${toolName}`, { 'tool.name': toolName });
597
+ const p = event.payload;
598
+ const toolName = p.toolName ?? 'tool';
599
+ // Tool-execution span per GenAI semconv (`execute_tool`).
600
+ // Args: top-level key NAMES only — `gen_ai.tool.call.arguments`
601
+ // exists in the spec but is opt-in and carries raw values; we
602
+ // deliberately never emit it (PII / prompt-injection echo).
603
+ const argKeys = p.args !== undefined && typeof p.args === 'object' ? Object.keys(p.args) : [];
604
+ const span = pushSpan(t, genAiNames ? `execute_tool ${toolName}` : `tool:${toolName}`, {
605
+ 'tool.name': toolName,
606
+ 'gen_ai.operation.name': 'execute_tool',
607
+ 'gen_ai.tool.name': toolName,
608
+ ...(p.toolCallId !== undefined && { 'gen_ai.tool.call.id': p.toolCallId }),
609
+ ...(p.protocol !== undefined && { 'agentfootprint.tool.protocol': p.protocol }),
610
+ ...(argKeys.length > 0 && { 'agentfootprint.tool.args.keys': boundList(argKeys) }),
611
+ });
612
+ if (p.toolCallId !== undefined)
613
+ t.toolSpans.set(p.toolCallId, span);
226
614
  break;
227
615
  }
228
616
  case 'agentfootprint.stream.tool_end': {
229
617
  const t = activeTurns.get(runId);
230
618
  if (!t?.sampled)
231
619
  break;
232
- const toolName = event.payload.toolName;
233
- const errored = event.payload.error !== undefined;
234
- const span = popSpan(t, toolName ? `tool:${toolName}` : undefined);
235
- if (span)
236
- endSpan(span, { error: errored });
620
+ const p = event.payload;
621
+ const errored = p.error !== undefined && p.error !== false;
622
+ // Correlate by toolCallId (the only identity ToolEndPayload
623
+ // carries) — parallel tool calls end out of LIFO order, so name
624
+ // matching alone would close the wrong span. Fallback chain
625
+ // keeps legacy hand-fed events (toolName) working.
626
+ let span;
627
+ if (p.toolCallId !== undefined && t.toolSpans.has(p.toolCallId)) {
628
+ span = t.toolSpans.get(p.toolCallId);
629
+ t.toolSpans.delete(p.toolCallId);
630
+ // Remove from the stack by identity so the LIFO unwind stays clean.
631
+ const idx = t.stack.findIndex((entry) => entry.span === span);
632
+ if (idx >= 0)
633
+ t.stack.splice(idx, 1);
634
+ }
635
+ else {
636
+ span = popSpan(t, p.toolName !== undefined
637
+ ? (name) => name === `tool:${p.toolName}` || name === `execute_tool ${p.toolName}`
638
+ : (name) => name.startsWith('tool:') || name.startsWith('execute_tool '));
639
+ }
640
+ if (!span)
641
+ break;
642
+ // Result: TYPE only — never the value (PII discipline; mirrors
643
+ // the #9 contract and `gen_ai.tool.call.result` stays unemitted).
644
+ setAttrs(span, {
645
+ 'agentfootprint.tool.result.type': p.result === null ? 'null' : typeof p.result,
646
+ ...(errored && { 'error.type': '_OTHER' }), // boolean error flag — no class info
647
+ });
648
+ endSpan(span, { error: errored });
237
649
  break;
238
650
  }
239
- // Other events annotate the topmost active span.
651
+ // A fatal run error: the turn will never see turn_end, so close
652
+ // the span tree here (ERROR on root) instead of leaking it until
653
+ // stop(). Stage + scope only — error MESSAGES can echo PII.
654
+ case 'agentfootprint.error.fatal': {
655
+ const t = activeTurns.get(runId);
656
+ if (!t)
657
+ break;
658
+ const p = event.payload;
659
+ if (t.root) {
660
+ recordSpanEvent(t.root, 'agentfootprint.error.fatal', {
661
+ ...(p.stage !== undefined && { 'agentfootprint.error.stage': bound(p.stage) }),
662
+ ...(p.scope !== undefined && { 'agentfootprint.error.scope': bound(p.scope) }),
663
+ });
664
+ }
665
+ while (t.stack.length > 1) {
666
+ const span = popSpan(t);
667
+ if (span)
668
+ endSpan(span);
669
+ }
670
+ const root = popSpan(t);
671
+ if (root)
672
+ endSpan(root, { error: true });
673
+ activeTurns.delete(runId);
674
+ break;
675
+ }
676
+ // Other events — annotate / record on the topmost active span.
240
677
  default: {
241
678
  const t = activeTurns.get(runId);
242
- const top = t?.stack[t.stack.length - 1]?.span;
243
- if (!t?.sampled || !top)
679
+ if (!t?.sampled)
244
680
  break;
245
681
  // Cost ticks are particularly valuable as attributes.
246
682
  if (event.type === 'agentfootprint.cost.tick') {
683
+ const top = topSpan(t);
684
+ if (!top)
685
+ break;
686
+ // Runtime shape: `cumulative.estimatedUsd` (CostTickPayload).
687
+ // Legacy fallback `cumulativeCostUsd` keeps hand-fed events
688
+ // working (the pre-6.17 fabricated test shape).
247
689
  const p = event.payload;
248
- if (typeof p.cumulativeCostUsd === 'number') {
249
- try {
250
- top.setAttribute('cost.cumulative_usd', p.cumulativeCostUsd);
251
- }
252
- catch {
253
- /* ignore */
254
- }
255
- }
690
+ const usd = p.cumulative?.estimatedUsd ?? p.cumulativeCostUsd;
691
+ if (typeof usd === 'number')
692
+ setAttrs(top, { 'cost.cumulative_usd': usd });
693
+ break;
256
694
  }
695
+ if (explainability)
696
+ handleExplainability(event, t);
257
697
  break;
258
698
  }
259
699
  }
@@ -277,6 +717,7 @@ export function otelObservability(opts) {
277
717
  if (span)
278
718
  endSpan(span);
279
719
  }
720
+ t.toolSpans.clear();
280
721
  }
281
722
  activeTurns.clear();
282
723
  },
@@ -289,6 +730,54 @@ export function otelObservability(opts) {
289
730
  });
290
731
  onErrorHook(err, event);
291
732
  },
733
+ decisionEvidenceRecorder() {
734
+ // One purpose (Convention 1): forward decide()/select() evidence
735
+ // from footprintjs's FlowRecorder channel into this strategy's
736
+ // span machinery. Plumbing filters mirror the #5 causal-evidence
737
+ // bridge (sf-cache gate deciders, the agent's Context slot-fork).
738
+ const forward = (stageId, chosen, evidence) => {
739
+ if (stopped || !explainability)
740
+ return;
741
+ // No structured evidence → already reported via the typed
742
+ // route_decided events; skip to avoid double-reporting.
743
+ if (evidence === undefined)
744
+ return;
745
+ const t = soleActiveTurn();
746
+ if (!t?.sampled)
747
+ return;
748
+ const top = topSpan(t);
749
+ if (!top)
750
+ return;
751
+ recordSpanEvent(top, 'agentfootprint.decision.evidence', {
752
+ 'agentfootprint.decision.stage': bound(stageId),
753
+ 'agentfootprint.decision.chosen': bound(chosen),
754
+ ...renderEvidenceAttrs(evidence),
755
+ });
756
+ };
757
+ return {
758
+ id: 'otel-decision-evidence',
759
+ onDecision(event) {
760
+ const stageId = event.traversalContext?.stageId ?? event.decider;
761
+ // Internal agent plumbing (the cache-gate decider) is not
762
+ // domain decision evidence. `includes` (not startsWith): in
763
+ // reactMode 'dynamic-grouped' names are double-prefixed.
764
+ if (String(event.chosen ?? '').includes('sf-cache/') ||
765
+ String(stageId).includes('sf-cache'))
766
+ return;
767
+ forward(String(stageId), String(event.chosen ?? 'unknown'), event.evidence);
768
+ },
769
+ onSelected(event) {
770
+ const stageId = event.traversalContext?.stageId ?? event.parent;
771
+ if (String(stageId).includes('sf-cache'))
772
+ return;
773
+ // The agent's own Context slot-fork is a selector — plumbing.
774
+ if (String(stageId).includes('context') &&
775
+ event.selected.every((s) => s.startsWith('sf-')))
776
+ return;
777
+ forward(String(stageId), event.selected.join(', '), event.evidence);
778
+ },
779
+ };
780
+ },
292
781
  };
293
782
  }
294
783
  //# sourceMappingURL=otel.js.map