agentfootprint 6.16.0 → 6.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,12 @@
2
2
  /**
3
3
  * otelObservability — OpenTelemetry distributed-tracing adapter.
4
4
  *
5
- * Ships every agentfootprint event as OpenTelemetry spans + log
6
- * records via a consumer-supplied OTel API. Same hierarchical
7
- * mapping as the X-Ray adapter, but the destination is whichever
8
- * OTel-compat backend the consumer's SDK exports to:
5
+ * Ships every agentfootprint event as OpenTelemetry spans + span events
6
+ * via a consumer-supplied OTel API, following the OpenTelemetry **GenAI
7
+ * semantic conventions** (`gen_ai.*` attribute namespace) plus
8
+ * agentfootprint-specific explainability attributes (`agentfootprint.*`).
9
+ * Same hierarchical mapping as the X-Ray adapter, but the destination is
10
+ * whichever OTel-compat backend the consumer's SDK exports to:
9
11
  *
10
12
  * - **Honeycomb** (OTLP/HTTP)
11
13
  * - **Grafana Cloud / Tempo / Mimir** (OTLP)
@@ -28,24 +30,67 @@
28
30
  * configure the SDK + exporter once at app startup; we just speak
29
31
  * the typed OTel API.
30
32
  *
31
- * Mapping:
33
+ * ## Event → span/attribute mapping
32
34
  *
33
- * agent.turn_start ↦ start root span (one trace per turn)
34
- * agent.turn_end ↦ end root span
35
+ * agent.turn_start ↦ start root span (one trace per turn)
36
+ * `gen_ai.operation.name: 'invoke_agent'`
37
+ * agent.turn_end ↦ end root span (+ turn-total `gen_ai.usage.*`)
35
38
  * agent.iteration_start ↦ start child span under root
36
39
  * agent.iteration_end ↦ end iteration span
37
- * stream.llm_start ↦ start child span (model call)
38
- * stream.llm_end ↦ end llm span
39
- * stream.tool_start start child span (tool call)
40
- * stream.tool_end ↦ end tool span (with `error: true` if errored)
40
+ * stream.llm_start ↦ start child span (inference) — `gen_ai.*`
41
+ * request attrs (`chat` operation)
42
+ * stream.llm_end end llm span (+ `gen_ai.usage.*`,
43
+ * `gen_ai.response.*`)
44
+ * stream.tool_start ↦ start child span — `execute_tool` operation,
45
+ * `gen_ai.tool.name` / `gen_ai.tool.call.id`
46
+ * stream.tool_end ↦ end tool span (ERROR status + `error.type`
47
+ * if errored). Correlated by toolCallId so
48
+ * PARALLEL tool calls close the right span.
41
49
  * cost.tick ↦ setAttribute on topmost active span
50
+ * error.fatal ↦ ERROR status on root + defensive unwind
51
+ * context.evaluated ↦ N span events `agentfootprint.skill.routing`
52
+ * — SYNTHESIZED name (one per routing entry),
53
+ * not a registry-verbatim forward; all other
54
+ * span events use the registry name verbatim
55
+ *
56
+ * ## Decisions = SPAN EVENTS, not attributes (design decision)
57
+ *
58
+ * Explainability signals (route decisions, skill routing, validation
59
+ * rejections, permission checks, credential lifecycle) are emitted as
60
+ * **span events** on the currently-active span rather than attributes:
61
+ *
62
+ * 1. MULTIPLICITY — an iteration span can carry several decisions
63
+ * (route + N skill routings + M permission checks). Attributes are
64
+ * last-write-wins and would clobber; span events accumulate.
65
+ * 2. ORDERING — span events carry their own timestamps, preserving the
66
+ * decision sequence inside one span. Compliance review (EU AI Act
67
+ * Art. 12 record-keeping) needs the order decisions were made.
68
+ * 3. ROUND-TRIP — OTLP backends (and agentThinkingUI's `fromOTLP`
69
+ * ingestion) surface span events as first-class timeline entries.
70
+ *
71
+ * When the consumer-injected tracer's spans don't implement `addEvent`
72
+ * (minimal test doubles), the adapter falls back to flattened
73
+ * `${eventName}.${key}` attributes — degraded (last-write-wins) but
74
+ * never silently dropped.
75
+ *
76
+ * ## PII discipline
77
+ *
78
+ * Mirrors the #9 validation contract: attribute values NEVER echo
79
+ * runtime VALUES that can carry PII —
80
+ * - tool args → top-level key NAMES only (`agentfootprint.tool.args.keys`)
81
+ * - tool results → `typeof` only (`agentfootprint.tool.result.type`)
82
+ * - validation issues → path / expected / got TYPES (bounded upstream)
83
+ * - decide() evidence → rule labels, operators, thresholds (developer
84
+ * constants) and the engine's redaction-aware value SUMMARIES
85
+ * - userPrompt / llm content / thinking → never emitted
86
+ * - error.fatal → stage + scope only (error MESSAGES can echo values)
87
+ * - credential events carry no secrets by construction (registry contract)
42
88
  *
43
89
  * @example Basic — Honeycomb via OTLP
44
90
  * ```ts
45
91
  * import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
46
92
  * import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
47
93
  * import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base';
48
- * import { trace } from '@opentelemetry/api';
49
94
  * import { otelObservability } from 'agentfootprint/observability-providers';
50
95
  *
51
96
  * // Set up OTel ONCE at app startup.
@@ -56,12 +101,13 @@
56
101
  * })));
57
102
  * provider.register();
58
103
  *
59
- * agent.enable.observability({
60
- * strategy: otelObservability({
61
- * serviceName: 'my-agent',
62
- * // tracer optional — defaults to trace.getTracer('agentfootprint').
63
- * }),
104
+ * const otel = otelObservability({
105
+ * serviceName: 'my-agent',
106
+ * // genAiSpanNames: true, // opt-in spec span names ('chat gpt-4', …)
64
107
  * });
108
+ * agent.enable.observability({ strategy: otel });
109
+ * // Optional — operator-level decide()/select() evidence as span events:
110
+ * // Agent.create({...}).recorder(otel.decisionEvidenceRecorder())
65
111
  * ```
66
112
  *
67
113
  * @example Test injection
@@ -75,6 +121,65 @@
75
121
  Object.defineProperty(exports, "__esModule", { value: true });
76
122
  exports.otelObservability = void 0;
77
123
  const lazyRequire_js_1 = require("../../lib/lazyRequire.js");
124
+ // ─── Bounding helpers (PII / cardinality discipline) ─────────────────
125
+ /** Hard caps for attribute payloads. Evidence is bounded upstream
126
+ * (#5 `maxFieldChars`); these are defense-in-depth for the OTLP wire. */
127
+ const MAX_ATTR_CHARS = 256;
128
+ const MAX_LIST_ITEMS = 20;
129
+ function bound(value) {
130
+ const s = typeof value === 'string' ? value : JSON.stringify(value) ?? String(value);
131
+ return s.length > MAX_ATTR_CHARS ? `${s.slice(0, MAX_ATTR_CHARS - 1)}…` : s;
132
+ }
133
+ function boundList(items) {
134
+ const capped = items.slice(0, MAX_LIST_ITEMS).map(bound);
135
+ return items.length > MAX_LIST_ITEMS
136
+ ? [...capped, `…+${items.length - MAX_LIST_ITEMS} more`]
137
+ : capped;
138
+ }
139
+ /** Render one rule's operator-level conditions as compact strings:
140
+ * `creditScore gt 700 → 750 (true)`. Value summaries come from the
141
+ * engine already bounded + redaction-aware — we only re-cap length. */
142
+ function renderConditions(rule) {
143
+ if (rule.conditions !== undefined && rule.conditions.length > 0) {
144
+ return boundList(rule.conditions.map((c) => `${c.key} ${c.op} ${bound(c.threshold)} → ${c.actualSummary} (${c.result})`));
145
+ }
146
+ if (rule.inputs !== undefined && rule.inputs.length > 0) {
147
+ return boundList(rule.inputs.map((i) => `${i.key} = ${i.valueSummary}`));
148
+ }
149
+ return [];
150
+ }
151
+ /** Flatten decide()/select() evidence into span-event attributes. */
152
+ function renderEvidenceAttrs(evidence) {
153
+ const attrs = {};
154
+ if (evidence.chosen !== undefined)
155
+ attrs['agentfootprint.decision.chosen'] = bound(evidence.chosen);
156
+ if (evidence.default !== undefined)
157
+ attrs['agentfootprint.decision.default'] = bound(evidence.default);
158
+ if (evidence.selected !== undefined)
159
+ attrs['agentfootprint.decision.selected'] = boundList(evidence.selected.map(String));
160
+ const rules = evidence.rules ?? [];
161
+ if (rules.length > 0)
162
+ attrs['agentfootprint.decision.rules_evaluated'] = rules.length;
163
+ const matched = rules.find((r) => r.matched === true);
164
+ if (matched !== undefined) {
165
+ if (matched.label !== undefined)
166
+ attrs['agentfootprint.decision.rule.label'] = bound(matched.label);
167
+ if (matched.ruleIndex !== undefined)
168
+ attrs['agentfootprint.decision.rule.index'] = matched.ruleIndex;
169
+ if (matched.branch !== undefined)
170
+ attrs['agentfootprint.decision.rule.branch'] = bound(matched.branch);
171
+ const conditions = renderConditions(matched);
172
+ if (conditions.length > 0)
173
+ attrs['agentfootprint.decision.conditions'] = conditions;
174
+ }
175
+ return attrs;
176
+ }
177
+ /** Is this object shaped like decide()/select() evidence? */
178
+ function looksLikeDecideEvidence(value) {
179
+ return (typeof value === 'object' &&
180
+ value !== null &&
181
+ Array.isArray(value.rules));
182
+ }
78
183
  // ─── Strategy factory ────────────────────────────────────────────────
79
184
  function otelObservability(opts) {
80
185
  if (!opts.serviceName) {
@@ -82,6 +187,8 @@ function otelObservability(opts) {
82
187
  `Pass an identifier visible in your OTel backend's service map, e.g. 'my-agent-prod'.`);
83
188
  }
84
189
  const sampleRate = opts.sampleRate ?? 1;
190
+ const genAiNames = opts.genAiSpanNames === true;
191
+ const explainability = opts.explainability !== false;
85
192
  // Lazy-resolve tracer if not injected. Defer the API import until
86
193
  // first event so consumers who don't actually fire events (no agent
87
194
  // run yet) don't even hit the OTel API surface.
@@ -108,12 +215,23 @@ function otelObservability(opts) {
108
215
  tracer = otelApi.trace.getTracer('agentfootprint');
109
216
  return tracer;
110
217
  }
111
- // Per-turn state — same pattern as xrayObservability. Events for
112
- // multiple in-flight turns interleave correctly because we key by
113
- // `runId` from the event payload.
114
218
  const activeTurns = new Map();
115
219
  let stopped = false;
116
220
  let onErrorHook;
221
+ /**
222
+ * Resolve the run anchor for an event.
223
+ *
224
+ * Real runtime events are dispatcher envelopes — the run id lives on
225
+ * `event.meta.runId` (built by `bridge/eventMeta.ts`). The legacy
226
+ * `payload.runId` read is kept as a fallback for consumers feeding
227
+ * hand-built events (the pre-6.17 shape this adapter's own tests
228
+ * used). Without the meta read, NO span ever opened on a real agent
229
+ * run — the bug the fabricated test shapes masked.
230
+ */
231
+ function anchorRunId(event) {
232
+ const meta = event.meta;
233
+ return meta?.runId ?? event.payload?.runId;
234
+ }
117
235
  function pushSpan(turnState, name, attrs) {
118
236
  // OTel parent-context wiring: we capture the parent in a context
119
237
  // and start the new span under it. (For BYO SDK setups, the
@@ -129,12 +247,13 @@ function otelObservability(opts) {
129
247
  turnState.stack.push({ name, span });
130
248
  return span;
131
249
  }
132
- function popSpan(turnState, expectedName) {
250
+ function popSpan(turnState, match) {
133
251
  let idx = turnState.stack.length - 1;
134
- if (expectedName) {
135
- // idx >= 0 guard above guarantees stack[idx] exists.
252
+ if (match !== undefined) {
253
+ const matches = typeof match === 'string' ? (name) => name === match : match;
254
+ // idx >= 0 guard guarantees stack[idx] exists.
136
255
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
137
- while (idx >= 0 && turnState.stack[idx].name !== expectedName)
256
+ while (idx >= 0 && !matches(turnState.stack[idx].name))
138
257
  idx--;
139
258
  }
140
259
  if (idx < 0)
@@ -143,8 +262,8 @@ function otelObservability(opts) {
143
262
  // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
144
263
  return turnState.stack.splice(idx, 1)[0].span;
145
264
  }
146
- function endSpan(span, opts) {
147
- if (opts?.error) {
265
+ function endSpan(span, endOpts) {
266
+ if (endOpts?.error) {
148
267
  const code = otelApi?.SpanStatusCode?.ERROR ?? 2;
149
268
  try {
150
269
  span.setStatus({ code });
@@ -155,26 +274,229 @@ function otelObservability(opts) {
155
274
  }
156
275
  span.end();
157
276
  }
277
+ function setAttrs(span, attrs) {
278
+ for (const [key, value] of Object.entries(attrs)) {
279
+ try {
280
+ span.setAttribute(key, value);
281
+ }
282
+ catch {
283
+ /* ignore — never break the agent loop on a sink error */
284
+ }
285
+ }
286
+ }
287
+ /** Emit a span event (preferred) or flattened-attribute fallback —
288
+ * see "Decisions = SPAN EVENTS" in the module docs. */
289
+ function recordSpanEvent(span, name, attrs) {
290
+ if (typeof span.addEvent === 'function') {
291
+ try {
292
+ span.addEvent(name, attrs);
293
+ return;
294
+ }
295
+ catch {
296
+ /* fall through to attribute fallback */
297
+ }
298
+ }
299
+ const flattened = {};
300
+ for (const [key, value] of Object.entries(attrs))
301
+ flattened[`${name}.${key}`] = value;
302
+ setAttrs(span, flattened);
303
+ }
304
+ function topSpan(t) {
305
+ return t?.stack[t.stack.length - 1]?.span;
306
+ }
307
+ /** Single-active-turn resolution for FlowRecorder evidence (which has
308
+ * no dispatcher runId to join on). One agent = one turn in flight is
309
+ * the norm; with >1 concurrent turn we can't attribute the decision
310
+ * safely, so we skip rather than risk cross-run contamination. */
311
+ function soleActiveTurn() {
312
+ if (activeTurns.size !== 1)
313
+ return undefined;
314
+ const [t] = activeTurns.values();
315
+ return t;
316
+ }
317
+ // ─── Explainability span events (typed-event side) ─────────────────
318
+ function handleExplainability(event, t) {
319
+ const top = topSpan(t);
320
+ if (!top)
321
+ return;
322
+ const p = event.payload;
323
+ switch (event.type) {
324
+ // The ReAct loop's own decision: tool-calls vs final.
325
+ case 'agentfootprint.agent.route_decided': {
326
+ recordSpanEvent(top, 'agentfootprint.agent.route_decided', {
327
+ 'agentfootprint.decision.stage': 'react-route',
328
+ 'agentfootprint.decision.chosen': bound(p.chosen),
329
+ ...(typeof p.rationale === 'string' && {
330
+ 'agentfootprint.decision.rationale': bound(p.rationale),
331
+ }),
332
+ ...(typeof p.iterIndex === 'number' && {
333
+ 'agentfootprint.iteration.index': p.iterIndex,
334
+ }),
335
+ });
336
+ break;
337
+ }
338
+ // Conditional core-flow routing. `evidence` (when an emitter
339
+ // populates it with decide() output) renders at operator level.
340
+ case 'agentfootprint.composition.route_decided': {
341
+ const attrs = {
342
+ 'agentfootprint.decision.stage': bound(p.conditionalId),
343
+ 'agentfootprint.decision.chosen': bound(p.chosen),
344
+ ...(typeof p.rationale === 'string' && {
345
+ 'agentfootprint.decision.rationale': bound(p.rationale),
346
+ }),
347
+ };
348
+ if (looksLikeDecideEvidence(p.evidence))
349
+ Object.assign(attrs, renderEvidenceAttrs(p.evidence));
350
+ recordSpanEvent(top, 'agentfootprint.composition.route_decided', attrs);
351
+ break;
352
+ }
353
+ // Skill-graph routing provenance — one span event per routed
354
+ // injection: the decision path (predicate labels + branch taken),
355
+ // the route edge, and the tools the route unlocked.
356
+ case 'agentfootprint.context.evaluated': {
357
+ const routing = p.routing;
358
+ if (!Array.isArray(routing))
359
+ break; // no skill routing this iteration — no event
360
+ for (const r of routing) {
361
+ recordSpanEvent(top, 'agentfootprint.skill.routing', {
362
+ 'agentfootprint.skill.injection_id': bound(r.injectionId),
363
+ ...(r.via !== undefined && { 'agentfootprint.skill.via': bound(r.via) }),
364
+ ...(r.label !== undefined && { 'agentfootprint.skill.label': bound(r.label) }),
365
+ ...(r.from !== undefined && { 'agentfootprint.skill.from': bound(r.from) }),
366
+ ...(Array.isArray(r.path) && {
367
+ 'agentfootprint.skill.path': boundList(r.path.map((step) => `${step.label} → ${step.branch}`)),
368
+ }),
369
+ ...(Array.isArray(r.tools) && {
370
+ 'agentfootprint.skill.tools': boundList(r.tools.map(String)),
371
+ }),
372
+ });
373
+ }
374
+ break;
375
+ }
376
+ case 'agentfootprint.skill.activated': {
377
+ recordSpanEvent(top, 'agentfootprint.skill.activated', {
378
+ 'agentfootprint.skill.id': bound(p.skillId),
379
+ 'agentfootprint.skill.reason': bound(p.reason),
380
+ ...(Array.isArray(p.injectedTools) && {
381
+ 'agentfootprint.skill.tools': boundList(p.injectedTools.map(String)),
382
+ }),
383
+ });
384
+ break;
385
+ }
386
+ // #9 tool-arg validation rejections. Issues carry paths /
387
+ // expectations / received TYPES — never values (PII contract).
388
+ case 'agentfootprint.validation.args_invalid': {
389
+ const issues = (p.issues ?? []);
390
+ recordSpanEvent(top, 'agentfootprint.validation.args_invalid', {
391
+ 'agentfootprint.validation.tool_name': bound(p.toolName),
392
+ 'agentfootprint.validation.tool_call_id': bound(p.toolCallId),
393
+ 'agentfootprint.validation.enforced': p.enforced === true,
394
+ 'agentfootprint.validation.issue_count': issues.length,
395
+ 'agentfootprint.validation.issues': boundList(issues.map((i) => `${i.path}: expected ${i.expected}, got ${i.got}`)),
396
+ });
397
+ break;
398
+ }
399
+ case 'agentfootprint.permission.check': {
400
+ recordSpanEvent(top, 'agentfootprint.permission.check', {
401
+ 'agentfootprint.permission.capability': bound(p.capability),
402
+ 'agentfootprint.permission.actor': bound(p.actor),
403
+ ...(p.target !== undefined && { 'agentfootprint.permission.target': bound(p.target) }),
404
+ 'agentfootprint.permission.result': bound(p.result),
405
+ ...(p.policyRuleId !== undefined && {
406
+ 'agentfootprint.permission.policy_rule_id': bound(p.policyRuleId),
407
+ }),
408
+ ...(typeof p.rationale === 'string' && {
409
+ 'agentfootprint.permission.rationale': bound(p.rationale),
410
+ }),
411
+ ...(typeof p.reason === 'string' && {
412
+ 'agentfootprint.permission.reason': bound(p.reason),
413
+ }),
414
+ });
415
+ break;
416
+ }
417
+ case 'agentfootprint.permission.halt': {
418
+ recordSpanEvent(top, 'agentfootprint.permission.halt', {
419
+ 'agentfootprint.permission.target': bound(p.target),
420
+ 'agentfootprint.permission.reason': bound(p.reason),
421
+ ...(typeof p.iteration === 'number' && {
422
+ 'agentfootprint.iteration.index': p.iteration,
423
+ }),
424
+ });
425
+ break;
426
+ }
427
+ // Credential lifecycle — payloads carry kind / service / session
428
+ // identifiers ONLY (the registry contract: never the secret).
429
+ case 'agentfootprint.credential.requested':
430
+ case 'agentfootprint.credential.acquired':
431
+ case 'agentfootprint.credential.authorization_required':
432
+ case 'agentfootprint.credential.failed': {
433
+ recordSpanEvent(top, event.type, {
434
+ 'agentfootprint.credential.service': bound(p.service),
435
+ ...(p.kind !== undefined && { 'agentfootprint.credential.kind': bound(p.kind) }),
436
+ ...(p.mode !== undefined && { 'agentfootprint.credential.mode': bound(p.mode) }),
437
+ ...(p.sessionId !== undefined && {
438
+ 'agentfootprint.credential.session_id': bound(p.sessionId),
439
+ }),
440
+ ...(p.reason !== undefined && { 'agentfootprint.credential.reason': bound(p.reason) }),
441
+ });
442
+ break;
443
+ }
444
+ default:
445
+ break;
446
+ }
447
+ }
158
448
  // ─── Event-to-span dispatch ────────────────────────────────────────
159
449
  function handleEvent(event) {
160
450
  if (stopped)
161
451
  return;
162
- const runId = event.payload?.runId;
452
+ const runId = anchorRunId(event);
163
453
  if (!runId)
164
454
  return; // Events without a turn anchor — skip.
165
455
  switch (event.type) {
166
456
  case 'agentfootprint.agent.turn_start': {
167
457
  const sampled = sampleRate >= 1 || Math.random() < sampleRate;
168
- const turnState = { stack: [], sampled };
458
+ const turnState = { stack: [], sampled, toolSpans: new Map() };
169
459
  activeTurns.set(runId, turnState);
170
- if (sampled)
171
- pushSpan(turnState, opts.serviceName, { 'service.name': opts.serviceName });
460
+ if (sampled) {
461
+ const turnIndex = event.payload.turnIndex;
462
+ // `invoke_agent` span per the GenAI agent-span conventions.
463
+ // `gen_ai.provider.name` / `gen_ai.request.model` (conditionally
464
+ // required) are back-filled on the first llm_start — unknown here.
465
+ // `userPrompt` is deliberately NOT emitted (PII).
466
+ // We emit `agentfootprint.run.id` (not `gen_ai.conversation.id`):
467
+ // a run is one turn, not a conversation/session — agentfootprint
468
+ // has no session primitive yet, and mislabeling would corrupt
469
+ // backends' session grouping.
470
+ turnState.root = pushSpan(turnState, genAiNames ? `invoke_agent ${opts.serviceName}` : opts.serviceName, {
471
+ 'service.name': opts.serviceName,
472
+ 'gen_ai.operation.name': 'invoke_agent',
473
+ 'gen_ai.agent.name': opts.serviceName,
474
+ 'agentfootprint.run.id': runId,
475
+ ...(typeof turnIndex === 'number' && { 'agentfootprint.turn.index': turnIndex }),
476
+ });
477
+ }
172
478
  break;
173
479
  }
174
480
  case 'agentfootprint.agent.turn_end': {
175
481
  const t = activeTurns.get(runId);
176
482
  if (!t)
177
483
  break;
484
+ if (t.root) {
485
+ // Turn-total usage on the invoke_agent span (semconv allows
486
+ // usage attrs on agent spans) + the iteration count.
487
+ const p = event.payload;
488
+ setAttrs(t.root, {
489
+ ...(typeof p.totalInputTokens === 'number' && {
490
+ 'gen_ai.usage.input_tokens': p.totalInputTokens,
491
+ }),
492
+ ...(typeof p.totalOutputTokens === 'number' && {
493
+ 'gen_ai.usage.output_tokens': p.totalOutputTokens,
494
+ }),
495
+ ...(typeof p.iterationCount === 'number' && {
496
+ 'agentfootprint.iteration.count': p.iterationCount,
497
+ }),
498
+ });
499
+ }
178
500
  // Defensive: end everything still on the stack.
179
501
  while (t.stack.length > 0) {
180
502
  const span = popSpan(t);
@@ -187,7 +509,8 @@ function otelObservability(opts) {
187
509
  case 'agentfootprint.agent.iteration_start': {
188
510
  const t = activeTurns.get(runId);
189
511
  if (t?.sampled) {
190
- const iteration = event.payload.iteration;
512
+ const iteration = event.payload.iterIndex ??
513
+ event.payload.iteration;
191
514
  pushSpan(t, `iteration:${iteration ?? '?'}`, {
192
515
  ...(typeof iteration === 'number' && { 'iteration.number': iteration }),
193
516
  });
@@ -197,9 +520,13 @@ function otelObservability(opts) {
197
520
  case 'agentfootprint.agent.iteration_end': {
198
521
  const t = activeTurns.get(runId);
199
522
  if (t?.sampled) {
200
- const span = popSpan(t);
201
- if (span)
523
+ const span = popSpan(t, (name) => name.startsWith('iteration:'));
524
+ if (span) {
525
+ const toolCallCount = event.payload.toolCallCount;
526
+ if (typeof toolCallCount === 'number')
527
+ setAttrs(span, { 'agentfootprint.tool_call.count': toolCallCount });
202
528
  endSpan(span);
529
+ }
203
530
  }
204
531
  break;
205
532
  }
@@ -207,56 +534,169 @@ function otelObservability(opts) {
207
534
  const t = activeTurns.get(runId);
208
535
  if (!t?.sampled)
209
536
  break;
210
- const model = event.payload.model;
211
- pushSpan(t, 'llm', model ? { 'gen_ai.request.model': model } : undefined);
537
+ const p = event.payload;
538
+ // Inference span per GenAI semconv: operation `chat`.
539
+ // `gen_ai.provider.name` passes the adapter's provider id through
540
+ // unchanged — 'anthropic' / 'openai' / 'cohere' are already
541
+ // well-known semconv values; others ride as custom values (the
542
+ // spec permits them).
543
+ pushSpan(t, genAiNames && p.model ? `chat ${p.model}` : 'llm', {
544
+ 'gen_ai.operation.name': 'chat',
545
+ ...(p.model !== undefined && { 'gen_ai.request.model': p.model }),
546
+ ...(p.provider !== undefined && { 'gen_ai.provider.name': p.provider }),
547
+ ...(typeof p.temperature === 'number' && {
548
+ 'gen_ai.request.temperature': p.temperature,
549
+ }),
550
+ });
551
+ // Back-fill the conditionally-required agent-span attrs now that
552
+ // the first inference call reveals provider + model.
553
+ if (t.root && t.rootEnriched !== true) {
554
+ t.rootEnriched = true;
555
+ setAttrs(t.root, {
556
+ ...(p.provider !== undefined && { 'gen_ai.provider.name': p.provider }),
557
+ ...(p.model !== undefined && { 'gen_ai.request.model': p.model }),
558
+ });
559
+ }
212
560
  break;
213
561
  }
214
562
  case 'agentfootprint.stream.llm_end': {
215
563
  const t = activeTurns.get(runId);
216
564
  if (!t?.sampled)
217
565
  break;
218
- const span = popSpan(t, 'llm');
219
- if (span)
220
- endSpan(span);
566
+ const span = popSpan(t, (name) => name === 'llm' || name.startsWith('chat'));
567
+ if (!span)
568
+ break;
569
+ const p = event.payload;
570
+ // Response-side semconv attrs. `content` is deliberately NOT
571
+ // emitted (PII) — the snapshot/audit-log channel carries it
572
+ // under the consumer's redaction policy.
573
+ setAttrs(span, {
574
+ ...(typeof p.usage?.input === 'number' && {
575
+ 'gen_ai.usage.input_tokens': p.usage.input,
576
+ }),
577
+ ...(typeof p.usage?.output === 'number' && {
578
+ 'gen_ai.usage.output_tokens': p.usage.output,
579
+ }),
580
+ ...(typeof p.usage?.cacheRead === 'number' && {
581
+ 'gen_ai.usage.cache_read.input_tokens': p.usage.cacheRead,
582
+ }),
583
+ ...(typeof p.usage?.cacheWrite === 'number' && {
584
+ 'gen_ai.usage.cache_creation.input_tokens': p.usage.cacheWrite,
585
+ }),
586
+ ...(typeof p.stopReason === 'string' && {
587
+ 'gen_ai.response.finish_reasons': [p.stopReason],
588
+ }),
589
+ ...(typeof p.providerResponseRef === 'string' && {
590
+ 'gen_ai.response.id': p.providerResponseRef,
591
+ }),
592
+ });
593
+ endSpan(span);
221
594
  break;
222
595
  }
223
596
  case 'agentfootprint.stream.tool_start': {
224
597
  const t = activeTurns.get(runId);
225
598
  if (!t?.sampled)
226
599
  break;
227
- const toolName = event.payload.toolName ?? 'tool';
228
- pushSpan(t, `tool:${toolName}`, { 'tool.name': toolName });
600
+ const p = event.payload;
601
+ const toolName = p.toolName ?? 'tool';
602
+ // Tool-execution span per GenAI semconv (`execute_tool`).
603
+ // Args: top-level key NAMES only — `gen_ai.tool.call.arguments`
604
+ // exists in the spec but is opt-in and carries raw values; we
605
+ // deliberately never emit it (PII / prompt-injection echo).
606
+ const argKeys = p.args !== undefined && typeof p.args === 'object' ? Object.keys(p.args) : [];
607
+ const span = pushSpan(t, genAiNames ? `execute_tool ${toolName}` : `tool:${toolName}`, {
608
+ 'tool.name': toolName,
609
+ 'gen_ai.operation.name': 'execute_tool',
610
+ 'gen_ai.tool.name': toolName,
611
+ ...(p.toolCallId !== undefined && { 'gen_ai.tool.call.id': p.toolCallId }),
612
+ ...(p.protocol !== undefined && { 'agentfootprint.tool.protocol': p.protocol }),
613
+ ...(argKeys.length > 0 && { 'agentfootprint.tool.args.keys': boundList(argKeys) }),
614
+ });
615
+ if (p.toolCallId !== undefined)
616
+ t.toolSpans.set(p.toolCallId, span);
229
617
  break;
230
618
  }
231
619
  case 'agentfootprint.stream.tool_end': {
232
620
  const t = activeTurns.get(runId);
233
621
  if (!t?.sampled)
234
622
  break;
235
- const toolName = event.payload.toolName;
236
- const errored = event.payload.error !== undefined;
237
- const span = popSpan(t, toolName ? `tool:${toolName}` : undefined);
238
- if (span)
239
- endSpan(span, { error: errored });
623
+ const p = event.payload;
624
+ const errored = p.error !== undefined && p.error !== false;
625
+ // Correlate by toolCallId (the only identity ToolEndPayload
626
+ // carries) — parallel tool calls end out of LIFO order, so name
627
+ // matching alone would close the wrong span. Fallback chain
628
+ // keeps legacy hand-fed events (toolName) working.
629
+ let span;
630
+ if (p.toolCallId !== undefined && t.toolSpans.has(p.toolCallId)) {
631
+ span = t.toolSpans.get(p.toolCallId);
632
+ t.toolSpans.delete(p.toolCallId);
633
+ // Remove from the stack by identity so the LIFO unwind stays clean.
634
+ const idx = t.stack.findIndex((entry) => entry.span === span);
635
+ if (idx >= 0)
636
+ t.stack.splice(idx, 1);
637
+ }
638
+ else {
639
+ span = popSpan(t, p.toolName !== undefined
640
+ ? (name) => name === `tool:${p.toolName}` || name === `execute_tool ${p.toolName}`
641
+ : (name) => name.startsWith('tool:') || name.startsWith('execute_tool '));
642
+ }
643
+ if (!span)
644
+ break;
645
+ // Result: TYPE only — never the value (PII discipline; mirrors
646
+ // the #9 contract and `gen_ai.tool.call.result` stays unemitted).
647
+ setAttrs(span, {
648
+ 'agentfootprint.tool.result.type': p.result === null ? 'null' : typeof p.result,
649
+ ...(errored && { 'error.type': '_OTHER' }), // boolean error flag — no class info
650
+ });
651
+ endSpan(span, { error: errored });
240
652
  break;
241
653
  }
242
- // Other events annotate the topmost active span.
654
+ // A fatal run error: the turn will never see turn_end, so close
655
+ // the span tree here (ERROR on root) instead of leaking it until
656
+ // stop(). Stage + scope only — error MESSAGES can echo PII.
657
+ case 'agentfootprint.error.fatal': {
658
+ const t = activeTurns.get(runId);
659
+ if (!t)
660
+ break;
661
+ const p = event.payload;
662
+ if (t.root) {
663
+ recordSpanEvent(t.root, 'agentfootprint.error.fatal', {
664
+ ...(p.stage !== undefined && { 'agentfootprint.error.stage': bound(p.stage) }),
665
+ ...(p.scope !== undefined && { 'agentfootprint.error.scope': bound(p.scope) }),
666
+ });
667
+ }
668
+ while (t.stack.length > 1) {
669
+ const span = popSpan(t);
670
+ if (span)
671
+ endSpan(span);
672
+ }
673
+ const root = popSpan(t);
674
+ if (root)
675
+ endSpan(root, { error: true });
676
+ activeTurns.delete(runId);
677
+ break;
678
+ }
679
+ // Other events — annotate / record on the topmost active span.
243
680
  default: {
244
681
  const t = activeTurns.get(runId);
245
- const top = t?.stack[t.stack.length - 1]?.span;
246
- if (!t?.sampled || !top)
682
+ if (!t?.sampled)
247
683
  break;
248
684
  // Cost ticks are particularly valuable as attributes.
249
685
  if (event.type === 'agentfootprint.cost.tick') {
686
+ const top = topSpan(t);
687
+ if (!top)
688
+ break;
689
+ // Runtime shape: `cumulative.estimatedUsd` (CostTickPayload).
690
+ // Legacy fallback `cumulativeCostUsd` keeps hand-fed events
691
+ // working (the pre-6.17 fabricated test shape).
250
692
  const p = event.payload;
251
- if (typeof p.cumulativeCostUsd === 'number') {
252
- try {
253
- top.setAttribute('cost.cumulative_usd', p.cumulativeCostUsd);
254
- }
255
- catch {
256
- /* ignore */
257
- }
258
- }
693
+ const usd = p.cumulative?.estimatedUsd ?? p.cumulativeCostUsd;
694
+ if (typeof usd === 'number')
695
+ setAttrs(top, { 'cost.cumulative_usd': usd });
696
+ break;
259
697
  }
698
+ if (explainability)
699
+ handleExplainability(event, t);
260
700
  break;
261
701
  }
262
702
  }
@@ -280,6 +720,7 @@ function otelObservability(opts) {
280
720
  if (span)
281
721
  endSpan(span);
282
722
  }
723
+ t.toolSpans.clear();
283
724
  }
284
725
  activeTurns.clear();
285
726
  },
@@ -292,6 +733,54 @@ function otelObservability(opts) {
292
733
  });
293
734
  onErrorHook(err, event);
294
735
  },
736
+ decisionEvidenceRecorder() {
737
+ // One purpose (Convention 1): forward decide()/select() evidence
738
+ // from footprintjs's FlowRecorder channel into this strategy's
739
+ // span machinery. Plumbing filters mirror the #5 causal-evidence
740
+ // bridge (sf-cache gate deciders, the agent's Context slot-fork).
741
+ const forward = (stageId, chosen, evidence) => {
742
+ if (stopped || !explainability)
743
+ return;
744
+ // No structured evidence → already reported via the typed
745
+ // route_decided events; skip to avoid double-reporting.
746
+ if (evidence === undefined)
747
+ return;
748
+ const t = soleActiveTurn();
749
+ if (!t?.sampled)
750
+ return;
751
+ const top = topSpan(t);
752
+ if (!top)
753
+ return;
754
+ recordSpanEvent(top, 'agentfootprint.decision.evidence', {
755
+ 'agentfootprint.decision.stage': bound(stageId),
756
+ 'agentfootprint.decision.chosen': bound(chosen),
757
+ ...renderEvidenceAttrs(evidence),
758
+ });
759
+ };
760
+ return {
761
+ id: 'otel-decision-evidence',
762
+ onDecision(event) {
763
+ const stageId = event.traversalContext?.stageId ?? event.decider;
764
+ // Internal agent plumbing (the cache-gate decider) is not
765
+ // domain decision evidence. `includes` (not startsWith): in
766
+ // reactMode 'dynamic-grouped' names are double-prefixed.
767
+ if (String(event.chosen ?? '').includes('sf-cache/') ||
768
+ String(stageId).includes('sf-cache'))
769
+ return;
770
+ forward(String(stageId), String(event.chosen ?? 'unknown'), event.evidence);
771
+ },
772
+ onSelected(event) {
773
+ const stageId = event.traversalContext?.stageId ?? event.parent;
774
+ if (String(stageId).includes('sf-cache'))
775
+ return;
776
+ // The agent's own Context slot-fork is a selector — plumbing.
777
+ if (String(stageId).includes('context') &&
778
+ event.selected.every((s) => s.startsWith('sf-')))
779
+ return;
780
+ forward(String(stageId), event.selected.join(', '), event.evidence);
781
+ },
782
+ };
783
+ },
295
784
  };
296
785
  }
297
786
  exports.otelObservability = otelObservability;