npm - agentfootprint - Versions diffs - 6.16.0 → 6.18.0 - Mend

agentfootprint 6.16.0 → 6.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/adapters/observability/audit.js +559 -0
package/dist/adapters/observability/audit.js.map +1 -0
package/dist/adapters/observability/otel.js +545 -56
package/dist/adapters/observability/otel.js.map +1 -1
package/dist/adapters/observability/xray.js +97 -13
package/dist/adapters/observability/xray.js.map +1 -1
package/dist/esm/adapters/observability/audit.js +554 -0
package/dist/esm/adapters/observability/audit.js.map +1 -0
package/dist/esm/adapters/observability/otel.js +545 -56
package/dist/esm/adapters/observability/otel.js.map +1 -1
package/dist/esm/adapters/observability/xray.js +97 -13
package/dist/esm/adapters/observability/xray.js.map +1 -1
package/dist/esm/lib/canonicalJson.js +125 -0
package/dist/esm/lib/canonicalJson.js.map +1 -0
package/dist/esm/observability-providers.js +5 -0
package/dist/esm/observability-providers.js.map +1 -1
package/dist/lib/canonicalJson.js +129 -0
package/dist/lib/canonicalJson.js.map +1 -0
package/dist/observability-providers.js +13 -1
package/dist/observability-providers.js.map +1 -1
package/dist/types/adapters/observability/audit.d.ts +255 -0
package/dist/types/adapters/observability/audit.d.ts.map +1 -0
package/dist/types/adapters/observability/otel.d.ts +143 -20
package/dist/types/adapters/observability/otel.d.ts.map +1 -1
package/dist/types/adapters/observability/xray.d.ts +7 -1
package/dist/types/adapters/observability/xray.d.ts.map +1 -1
package/dist/types/lib/canonicalJson.d.ts +57 -0
package/dist/types/lib/canonicalJson.d.ts.map +1 -0
package/dist/types/observability-providers.d.ts +3 -1
package/dist/types/observability-providers.d.ts.map +1 -1
package/package.json +3 -2

package/dist/adapters/observability/otel.js CHANGED Viewed

@@ -2,10 +2,12 @@
 /**
  * otelObservability — OpenTelemetry distributed-tracing adapter.
  *
- * Ships every agentfootprint event as OpenTelemetry spans + log
- * records via a consumer-supplied OTel API. Same hierarchical
- * mapping as the X-Ray adapter, but the destination is whichever
- * OTel-compat backend the consumer's SDK exports to:
+ * Ships every agentfootprint event as OpenTelemetry spans + span events
+ * via a consumer-supplied OTel API, following the OpenTelemetry **GenAI
+ * semantic conventions** (`gen_ai.*` attribute namespace) plus
+ * agentfootprint-specific explainability attributes (`agentfootprint.*`).
+ * Same hierarchical mapping as the X-Ray adapter, but the destination is
+ * whichever OTel-compat backend the consumer's SDK exports to:
  *
  *   - **Honeycomb** (OTLP/HTTP)
  *   - **Grafana Cloud / Tempo / Mimir** (OTLP)
@@ -28,24 +30,67 @@
  * configure the SDK + exporter once at app startup; we just speak
  * the typed OTel API.
  *
- * Mapping:
+ * ## Event → span/attribute mapping
  *
- *   agent.turn_start          ↦  start root span (one trace per turn)
- *   agent.turn_end            ↦  end root span
+ *   agent.turn_start          ↦  start root span (one trace per turn) —
+ *                                `gen_ai.operation.name: 'invoke_agent'`
+ *   agent.turn_end            ↦  end root span (+ turn-total `gen_ai.usage.*`)
  *   agent.iteration_start     ↦  start child span under root
  *   agent.iteration_end       ↦  end iteration span
- *   stream.llm_start          ↦  start child span (model call)
- *   stream.llm_end            ↦  end llm span
- *   stream.tool_start         ↦  start child span (tool call)
- *   stream.tool_end           ↦  end tool span (with `error: true` if errored)
+ *   stream.llm_start          ↦  start child span (inference) — `gen_ai.*`
+ *                                request attrs (`chat` operation)
+ *   stream.llm_end            ↦  end llm span (+ `gen_ai.usage.*`,
+ *                                `gen_ai.response.*`)
+ *   stream.tool_start         ↦  start child span — `execute_tool` operation,
+ *                                `gen_ai.tool.name` / `gen_ai.tool.call.id`
+ *   stream.tool_end           ↦  end tool span (ERROR status + `error.type`
+ *                                if errored). Correlated by toolCallId so
+ *                                PARALLEL tool calls close the right span.
  *   cost.tick                 ↦  setAttribute on topmost active span
+ *   error.fatal               ↦  ERROR status on root + defensive unwind
+ *   context.evaluated         ↦  N span events `agentfootprint.skill.routing`
+ *                                — SYNTHESIZED name (one per routing entry),
+ *                                not a registry-verbatim forward; all other
+ *                                span events use the registry name verbatim
+ *
+ * ## Decisions = SPAN EVENTS, not attributes (design decision)
+ *
+ * Explainability signals (route decisions, skill routing, validation
+ * rejections, permission checks, credential lifecycle) are emitted as
+ * **span events** on the currently-active span rather than attributes:
+ *
+ *   1. MULTIPLICITY — an iteration span can carry several decisions
+ *      (route + N skill routings + M permission checks). Attributes are
+ *      last-write-wins and would clobber; span events accumulate.
+ *   2. ORDERING — span events carry their own timestamps, preserving the
+ *      decision sequence inside one span. Compliance review (EU AI Act
+ *      Art. 12 record-keeping) needs the order decisions were made.
+ *   3. ROUND-TRIP — OTLP backends (and agentThinkingUI's `fromOTLP`
+ *      ingestion) surface span events as first-class timeline entries.
+ *
+ * When the consumer-injected tracer's spans don't implement `addEvent`
+ * (minimal test doubles), the adapter falls back to flattened
+ * `${eventName}.${key}` attributes — degraded (last-write-wins) but
+ * never silently dropped.
+ *
+ * ## PII discipline
+ *
+ * Mirrors the #9 validation contract: attribute values NEVER echo
+ * runtime VALUES that can carry PII —
+ *   - tool args  → top-level key NAMES only (`agentfootprint.tool.args.keys`)
+ *   - tool results → `typeof` only (`agentfootprint.tool.result.type`)
+ *   - validation issues → path / expected / got TYPES (bounded upstream)
+ *   - decide() evidence → rule labels, operators, thresholds (developer
+ *     constants) and the engine's redaction-aware value SUMMARIES
+ *   - userPrompt / llm content / thinking → never emitted
+ *   - error.fatal → stage + scope only (error MESSAGES can echo values)
+ *   - credential events carry no secrets by construction (registry contract)
  *
  * @example Basic — Honeycomb via OTLP
  * ```ts
  * import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
  * import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
  * import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base';
- * import { trace } from '@opentelemetry/api';
  * import { otelObservability } from 'agentfootprint/observability-providers';
  *
  * // Set up OTel ONCE at app startup.
@@ -56,12 +101,13 @@
  * })));
  * provider.register();
  *
- * agent.enable.observability({
- *   strategy: otelObservability({
- *     serviceName: 'my-agent',
- *     // tracer optional — defaults to trace.getTracer('agentfootprint').
- *   }),
+ * const otel = otelObservability({
+ *   serviceName: 'my-agent',
+ *   // genAiSpanNames: true,  // opt-in spec span names ('chat gpt-4', …)
  * });
+ * agent.enable.observability({ strategy: otel });
+ * // Optional — operator-level decide()/select() evidence as span events:
+ * // Agent.create({...}).recorder(otel.decisionEvidenceRecorder())
  * ```
  *
  * @example Test injection
@@ -75,6 +121,65 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.otelObservability = void 0;
 const lazyRequire_js_1 = require("../../lib/lazyRequire.js");
+// ─── Bounding helpers (PII / cardinality discipline) ─────────────────
+/** Hard caps for attribute payloads. Evidence is bounded upstream
+ *  (#5 `maxFieldChars`); these are defense-in-depth for the OTLP wire. */
+const MAX_ATTR_CHARS = 256;
+const MAX_LIST_ITEMS = 20;
+function bound(value) {
+    const s = typeof value === 'string' ? value : JSON.stringify(value) ?? String(value);
+    return s.length > MAX_ATTR_CHARS ? `${s.slice(0, MAX_ATTR_CHARS - 1)}…` : s;
+}
+function boundList(items) {
+    const capped = items.slice(0, MAX_LIST_ITEMS).map(bound);
+    return items.length > MAX_LIST_ITEMS
+        ? [...capped, `…+${items.length - MAX_LIST_ITEMS} more`]
+        : capped;
+}
+/** Render one rule's operator-level conditions as compact strings:
+ *  `creditScore gt 700 → 750 (true)`. Value summaries come from the
+ *  engine already bounded + redaction-aware — we only re-cap length. */
+function renderConditions(rule) {
+    if (rule.conditions !== undefined && rule.conditions.length > 0) {
+        return boundList(rule.conditions.map((c) => `${c.key} ${c.op} ${bound(c.threshold)} → ${c.actualSummary} (${c.result})`));
+    }
+    if (rule.inputs !== undefined && rule.inputs.length > 0) {
+        return boundList(rule.inputs.map((i) => `${i.key} = ${i.valueSummary}`));
+    }
+    return [];
+}
+/** Flatten decide()/select() evidence into span-event attributes. */
+function renderEvidenceAttrs(evidence) {
+    const attrs = {};
+    if (evidence.chosen !== undefined)
+        attrs['agentfootprint.decision.chosen'] = bound(evidence.chosen);
+    if (evidence.default !== undefined)
+        attrs['agentfootprint.decision.default'] = bound(evidence.default);
+    if (evidence.selected !== undefined)
+        attrs['agentfootprint.decision.selected'] = boundList(evidence.selected.map(String));
+    const rules = evidence.rules ?? [];
+    if (rules.length > 0)
+        attrs['agentfootprint.decision.rules_evaluated'] = rules.length;
+    const matched = rules.find((r) => r.matched === true);
+    if (matched !== undefined) {
+        if (matched.label !== undefined)
+            attrs['agentfootprint.decision.rule.label'] = bound(matched.label);
+        if (matched.ruleIndex !== undefined)
+            attrs['agentfootprint.decision.rule.index'] = matched.ruleIndex;
+        if (matched.branch !== undefined)
+            attrs['agentfootprint.decision.rule.branch'] = bound(matched.branch);
+        const conditions = renderConditions(matched);
+        if (conditions.length > 0)
+            attrs['agentfootprint.decision.conditions'] = conditions;
+    }
+    return attrs;
+}
+/** Is this object shaped like decide()/select() evidence? */
+function looksLikeDecideEvidence(value) {
+    return (typeof value === 'object' &&
+        value !== null &&
+        Array.isArray(value.rules));
+}
 // ─── Strategy factory ────────────────────────────────────────────────
 function otelObservability(opts) {
     if (!opts.serviceName) {
@@ -82,6 +187,8 @@ function otelObservability(opts) {
             `Pass an identifier visible in your OTel backend's service map, e.g. 'my-agent-prod'.`);
     }
     const sampleRate = opts.sampleRate ?? 1;
+    const genAiNames = opts.genAiSpanNames === true;
+    const explainability = opts.explainability !== false;
     // Lazy-resolve tracer if not injected. Defer the API import until
     // first event so consumers who don't actually fire events (no agent
     // run yet) don't even hit the OTel API surface.
@@ -108,12 +215,23 @@ function otelObservability(opts) {
         tracer = otelApi.trace.getTracer('agentfootprint');
         return tracer;
     }
-    // Per-turn state — same pattern as xrayObservability. Events for
-    // multiple in-flight turns interleave correctly because we key by
-    // `runId` from the event payload.
     const activeTurns = new Map();
     let stopped = false;
     let onErrorHook;
+    /**
+     * Resolve the run anchor for an event.
+     *
+     * Real runtime events are dispatcher envelopes — the run id lives on
+     * `event.meta.runId` (built by `bridge/eventMeta.ts`). The legacy
+     * `payload.runId` read is kept as a fallback for consumers feeding
+     * hand-built events (the pre-6.17 shape this adapter's own tests
+     * used). Without the meta read, NO span ever opened on a real agent
+     * run — the bug the fabricated test shapes masked.
+     */
+    function anchorRunId(event) {
+        const meta = event.meta;
+        return meta?.runId ?? event.payload?.runId;
+    }
     function pushSpan(turnState, name, attrs) {
         // OTel parent-context wiring: we capture the parent in a context
         // and start the new span under it. (For BYO SDK setups, the
@@ -129,12 +247,13 @@ function otelObservability(opts) {
         turnState.stack.push({ name, span });
         return span;
     }
-    function popSpan(turnState, expectedName) {
+    function popSpan(turnState, match) {
         let idx = turnState.stack.length - 1;
-        if (expectedName) {
-            // idx >= 0 guard above guarantees stack[idx] exists.
+        if (match !== undefined) {
+            const matches = typeof match === 'string' ? (name) => name === match : match;
+            // idx >= 0 guard guarantees stack[idx] exists.
             // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
-            while (idx >= 0 && turnState.stack[idx].name !== expectedName)
+            while (idx >= 0 && !matches(turnState.stack[idx].name))
                 idx--;
         }
         if (idx < 0)
@@ -143,8 +262,8 @@ function otelObservability(opts) {
         // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
         return turnState.stack.splice(idx, 1)[0].span;
     }
-    function endSpan(span, opts) {
-        if (opts?.error) {
+    function endSpan(span, endOpts) {
+        if (endOpts?.error) {
             const code = otelApi?.SpanStatusCode?.ERROR ?? 2;
             try {
                 span.setStatus({ code });
@@ -155,26 +274,229 @@ function otelObservability(opts) {
         }
         span.end();
     }
+    function setAttrs(span, attrs) {
+        for (const [key, value] of Object.entries(attrs)) {
+            try {
+                span.setAttribute(key, value);
+            }
+            catch {
+                /* ignore — never break the agent loop on a sink error */
+            }
+        }
+    }
+    /** Emit a span event (preferred) or flattened-attribute fallback —
+     *  see "Decisions = SPAN EVENTS" in the module docs. */
+    function recordSpanEvent(span, name, attrs) {
+        if (typeof span.addEvent === 'function') {
+            try {
+                span.addEvent(name, attrs);
+                return;
+            }
+            catch {
+                /* fall through to attribute fallback */
+            }
+        }
+        const flattened = {};
+        for (const [key, value] of Object.entries(attrs))
+            flattened[`${name}.${key}`] = value;
+        setAttrs(span, flattened);
+    }
+    function topSpan(t) {
+        return t?.stack[t.stack.length - 1]?.span;
+    }
+    /** Single-active-turn resolution for FlowRecorder evidence (which has
+     *  no dispatcher runId to join on). One agent = one turn in flight is
+     *  the norm; with >1 concurrent turn we can't attribute the decision
+     *  safely, so we skip rather than risk cross-run contamination. */
+    function soleActiveTurn() {
+        if (activeTurns.size !== 1)
+            return undefined;
+        const [t] = activeTurns.values();
+        return t;
+    }
+    // ─── Explainability span events (typed-event side) ─────────────────
+    function handleExplainability(event, t) {
+        const top = topSpan(t);
+        if (!top)
+            return;
+        const p = event.payload;
+        switch (event.type) {
+            // The ReAct loop's own decision: tool-calls vs final.
+            case 'agentfootprint.agent.route_decided': {
+                recordSpanEvent(top, 'agentfootprint.agent.route_decided', {
+                    'agentfootprint.decision.stage': 'react-route',
+                    'agentfootprint.decision.chosen': bound(p.chosen),
+                    ...(typeof p.rationale === 'string' && {
+                        'agentfootprint.decision.rationale': bound(p.rationale),
+                    }),
+                    ...(typeof p.iterIndex === 'number' && {
+                        'agentfootprint.iteration.index': p.iterIndex,
+                    }),
+                });
+                break;
+            }
+            // Conditional core-flow routing. `evidence` (when an emitter
+            // populates it with decide() output) renders at operator level.
+            case 'agentfootprint.composition.route_decided': {
+                const attrs = {
+                    'agentfootprint.decision.stage': bound(p.conditionalId),
+                    'agentfootprint.decision.chosen': bound(p.chosen),
+                    ...(typeof p.rationale === 'string' && {
+                        'agentfootprint.decision.rationale': bound(p.rationale),
+                    }),
+                };
+                if (looksLikeDecideEvidence(p.evidence))
+                    Object.assign(attrs, renderEvidenceAttrs(p.evidence));
+                recordSpanEvent(top, 'agentfootprint.composition.route_decided', attrs);
+                break;
+            }
+            // Skill-graph routing provenance — one span event per routed
+            // injection: the decision path (predicate labels + branch taken),
+            // the route edge, and the tools the route unlocked.
+            case 'agentfootprint.context.evaluated': {
+                const routing = p.routing;
+                if (!Array.isArray(routing))
+                    break; // no skill routing this iteration — no event
+                for (const r of routing) {
+                    recordSpanEvent(top, 'agentfootprint.skill.routing', {
+                        'agentfootprint.skill.injection_id': bound(r.injectionId),
+                        ...(r.via !== undefined && { 'agentfootprint.skill.via': bound(r.via) }),
+                        ...(r.label !== undefined && { 'agentfootprint.skill.label': bound(r.label) }),
+                        ...(r.from !== undefined && { 'agentfootprint.skill.from': bound(r.from) }),
+                        ...(Array.isArray(r.path) && {
+                            'agentfootprint.skill.path': boundList(r.path.map((step) => `${step.label} → ${step.branch}`)),
+                        }),
+                        ...(Array.isArray(r.tools) && {
+                            'agentfootprint.skill.tools': boundList(r.tools.map(String)),
+                        }),
+                    });
+                }
+                break;
+            }
+            case 'agentfootprint.skill.activated': {
+                recordSpanEvent(top, 'agentfootprint.skill.activated', {
+                    'agentfootprint.skill.id': bound(p.skillId),
+                    'agentfootprint.skill.reason': bound(p.reason),
+                    ...(Array.isArray(p.injectedTools) && {
+                        'agentfootprint.skill.tools': boundList(p.injectedTools.map(String)),
+                    }),
+                });
+                break;
+            }
+            // #9 tool-arg validation rejections. Issues carry paths /
+            // expectations / received TYPES — never values (PII contract).
+            case 'agentfootprint.validation.args_invalid': {
+                const issues = (p.issues ?? []);
+                recordSpanEvent(top, 'agentfootprint.validation.args_invalid', {
+                    'agentfootprint.validation.tool_name': bound(p.toolName),
+                    'agentfootprint.validation.tool_call_id': bound(p.toolCallId),
+                    'agentfootprint.validation.enforced': p.enforced === true,
+                    'agentfootprint.validation.issue_count': issues.length,
+                    'agentfootprint.validation.issues': boundList(issues.map((i) => `${i.path}: expected ${i.expected}, got ${i.got}`)),
+                });
+                break;
+            }
+            case 'agentfootprint.permission.check': {
+                recordSpanEvent(top, 'agentfootprint.permission.check', {
+                    'agentfootprint.permission.capability': bound(p.capability),
+                    'agentfootprint.permission.actor': bound(p.actor),
+                    ...(p.target !== undefined && { 'agentfootprint.permission.target': bound(p.target) }),
+                    'agentfootprint.permission.result': bound(p.result),
+                    ...(p.policyRuleId !== undefined && {
+                        'agentfootprint.permission.policy_rule_id': bound(p.policyRuleId),
+                    }),
+                    ...(typeof p.rationale === 'string' && {
+                        'agentfootprint.permission.rationale': bound(p.rationale),
+                    }),
+                    ...(typeof p.reason === 'string' && {
+                        'agentfootprint.permission.reason': bound(p.reason),
+                    }),
+                });
+                break;
+            }
+            case 'agentfootprint.permission.halt': {
+                recordSpanEvent(top, 'agentfootprint.permission.halt', {
+                    'agentfootprint.permission.target': bound(p.target),
+                    'agentfootprint.permission.reason': bound(p.reason),
+                    ...(typeof p.iteration === 'number' && {
+                        'agentfootprint.iteration.index': p.iteration,
+                    }),
+                });
+                break;
+            }
+            // Credential lifecycle — payloads carry kind / service / session
+            // identifiers ONLY (the registry contract: never the secret).
+            case 'agentfootprint.credential.requested':
+            case 'agentfootprint.credential.acquired':
+            case 'agentfootprint.credential.authorization_required':
+            case 'agentfootprint.credential.failed': {
+                recordSpanEvent(top, event.type, {
+                    'agentfootprint.credential.service': bound(p.service),
+                    ...(p.kind !== undefined && { 'agentfootprint.credential.kind': bound(p.kind) }),
+                    ...(p.mode !== undefined && { 'agentfootprint.credential.mode': bound(p.mode) }),
+                    ...(p.sessionId !== undefined && {
+                        'agentfootprint.credential.session_id': bound(p.sessionId),
+                    }),
+                    ...(p.reason !== undefined && { 'agentfootprint.credential.reason': bound(p.reason) }),
+                });
+                break;
+            }
+            default:
+                break;
+        }
+    }
     // ─── Event-to-span dispatch ────────────────────────────────────────
     function handleEvent(event) {
         if (stopped)
             return;
-        const runId = event.payload?.runId;
+        const runId = anchorRunId(event);
         if (!runId)
             return; // Events without a turn anchor — skip.
         switch (event.type) {
             case 'agentfootprint.agent.turn_start': {
                 const sampled = sampleRate >= 1 || Math.random() < sampleRate;
-                const turnState = { stack: [], sampled };
+                const turnState = { stack: [], sampled, toolSpans: new Map() };
                 activeTurns.set(runId, turnState);
-                if (sampled)
-                    pushSpan(turnState, opts.serviceName, { 'service.name': opts.serviceName });
+                if (sampled) {
+                    const turnIndex = event.payload.turnIndex;
+                    // `invoke_agent` span per the GenAI agent-span conventions.
+                    // `gen_ai.provider.name` / `gen_ai.request.model` (conditionally
+                    // required) are back-filled on the first llm_start — unknown here.
+                    // `userPrompt` is deliberately NOT emitted (PII).
+                    // We emit `agentfootprint.run.id` (not `gen_ai.conversation.id`):
+                    // a run is one turn, not a conversation/session — agentfootprint
+                    // has no session primitive yet, and mislabeling would corrupt
+                    // backends' session grouping.
+                    turnState.root = pushSpan(turnState, genAiNames ? `invoke_agent ${opts.serviceName}` : opts.serviceName, {
+                        'service.name': opts.serviceName,
+                        'gen_ai.operation.name': 'invoke_agent',
+                        'gen_ai.agent.name': opts.serviceName,
+                        'agentfootprint.run.id': runId,
+                        ...(typeof turnIndex === 'number' && { 'agentfootprint.turn.index': turnIndex }),
+                    });
+                }
                 break;
             }
             case 'agentfootprint.agent.turn_end': {
                 const t = activeTurns.get(runId);
                 if (!t)
                     break;
+                if (t.root) {
+                    // Turn-total usage on the invoke_agent span (semconv allows
+                    // usage attrs on agent spans) + the iteration count.
+                    const p = event.payload;
+                    setAttrs(t.root, {
+                        ...(typeof p.totalInputTokens === 'number' && {
+                            'gen_ai.usage.input_tokens': p.totalInputTokens,
+                        }),
+                        ...(typeof p.totalOutputTokens === 'number' && {
+                            'gen_ai.usage.output_tokens': p.totalOutputTokens,
+                        }),
+                        ...(typeof p.iterationCount === 'number' && {
+                            'agentfootprint.iteration.count': p.iterationCount,
+                        }),
+                    });
+                }
                 // Defensive: end everything still on the stack.
                 while (t.stack.length > 0) {
                     const span = popSpan(t);
@@ -187,7 +509,8 @@ function otelObservability(opts) {
             case 'agentfootprint.agent.iteration_start': {
                 const t = activeTurns.get(runId);
                 if (t?.sampled) {
-                    const iteration = event.payload.iteration;
+                    const iteration = event.payload.iterIndex ??
+                        event.payload.iteration;
                     pushSpan(t, `iteration:${iteration ?? '?'}`, {
                         ...(typeof iteration === 'number' && { 'iteration.number': iteration }),
                     });
@@ -197,9 +520,13 @@ function otelObservability(opts) {
             case 'agentfootprint.agent.iteration_end': {
                 const t = activeTurns.get(runId);
                 if (t?.sampled) {
-                    const span = popSpan(t);
-                    if (span)
+                    const span = popSpan(t, (name) => name.startsWith('iteration:'));
+                    if (span) {
+                        const toolCallCount = event.payload.toolCallCount;
+                        if (typeof toolCallCount === 'number')
+                            setAttrs(span, { 'agentfootprint.tool_call.count': toolCallCount });
                         endSpan(span);
+                    }
                 }
                 break;
             }
@@ -207,56 +534,169 @@ function otelObservability(opts) {
                 const t = activeTurns.get(runId);
                 if (!t?.sampled)
                     break;
-                const model = event.payload.model;
-                pushSpan(t, 'llm', model ? { 'gen_ai.request.model': model } : undefined);
+                const p = event.payload;
+                // Inference span per GenAI semconv: operation `chat`.
+                // `gen_ai.provider.name` passes the adapter's provider id through
+                // unchanged — 'anthropic' / 'openai' / 'cohere' are already
+                // well-known semconv values; others ride as custom values (the
+                // spec permits them).
+                pushSpan(t, genAiNames && p.model ? `chat ${p.model}` : 'llm', {
+                    'gen_ai.operation.name': 'chat',
+                    ...(p.model !== undefined && { 'gen_ai.request.model': p.model }),
+                    ...(p.provider !== undefined && { 'gen_ai.provider.name': p.provider }),
+                    ...(typeof p.temperature === 'number' && {
+                        'gen_ai.request.temperature': p.temperature,
+                    }),
+                });
+                // Back-fill the conditionally-required agent-span attrs now that
+                // the first inference call reveals provider + model.
+                if (t.root && t.rootEnriched !== true) {
+                    t.rootEnriched = true;
+                    setAttrs(t.root, {
+                        ...(p.provider !== undefined && { 'gen_ai.provider.name': p.provider }),
+                        ...(p.model !== undefined && { 'gen_ai.request.model': p.model }),
+                    });
+                }
                 break;
             }
             case 'agentfootprint.stream.llm_end': {
                 const t = activeTurns.get(runId);
                 if (!t?.sampled)
                     break;
-                const span = popSpan(t, 'llm');
-                if (span)
-                    endSpan(span);
+                const span = popSpan(t, (name) => name === 'llm' || name.startsWith('chat'));
+                if (!span)
+                    break;
+                const p = event.payload;
+                // Response-side semconv attrs. `content` is deliberately NOT
+                // emitted (PII) — the snapshot/audit-log channel carries it
+                // under the consumer's redaction policy.
+                setAttrs(span, {
+                    ...(typeof p.usage?.input === 'number' && {
+                        'gen_ai.usage.input_tokens': p.usage.input,
+                    }),
+                    ...(typeof p.usage?.output === 'number' && {
+                        'gen_ai.usage.output_tokens': p.usage.output,
+                    }),
+                    ...(typeof p.usage?.cacheRead === 'number' && {
+                        'gen_ai.usage.cache_read.input_tokens': p.usage.cacheRead,
+                    }),
+                    ...(typeof p.usage?.cacheWrite === 'number' && {
+                        'gen_ai.usage.cache_creation.input_tokens': p.usage.cacheWrite,
+                    }),
+                    ...(typeof p.stopReason === 'string' && {
+                        'gen_ai.response.finish_reasons': [p.stopReason],
+                    }),
+                    ...(typeof p.providerResponseRef === 'string' && {
+                        'gen_ai.response.id': p.providerResponseRef,
+                    }),
+                });
+                endSpan(span);
                 break;
             }
             case 'agentfootprint.stream.tool_start': {
                 const t = activeTurns.get(runId);
                 if (!t?.sampled)
                     break;
-                const toolName = event.payload.toolName ?? 'tool';
-                pushSpan(t, `tool:${toolName}`, { 'tool.name': toolName });
+                const p = event.payload;
+                const toolName = p.toolName ?? 'tool';
+                // Tool-execution span per GenAI semconv (`execute_tool`).
+                // Args: top-level key NAMES only — `gen_ai.tool.call.arguments`
+                // exists in the spec but is opt-in and carries raw values; we
+                // deliberately never emit it (PII / prompt-injection echo).
+                const argKeys = p.args !== undefined && typeof p.args === 'object' ? Object.keys(p.args) : [];
+                const span = pushSpan(t, genAiNames ? `execute_tool ${toolName}` : `tool:${toolName}`, {
+                    'tool.name': toolName,
+                    'gen_ai.operation.name': 'execute_tool',
+                    'gen_ai.tool.name': toolName,
+                    ...(p.toolCallId !== undefined && { 'gen_ai.tool.call.id': p.toolCallId }),
+                    ...(p.protocol !== undefined && { 'agentfootprint.tool.protocol': p.protocol }),
+                    ...(argKeys.length > 0 && { 'agentfootprint.tool.args.keys': boundList(argKeys) }),
+                });
+                if (p.toolCallId !== undefined)
+                    t.toolSpans.set(p.toolCallId, span);
                 break;
             }
             case 'agentfootprint.stream.tool_end': {
                 const t = activeTurns.get(runId);
                 if (!t?.sampled)
                     break;
-                const toolName = event.payload.toolName;
-                const errored = event.payload.error !== undefined;
-                const span = popSpan(t, toolName ? `tool:${toolName}` : undefined);
-                if (span)
-                    endSpan(span, { error: errored });
+                const p = event.payload;
+                const errored = p.error !== undefined && p.error !== false;
+                // Correlate by toolCallId (the only identity ToolEndPayload
+                // carries) — parallel tool calls end out of LIFO order, so name
+                // matching alone would close the wrong span. Fallback chain
+                // keeps legacy hand-fed events (toolName) working.
+                let span;
+                if (p.toolCallId !== undefined && t.toolSpans.has(p.toolCallId)) {
+                    span = t.toolSpans.get(p.toolCallId);
+                    t.toolSpans.delete(p.toolCallId);
+                    // Remove from the stack by identity so the LIFO unwind stays clean.
+                    const idx = t.stack.findIndex((entry) => entry.span === span);
+                    if (idx >= 0)
+                        t.stack.splice(idx, 1);
+                }
+                else {
+                    span = popSpan(t, p.toolName !== undefined
+                        ? (name) => name === `tool:${p.toolName}` || name === `execute_tool ${p.toolName}`
+                        : (name) => name.startsWith('tool:') || name.startsWith('execute_tool '));
+                }
+                if (!span)
+                    break;
+                // Result: TYPE only — never the value (PII discipline; mirrors
+                // the #9 contract and `gen_ai.tool.call.result` stays unemitted).
+                setAttrs(span, {
+                    'agentfootprint.tool.result.type': p.result === null ? 'null' : typeof p.result,
+                    ...(errored && { 'error.type': '_OTHER' }), // boolean error flag — no class info
+                });
+                endSpan(span, { error: errored });
                 break;
             }
-            // Other events — annotate the topmost active span.
+            // A fatal run error: the turn will never see turn_end, so close
+            // the span tree here (ERROR on root) instead of leaking it until
+            // stop(). Stage + scope only — error MESSAGES can echo PII.
+            case 'agentfootprint.error.fatal': {
+                const t = activeTurns.get(runId);
+                if (!t)
+                    break;
+                const p = event.payload;
+                if (t.root) {
+                    recordSpanEvent(t.root, 'agentfootprint.error.fatal', {
+                        ...(p.stage !== undefined && { 'agentfootprint.error.stage': bound(p.stage) }),
+                        ...(p.scope !== undefined && { 'agentfootprint.error.scope': bound(p.scope) }),
+                    });
+                }
+                while (t.stack.length > 1) {
+                    const span = popSpan(t);
+                    if (span)
+                        endSpan(span);
+                }
+                const root = popSpan(t);
+                if (root)
+                    endSpan(root, { error: true });
+                activeTurns.delete(runId);
+                break;
+            }
+            // Other events — annotate / record on the topmost active span.
             default: {
                 const t = activeTurns.get(runId);
-                const top = t?.stack[t.stack.length - 1]?.span;
-                if (!t?.sampled || !top)
+                if (!t?.sampled)
                     break;
                 // Cost ticks are particularly valuable as attributes.
                 if (event.type === 'agentfootprint.cost.tick') {
+                    const top = topSpan(t);
+                    if (!top)
+                        break;
+                    // Runtime shape: `cumulative.estimatedUsd` (CostTickPayload).
+                    // Legacy fallback `cumulativeCostUsd` keeps hand-fed events
+                    // working (the pre-6.17 fabricated test shape).
                     const p = event.payload;
-                    if (typeof p.cumulativeCostUsd === 'number') {
-                        try {
-                            top.setAttribute('cost.cumulative_usd', p.cumulativeCostUsd);
-                        }
-                        catch {
-                            /* ignore */
-                        }
-                    }
+                    const usd = p.cumulative?.estimatedUsd ?? p.cumulativeCostUsd;
+                    if (typeof usd === 'number')
+                        setAttrs(top, { 'cost.cumulative_usd': usd });
+                    break;
                 }
+                if (explainability)
+                    handleExplainability(event, t);
                 break;
             }
         }
@@ -280,6 +720,7 @@ function otelObservability(opts) {
                     if (span)
                         endSpan(span);
                 }
+                t.toolSpans.clear();
             }
             activeTurns.clear();
         },
@@ -292,6 +733,54 @@ function otelObservability(opts) {
                     });
             onErrorHook(err, event);
         },
+        decisionEvidenceRecorder() {
+            // One purpose (Convention 1): forward decide()/select() evidence
+            // from footprintjs's FlowRecorder channel into this strategy's
+            // span machinery. Plumbing filters mirror the #5 causal-evidence
+            // bridge (sf-cache gate deciders, the agent's Context slot-fork).
+            const forward = (stageId, chosen, evidence) => {
+                if (stopped || !explainability)
+                    return;
+                // No structured evidence → already reported via the typed
+                // route_decided events; skip to avoid double-reporting.
+                if (evidence === undefined)
+                    return;
+                const t = soleActiveTurn();
+                if (!t?.sampled)
+                    return;
+                const top = topSpan(t);
+                if (!top)
+                    return;
+                recordSpanEvent(top, 'agentfootprint.decision.evidence', {
+                    'agentfootprint.decision.stage': bound(stageId),
+                    'agentfootprint.decision.chosen': bound(chosen),
+                    ...renderEvidenceAttrs(evidence),
+                });
+            };
+            return {
+                id: 'otel-decision-evidence',
+                onDecision(event) {
+                    const stageId = event.traversalContext?.stageId ?? event.decider;
+                    // Internal agent plumbing (the cache-gate decider) is not
+                    // domain decision evidence. `includes` (not startsWith): in
+                    // reactMode 'dynamic-grouped' names are double-prefixed.
+                    if (String(event.chosen ?? '').includes('sf-cache/') ||
+                        String(stageId).includes('sf-cache'))
+                        return;
+                    forward(String(stageId), String(event.chosen ?? 'unknown'), event.evidence);
+                },
+                onSelected(event) {
+                    const stageId = event.traversalContext?.stageId ?? event.parent;
+                    if (String(stageId).includes('sf-cache'))
+                        return;
+                    // The agent's own Context slot-fork is a selector — plumbing.
+                    if (String(stageId).includes('context') &&
+                        event.selected.every((s) => s.startsWith('sf-')))
+                        return;
+                    forward(String(stageId), event.selected.join(', '), event.evidence);
+                },
+            };
+        },
     };
 }
 exports.otelObservability = otelObservability;