npm - @replayci/replay - Versions diffs - 0.1.7 → 0.1.9 - Mend

@replayci/replay 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.cjs CHANGED Viewed

@@ -3224,6 +3224,34 @@ function redactCapture(input) {
     pattern_fingerprint: PATTERN_FINGERPRINT
   };
 }
+function redactTrace(trace, captureLevel) {
+  if (captureLevel === "full") return trace;
+  return {
+    ...trace,
+    entries: trace.entries.map((entry) => redactTraceEntry(entry, captureLevel))
+  };
+}
+function redactTraceEntry(entry, captureLevel) {
+  if (captureLevel === "metadata") {
+    return {
+      ...entry,
+      checked: redactRecord(entry.checked),
+      found: redactRecord(entry.found)
+    };
+  }
+  return entry;
+}
+function redactRecord(record) {
+  const result = {};
+  for (const [key, value] of Object.entries(record)) {
+    if (typeof value === "string") {
+      result[key] = redactString(value);
+    } else {
+      result[key] = value;
+    }
+  }
+  return result;
+}
 // src/errors/replay.ts
 var ReplayContractError = class extends Error {
@@ -3520,8 +3548,9 @@ function toRecord8(value) {
 var import_node_crypto3 = __toESM(require("crypto"), 1);
 // src/phases.ts
-function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
+function validatePhaseTransition(toolCalls, sessionState, compiledSession, ctx) {
   if (!compiledSession.phases) {
+    ctx?.trace.push({ stage: "phase", tool: null, verdict: "skip", reason: "no_phases_configured", checked: {}, found: {} });
     return { legal: true, newPhase: sessionState.currentPhase };
   }
   const attemptedTransitions = [];
@@ -3532,6 +3561,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
       sessionState.currentPhase ?? ""
     );
     if (!allowedTransitions?.includes(contract.transitions.advances_to)) {
+      ctx?.trace.push({
+        stage: "phase",
+        tool: toolCall.name,
+        verdict: "block",
+        reason: "illegal_phase_transition",
+        checked: { advances_to: contract.transitions.advances_to, from: sessionState.currentPhase },
+        found: { allowed_transitions: allowedTransitions ?? [] }
+      });
       return {
         legal: false,
         newPhase: sessionState.currentPhase,
@@ -3548,6 +3585,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
   if (attemptedTransitions.length > 1) {
     const distinctTargets = new Set(attemptedTransitions.map((t) => t.target));
     if (distinctTargets.size > 1) {
+      ctx?.trace.push({
+        stage: "phase",
+        tool: attemptedTransitions.map((t) => t.tool).join(", "),
+        verdict: "block",
+        reason: "ambiguous_phase_transition",
+        checked: { targets: Array.from(distinctTargets) },
+        found: { from: sessionState.currentPhase }
+      });
       return {
         legal: false,
         newPhase: sessionState.currentPhase,
@@ -3558,7 +3603,17 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
     }
   }
   if (attemptedTransitions.length > 0) {
-    return { legal: true, newPhase: attemptedTransitions[0].target };
+    const target = attemptedTransitions[0].target;
+    const allowedTransitions = compiledSession.transitions.get(sessionState.currentPhase ?? "") ?? [];
+    ctx?.trace.push({
+      stage: "phase",
+      tool: attemptedTransitions[0].tool,
+      verdict: "allow",
+      reason: "phase_advanced",
+      checked: { advances_to: target, from: sessionState.currentPhase },
+      found: { allowed_transitions: allowedTransitions }
+    });
+    return { legal: true, newPhase: target };
   }
   return { legal: true, newPhase: sessionState.currentPhase };
 }
@@ -3808,7 +3863,7 @@ function checkCircuitBreaker(state, config) {
 }
 // src/crossStep.ts
-function validateCrossStep(toolCalls, sessionState, contracts) {
+function validateCrossStep(toolCalls, sessionState, contracts, ctx) {
   const failures = [];
   const contractByTool = new Map(contracts.map((c) => [c.tool, c]));
   const workingForbidden = new Set(sessionState.forbiddenTools);
@@ -3836,8 +3891,17 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
         reason: "forbidden_tool",
         detail: resourceValue !== void 0 ? `Tool "${tc.name}" is forbidden in this session for resource ${JSON.stringify(resourceValue)}` : `Tool "${tc.name}" is forbidden in this session`
       });
+      ctx?.trace.push({
+        stage: "cross_step",
+        tool: tc.name,
+        verdict: "block",
+        reason: "forbidden_tool",
+        checked: { tool: tc.name },
+        found: { is_resource_scoped: resourceValue !== void 0, resource_value: resourceValue ?? null }
+      });
       continue;
     }
+    let crossStepPassed = true;
     if (contract?.preconditions && contract.preconditions.length > 0) {
       const results = evaluatePreconditions(
         contract.preconditions,
@@ -3846,6 +3910,7 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
       );
       for (const result of results) {
         if (!result.satisfied) {
+          crossStepPassed = false;
           failures.push({
             toolName: tc.name,
             reason: "precondition_not_met",
@@ -3854,6 +3919,25 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
         }
       }
     }
+    if (crossStepPassed) {
+      ctx?.trace.push({
+        stage: "cross_step",
+        tool: tc.name,
+        verdict: "allow",
+        reason: "preconditions_satisfied",
+        checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
+        found: { resource_value: resourceValue ?? null }
+      });
+    } else {
+      ctx?.trace.push({
+        stage: "cross_step",
+        tool: tc.name,
+        verdict: "block",
+        reason: "precondition_not_met",
+        checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
+        found: { resource_value: resourceValue ?? null, matching_step_index: null }
+      });
+    }
     if (contract?.forbids_after) {
       for (const entry of contract.forbids_after) {
         if (typeof entry === "string") {
@@ -4075,20 +4159,23 @@ function extractPath2(obj, path) {
 }
 // src/narrow.ts
-function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter) {
+function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter, ctx) {
   const allowed = [];
   const removed = [];
   for (const tool of requestedTools) {
     if (manualFilter && !manualFilter.includes(tool.name)) {
       removed.push({ tool: tool.name, reason: "manual_filter" });
+      ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "manual_filter", checked: { filter: manualFilter }, found: {} });
       continue;
     }
     const contract = compiledSession.perToolContracts.get(tool.name);
     if (!contract) {
       if (unmatchedPolicy === "allow") {
         allowed.push(tool);
+        ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "allow", reason: "no_contract_passthrough", checked: { unmatched_policy: "allow" }, found: {} });
       } else {
         removed.push({ tool: tool.name, reason: "no_contract" });
+        ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "no_contract", checked: { unmatched_policy: "block" }, found: {} });
       }
       continue;
     }
@@ -4101,6 +4188,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
           reason: "wrong_phase",
           detail: `Tool valid in [${contract.transitions.valid_in_phases.join(", ")}], current phase: ${sessionState.currentPhase}`
         });
+        ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "wrong_phase", checked: { valid_in_phases: contract.transitions.valid_in_phases }, found: { current_phase: sessionState.currentPhase } });
         continue;
       }
     }
@@ -4111,6 +4199,18 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
       );
       const unsatisfied = results.find((r) => !r.satisfied);
       if (unsatisfied) {
+        const firstPre = contract.preconditions[0];
+        ctx?.trace.push({
+          stage: "narrow",
+          tool: tool.name,
+          verdict: "remove",
+          reason: "precondition_not_met",
+          checked: {
+            requires_prior_tool: firstPre.requires_prior_tool ?? null,
+            with_output: firstPre.with_output ?? []
+          },
+          found: { satisfied_precondition_cache_hit: false }
+        });
         removed.push({
           tool: tool.name,
           reason: "precondition_not_met",
@@ -4124,6 +4224,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
         tool: tool.name,
         reason: "forbidden_in_state"
       });
+      ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "forbidden_in_state", checked: { tool: tool.name }, found: { is_resource_scoped: false } });
       continue;
     }
     if (compiledSession.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
@@ -4140,9 +4241,24 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
           reason: "policy_denied",
           detail: verdict.reason ?? "Policy deny rule matched"
         });
+        ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "policy_denied", checked: { rule_type: "session_deny" }, found: { matched: true } });
         continue;
       }
     }
+    ctx?.trace.push({
+      stage: "narrow",
+      tool: tool.name,
+      verdict: "allow",
+      reason: "all_checks_passed",
+      checked: {
+        has_contract: true,
+        phase_ok: true,
+        preconditions_ok: true,
+        not_forbidden: true,
+        policy_ok: true
+      },
+      found: {}
+    });
     allowed.push(tool);
   }
   return { allowed, removed };
@@ -4636,7 +4752,7 @@ function replay(client, opts = {}) {
   const unmatchedPolicy = opts.unmatchedPolicy ?? "block";
   const maxRetries = Math.min(Math.max(0, opts.maxRetries ?? 0), MAX_RETRIES);
   const compatEnforcement = opts.compatEnforcement ?? "protective";
-  const diagnostics = opts.diagnostics;
+  const diagnostics = opts.diagnostics ?? defaultReplayDiagnosticsHandler;
   let provider;
   try {
     provider = detectProvider(client);
@@ -4669,6 +4785,12 @@ function replay(client, opts = {}) {
     emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
     return createBlockingInactiveSession(client, sessionId, detail);
   }
+  if (opts.contractsDir && !discoveredSessionYaml && !opts.sessionYamlPath) {
+    emitDiagnostic2(diagnostics, {
+      type: "replay_compile_warning",
+      details: "No session.yaml found in contractsDir \u2014 session-level features (phases, policy, session_limits) are inactive. Per-tool contracts still apply."
+    });
+  }
   let sessionYaml = discoveredSessionYaml;
   if (!sessionYaml && opts.providerConstraints) {
     sessionYaml = { schema_version: "1.0", agent, provider_constraints: opts.providerConstraints };
@@ -4828,6 +4950,7 @@ function replay(client, opts = {}) {
   let bypassDetected = false;
   let lastShadowDeltaValue = null;
   let lastNarrowResult = null;
+  let lastTrace = null;
   let shadowEvaluationCount = 0;
   let manualFilter = null;
   const deferredReceipts = /* @__PURE__ */ new Map();
@@ -4835,6 +4958,17 @@ function replay(client, opts = {}) {
   const compiledLimits = compiledSession?.sessionLimits;
   const mergedLimits = { ...contractLimits ?? {}, ...compiledLimits ?? {} };
   const resolvedSessionLimits = Object.keys(mergedLimits).length > 0 ? mergedLimits : null;
+  if (resolvedSessionLimits?.max_tool_calls_mode === "narrow" && resolvedSessionLimits.max_calls_per_tool) {
+    const budgetedTools = new Set(Object.keys(resolvedSessionLimits.max_calls_per_tool));
+    const unbudgeted = contracts.map((c) => c.tool).filter((t) => !budgetedTools.has(t));
+    if (unbudgeted.length > 0) {
+      emitDiagnostic2(diagnostics, {
+        type: "replay_narrow_unbudgeted_tools",
+        session_id: sessionId,
+        tools: unbudgeted
+      });
+    }
+  }
   const store = opts.store ?? null;
   let storeLoadPromise = null;
   let storeLoadDone = false;
@@ -4902,6 +5036,18 @@ function replay(client, opts = {}) {
     } catch {
     }
   }
+  function createTrace(stepIndex) {
+    const entries = [];
+    return {
+      sessionId,
+      stepIndex,
+      complete: false,
+      entries,
+      push(entry) {
+        entries.push(entry);
+      }
+    };
+  }
   const enforcementCreate = async function replayEnforcementCreate(...args) {
     if (killed) {
       throw new ReplayKillError(sessionId, killedAt);
@@ -4944,8 +5090,19 @@ function replay(client, opts = {}) {
       total_ms: 0,
       enforcement_ms: 0
     };
+    const trace = createTrace(sessionState.totalStepCount);
+    const traceCtx = { trace };
+    let currentTraceStage = "narrow";
     const request = toRecord10(args[0]);
     const requestToolNames = extractRequestToolNames(request);
+    const messages = Array.isArray(request.messages) ? request.messages : [];
+    if (messages.length > 0) {
+      const toolResults = extractToolResults(messages, provider);
+      if (toolResults.length > 0) {
+        const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
+        sessionState = applyOutputExtracts(sessionState, outputUpdates);
+      }
+    }
     let narrowResult = null;
     let activeArgs = args;
     if (compiledSession && Array.isArray(request.tools) && request.tools.length > 0) {
@@ -4956,7 +5113,8 @@ function replay(client, opts = {}) {
           sessionState,
           compiledSession,
           unmatchedPolicy,
-          manualFilter
+          manualFilter,
+          traceCtx
         );
         lastNarrowResult = narrowResult;
         if (narrowResult.removed.length > 0) {
@@ -4994,55 +5152,96 @@ function replay(client, opts = {}) {
     timing.narrow_ms = Date.now() - guardStart;
     const preCheckStart = Date.now();
     try {
+      currentTraceStage = "pre_check";
       if (mode === "enforce" && resolvedSessionLimits) {
         const limitResult = checkSessionLimits(sessionState, resolvedSessionLimits);
         if (limitResult.exceeded) {
-          const decision = {
-            action: "block",
-            tool_calls: [],
-            blocked: [{
-              tool_name: "_session",
-              arguments: "",
-              reason: "session_limit_exceeded",
-              contract_file: "",
-              failures: [{ path: "$", operator: "session_limit", expected: "", found: "", message: limitResult.reason ?? "session limit exceeded" }]
-            }],
-            response_modification: gateMode
-          };
-          sessionState = recordDecisionOutcome(sessionState, "blocked");
-          if (resolvedSessionLimits.circuit_breaker) {
-            const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
-            if (cbResult.triggered) {
-              killed = true;
-              killedAt = (/* @__PURE__ */ new Date()).toISOString();
-              sessionState = killSession(sessionState);
-              emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
+          let narrowedPastLimit = false;
+          if (limitResult.reason?.startsWith("max_tool_calls") && resolvedSessionLimits.max_tool_calls_mode === "narrow" && resolvedSessionLimits.max_calls_per_tool) {
+            const costOk = !(typeof resolvedSessionLimits.max_cost_per_session === "number" && sessionState.actualCost >= resolvedSessionLimits.max_cost_per_session);
+            if (costOk) {
+              const currentRequest = toRecord10(activeArgs[0]);
+              const currentTools = Array.isArray(currentRequest.tools) ? extractToolDefinitions(currentRequest.tools) : [];
+              const budgetedTools = currentTools.filter((tool) => {
+                const max = resolvedSessionLimits.max_calls_per_tool[tool.name];
+                if (typeof max !== "number") return false;
+                return (sessionState.toolCallCounts.get(tool.name) ?? 0) < max;
+              });
+              if (budgetedTools.length > 0) {
+                const modifiedRequest = { ...currentRequest, tools: budgetedTools };
+                activeArgs = [modifiedRequest, ...Array.prototype.slice.call(activeArgs, 1)];
+                narrowedPastLimit = true;
+                trace.push({
+                  stage: "pre_check",
+                  tool: null,
+                  verdict: "narrow",
+                  reason: "max_tool_calls_narrow_mode",
+                  checked: { max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null, budgeted_tools: budgetedTools.map((t) => t.name) },
+                  found: { total_tool_calls: sessionState.totalToolCalls }
+                });
+              }
             }
           }
-          timing.pre_check_ms = Date.now() - preCheckStart;
-          captureDecision(
-            decision,
-            null,
-            request,
-            guardStart,
-            requestToolNames,
-            null,
-            narrowResult,
-            null,
-            null,
-            null,
-            void 0,
-            timing
-          );
-          if (isCompatAdvisory) {
-            emitDiagnostic2(diagnostics, {
-              type: "replay_compat_advisory",
-              session_id: sessionId,
-              would_block: decision.blocked,
-              details: limitResult.reason ?? "session limit exceeded"
+          if (!narrowedPastLimit) {
+            trace.push({
+              stage: "pre_check",
+              tool: null,
+              verdict: "block",
+              reason: "session_limit_exceeded",
+              checked: {
+                max_steps: resolvedSessionLimits.max_steps ?? null,
+                max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
+                max_cost: resolvedSessionLimits.max_cost_per_session ?? null
+              },
+              found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
             });
-          } else {
-            throw buildContractError2(decision);
+            const decision = {
+              action: "block",
+              tool_calls: [],
+              blocked: [{
+                tool_name: "_session",
+                arguments: "",
+                reason: "session_limit_exceeded",
+                contract_file: "",
+                failures: [{ path: "$", operator: "session_limit", expected: "", found: "", message: limitResult.reason ?? "session limit exceeded" }]
+              }],
+              response_modification: gateMode
+            };
+            sessionState = recordDecisionOutcome(sessionState, "blocked");
+            if (resolvedSessionLimits.circuit_breaker) {
+              const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
+              if (cbResult.triggered) {
+                killed = true;
+                killedAt = (/* @__PURE__ */ new Date()).toISOString();
+                sessionState = killSession(sessionState);
+                emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
+              }
+            }
+            timing.pre_check_ms = Date.now() - preCheckStart;
+            captureDecision(
+              decision,
+              null,
+              request,
+              guardStart,
+              requestToolNames,
+              null,
+              narrowResult,
+              null,
+              null,
+              null,
+              void 0,
+              timing
+            );
+            if (isCompatAdvisory) {
+              emitDiagnostic2(diagnostics, {
+                type: "replay_compat_advisory",
+                session_id: sessionId,
+                would_block: decision.blocked,
+                details: limitResult.reason ?? "session limit exceeded"
+              });
+            } else {
+              throw buildContractError2(decision);
+            }
           }
         }
         if (isAtHardStepCap(sessionState)) {
@@ -5075,8 +5274,23 @@ function replay(client, opts = {}) {
           );
           throw buildContractError2(decision);
         }
+        if (!checkSessionLimits(sessionState, resolvedSessionLimits).exceeded) {
+          trace.push({
+            stage: "pre_check",
+            tool: null,
+            verdict: "allow",
+            reason: "session_limits_ok",
+            checked: {
+              max_steps: resolvedSessionLimits.max_steps ?? null,
+              max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
+              max_cost: resolvedSessionLimits.max_cost_per_session ?? null
+            },
+            found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
+          });
+        }
+      } else if (mode === "enforce") {
+        trace.push({ stage: "pre_check", tool: null, verdict: "skip", reason: "no_session_limits", checked: {}, found: {} });
       }
-      const messages = Array.isArray(request.messages) ? request.messages : [];
       if (messages.length > 0) {
         const msgResult = validateToolResultMessages(messages, contracts, provider);
         if (!msgResult.passed) {
@@ -5086,13 +5300,6 @@ function replay(client, opts = {}) {
           });
         }
       }
-      if (messages.length > 0) {
-        const toolResults = extractToolResults(messages, provider);
-        if (toolResults.length > 0) {
-          const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
-          sessionState = applyOutputExtracts(sessionState, outputUpdates);
-        }
-      }
       const inputFailures = evaluateInputInvariants(request, contracts);
       if (mode === "enforce" && inputFailures.length > 0) {
         if (onError === "block") {
@@ -5167,6 +5374,10 @@ function replay(client, opts = {}) {
           sessionState = updateActualCost(sessionState, costDelta);
         }
         if (mode === "log-only") {
+          trace.push({ stage: "gate", tool: null, verdict: "allow", reason: "log_only_mode", checked: {}, found: {} });
+          trace.complete = true;
+          lastTrace = trace;
+          emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
           captureDecision(
             { action: "allow", tool_calls: extractToolCalls(response, provider) },
             response,
@@ -5179,13 +5390,26 @@ function replay(client, opts = {}) {
             null,
             null,
             void 0,
-            timing
+            timing,
+            trace
           );
           return response;
         }
+        currentTraceStage = "validate";
         const toolCalls = extractToolCalls(response, provider);
         const validateStart = Date.now();
         const validation = validateResponse2(response, toolCalls, contracts, requestToolNames, unmatchedPolicy, provider);
+        for (const f of validation.failures) {
+          const toolName = extractToolNameFromFailure(f, toolCalls);
+          trace.push({
+            stage: "validate",
+            tool: toolName === "_response" ? null : toolName,
+            verdict: "block",
+            reason: f.operator === "response_format" ? "response_format_failed" : "output_invariant_failed",
+            checked: { path: f.path, operator: f.operator, invariant_type: f.operator === "response_format" ? "response_format" : "output" },
+            found: { value: f.found }
+          });
+        }
         timing.validate_ms += Date.now() - validateStart;
         if (isActiveGovern && !attemptDegraded && attemptPreparedRequestId) {
           const rtProposalStart = Date.now();
@@ -5220,9 +5444,10 @@ function replay(client, opts = {}) {
           }
           timing.runtime_ms += Date.now() - rtProposalStart;
         }
+        currentTraceStage = "cross_step";
         const crossStepStart = Date.now();
         const crossStepContracts = compiledSession ? Array.from(compiledSession.perToolContracts.values()) : contracts;
-        const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts);
+        const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts, traceCtx);
         if (!crossStepResult.passed) {
           for (const f of crossStepResult.failures) {
             validation.failures.push({
@@ -5236,10 +5461,11 @@ function replay(client, opts = {}) {
           }
         }
         timing.cross_step_ms += Date.now() - crossStepStart;
+        currentTraceStage = "phase";
         let phaseResult = null;
         const phaseStart = Date.now();
         if (compiledSession) {
-          phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession);
+          phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession, traceCtx);
           if (!phaseResult.legal) {
             validation.failures.push({
               path: `$.tool_calls.${phaseResult.blockedTool}`,
@@ -5269,7 +5495,7 @@ function replay(client, opts = {}) {
               for (const f of avResult.failures) {
                 validation.failures.push({
                   path: f.path,
-                  operator: f.operator,
+                  operator: "argument_value_mismatch",
                   expected: String(f.expected),
                   found: String(f.actual),
                   message: f.detail,
@@ -5280,10 +5506,12 @@ function replay(client, opts = {}) {
               }
             }
           }
+          currentTraceStage = "limit";
           if (resolvedSessionLimits) {
             const workingState = { ...sessionState, toolCallCounts: workingToolCallCounts };
             const perToolResult = checkPerToolLimits(workingState, tc.name, resolvedSessionLimits);
             if (perToolResult.exceeded) {
+              trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "per_tool_limit_exceeded", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
               validation.failures.push({
                 path: `$.tool_calls.${tc.name}`,
                 operator: "session_limit",
@@ -5292,6 +5520,8 @@ function replay(client, opts = {}) {
                 message: perToolResult.reason ?? "per-tool limit exceeded",
                 contract_file: ""
               });
+            } else {
+              trace.push({ stage: "limit", tool: tc.name, verdict: "allow", reason: "per_tool_limit_ok", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
             }
           }
           workingToolCallCounts.set(tc.name, (workingToolCallCounts.get(tc.name) ?? 0) + 1);
@@ -5308,6 +5538,7 @@ function replay(client, opts = {}) {
             ).length;
             const totalMatches = loopResult.matchCount + intraMatches;
             if (totalMatches >= resolvedSessionLimits.loop_detection.threshold) {
+              trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "loop_detected", checked: { window: resolvedSessionLimits.loop_detection.window, threshold: resolvedSessionLimits.loop_detection.threshold }, found: { match_count: totalMatches, arguments_hash: argsHash } });
               validation.failures.push({
                 path: `$.tool_calls.${tc.name}`,
                 operator: "loop_detected",
@@ -5321,6 +5552,7 @@ function replay(client, opts = {}) {
           }
         }
         timing.argument_values_ms += Date.now() - argValuesStart;
+        currentTraceStage = "policy";
         let policyVerdicts = null;
         const policyStart = Date.now();
         if (compiledSession?.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
@@ -5341,6 +5573,14 @@ function replay(client, opts = {}) {
             );
             policyVerdicts.set(tc.name, verdict);
             if (!verdict.allowed) {
+              trace.push({
+                stage: "policy",
+                tool: tc.name,
+                verdict: "block",
+                reason: verdict.reason?.startsWith("Session deny") ? "session_deny_matched" : verdict.reason?.startsWith("default_deny") ? "default_deny_no_allow" : "policy_denied",
+                checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
+                found: { matched: true }
+              });
               validation.failures.push({
                 path: `$.tool_calls.${tc.name}`,
                 operator: "policy_denied",
@@ -5349,10 +5589,22 @@ function replay(client, opts = {}) {
                 message: `Policy denied: ${tc.name} \u2014 ${verdict.reason}`,
                 contract_file: ""
               });
+            } else {
+              trace.push({
+                stage: "policy",
+                tool: tc.name,
+                verdict: "allow",
+                reason: "policy_allowed",
+                checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
+                found: { session_deny_matched: false, tool_deny_matched: false }
+              });
             }
           }
+        } else {
+          trace.push({ stage: "policy", tool: null, verdict: "skip", reason: "no_policy_configured", checked: {}, found: {} });
         }
         timing.policy_ms += Date.now() - policyStart;
+        currentTraceStage = "gate";
         if (mode === "shadow") {
           const shadowGateStart = Date.now();
           const shadowDecision = validation.failures.length > 0 ? {
@@ -5361,6 +5613,15 @@ function replay(client, opts = {}) {
             blocked: buildBlockedCalls(toolCalls, validation.failures, validation.unmatchedBlocked),
             response_modification: gateMode
           } : { action: "allow", tool_calls: toolCalls };
+          const blockedTools = shadowDecision.action === "block" ? shadowDecision.blocked.map((b) => b.tool_name) : [];
+          trace.push({
+            stage: "gate",
+            tool: null,
+            verdict: blockedTools.length > 0 ? "info" : "allow",
+            reason: blockedTools.length > 0 ? "violations_found" : "no_violations",
+            checked: { gate_mode: gateMode },
+            found: { blocked_count: blockedTools.length, action: shadowDecision.action, ...blockedTools.length > 0 ? { blocked_tools: blockedTools } : {} }
+          });
           const shadowDelta = {
             would_have_blocked: shadowDecision.action === "block" ? shadowDecision.blocked : [],
             would_have_narrowed: narrowResult?.removed ?? [],
@@ -5370,7 +5631,11 @@ function replay(client, opts = {}) {
           lastShadowDeltaValue = shadowDelta;
           shadowEvaluationCount++;
           timing.gate_ms += Date.now() - shadowGateStart;
-          captureDecision(shadowDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, shadowDelta, timing);
+          trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockedTools, killed: false, step_index: sessionState.totalStepCount } });
+          trace.complete = true;
+          lastTrace = trace;
+          emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
+          captureDecision(shadowDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, shadowDelta, timing, trace);
           return response;
         }
         if (isCompatAdvisory) {
@@ -5411,7 +5676,21 @@ function replay(client, opts = {}) {
             sessionState = recordDecisionOutcome(sessionState, "allowed");
           }
           timing.finalize_ms += Date.now() - advisoryFinalizeStart;
-          captureDecision(advisoryDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing);
+          const advisoryBlockedTools = advisoryDecision.action === "block" ? advisoryDecision.blocked.map((b) => b.tool_name) : [];
+          trace.push({
+            stage: "gate",
+            tool: null,
+            verdict: advisoryBlockedTools.length > 0 ? "info" : "allow",
+            reason: advisoryBlockedTools.length > 0 ? "violations_found" : "no_violations",
+            checked: { gate_mode: gateMode },
+            found: { blocked_count: advisoryBlockedTools.length, action: advisoryDecision.action, ...advisoryBlockedTools.length > 0 ? { blocked_tools: advisoryBlockedTools } : {} }
+          });
+          const advisoryNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
+          trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: advisoryNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: advisoryBlockedTools, killed: false, step_index: sessionState.totalStepCount } });
+          trace.complete = true;
+          lastTrace = trace;
+          emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
+          captureDecision(advisoryDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
           return response;
         }
         const enforceGateStart = Date.now();
@@ -5449,7 +5728,20 @@ function replay(client, opts = {}) {
               });
             }
           }
-          captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing);
+          trace.push({
+            stage: "gate",
+            tool: null,
+            verdict: "allow",
+            reason: "no_violations",
+            checked: { gate_mode: gateMode },
+            found: { blocked_count: 0, action: "allow" }
+          });
+          const allowNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
+          trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: completedStep.phase, phase_after: allowNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: [], killed: false, step_index: sessionState.totalStepCount } });
+          trace.complete = true;
+          lastTrace = trace;
+          emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
+          captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
           return response;
         }
         sessionState = recordDecisionOutcome(sessionState, "blocked");
@@ -5514,15 +5806,42 @@ function replay(client, opts = {}) {
           );
           continue;
         }
-        captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing);
+        const blockBlockedTools = decision.action === "block" ? decision.blocked.map((b) => b.tool_name) : [];
+        trace.push({
+          stage: "gate",
+          tool: null,
+          verdict: "block",
+          reason: "violations_found",
+          checked: { gate_mode: gateMode },
+          found: { blocked_count: blockBlockedTools.length, action: "block", blocked_tools: blockBlockedTools }
+        });
+        trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockBlockedTools, killed, step_index: sessionState.totalStepCount } });
+        trace.complete = true;
+        lastTrace = trace;
+        emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
+        captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
         return applyGateDecision(decision, response, provider, gateMode, opts.onBlock);
       }
       if (lastError) throw lastError;
       throw new ReplayInternalError("Retry loop exhausted without result", { sessionId });
     } catch (err) {
       if (err instanceof ReplayContractError || err instanceof ReplayKillError) {
+        if (!trace.complete) {
+          lastTrace = trace;
+          emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
+        }
         throw err;
       }
+      trace.push({
+        stage: currentTraceStage,
+        tool: null,
+        verdict: "error",
+        reason: "stage_threw",
+        checked: {},
+        found: { error: err instanceof Error ? err.message : String(err) }
+      });
+      lastTrace = trace;
+      emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
       sessionState = recordDecisionOutcome(sessionState, "error");
       if (resolvedSessionLimits?.circuit_breaker) {
         const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
@@ -5648,6 +5967,9 @@ function replay(client, opts = {}) {
     getLastShadowDelta() {
       return lastShadowDeltaValue;
     },
+    getLastTrace() {
+      return lastTrace;
+    },
     /**
      * v3: Manually restrict available tools within compiled legal space.
      * @see specs/replay-v3.md § narrow() / widen()
@@ -5783,7 +6105,7 @@ function replay(client, opts = {}) {
     }
     return wrapped;
   }
-  function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam) {
+  function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam, traceParam) {
     if (!buffer && !store) return;
     if (timingParam) {
       timingParam.total_ms = Date.now() - guardStart;
@@ -5818,6 +6140,7 @@ function replay(client, opts = {}) {
       phase: sessionState.currentPhase,
       phase_transition: phaseTransitionStr,
       shadow_delta: shadowDelta,
+      trace: traceParam ? redactTrace(traceParam, opts.captureLevel ?? "full") : void 0,
       receipt: null
     };
     const capturedCall = {
@@ -6375,6 +6698,7 @@ function resolveSessionLimits(contracts) {
     const sl = c.session_limits;
     if (sl.max_steps !== void 0 && merged.max_steps === void 0) merged.max_steps = sl.max_steps;
     if (sl.max_tool_calls !== void 0 && merged.max_tool_calls === void 0) merged.max_tool_calls = sl.max_tool_calls;
+    if (sl.max_tool_calls_mode !== void 0 && merged.max_tool_calls_mode === void 0) merged.max_tool_calls_mode = sl.max_tool_calls_mode;
     if (sl.max_cost_per_session !== void 0 && merged.max_cost_per_session === void 0) merged.max_cost_per_session = sl.max_cost_per_session;
     if (sl.loop_detection && !merged.loop_detection) merged.loop_detection = sl.loop_detection;
     if (sl.circuit_breaker && !merged.circuit_breaker) merged.circuit_breaker = sl.circuit_breaker;
@@ -6482,6 +6806,7 @@ function createInactiveSession(client, sessionId, reason) {
     getState: () => EMPTY_STATE_SNAPSHOT,
     getLastNarrowing: () => null,
     getLastShadowDelta: () => null,
+    getLastTrace: () => null,
     narrow() {
     },
     widen() {
@@ -6523,6 +6848,7 @@ function createBlockingInactiveSession(client, sessionId, detail, configError) {
     getState: () => EMPTY_STATE_SNAPSHOT,
     getLastNarrowing: () => null,
     getLastShadowDelta: () => null,
+    getLastTrace: () => null,
     narrow() {
     },
     widen() {
@@ -6607,6 +6933,83 @@ function generateSessionId2() {
 function stripHashPrefix(hash) {
   return hash.startsWith("sha256:") ? hash.slice(7) : hash;
 }
+function resolveLogLevel() {
+  const raw = typeof process !== "undefined" ? process.env.REPLAYCI_LOG : void 0;
+  if (!raw) return "warn";
+  const lower = raw.toLowerCase();
+  if (lower === "trace" || lower === "debug") return "trace";
+  if (lower === "silent" || lower === "off" || lower === "none") return "silent";
+  return "warn";
+}
+function defaultReplayDiagnosticsHandler(event) {
+  const level = resolveLogLevel();
+  if (level === "silent") return;
+  switch (event.type) {
+    case "replay_inactive":
+      console.warn(`[replayci] replay() inactive: ${event.reason}${event.error_message ? ` \u2014 ${event.error_message}` : ""}`);
+      break;
+    case "replay_compile_error":
+      console.warn(`[replayci] compile error: ${event.details}`);
+      break;
+    case "replay_compile_warning":
+      console.warn(`[replayci] compile warning: ${event.details}`);
+      break;
+    case "replay_bypass_detected":
+      console.warn(`[replayci] bypass detected on session ${event.session_id}`);
+      break;
+    case "replay_kill":
+      console.warn(`[replayci] session ${event.session_id} killed`);
+      break;
+    case "replay_block":
+      console.warn(`[replayci] blocked ${event.tool_name}: ${event.reason}`);
+      break;
+    case "replay_narrow": {
+      for (const r of event.removed) {
+        console.warn(`[replayci] removed ${r.tool} \u2192 ${r.reason}${r.detail ? ` (${r.detail})` : ""}`);
+      }
+      break;
+    }
+    case "replay_trace": {
+      const t = event.trace;
+      if (level === "trace") {
+        for (const entry of t.entries) {
+          const toolStr = entry.tool ? ` ${entry.tool}` : "";
+          const detail = entry.reason !== entry.verdict ? ` \u2014 ${entry.reason}` : "";
+          const checkedStr = Object.keys(entry.checked).length > 0 ? ` checked=${JSON.stringify(entry.checked)}` : "";
+          const foundStr = Object.keys(entry.found).length > 0 ? ` found=${JSON.stringify(entry.found)}` : "";
+          console.warn(`[replayci] ${entry.stage}${toolStr}: ${entry.verdict}${detail}${checkedStr}${foundStr}`);
+        }
+        if (!t.complete) {
+          console.warn(`[replayci] trace INCOMPLETE (fault in pipeline)`);
+        }
+      } else {
+        const blocks = t.entries.filter((e) => e.verdict === "block");
+        for (const b of blocks) {
+          const toolStr = b.tool ?? "session";
+          console.warn(`[replayci] blocked ${toolStr} at ${b.stage} \u2192 ${b.reason}`);
+        }
+        if (!t.complete) {
+          console.warn(`[replayci] enforcement cycle incomplete (fault) \u2014 session.getLastTrace() for partial trace`);
+        }
+      }
+      break;
+    }
+    case "replay_workflow_error":
+      console.warn(`[replayci] workflow error: ${event.details}`);
+      break;
+    case "replay_state_sync_error":
+      console.warn(`[replayci] state sync error: ${event.details}`);
+      break;
+    case "replay_receipt_error":
+      console.warn(`[replayci] receipt error (${event.tool_name}): ${event.details}`);
+      break;
+    case "replay_capture_error":
+      console.warn(`[replayci] capture error: ${event.details}`);
+      break;
+    default:
+      break;
+  }
+}
 function emitDiagnostic2(diagnostics, event) {
   try {
     diagnostics?.(event);