@replayci/replay 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +458 -66
- package/dist/index.d.cts +48 -1
- package/dist/index.d.ts +48 -1
- package/dist/index.js +458 -66
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -3213,6 +3213,34 @@ function redactCapture(input) {
|
|
|
3213
3213
|
pattern_fingerprint: PATTERN_FINGERPRINT
|
|
3214
3214
|
};
|
|
3215
3215
|
}
|
|
3216
|
+
function redactTrace(trace, captureLevel) {
|
|
3217
|
+
if (captureLevel === "full") return trace;
|
|
3218
|
+
return {
|
|
3219
|
+
...trace,
|
|
3220
|
+
entries: trace.entries.map((entry) => redactTraceEntry(entry, captureLevel))
|
|
3221
|
+
};
|
|
3222
|
+
}
|
|
3223
|
+
function redactTraceEntry(entry, captureLevel) {
|
|
3224
|
+
if (captureLevel === "metadata") {
|
|
3225
|
+
return {
|
|
3226
|
+
...entry,
|
|
3227
|
+
checked: redactRecord(entry.checked),
|
|
3228
|
+
found: redactRecord(entry.found)
|
|
3229
|
+
};
|
|
3230
|
+
}
|
|
3231
|
+
return entry;
|
|
3232
|
+
}
|
|
3233
|
+
function redactRecord(record) {
|
|
3234
|
+
const result = {};
|
|
3235
|
+
for (const [key, value] of Object.entries(record)) {
|
|
3236
|
+
if (typeof value === "string") {
|
|
3237
|
+
result[key] = redactString(value);
|
|
3238
|
+
} else {
|
|
3239
|
+
result[key] = value;
|
|
3240
|
+
}
|
|
3241
|
+
}
|
|
3242
|
+
return result;
|
|
3243
|
+
}
|
|
3216
3244
|
|
|
3217
3245
|
// src/errors/replay.ts
|
|
3218
3246
|
var ReplayContractError = class extends Error {
|
|
@@ -3509,8 +3537,9 @@ function toRecord8(value) {
|
|
|
3509
3537
|
import crypto2 from "crypto";
|
|
3510
3538
|
|
|
3511
3539
|
// src/phases.ts
|
|
3512
|
-
function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
3540
|
+
function validatePhaseTransition(toolCalls, sessionState, compiledSession, ctx) {
|
|
3513
3541
|
if (!compiledSession.phases) {
|
|
3542
|
+
ctx?.trace.push({ stage: "phase", tool: null, verdict: "skip", reason: "no_phases_configured", checked: {}, found: {} });
|
|
3514
3543
|
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3515
3544
|
}
|
|
3516
3545
|
const attemptedTransitions = [];
|
|
@@ -3521,6 +3550,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3521
3550
|
sessionState.currentPhase ?? ""
|
|
3522
3551
|
);
|
|
3523
3552
|
if (!allowedTransitions?.includes(contract.transitions.advances_to)) {
|
|
3553
|
+
ctx?.trace.push({
|
|
3554
|
+
stage: "phase",
|
|
3555
|
+
tool: toolCall.name,
|
|
3556
|
+
verdict: "block",
|
|
3557
|
+
reason: "illegal_phase_transition",
|
|
3558
|
+
checked: { advances_to: contract.transitions.advances_to, from: sessionState.currentPhase },
|
|
3559
|
+
found: { allowed_transitions: allowedTransitions ?? [] }
|
|
3560
|
+
});
|
|
3524
3561
|
return {
|
|
3525
3562
|
legal: false,
|
|
3526
3563
|
newPhase: sessionState.currentPhase,
|
|
@@ -3537,6 +3574,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3537
3574
|
if (attemptedTransitions.length > 1) {
|
|
3538
3575
|
const distinctTargets = new Set(attemptedTransitions.map((t) => t.target));
|
|
3539
3576
|
if (distinctTargets.size > 1) {
|
|
3577
|
+
ctx?.trace.push({
|
|
3578
|
+
stage: "phase",
|
|
3579
|
+
tool: attemptedTransitions.map((t) => t.tool).join(", "),
|
|
3580
|
+
verdict: "block",
|
|
3581
|
+
reason: "ambiguous_phase_transition",
|
|
3582
|
+
checked: { targets: Array.from(distinctTargets) },
|
|
3583
|
+
found: { from: sessionState.currentPhase }
|
|
3584
|
+
});
|
|
3540
3585
|
return {
|
|
3541
3586
|
legal: false,
|
|
3542
3587
|
newPhase: sessionState.currentPhase,
|
|
@@ -3547,7 +3592,17 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3547
3592
|
}
|
|
3548
3593
|
}
|
|
3549
3594
|
if (attemptedTransitions.length > 0) {
|
|
3550
|
-
|
|
3595
|
+
const target = attemptedTransitions[0].target;
|
|
3596
|
+
const allowedTransitions = compiledSession.transitions.get(sessionState.currentPhase ?? "") ?? [];
|
|
3597
|
+
ctx?.trace.push({
|
|
3598
|
+
stage: "phase",
|
|
3599
|
+
tool: attemptedTransitions[0].tool,
|
|
3600
|
+
verdict: "allow",
|
|
3601
|
+
reason: "phase_advanced",
|
|
3602
|
+
checked: { advances_to: target, from: sessionState.currentPhase },
|
|
3603
|
+
found: { allowed_transitions: allowedTransitions }
|
|
3604
|
+
});
|
|
3605
|
+
return { legal: true, newPhase: target };
|
|
3551
3606
|
}
|
|
3552
3607
|
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3553
3608
|
}
|
|
@@ -3797,7 +3852,7 @@ function checkCircuitBreaker(state, config) {
|
|
|
3797
3852
|
}
|
|
3798
3853
|
|
|
3799
3854
|
// src/crossStep.ts
|
|
3800
|
-
function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
3855
|
+
function validateCrossStep(toolCalls, sessionState, contracts, ctx) {
|
|
3801
3856
|
const failures = [];
|
|
3802
3857
|
const contractByTool = new Map(contracts.map((c) => [c.tool, c]));
|
|
3803
3858
|
const workingForbidden = new Set(sessionState.forbiddenTools);
|
|
@@ -3825,8 +3880,17 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3825
3880
|
reason: "forbidden_tool",
|
|
3826
3881
|
detail: resourceValue !== void 0 ? `Tool "${tc.name}" is forbidden in this session for resource ${JSON.stringify(resourceValue)}` : `Tool "${tc.name}" is forbidden in this session`
|
|
3827
3882
|
});
|
|
3883
|
+
ctx?.trace.push({
|
|
3884
|
+
stage: "cross_step",
|
|
3885
|
+
tool: tc.name,
|
|
3886
|
+
verdict: "block",
|
|
3887
|
+
reason: "forbidden_tool",
|
|
3888
|
+
checked: { tool: tc.name },
|
|
3889
|
+
found: { is_resource_scoped: resourceValue !== void 0, resource_value: resourceValue ?? null }
|
|
3890
|
+
});
|
|
3828
3891
|
continue;
|
|
3829
3892
|
}
|
|
3893
|
+
let crossStepPassed = true;
|
|
3830
3894
|
if (contract?.preconditions && contract.preconditions.length > 0) {
|
|
3831
3895
|
const results = evaluatePreconditions(
|
|
3832
3896
|
contract.preconditions,
|
|
@@ -3835,6 +3899,7 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3835
3899
|
);
|
|
3836
3900
|
for (const result of results) {
|
|
3837
3901
|
if (!result.satisfied) {
|
|
3902
|
+
crossStepPassed = false;
|
|
3838
3903
|
failures.push({
|
|
3839
3904
|
toolName: tc.name,
|
|
3840
3905
|
reason: "precondition_not_met",
|
|
@@ -3843,6 +3908,25 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3843
3908
|
}
|
|
3844
3909
|
}
|
|
3845
3910
|
}
|
|
3911
|
+
if (crossStepPassed) {
|
|
3912
|
+
ctx?.trace.push({
|
|
3913
|
+
stage: "cross_step",
|
|
3914
|
+
tool: tc.name,
|
|
3915
|
+
verdict: "allow",
|
|
3916
|
+
reason: "preconditions_satisfied",
|
|
3917
|
+
checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
|
|
3918
|
+
found: { resource_value: resourceValue ?? null }
|
|
3919
|
+
});
|
|
3920
|
+
} else {
|
|
3921
|
+
ctx?.trace.push({
|
|
3922
|
+
stage: "cross_step",
|
|
3923
|
+
tool: tc.name,
|
|
3924
|
+
verdict: "block",
|
|
3925
|
+
reason: "precondition_not_met",
|
|
3926
|
+
checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
|
|
3927
|
+
found: { resource_value: resourceValue ?? null, matching_step_index: null }
|
|
3928
|
+
});
|
|
3929
|
+
}
|
|
3846
3930
|
if (contract?.forbids_after) {
|
|
3847
3931
|
for (const entry of contract.forbids_after) {
|
|
3848
3932
|
if (typeof entry === "string") {
|
|
@@ -4066,20 +4150,23 @@ function extractPath2(obj, path) {
|
|
|
4066
4150
|
}
|
|
4067
4151
|
|
|
4068
4152
|
// src/narrow.ts
|
|
4069
|
-
function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter) {
|
|
4153
|
+
function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter, ctx) {
|
|
4070
4154
|
const allowed = [];
|
|
4071
4155
|
const removed = [];
|
|
4072
4156
|
for (const tool of requestedTools) {
|
|
4073
4157
|
if (manualFilter && !manualFilter.includes(tool.name)) {
|
|
4074
4158
|
removed.push({ tool: tool.name, reason: "manual_filter" });
|
|
4159
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "manual_filter", checked: { filter: manualFilter }, found: {} });
|
|
4075
4160
|
continue;
|
|
4076
4161
|
}
|
|
4077
4162
|
const contract = compiledSession.perToolContracts.get(tool.name);
|
|
4078
4163
|
if (!contract) {
|
|
4079
4164
|
if (unmatchedPolicy === "allow") {
|
|
4080
4165
|
allowed.push(tool);
|
|
4166
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "allow", reason: "no_contract_passthrough", checked: { unmatched_policy: "allow" }, found: {} });
|
|
4081
4167
|
} else {
|
|
4082
4168
|
removed.push({ tool: tool.name, reason: "no_contract" });
|
|
4169
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "no_contract", checked: { unmatched_policy: "block" }, found: {} });
|
|
4083
4170
|
}
|
|
4084
4171
|
continue;
|
|
4085
4172
|
}
|
|
@@ -4092,6 +4179,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4092
4179
|
reason: "wrong_phase",
|
|
4093
4180
|
detail: `Tool valid in [${contract.transitions.valid_in_phases.join(", ")}], current phase: ${sessionState.currentPhase}`
|
|
4094
4181
|
});
|
|
4182
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "wrong_phase", checked: { valid_in_phases: contract.transitions.valid_in_phases }, found: { current_phase: sessionState.currentPhase } });
|
|
4095
4183
|
continue;
|
|
4096
4184
|
}
|
|
4097
4185
|
}
|
|
@@ -4102,6 +4190,18 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4102
4190
|
);
|
|
4103
4191
|
const unsatisfied = results.find((r) => !r.satisfied);
|
|
4104
4192
|
if (unsatisfied) {
|
|
4193
|
+
const firstPre = contract.preconditions[0];
|
|
4194
|
+
ctx?.trace.push({
|
|
4195
|
+
stage: "narrow",
|
|
4196
|
+
tool: tool.name,
|
|
4197
|
+
verdict: "remove",
|
|
4198
|
+
reason: "precondition_not_met",
|
|
4199
|
+
checked: {
|
|
4200
|
+
requires_prior_tool: firstPre.requires_prior_tool ?? null,
|
|
4201
|
+
with_output: firstPre.with_output ?? []
|
|
4202
|
+
},
|
|
4203
|
+
found: { satisfied_precondition_cache_hit: false }
|
|
4204
|
+
});
|
|
4105
4205
|
removed.push({
|
|
4106
4206
|
tool: tool.name,
|
|
4107
4207
|
reason: "precondition_not_met",
|
|
@@ -4115,6 +4215,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4115
4215
|
tool: tool.name,
|
|
4116
4216
|
reason: "forbidden_in_state"
|
|
4117
4217
|
});
|
|
4218
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "forbidden_in_state", checked: { tool: tool.name }, found: { is_resource_scoped: false } });
|
|
4118
4219
|
continue;
|
|
4119
4220
|
}
|
|
4120
4221
|
if (compiledSession.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
@@ -4131,9 +4232,24 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4131
4232
|
reason: "policy_denied",
|
|
4132
4233
|
detail: verdict.reason ?? "Policy deny rule matched"
|
|
4133
4234
|
});
|
|
4235
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "policy_denied", checked: { rule_type: "session_deny" }, found: { matched: true } });
|
|
4134
4236
|
continue;
|
|
4135
4237
|
}
|
|
4136
4238
|
}
|
|
4239
|
+
ctx?.trace.push({
|
|
4240
|
+
stage: "narrow",
|
|
4241
|
+
tool: tool.name,
|
|
4242
|
+
verdict: "allow",
|
|
4243
|
+
reason: "all_checks_passed",
|
|
4244
|
+
checked: {
|
|
4245
|
+
has_contract: true,
|
|
4246
|
+
phase_ok: true,
|
|
4247
|
+
preconditions_ok: true,
|
|
4248
|
+
not_forbidden: true,
|
|
4249
|
+
policy_ok: true
|
|
4250
|
+
},
|
|
4251
|
+
found: {}
|
|
4252
|
+
});
|
|
4137
4253
|
allowed.push(tool);
|
|
4138
4254
|
}
|
|
4139
4255
|
return { allowed, removed };
|
|
@@ -4627,7 +4743,7 @@ function replay(client, opts = {}) {
|
|
|
4627
4743
|
const unmatchedPolicy = opts.unmatchedPolicy ?? "block";
|
|
4628
4744
|
const maxRetries = Math.min(Math.max(0, opts.maxRetries ?? 0), MAX_RETRIES);
|
|
4629
4745
|
const compatEnforcement = opts.compatEnforcement ?? "protective";
|
|
4630
|
-
const diagnostics = opts.diagnostics;
|
|
4746
|
+
const diagnostics = opts.diagnostics ?? defaultReplayDiagnosticsHandler;
|
|
4631
4747
|
let provider;
|
|
4632
4748
|
try {
|
|
4633
4749
|
provider = detectProvider(client);
|
|
@@ -4660,6 +4776,12 @@ function replay(client, opts = {}) {
|
|
|
4660
4776
|
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4661
4777
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4662
4778
|
}
|
|
4779
|
+
if (opts.contractsDir && !discoveredSessionYaml && !opts.sessionYamlPath) {
|
|
4780
|
+
emitDiagnostic2(diagnostics, {
|
|
4781
|
+
type: "replay_compile_warning",
|
|
4782
|
+
details: "No session.yaml found in contractsDir \u2014 session-level features (phases, policy, session_limits) are inactive. Per-tool contracts still apply."
|
|
4783
|
+
});
|
|
4784
|
+
}
|
|
4663
4785
|
let sessionYaml = discoveredSessionYaml;
|
|
4664
4786
|
if (!sessionYaml && opts.providerConstraints) {
|
|
4665
4787
|
sessionYaml = { schema_version: "1.0", agent, provider_constraints: opts.providerConstraints };
|
|
@@ -4819,6 +4941,7 @@ function replay(client, opts = {}) {
|
|
|
4819
4941
|
let bypassDetected = false;
|
|
4820
4942
|
let lastShadowDeltaValue = null;
|
|
4821
4943
|
let lastNarrowResult = null;
|
|
4944
|
+
let lastTrace = null;
|
|
4822
4945
|
let shadowEvaluationCount = 0;
|
|
4823
4946
|
let manualFilter = null;
|
|
4824
4947
|
const deferredReceipts = /* @__PURE__ */ new Map();
|
|
@@ -4893,6 +5016,18 @@ function replay(client, opts = {}) {
|
|
|
4893
5016
|
} catch {
|
|
4894
5017
|
}
|
|
4895
5018
|
}
|
|
5019
|
+
function createTrace(stepIndex) {
|
|
5020
|
+
const entries = [];
|
|
5021
|
+
return {
|
|
5022
|
+
sessionId,
|
|
5023
|
+
stepIndex,
|
|
5024
|
+
complete: false,
|
|
5025
|
+
entries,
|
|
5026
|
+
push(entry) {
|
|
5027
|
+
entries.push(entry);
|
|
5028
|
+
}
|
|
5029
|
+
};
|
|
5030
|
+
}
|
|
4896
5031
|
const enforcementCreate = async function replayEnforcementCreate(...args) {
|
|
4897
5032
|
if (killed) {
|
|
4898
5033
|
throw new ReplayKillError(sessionId, killedAt);
|
|
@@ -4935,8 +5070,19 @@ function replay(client, opts = {}) {
|
|
|
4935
5070
|
total_ms: 0,
|
|
4936
5071
|
enforcement_ms: 0
|
|
4937
5072
|
};
|
|
5073
|
+
const trace = createTrace(sessionState.totalStepCount);
|
|
5074
|
+
const traceCtx = { trace };
|
|
5075
|
+
let currentTraceStage = "narrow";
|
|
4938
5076
|
const request = toRecord10(args[0]);
|
|
4939
5077
|
const requestToolNames = extractRequestToolNames(request);
|
|
5078
|
+
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
5079
|
+
if (messages.length > 0) {
|
|
5080
|
+
const toolResults = extractToolResults(messages, provider);
|
|
5081
|
+
if (toolResults.length > 0) {
|
|
5082
|
+
const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
|
|
5083
|
+
sessionState = applyOutputExtracts(sessionState, outputUpdates);
|
|
5084
|
+
}
|
|
5085
|
+
}
|
|
4940
5086
|
let narrowResult = null;
|
|
4941
5087
|
let activeArgs = args;
|
|
4942
5088
|
if (compiledSession && Array.isArray(request.tools) && request.tools.length > 0) {
|
|
@@ -4947,7 +5093,8 @@ function replay(client, opts = {}) {
|
|
|
4947
5093
|
sessionState,
|
|
4948
5094
|
compiledSession,
|
|
4949
5095
|
unmatchedPolicy,
|
|
4950
|
-
manualFilter
|
|
5096
|
+
manualFilter,
|
|
5097
|
+
traceCtx
|
|
4951
5098
|
);
|
|
4952
5099
|
lastNarrowResult = narrowResult;
|
|
4953
5100
|
if (narrowResult.removed.length > 0) {
|
|
@@ -4985,55 +5132,96 @@ function replay(client, opts = {}) {
|
|
|
4985
5132
|
timing.narrow_ms = Date.now() - guardStart;
|
|
4986
5133
|
const preCheckStart = Date.now();
|
|
4987
5134
|
try {
|
|
5135
|
+
currentTraceStage = "pre_check";
|
|
4988
5136
|
if (mode === "enforce" && resolvedSessionLimits) {
|
|
4989
5137
|
const limitResult = checkSessionLimits(sessionState, resolvedSessionLimits);
|
|
4990
5138
|
if (limitResult.exceeded) {
|
|
4991
|
-
|
|
4992
|
-
|
|
4993
|
-
|
|
4994
|
-
|
|
4995
|
-
|
|
4996
|
-
|
|
4997
|
-
|
|
4998
|
-
|
|
4999
|
-
|
|
5000
|
-
|
|
5001
|
-
|
|
5002
|
-
|
|
5003
|
-
|
|
5004
|
-
|
|
5005
|
-
|
|
5006
|
-
|
|
5007
|
-
|
|
5008
|
-
|
|
5009
|
-
|
|
5010
|
-
|
|
5139
|
+
let narrowedPastLimit = false;
|
|
5140
|
+
if (limitResult.reason?.startsWith("max_tool_calls") && resolvedSessionLimits.max_tool_calls_mode === "narrow" && resolvedSessionLimits.max_calls_per_tool) {
|
|
5141
|
+
const costOk = !(typeof resolvedSessionLimits.max_cost_per_session === "number" && sessionState.actualCost >= resolvedSessionLimits.max_cost_per_session);
|
|
5142
|
+
if (costOk) {
|
|
5143
|
+
const currentRequest = toRecord10(activeArgs[0]);
|
|
5144
|
+
const currentTools = Array.isArray(currentRequest.tools) ? extractToolDefinitions(currentRequest.tools) : [];
|
|
5145
|
+
const budgetedTools = currentTools.filter((tool) => {
|
|
5146
|
+
const max = resolvedSessionLimits.max_calls_per_tool[tool.name];
|
|
5147
|
+
if (typeof max !== "number") return false;
|
|
5148
|
+
return (sessionState.toolCallCounts.get(tool.name) ?? 0) < max;
|
|
5149
|
+
});
|
|
5150
|
+
if (budgetedTools.length > 0) {
|
|
5151
|
+
const modifiedRequest = { ...currentRequest, tools: budgetedTools };
|
|
5152
|
+
activeArgs = [modifiedRequest, ...Array.prototype.slice.call(activeArgs, 1)];
|
|
5153
|
+
narrowedPastLimit = true;
|
|
5154
|
+
trace.push({
|
|
5155
|
+
stage: "pre_check",
|
|
5156
|
+
tool: null,
|
|
5157
|
+
verdict: "narrow",
|
|
5158
|
+
reason: "max_tool_calls_narrow_mode",
|
|
5159
|
+
checked: { max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null, budgeted_tools: budgetedTools.map((t) => t.name) },
|
|
5160
|
+
found: { total_tool_calls: sessionState.totalToolCalls }
|
|
5161
|
+
});
|
|
5162
|
+
}
|
|
5011
5163
|
}
|
|
5012
5164
|
}
|
|
5013
|
-
|
|
5014
|
-
|
|
5015
|
-
|
|
5016
|
-
|
|
5017
|
-
|
|
5018
|
-
|
|
5019
|
-
|
|
5020
|
-
|
|
5021
|
-
|
|
5022
|
-
|
|
5023
|
-
|
|
5024
|
-
|
|
5025
|
-
void 0,
|
|
5026
|
-
timing
|
|
5027
|
-
);
|
|
5028
|
-
if (isCompatAdvisory) {
|
|
5029
|
-
emitDiagnostic2(diagnostics, {
|
|
5030
|
-
type: "replay_compat_advisory",
|
|
5031
|
-
session_id: sessionId,
|
|
5032
|
-
would_block: decision.blocked,
|
|
5033
|
-
details: limitResult.reason ?? "session limit exceeded"
|
|
5165
|
+
if (!narrowedPastLimit) {
|
|
5166
|
+
trace.push({
|
|
5167
|
+
stage: "pre_check",
|
|
5168
|
+
tool: null,
|
|
5169
|
+
verdict: "block",
|
|
5170
|
+
reason: "session_limit_exceeded",
|
|
5171
|
+
checked: {
|
|
5172
|
+
max_steps: resolvedSessionLimits.max_steps ?? null,
|
|
5173
|
+
max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
|
|
5174
|
+
max_cost: resolvedSessionLimits.max_cost_per_session ?? null
|
|
5175
|
+
},
|
|
5176
|
+
found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
|
|
5034
5177
|
});
|
|
5035
|
-
|
|
5036
|
-
|
|
5178
|
+
const decision = {
|
|
5179
|
+
action: "block",
|
|
5180
|
+
tool_calls: [],
|
|
5181
|
+
blocked: [{
|
|
5182
|
+
tool_name: "_session",
|
|
5183
|
+
arguments: "",
|
|
5184
|
+
reason: "session_limit_exceeded",
|
|
5185
|
+
contract_file: "",
|
|
5186
|
+
failures: [{ path: "$", operator: "session_limit", expected: "", found: "", message: limitResult.reason ?? "session limit exceeded" }]
|
|
5187
|
+
}],
|
|
5188
|
+
response_modification: gateMode
|
|
5189
|
+
};
|
|
5190
|
+
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
5191
|
+
if (resolvedSessionLimits.circuit_breaker) {
|
|
5192
|
+
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
5193
|
+
if (cbResult.triggered) {
|
|
5194
|
+
killed = true;
|
|
5195
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
5196
|
+
sessionState = killSession(sessionState);
|
|
5197
|
+
emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
|
|
5198
|
+
}
|
|
5199
|
+
}
|
|
5200
|
+
timing.pre_check_ms = Date.now() - preCheckStart;
|
|
5201
|
+
captureDecision(
|
|
5202
|
+
decision,
|
|
5203
|
+
null,
|
|
5204
|
+
request,
|
|
5205
|
+
guardStart,
|
|
5206
|
+
requestToolNames,
|
|
5207
|
+
null,
|
|
5208
|
+
narrowResult,
|
|
5209
|
+
null,
|
|
5210
|
+
null,
|
|
5211
|
+
null,
|
|
5212
|
+
void 0,
|
|
5213
|
+
timing
|
|
5214
|
+
);
|
|
5215
|
+
if (isCompatAdvisory) {
|
|
5216
|
+
emitDiagnostic2(diagnostics, {
|
|
5217
|
+
type: "replay_compat_advisory",
|
|
5218
|
+
session_id: sessionId,
|
|
5219
|
+
would_block: decision.blocked,
|
|
5220
|
+
details: limitResult.reason ?? "session limit exceeded"
|
|
5221
|
+
});
|
|
5222
|
+
} else {
|
|
5223
|
+
throw buildContractError2(decision);
|
|
5224
|
+
}
|
|
5037
5225
|
}
|
|
5038
5226
|
}
|
|
5039
5227
|
if (isAtHardStepCap(sessionState)) {
|
|
@@ -5066,8 +5254,23 @@ function replay(client, opts = {}) {
|
|
|
5066
5254
|
);
|
|
5067
5255
|
throw buildContractError2(decision);
|
|
5068
5256
|
}
|
|
5257
|
+
if (!checkSessionLimits(sessionState, resolvedSessionLimits).exceeded) {
|
|
5258
|
+
trace.push({
|
|
5259
|
+
stage: "pre_check",
|
|
5260
|
+
tool: null,
|
|
5261
|
+
verdict: "allow",
|
|
5262
|
+
reason: "session_limits_ok",
|
|
5263
|
+
checked: {
|
|
5264
|
+
max_steps: resolvedSessionLimits.max_steps ?? null,
|
|
5265
|
+
max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
|
|
5266
|
+
max_cost: resolvedSessionLimits.max_cost_per_session ?? null
|
|
5267
|
+
},
|
|
5268
|
+
found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
|
|
5269
|
+
});
|
|
5270
|
+
}
|
|
5271
|
+
} else if (mode === "enforce") {
|
|
5272
|
+
trace.push({ stage: "pre_check", tool: null, verdict: "skip", reason: "no_session_limits", checked: {}, found: {} });
|
|
5069
5273
|
}
|
|
5070
|
-
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
5071
5274
|
if (messages.length > 0) {
|
|
5072
5275
|
const msgResult = validateToolResultMessages(messages, contracts, provider);
|
|
5073
5276
|
if (!msgResult.passed) {
|
|
@@ -5077,13 +5280,6 @@ function replay(client, opts = {}) {
|
|
|
5077
5280
|
});
|
|
5078
5281
|
}
|
|
5079
5282
|
}
|
|
5080
|
-
if (messages.length > 0) {
|
|
5081
|
-
const toolResults = extractToolResults(messages, provider);
|
|
5082
|
-
if (toolResults.length > 0) {
|
|
5083
|
-
const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
|
|
5084
|
-
sessionState = applyOutputExtracts(sessionState, outputUpdates);
|
|
5085
|
-
}
|
|
5086
|
-
}
|
|
5087
5283
|
const inputFailures = evaluateInputInvariants(request, contracts);
|
|
5088
5284
|
if (mode === "enforce" && inputFailures.length > 0) {
|
|
5089
5285
|
if (onError === "block") {
|
|
@@ -5158,6 +5354,10 @@ function replay(client, opts = {}) {
|
|
|
5158
5354
|
sessionState = updateActualCost(sessionState, costDelta);
|
|
5159
5355
|
}
|
|
5160
5356
|
if (mode === "log-only") {
|
|
5357
|
+
trace.push({ stage: "gate", tool: null, verdict: "allow", reason: "log_only_mode", checked: {}, found: {} });
|
|
5358
|
+
trace.complete = true;
|
|
5359
|
+
lastTrace = trace;
|
|
5360
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5161
5361
|
captureDecision(
|
|
5162
5362
|
{ action: "allow", tool_calls: extractToolCalls(response, provider) },
|
|
5163
5363
|
response,
|
|
@@ -5170,13 +5370,26 @@ function replay(client, opts = {}) {
|
|
|
5170
5370
|
null,
|
|
5171
5371
|
null,
|
|
5172
5372
|
void 0,
|
|
5173
|
-
timing
|
|
5373
|
+
timing,
|
|
5374
|
+
trace
|
|
5174
5375
|
);
|
|
5175
5376
|
return response;
|
|
5176
5377
|
}
|
|
5378
|
+
currentTraceStage = "validate";
|
|
5177
5379
|
const toolCalls = extractToolCalls(response, provider);
|
|
5178
5380
|
const validateStart = Date.now();
|
|
5179
5381
|
const validation = validateResponse2(response, toolCalls, contracts, requestToolNames, unmatchedPolicy, provider);
|
|
5382
|
+
for (const f of validation.failures) {
|
|
5383
|
+
const toolName = extractToolNameFromFailure(f, toolCalls);
|
|
5384
|
+
trace.push({
|
|
5385
|
+
stage: "validate",
|
|
5386
|
+
tool: toolName === "_response" ? null : toolName,
|
|
5387
|
+
verdict: "block",
|
|
5388
|
+
reason: f.operator === "response_format" ? "response_format_failed" : "output_invariant_failed",
|
|
5389
|
+
checked: { path: f.path, operator: f.operator, invariant_type: f.operator === "response_format" ? "response_format" : "output" },
|
|
5390
|
+
found: { value: f.found }
|
|
5391
|
+
});
|
|
5392
|
+
}
|
|
5180
5393
|
timing.validate_ms += Date.now() - validateStart;
|
|
5181
5394
|
if (isActiveGovern && !attemptDegraded && attemptPreparedRequestId) {
|
|
5182
5395
|
const rtProposalStart = Date.now();
|
|
@@ -5211,9 +5424,10 @@ function replay(client, opts = {}) {
|
|
|
5211
5424
|
}
|
|
5212
5425
|
timing.runtime_ms += Date.now() - rtProposalStart;
|
|
5213
5426
|
}
|
|
5427
|
+
currentTraceStage = "cross_step";
|
|
5214
5428
|
const crossStepStart = Date.now();
|
|
5215
5429
|
const crossStepContracts = compiledSession ? Array.from(compiledSession.perToolContracts.values()) : contracts;
|
|
5216
|
-
const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts);
|
|
5430
|
+
const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts, traceCtx);
|
|
5217
5431
|
if (!crossStepResult.passed) {
|
|
5218
5432
|
for (const f of crossStepResult.failures) {
|
|
5219
5433
|
validation.failures.push({
|
|
@@ -5227,10 +5441,11 @@ function replay(client, opts = {}) {
|
|
|
5227
5441
|
}
|
|
5228
5442
|
}
|
|
5229
5443
|
timing.cross_step_ms += Date.now() - crossStepStart;
|
|
5444
|
+
currentTraceStage = "phase";
|
|
5230
5445
|
let phaseResult = null;
|
|
5231
5446
|
const phaseStart = Date.now();
|
|
5232
5447
|
if (compiledSession) {
|
|
5233
|
-
phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession);
|
|
5448
|
+
phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession, traceCtx);
|
|
5234
5449
|
if (!phaseResult.legal) {
|
|
5235
5450
|
validation.failures.push({
|
|
5236
5451
|
path: `$.tool_calls.${phaseResult.blockedTool}`,
|
|
@@ -5260,7 +5475,7 @@ function replay(client, opts = {}) {
|
|
|
5260
5475
|
for (const f of avResult.failures) {
|
|
5261
5476
|
validation.failures.push({
|
|
5262
5477
|
path: f.path,
|
|
5263
|
-
operator:
|
|
5478
|
+
operator: "argument_value_mismatch",
|
|
5264
5479
|
expected: String(f.expected),
|
|
5265
5480
|
found: String(f.actual),
|
|
5266
5481
|
message: f.detail,
|
|
@@ -5271,10 +5486,12 @@ function replay(client, opts = {}) {
|
|
|
5271
5486
|
}
|
|
5272
5487
|
}
|
|
5273
5488
|
}
|
|
5489
|
+
currentTraceStage = "limit";
|
|
5274
5490
|
if (resolvedSessionLimits) {
|
|
5275
5491
|
const workingState = { ...sessionState, toolCallCounts: workingToolCallCounts };
|
|
5276
5492
|
const perToolResult = checkPerToolLimits(workingState, tc.name, resolvedSessionLimits);
|
|
5277
5493
|
if (perToolResult.exceeded) {
|
|
5494
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "per_tool_limit_exceeded", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
|
|
5278
5495
|
validation.failures.push({
|
|
5279
5496
|
path: `$.tool_calls.${tc.name}`,
|
|
5280
5497
|
operator: "session_limit",
|
|
@@ -5283,6 +5500,8 @@ function replay(client, opts = {}) {
|
|
|
5283
5500
|
message: perToolResult.reason ?? "per-tool limit exceeded",
|
|
5284
5501
|
contract_file: ""
|
|
5285
5502
|
});
|
|
5503
|
+
} else {
|
|
5504
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "allow", reason: "per_tool_limit_ok", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
|
|
5286
5505
|
}
|
|
5287
5506
|
}
|
|
5288
5507
|
workingToolCallCounts.set(tc.name, (workingToolCallCounts.get(tc.name) ?? 0) + 1);
|
|
@@ -5299,6 +5518,7 @@ function replay(client, opts = {}) {
|
|
|
5299
5518
|
).length;
|
|
5300
5519
|
const totalMatches = loopResult.matchCount + intraMatches;
|
|
5301
5520
|
if (totalMatches >= resolvedSessionLimits.loop_detection.threshold) {
|
|
5521
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "loop_detected", checked: { window: resolvedSessionLimits.loop_detection.window, threshold: resolvedSessionLimits.loop_detection.threshold }, found: { match_count: totalMatches, arguments_hash: argsHash } });
|
|
5302
5522
|
validation.failures.push({
|
|
5303
5523
|
path: `$.tool_calls.${tc.name}`,
|
|
5304
5524
|
operator: "loop_detected",
|
|
@@ -5312,6 +5532,7 @@ function replay(client, opts = {}) {
|
|
|
5312
5532
|
}
|
|
5313
5533
|
}
|
|
5314
5534
|
timing.argument_values_ms += Date.now() - argValuesStart;
|
|
5535
|
+
currentTraceStage = "policy";
|
|
5315
5536
|
let policyVerdicts = null;
|
|
5316
5537
|
const policyStart = Date.now();
|
|
5317
5538
|
if (compiledSession?.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
@@ -5332,6 +5553,14 @@ function replay(client, opts = {}) {
|
|
|
5332
5553
|
);
|
|
5333
5554
|
policyVerdicts.set(tc.name, verdict);
|
|
5334
5555
|
if (!verdict.allowed) {
|
|
5556
|
+
trace.push({
|
|
5557
|
+
stage: "policy",
|
|
5558
|
+
tool: tc.name,
|
|
5559
|
+
verdict: "block",
|
|
5560
|
+
reason: verdict.reason?.startsWith("Session deny") ? "session_deny_matched" : verdict.reason?.startsWith("default_deny") ? "default_deny_no_allow" : "policy_denied",
|
|
5561
|
+
checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
|
|
5562
|
+
found: { matched: true }
|
|
5563
|
+
});
|
|
5335
5564
|
validation.failures.push({
|
|
5336
5565
|
path: `$.tool_calls.${tc.name}`,
|
|
5337
5566
|
operator: "policy_denied",
|
|
@@ -5340,10 +5569,22 @@ function replay(client, opts = {}) {
|
|
|
5340
5569
|
message: `Policy denied: ${tc.name} \u2014 ${verdict.reason}`,
|
|
5341
5570
|
contract_file: ""
|
|
5342
5571
|
});
|
|
5572
|
+
} else {
|
|
5573
|
+
trace.push({
|
|
5574
|
+
stage: "policy",
|
|
5575
|
+
tool: tc.name,
|
|
5576
|
+
verdict: "allow",
|
|
5577
|
+
reason: "policy_allowed",
|
|
5578
|
+
checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
|
|
5579
|
+
found: { session_deny_matched: false, tool_deny_matched: false }
|
|
5580
|
+
});
|
|
5343
5581
|
}
|
|
5344
5582
|
}
|
|
5583
|
+
} else {
|
|
5584
|
+
trace.push({ stage: "policy", tool: null, verdict: "skip", reason: "no_policy_configured", checked: {}, found: {} });
|
|
5345
5585
|
}
|
|
5346
5586
|
timing.policy_ms += Date.now() - policyStart;
|
|
5587
|
+
currentTraceStage = "gate";
|
|
5347
5588
|
if (mode === "shadow") {
|
|
5348
5589
|
const shadowGateStart = Date.now();
|
|
5349
5590
|
const shadowDecision = validation.failures.length > 0 ? {
|
|
@@ -5352,6 +5593,15 @@ function replay(client, opts = {}) {
|
|
|
5352
5593
|
blocked: buildBlockedCalls(toolCalls, validation.failures, validation.unmatchedBlocked),
|
|
5353
5594
|
response_modification: gateMode
|
|
5354
5595
|
} : { action: "allow", tool_calls: toolCalls };
|
|
5596
|
+
const blockedTools = shadowDecision.action === "block" ? shadowDecision.blocked.map((b) => b.tool_name) : [];
|
|
5597
|
+
trace.push({
|
|
5598
|
+
stage: "gate",
|
|
5599
|
+
tool: null,
|
|
5600
|
+
verdict: blockedTools.length > 0 ? "info" : "allow",
|
|
5601
|
+
reason: blockedTools.length > 0 ? "violations_found" : "no_violations",
|
|
5602
|
+
checked: { gate_mode: gateMode },
|
|
5603
|
+
found: { blocked_count: blockedTools.length, action: shadowDecision.action, ...blockedTools.length > 0 ? { blocked_tools: blockedTools } : {} }
|
|
5604
|
+
});
|
|
5355
5605
|
const shadowDelta = {
|
|
5356
5606
|
would_have_blocked: shadowDecision.action === "block" ? shadowDecision.blocked : [],
|
|
5357
5607
|
would_have_narrowed: narrowResult?.removed ?? [],
|
|
@@ -5361,7 +5611,11 @@ function replay(client, opts = {}) {
|
|
|
5361
5611
|
lastShadowDeltaValue = shadowDelta;
|
|
5362
5612
|
shadowEvaluationCount++;
|
|
5363
5613
|
timing.gate_ms += Date.now() - shadowGateStart;
|
|
5364
|
-
|
|
5614
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockedTools, killed: false, step_index: sessionState.totalStepCount } });
|
|
5615
|
+
trace.complete = true;
|
|
5616
|
+
lastTrace = trace;
|
|
5617
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5618
|
+
captureDecision(shadowDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, shadowDelta, timing, trace);
|
|
5365
5619
|
return response;
|
|
5366
5620
|
}
|
|
5367
5621
|
if (isCompatAdvisory) {
|
|
@@ -5402,7 +5656,21 @@ function replay(client, opts = {}) {
|
|
|
5402
5656
|
sessionState = recordDecisionOutcome(sessionState, "allowed");
|
|
5403
5657
|
}
|
|
5404
5658
|
timing.finalize_ms += Date.now() - advisoryFinalizeStart;
|
|
5405
|
-
|
|
5659
|
+
const advisoryBlockedTools = advisoryDecision.action === "block" ? advisoryDecision.blocked.map((b) => b.tool_name) : [];
|
|
5660
|
+
trace.push({
|
|
5661
|
+
stage: "gate",
|
|
5662
|
+
tool: null,
|
|
5663
|
+
verdict: advisoryBlockedTools.length > 0 ? "info" : "allow",
|
|
5664
|
+
reason: advisoryBlockedTools.length > 0 ? "violations_found" : "no_violations",
|
|
5665
|
+
checked: { gate_mode: gateMode },
|
|
5666
|
+
found: { blocked_count: advisoryBlockedTools.length, action: advisoryDecision.action, ...advisoryBlockedTools.length > 0 ? { blocked_tools: advisoryBlockedTools } : {} }
|
|
5667
|
+
});
|
|
5668
|
+
const advisoryNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
5669
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: advisoryNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: advisoryBlockedTools, killed: false, step_index: sessionState.totalStepCount } });
|
|
5670
|
+
trace.complete = true;
|
|
5671
|
+
lastTrace = trace;
|
|
5672
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5673
|
+
captureDecision(advisoryDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5406
5674
|
return response;
|
|
5407
5675
|
}
|
|
5408
5676
|
const enforceGateStart = Date.now();
|
|
@@ -5440,7 +5708,20 @@ function replay(client, opts = {}) {
|
|
|
5440
5708
|
});
|
|
5441
5709
|
}
|
|
5442
5710
|
}
|
|
5443
|
-
|
|
5711
|
+
trace.push({
|
|
5712
|
+
stage: "gate",
|
|
5713
|
+
tool: null,
|
|
5714
|
+
verdict: "allow",
|
|
5715
|
+
reason: "no_violations",
|
|
5716
|
+
checked: { gate_mode: gateMode },
|
|
5717
|
+
found: { blocked_count: 0, action: "allow" }
|
|
5718
|
+
});
|
|
5719
|
+
const allowNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
5720
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: completedStep.phase, phase_after: allowNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: [], killed: false, step_index: sessionState.totalStepCount } });
|
|
5721
|
+
trace.complete = true;
|
|
5722
|
+
lastTrace = trace;
|
|
5723
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5724
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5444
5725
|
return response;
|
|
5445
5726
|
}
|
|
5446
5727
|
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
@@ -5505,15 +5786,42 @@ function replay(client, opts = {}) {
|
|
|
5505
5786
|
);
|
|
5506
5787
|
continue;
|
|
5507
5788
|
}
|
|
5508
|
-
|
|
5789
|
+
const blockBlockedTools = decision.action === "block" ? decision.blocked.map((b) => b.tool_name) : [];
|
|
5790
|
+
trace.push({
|
|
5791
|
+
stage: "gate",
|
|
5792
|
+
tool: null,
|
|
5793
|
+
verdict: "block",
|
|
5794
|
+
reason: "violations_found",
|
|
5795
|
+
checked: { gate_mode: gateMode },
|
|
5796
|
+
found: { blocked_count: blockBlockedTools.length, action: "block", blocked_tools: blockBlockedTools }
|
|
5797
|
+
});
|
|
5798
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockBlockedTools, killed, step_index: sessionState.totalStepCount } });
|
|
5799
|
+
trace.complete = true;
|
|
5800
|
+
lastTrace = trace;
|
|
5801
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5802
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5509
5803
|
return applyGateDecision(decision, response, provider, gateMode, opts.onBlock);
|
|
5510
5804
|
}
|
|
5511
5805
|
if (lastError) throw lastError;
|
|
5512
5806
|
throw new ReplayInternalError("Retry loop exhausted without result", { sessionId });
|
|
5513
5807
|
} catch (err) {
|
|
5514
5808
|
if (err instanceof ReplayContractError || err instanceof ReplayKillError) {
|
|
5809
|
+
if (!trace.complete) {
|
|
5810
|
+
lastTrace = trace;
|
|
5811
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5812
|
+
}
|
|
5515
5813
|
throw err;
|
|
5516
5814
|
}
|
|
5815
|
+
trace.push({
|
|
5816
|
+
stage: currentTraceStage,
|
|
5817
|
+
tool: null,
|
|
5818
|
+
verdict: "error",
|
|
5819
|
+
reason: "stage_threw",
|
|
5820
|
+
checked: {},
|
|
5821
|
+
found: { error: err instanceof Error ? err.message : String(err) }
|
|
5822
|
+
});
|
|
5823
|
+
lastTrace = trace;
|
|
5824
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5517
5825
|
sessionState = recordDecisionOutcome(sessionState, "error");
|
|
5518
5826
|
if (resolvedSessionLimits?.circuit_breaker) {
|
|
5519
5827
|
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
@@ -5639,6 +5947,9 @@ function replay(client, opts = {}) {
|
|
|
5639
5947
|
getLastShadowDelta() {
|
|
5640
5948
|
return lastShadowDeltaValue;
|
|
5641
5949
|
},
|
|
5950
|
+
getLastTrace() {
|
|
5951
|
+
return lastTrace;
|
|
5952
|
+
},
|
|
5642
5953
|
/**
|
|
5643
5954
|
* v3: Manually restrict available tools within compiled legal space.
|
|
5644
5955
|
* @see specs/replay-v3.md § narrow() / widen()
|
|
@@ -5774,7 +6085,7 @@ function replay(client, opts = {}) {
|
|
|
5774
6085
|
}
|
|
5775
6086
|
return wrapped;
|
|
5776
6087
|
}
|
|
5777
|
-
function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam) {
|
|
6088
|
+
function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam, traceParam) {
|
|
5778
6089
|
if (!buffer && !store) return;
|
|
5779
6090
|
if (timingParam) {
|
|
5780
6091
|
timingParam.total_ms = Date.now() - guardStart;
|
|
@@ -5809,6 +6120,7 @@ function replay(client, opts = {}) {
|
|
|
5809
6120
|
phase: sessionState.currentPhase,
|
|
5810
6121
|
phase_transition: phaseTransitionStr,
|
|
5811
6122
|
shadow_delta: shadowDelta,
|
|
6123
|
+
trace: traceParam ? redactTrace(traceParam, opts.captureLevel ?? "full") : void 0,
|
|
5812
6124
|
receipt: null
|
|
5813
6125
|
};
|
|
5814
6126
|
const capturedCall = {
|
|
@@ -6366,6 +6678,7 @@ function resolveSessionLimits(contracts) {
|
|
|
6366
6678
|
const sl = c.session_limits;
|
|
6367
6679
|
if (sl.max_steps !== void 0 && merged.max_steps === void 0) merged.max_steps = sl.max_steps;
|
|
6368
6680
|
if (sl.max_tool_calls !== void 0 && merged.max_tool_calls === void 0) merged.max_tool_calls = sl.max_tool_calls;
|
|
6681
|
+
if (sl.max_tool_calls_mode !== void 0 && merged.max_tool_calls_mode === void 0) merged.max_tool_calls_mode = sl.max_tool_calls_mode;
|
|
6369
6682
|
if (sl.max_cost_per_session !== void 0 && merged.max_cost_per_session === void 0) merged.max_cost_per_session = sl.max_cost_per_session;
|
|
6370
6683
|
if (sl.loop_detection && !merged.loop_detection) merged.loop_detection = sl.loop_detection;
|
|
6371
6684
|
if (sl.circuit_breaker && !merged.circuit_breaker) merged.circuit_breaker = sl.circuit_breaker;
|
|
@@ -6473,6 +6786,7 @@ function createInactiveSession(client, sessionId, reason) {
|
|
|
6473
6786
|
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6474
6787
|
getLastNarrowing: () => null,
|
|
6475
6788
|
getLastShadowDelta: () => null,
|
|
6789
|
+
getLastTrace: () => null,
|
|
6476
6790
|
narrow() {
|
|
6477
6791
|
},
|
|
6478
6792
|
widen() {
|
|
@@ -6514,6 +6828,7 @@ function createBlockingInactiveSession(client, sessionId, detail, configError) {
|
|
|
6514
6828
|
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6515
6829
|
getLastNarrowing: () => null,
|
|
6516
6830
|
getLastShadowDelta: () => null,
|
|
6831
|
+
getLastTrace: () => null,
|
|
6517
6832
|
narrow() {
|
|
6518
6833
|
},
|
|
6519
6834
|
widen() {
|
|
@@ -6598,6 +6913,83 @@ function generateSessionId2() {
|
|
|
6598
6913
|
function stripHashPrefix(hash) {
|
|
6599
6914
|
return hash.startsWith("sha256:") ? hash.slice(7) : hash;
|
|
6600
6915
|
}
|
|
6916
|
+
function resolveLogLevel() {
|
|
6917
|
+
const raw = typeof process !== "undefined" ? process.env.REPLAYCI_LOG : void 0;
|
|
6918
|
+
if (!raw) return "warn";
|
|
6919
|
+
const lower = raw.toLowerCase();
|
|
6920
|
+
if (lower === "trace" || lower === "debug") return "trace";
|
|
6921
|
+
if (lower === "silent" || lower === "off" || lower === "none") return "silent";
|
|
6922
|
+
return "warn";
|
|
6923
|
+
}
|
|
6924
|
+
function defaultReplayDiagnosticsHandler(event) {
|
|
6925
|
+
const level = resolveLogLevel();
|
|
6926
|
+
if (level === "silent") return;
|
|
6927
|
+
switch (event.type) {
|
|
6928
|
+
case "replay_inactive":
|
|
6929
|
+
console.warn(`[replayci] replay() inactive: ${event.reason}${event.error_message ? ` \u2014 ${event.error_message}` : ""}`);
|
|
6930
|
+
break;
|
|
6931
|
+
case "replay_compile_error":
|
|
6932
|
+
console.warn(`[replayci] compile error: ${event.details}`);
|
|
6933
|
+
break;
|
|
6934
|
+
case "replay_compile_warning":
|
|
6935
|
+
console.warn(`[replayci] compile warning: ${event.details}`);
|
|
6936
|
+
break;
|
|
6937
|
+
case "replay_bypass_detected":
|
|
6938
|
+
console.warn(`[replayci] bypass detected on session ${event.session_id}`);
|
|
6939
|
+
break;
|
|
6940
|
+
case "replay_kill":
|
|
6941
|
+
console.warn(`[replayci] session ${event.session_id} killed`);
|
|
6942
|
+
break;
|
|
6943
|
+
case "replay_block":
|
|
6944
|
+
console.warn(`[replayci] blocked ${event.tool_name}: ${event.reason}`);
|
|
6945
|
+
break;
|
|
6946
|
+
case "replay_narrow": {
|
|
6947
|
+
for (const r of event.removed) {
|
|
6948
|
+
console.warn(`[replayci] removed ${r.tool} \u2192 ${r.reason}${r.detail ? ` (${r.detail})` : ""}`);
|
|
6949
|
+
}
|
|
6950
|
+
break;
|
|
6951
|
+
}
|
|
6952
|
+
case "replay_trace": {
|
|
6953
|
+
const t = event.trace;
|
|
6954
|
+
if (level === "trace") {
|
|
6955
|
+
for (const entry of t.entries) {
|
|
6956
|
+
const toolStr = entry.tool ? ` ${entry.tool}` : "";
|
|
6957
|
+
const detail = entry.reason !== entry.verdict ? ` \u2014 ${entry.reason}` : "";
|
|
6958
|
+
const checkedStr = Object.keys(entry.checked).length > 0 ? ` checked=${JSON.stringify(entry.checked)}` : "";
|
|
6959
|
+
const foundStr = Object.keys(entry.found).length > 0 ? ` found=${JSON.stringify(entry.found)}` : "";
|
|
6960
|
+
console.warn(`[replayci] ${entry.stage}${toolStr}: ${entry.verdict}${detail}${checkedStr}${foundStr}`);
|
|
6961
|
+
}
|
|
6962
|
+
if (!t.complete) {
|
|
6963
|
+
console.warn(`[replayci] trace INCOMPLETE (fault in pipeline)`);
|
|
6964
|
+
}
|
|
6965
|
+
} else {
|
|
6966
|
+
const blocks = t.entries.filter((e) => e.verdict === "block");
|
|
6967
|
+
for (const b of blocks) {
|
|
6968
|
+
const toolStr = b.tool ?? "session";
|
|
6969
|
+
console.warn(`[replayci] blocked ${toolStr} at ${b.stage} \u2192 ${b.reason}`);
|
|
6970
|
+
}
|
|
6971
|
+
if (!t.complete) {
|
|
6972
|
+
console.warn(`[replayci] enforcement cycle incomplete (fault) \u2014 session.getLastTrace() for partial trace`);
|
|
6973
|
+
}
|
|
6974
|
+
}
|
|
6975
|
+
break;
|
|
6976
|
+
}
|
|
6977
|
+
case "replay_workflow_error":
|
|
6978
|
+
console.warn(`[replayci] workflow error: ${event.details}`);
|
|
6979
|
+
break;
|
|
6980
|
+
case "replay_state_sync_error":
|
|
6981
|
+
console.warn(`[replayci] state sync error: ${event.details}`);
|
|
6982
|
+
break;
|
|
6983
|
+
case "replay_receipt_error":
|
|
6984
|
+
console.warn(`[replayci] receipt error (${event.tool_name}): ${event.details}`);
|
|
6985
|
+
break;
|
|
6986
|
+
case "replay_capture_error":
|
|
6987
|
+
console.warn(`[replayci] capture error: ${event.details}`);
|
|
6988
|
+
break;
|
|
6989
|
+
default:
|
|
6990
|
+
break;
|
|
6991
|
+
}
|
|
6992
|
+
}
|
|
6601
6993
|
function emitDiagnostic2(diagnostics, event) {
|
|
6602
6994
|
try {
|
|
6603
6995
|
diagnostics?.(event);
|