@replayci/replay 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +458 -66
- package/dist/index.d.cts +48 -1
- package/dist/index.d.ts +48 -1
- package/dist/index.js +458 -66
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -3224,6 +3224,34 @@ function redactCapture(input) {
|
|
|
3224
3224
|
pattern_fingerprint: PATTERN_FINGERPRINT
|
|
3225
3225
|
};
|
|
3226
3226
|
}
|
|
3227
|
+
function redactTrace(trace, captureLevel) {
|
|
3228
|
+
if (captureLevel === "full") return trace;
|
|
3229
|
+
return {
|
|
3230
|
+
...trace,
|
|
3231
|
+
entries: trace.entries.map((entry) => redactTraceEntry(entry, captureLevel))
|
|
3232
|
+
};
|
|
3233
|
+
}
|
|
3234
|
+
function redactTraceEntry(entry, captureLevel) {
|
|
3235
|
+
if (captureLevel === "metadata") {
|
|
3236
|
+
return {
|
|
3237
|
+
...entry,
|
|
3238
|
+
checked: redactRecord(entry.checked),
|
|
3239
|
+
found: redactRecord(entry.found)
|
|
3240
|
+
};
|
|
3241
|
+
}
|
|
3242
|
+
return entry;
|
|
3243
|
+
}
|
|
3244
|
+
function redactRecord(record) {
|
|
3245
|
+
const result = {};
|
|
3246
|
+
for (const [key, value] of Object.entries(record)) {
|
|
3247
|
+
if (typeof value === "string") {
|
|
3248
|
+
result[key] = redactString(value);
|
|
3249
|
+
} else {
|
|
3250
|
+
result[key] = value;
|
|
3251
|
+
}
|
|
3252
|
+
}
|
|
3253
|
+
return result;
|
|
3254
|
+
}
|
|
3227
3255
|
|
|
3228
3256
|
// src/errors/replay.ts
|
|
3229
3257
|
var ReplayContractError = class extends Error {
|
|
@@ -3520,8 +3548,9 @@ function toRecord8(value) {
|
|
|
3520
3548
|
var import_node_crypto3 = __toESM(require("crypto"), 1);
|
|
3521
3549
|
|
|
3522
3550
|
// src/phases.ts
|
|
3523
|
-
function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
3551
|
+
function validatePhaseTransition(toolCalls, sessionState, compiledSession, ctx) {
|
|
3524
3552
|
if (!compiledSession.phases) {
|
|
3553
|
+
ctx?.trace.push({ stage: "phase", tool: null, verdict: "skip", reason: "no_phases_configured", checked: {}, found: {} });
|
|
3525
3554
|
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3526
3555
|
}
|
|
3527
3556
|
const attemptedTransitions = [];
|
|
@@ -3532,6 +3561,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3532
3561
|
sessionState.currentPhase ?? ""
|
|
3533
3562
|
);
|
|
3534
3563
|
if (!allowedTransitions?.includes(contract.transitions.advances_to)) {
|
|
3564
|
+
ctx?.trace.push({
|
|
3565
|
+
stage: "phase",
|
|
3566
|
+
tool: toolCall.name,
|
|
3567
|
+
verdict: "block",
|
|
3568
|
+
reason: "illegal_phase_transition",
|
|
3569
|
+
checked: { advances_to: contract.transitions.advances_to, from: sessionState.currentPhase },
|
|
3570
|
+
found: { allowed_transitions: allowedTransitions ?? [] }
|
|
3571
|
+
});
|
|
3535
3572
|
return {
|
|
3536
3573
|
legal: false,
|
|
3537
3574
|
newPhase: sessionState.currentPhase,
|
|
@@ -3548,6 +3585,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3548
3585
|
if (attemptedTransitions.length > 1) {
|
|
3549
3586
|
const distinctTargets = new Set(attemptedTransitions.map((t) => t.target));
|
|
3550
3587
|
if (distinctTargets.size > 1) {
|
|
3588
|
+
ctx?.trace.push({
|
|
3589
|
+
stage: "phase",
|
|
3590
|
+
tool: attemptedTransitions.map((t) => t.tool).join(", "),
|
|
3591
|
+
verdict: "block",
|
|
3592
|
+
reason: "ambiguous_phase_transition",
|
|
3593
|
+
checked: { targets: Array.from(distinctTargets) },
|
|
3594
|
+
found: { from: sessionState.currentPhase }
|
|
3595
|
+
});
|
|
3551
3596
|
return {
|
|
3552
3597
|
legal: false,
|
|
3553
3598
|
newPhase: sessionState.currentPhase,
|
|
@@ -3558,7 +3603,17 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3558
3603
|
}
|
|
3559
3604
|
}
|
|
3560
3605
|
if (attemptedTransitions.length > 0) {
|
|
3561
|
-
|
|
3606
|
+
const target = attemptedTransitions[0].target;
|
|
3607
|
+
const allowedTransitions = compiledSession.transitions.get(sessionState.currentPhase ?? "") ?? [];
|
|
3608
|
+
ctx?.trace.push({
|
|
3609
|
+
stage: "phase",
|
|
3610
|
+
tool: attemptedTransitions[0].tool,
|
|
3611
|
+
verdict: "allow",
|
|
3612
|
+
reason: "phase_advanced",
|
|
3613
|
+
checked: { advances_to: target, from: sessionState.currentPhase },
|
|
3614
|
+
found: { allowed_transitions: allowedTransitions }
|
|
3615
|
+
});
|
|
3616
|
+
return { legal: true, newPhase: target };
|
|
3562
3617
|
}
|
|
3563
3618
|
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3564
3619
|
}
|
|
@@ -3808,7 +3863,7 @@ function checkCircuitBreaker(state, config) {
|
|
|
3808
3863
|
}
|
|
3809
3864
|
|
|
3810
3865
|
// src/crossStep.ts
|
|
3811
|
-
function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
3866
|
+
function validateCrossStep(toolCalls, sessionState, contracts, ctx) {
|
|
3812
3867
|
const failures = [];
|
|
3813
3868
|
const contractByTool = new Map(contracts.map((c) => [c.tool, c]));
|
|
3814
3869
|
const workingForbidden = new Set(sessionState.forbiddenTools);
|
|
@@ -3836,8 +3891,17 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3836
3891
|
reason: "forbidden_tool",
|
|
3837
3892
|
detail: resourceValue !== void 0 ? `Tool "${tc.name}" is forbidden in this session for resource ${JSON.stringify(resourceValue)}` : `Tool "${tc.name}" is forbidden in this session`
|
|
3838
3893
|
});
|
|
3894
|
+
ctx?.trace.push({
|
|
3895
|
+
stage: "cross_step",
|
|
3896
|
+
tool: tc.name,
|
|
3897
|
+
verdict: "block",
|
|
3898
|
+
reason: "forbidden_tool",
|
|
3899
|
+
checked: { tool: tc.name },
|
|
3900
|
+
found: { is_resource_scoped: resourceValue !== void 0, resource_value: resourceValue ?? null }
|
|
3901
|
+
});
|
|
3839
3902
|
continue;
|
|
3840
3903
|
}
|
|
3904
|
+
let crossStepPassed = true;
|
|
3841
3905
|
if (contract?.preconditions && contract.preconditions.length > 0) {
|
|
3842
3906
|
const results = evaluatePreconditions(
|
|
3843
3907
|
contract.preconditions,
|
|
@@ -3846,6 +3910,7 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3846
3910
|
);
|
|
3847
3911
|
for (const result of results) {
|
|
3848
3912
|
if (!result.satisfied) {
|
|
3913
|
+
crossStepPassed = false;
|
|
3849
3914
|
failures.push({
|
|
3850
3915
|
toolName: tc.name,
|
|
3851
3916
|
reason: "precondition_not_met",
|
|
@@ -3854,6 +3919,25 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3854
3919
|
}
|
|
3855
3920
|
}
|
|
3856
3921
|
}
|
|
3922
|
+
if (crossStepPassed) {
|
|
3923
|
+
ctx?.trace.push({
|
|
3924
|
+
stage: "cross_step",
|
|
3925
|
+
tool: tc.name,
|
|
3926
|
+
verdict: "allow",
|
|
3927
|
+
reason: "preconditions_satisfied",
|
|
3928
|
+
checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
|
|
3929
|
+
found: { resource_value: resourceValue ?? null }
|
|
3930
|
+
});
|
|
3931
|
+
} else {
|
|
3932
|
+
ctx?.trace.push({
|
|
3933
|
+
stage: "cross_step",
|
|
3934
|
+
tool: tc.name,
|
|
3935
|
+
verdict: "block",
|
|
3936
|
+
reason: "precondition_not_met",
|
|
3937
|
+
checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
|
|
3938
|
+
found: { resource_value: resourceValue ?? null, matching_step_index: null }
|
|
3939
|
+
});
|
|
3940
|
+
}
|
|
3857
3941
|
if (contract?.forbids_after) {
|
|
3858
3942
|
for (const entry of contract.forbids_after) {
|
|
3859
3943
|
if (typeof entry === "string") {
|
|
@@ -4075,20 +4159,23 @@ function extractPath2(obj, path) {
|
|
|
4075
4159
|
}
|
|
4076
4160
|
|
|
4077
4161
|
// src/narrow.ts
|
|
4078
|
-
function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter) {
|
|
4162
|
+
function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter, ctx) {
|
|
4079
4163
|
const allowed = [];
|
|
4080
4164
|
const removed = [];
|
|
4081
4165
|
for (const tool of requestedTools) {
|
|
4082
4166
|
if (manualFilter && !manualFilter.includes(tool.name)) {
|
|
4083
4167
|
removed.push({ tool: tool.name, reason: "manual_filter" });
|
|
4168
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "manual_filter", checked: { filter: manualFilter }, found: {} });
|
|
4084
4169
|
continue;
|
|
4085
4170
|
}
|
|
4086
4171
|
const contract = compiledSession.perToolContracts.get(tool.name);
|
|
4087
4172
|
if (!contract) {
|
|
4088
4173
|
if (unmatchedPolicy === "allow") {
|
|
4089
4174
|
allowed.push(tool);
|
|
4175
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "allow", reason: "no_contract_passthrough", checked: { unmatched_policy: "allow" }, found: {} });
|
|
4090
4176
|
} else {
|
|
4091
4177
|
removed.push({ tool: tool.name, reason: "no_contract" });
|
|
4178
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "no_contract", checked: { unmatched_policy: "block" }, found: {} });
|
|
4092
4179
|
}
|
|
4093
4180
|
continue;
|
|
4094
4181
|
}
|
|
@@ -4101,6 +4188,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4101
4188
|
reason: "wrong_phase",
|
|
4102
4189
|
detail: `Tool valid in [${contract.transitions.valid_in_phases.join(", ")}], current phase: ${sessionState.currentPhase}`
|
|
4103
4190
|
});
|
|
4191
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "wrong_phase", checked: { valid_in_phases: contract.transitions.valid_in_phases }, found: { current_phase: sessionState.currentPhase } });
|
|
4104
4192
|
continue;
|
|
4105
4193
|
}
|
|
4106
4194
|
}
|
|
@@ -4111,6 +4199,18 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4111
4199
|
);
|
|
4112
4200
|
const unsatisfied = results.find((r) => !r.satisfied);
|
|
4113
4201
|
if (unsatisfied) {
|
|
4202
|
+
const firstPre = contract.preconditions[0];
|
|
4203
|
+
ctx?.trace.push({
|
|
4204
|
+
stage: "narrow",
|
|
4205
|
+
tool: tool.name,
|
|
4206
|
+
verdict: "remove",
|
|
4207
|
+
reason: "precondition_not_met",
|
|
4208
|
+
checked: {
|
|
4209
|
+
requires_prior_tool: firstPre.requires_prior_tool ?? null,
|
|
4210
|
+
with_output: firstPre.with_output ?? []
|
|
4211
|
+
},
|
|
4212
|
+
found: { satisfied_precondition_cache_hit: false }
|
|
4213
|
+
});
|
|
4114
4214
|
removed.push({
|
|
4115
4215
|
tool: tool.name,
|
|
4116
4216
|
reason: "precondition_not_met",
|
|
@@ -4124,6 +4224,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4124
4224
|
tool: tool.name,
|
|
4125
4225
|
reason: "forbidden_in_state"
|
|
4126
4226
|
});
|
|
4227
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "forbidden_in_state", checked: { tool: tool.name }, found: { is_resource_scoped: false } });
|
|
4127
4228
|
continue;
|
|
4128
4229
|
}
|
|
4129
4230
|
if (compiledSession.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
@@ -4140,9 +4241,24 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4140
4241
|
reason: "policy_denied",
|
|
4141
4242
|
detail: verdict.reason ?? "Policy deny rule matched"
|
|
4142
4243
|
});
|
|
4244
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "policy_denied", checked: { rule_type: "session_deny" }, found: { matched: true } });
|
|
4143
4245
|
continue;
|
|
4144
4246
|
}
|
|
4145
4247
|
}
|
|
4248
|
+
ctx?.trace.push({
|
|
4249
|
+
stage: "narrow",
|
|
4250
|
+
tool: tool.name,
|
|
4251
|
+
verdict: "allow",
|
|
4252
|
+
reason: "all_checks_passed",
|
|
4253
|
+
checked: {
|
|
4254
|
+
has_contract: true,
|
|
4255
|
+
phase_ok: true,
|
|
4256
|
+
preconditions_ok: true,
|
|
4257
|
+
not_forbidden: true,
|
|
4258
|
+
policy_ok: true
|
|
4259
|
+
},
|
|
4260
|
+
found: {}
|
|
4261
|
+
});
|
|
4146
4262
|
allowed.push(tool);
|
|
4147
4263
|
}
|
|
4148
4264
|
return { allowed, removed };
|
|
@@ -4636,7 +4752,7 @@ function replay(client, opts = {}) {
|
|
|
4636
4752
|
const unmatchedPolicy = opts.unmatchedPolicy ?? "block";
|
|
4637
4753
|
const maxRetries = Math.min(Math.max(0, opts.maxRetries ?? 0), MAX_RETRIES);
|
|
4638
4754
|
const compatEnforcement = opts.compatEnforcement ?? "protective";
|
|
4639
|
-
const diagnostics = opts.diagnostics;
|
|
4755
|
+
const diagnostics = opts.diagnostics ?? defaultReplayDiagnosticsHandler;
|
|
4640
4756
|
let provider;
|
|
4641
4757
|
try {
|
|
4642
4758
|
provider = detectProvider(client);
|
|
@@ -4669,6 +4785,12 @@ function replay(client, opts = {}) {
|
|
|
4669
4785
|
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4670
4786
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4671
4787
|
}
|
|
4788
|
+
if (opts.contractsDir && !discoveredSessionYaml && !opts.sessionYamlPath) {
|
|
4789
|
+
emitDiagnostic2(diagnostics, {
|
|
4790
|
+
type: "replay_compile_warning",
|
|
4791
|
+
details: "No session.yaml found in contractsDir \u2014 session-level features (phases, policy, session_limits) are inactive. Per-tool contracts still apply."
|
|
4792
|
+
});
|
|
4793
|
+
}
|
|
4672
4794
|
let sessionYaml = discoveredSessionYaml;
|
|
4673
4795
|
if (!sessionYaml && opts.providerConstraints) {
|
|
4674
4796
|
sessionYaml = { schema_version: "1.0", agent, provider_constraints: opts.providerConstraints };
|
|
@@ -4828,6 +4950,7 @@ function replay(client, opts = {}) {
|
|
|
4828
4950
|
let bypassDetected = false;
|
|
4829
4951
|
let lastShadowDeltaValue = null;
|
|
4830
4952
|
let lastNarrowResult = null;
|
|
4953
|
+
let lastTrace = null;
|
|
4831
4954
|
let shadowEvaluationCount = 0;
|
|
4832
4955
|
let manualFilter = null;
|
|
4833
4956
|
const deferredReceipts = /* @__PURE__ */ new Map();
|
|
@@ -4902,6 +5025,18 @@ function replay(client, opts = {}) {
|
|
|
4902
5025
|
} catch {
|
|
4903
5026
|
}
|
|
4904
5027
|
}
|
|
5028
|
+
function createTrace(stepIndex) {
|
|
5029
|
+
const entries = [];
|
|
5030
|
+
return {
|
|
5031
|
+
sessionId,
|
|
5032
|
+
stepIndex,
|
|
5033
|
+
complete: false,
|
|
5034
|
+
entries,
|
|
5035
|
+
push(entry) {
|
|
5036
|
+
entries.push(entry);
|
|
5037
|
+
}
|
|
5038
|
+
};
|
|
5039
|
+
}
|
|
4905
5040
|
const enforcementCreate = async function replayEnforcementCreate(...args) {
|
|
4906
5041
|
if (killed) {
|
|
4907
5042
|
throw new ReplayKillError(sessionId, killedAt);
|
|
@@ -4944,8 +5079,19 @@ function replay(client, opts = {}) {
|
|
|
4944
5079
|
total_ms: 0,
|
|
4945
5080
|
enforcement_ms: 0
|
|
4946
5081
|
};
|
|
5082
|
+
const trace = createTrace(sessionState.totalStepCount);
|
|
5083
|
+
const traceCtx = { trace };
|
|
5084
|
+
let currentTraceStage = "narrow";
|
|
4947
5085
|
const request = toRecord10(args[0]);
|
|
4948
5086
|
const requestToolNames = extractRequestToolNames(request);
|
|
5087
|
+
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
5088
|
+
if (messages.length > 0) {
|
|
5089
|
+
const toolResults = extractToolResults(messages, provider);
|
|
5090
|
+
if (toolResults.length > 0) {
|
|
5091
|
+
const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
|
|
5092
|
+
sessionState = applyOutputExtracts(sessionState, outputUpdates);
|
|
5093
|
+
}
|
|
5094
|
+
}
|
|
4949
5095
|
let narrowResult = null;
|
|
4950
5096
|
let activeArgs = args;
|
|
4951
5097
|
if (compiledSession && Array.isArray(request.tools) && request.tools.length > 0) {
|
|
@@ -4956,7 +5102,8 @@ function replay(client, opts = {}) {
|
|
|
4956
5102
|
sessionState,
|
|
4957
5103
|
compiledSession,
|
|
4958
5104
|
unmatchedPolicy,
|
|
4959
|
-
manualFilter
|
|
5105
|
+
manualFilter,
|
|
5106
|
+
traceCtx
|
|
4960
5107
|
);
|
|
4961
5108
|
lastNarrowResult = narrowResult;
|
|
4962
5109
|
if (narrowResult.removed.length > 0) {
|
|
@@ -4994,55 +5141,96 @@ function replay(client, opts = {}) {
|
|
|
4994
5141
|
timing.narrow_ms = Date.now() - guardStart;
|
|
4995
5142
|
const preCheckStart = Date.now();
|
|
4996
5143
|
try {
|
|
5144
|
+
currentTraceStage = "pre_check";
|
|
4997
5145
|
if (mode === "enforce" && resolvedSessionLimits) {
|
|
4998
5146
|
const limitResult = checkSessionLimits(sessionState, resolvedSessionLimits);
|
|
4999
5147
|
if (limitResult.exceeded) {
|
|
5000
|
-
|
|
5001
|
-
|
|
5002
|
-
|
|
5003
|
-
|
|
5004
|
-
|
|
5005
|
-
|
|
5006
|
-
|
|
5007
|
-
|
|
5008
|
-
|
|
5009
|
-
|
|
5010
|
-
|
|
5011
|
-
|
|
5012
|
-
|
|
5013
|
-
|
|
5014
|
-
|
|
5015
|
-
|
|
5016
|
-
|
|
5017
|
-
|
|
5018
|
-
|
|
5019
|
-
|
|
5148
|
+
let narrowedPastLimit = false;
|
|
5149
|
+
if (limitResult.reason?.startsWith("max_tool_calls") && resolvedSessionLimits.max_tool_calls_mode === "narrow" && resolvedSessionLimits.max_calls_per_tool) {
|
|
5150
|
+
const costOk = !(typeof resolvedSessionLimits.max_cost_per_session === "number" && sessionState.actualCost >= resolvedSessionLimits.max_cost_per_session);
|
|
5151
|
+
if (costOk) {
|
|
5152
|
+
const currentRequest = toRecord10(activeArgs[0]);
|
|
5153
|
+
const currentTools = Array.isArray(currentRequest.tools) ? extractToolDefinitions(currentRequest.tools) : [];
|
|
5154
|
+
const budgetedTools = currentTools.filter((tool) => {
|
|
5155
|
+
const max = resolvedSessionLimits.max_calls_per_tool[tool.name];
|
|
5156
|
+
if (typeof max !== "number") return false;
|
|
5157
|
+
return (sessionState.toolCallCounts.get(tool.name) ?? 0) < max;
|
|
5158
|
+
});
|
|
5159
|
+
if (budgetedTools.length > 0) {
|
|
5160
|
+
const modifiedRequest = { ...currentRequest, tools: budgetedTools };
|
|
5161
|
+
activeArgs = [modifiedRequest, ...Array.prototype.slice.call(activeArgs, 1)];
|
|
5162
|
+
narrowedPastLimit = true;
|
|
5163
|
+
trace.push({
|
|
5164
|
+
stage: "pre_check",
|
|
5165
|
+
tool: null,
|
|
5166
|
+
verdict: "narrow",
|
|
5167
|
+
reason: "max_tool_calls_narrow_mode",
|
|
5168
|
+
checked: { max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null, budgeted_tools: budgetedTools.map((t) => t.name) },
|
|
5169
|
+
found: { total_tool_calls: sessionState.totalToolCalls }
|
|
5170
|
+
});
|
|
5171
|
+
}
|
|
5020
5172
|
}
|
|
5021
5173
|
}
|
|
5022
|
-
|
|
5023
|
-
|
|
5024
|
-
|
|
5025
|
-
|
|
5026
|
-
|
|
5027
|
-
|
|
5028
|
-
|
|
5029
|
-
|
|
5030
|
-
|
|
5031
|
-
|
|
5032
|
-
|
|
5033
|
-
|
|
5034
|
-
void 0,
|
|
5035
|
-
timing
|
|
5036
|
-
);
|
|
5037
|
-
if (isCompatAdvisory) {
|
|
5038
|
-
emitDiagnostic2(diagnostics, {
|
|
5039
|
-
type: "replay_compat_advisory",
|
|
5040
|
-
session_id: sessionId,
|
|
5041
|
-
would_block: decision.blocked,
|
|
5042
|
-
details: limitResult.reason ?? "session limit exceeded"
|
|
5174
|
+
if (!narrowedPastLimit) {
|
|
5175
|
+
trace.push({
|
|
5176
|
+
stage: "pre_check",
|
|
5177
|
+
tool: null,
|
|
5178
|
+
verdict: "block",
|
|
5179
|
+
reason: "session_limit_exceeded",
|
|
5180
|
+
checked: {
|
|
5181
|
+
max_steps: resolvedSessionLimits.max_steps ?? null,
|
|
5182
|
+
max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
|
|
5183
|
+
max_cost: resolvedSessionLimits.max_cost_per_session ?? null
|
|
5184
|
+
},
|
|
5185
|
+
found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
|
|
5043
5186
|
});
|
|
5044
|
-
|
|
5045
|
-
|
|
5187
|
+
const decision = {
|
|
5188
|
+
action: "block",
|
|
5189
|
+
tool_calls: [],
|
|
5190
|
+
blocked: [{
|
|
5191
|
+
tool_name: "_session",
|
|
5192
|
+
arguments: "",
|
|
5193
|
+
reason: "session_limit_exceeded",
|
|
5194
|
+
contract_file: "",
|
|
5195
|
+
failures: [{ path: "$", operator: "session_limit", expected: "", found: "", message: limitResult.reason ?? "session limit exceeded" }]
|
|
5196
|
+
}],
|
|
5197
|
+
response_modification: gateMode
|
|
5198
|
+
};
|
|
5199
|
+
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
5200
|
+
if (resolvedSessionLimits.circuit_breaker) {
|
|
5201
|
+
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
5202
|
+
if (cbResult.triggered) {
|
|
5203
|
+
killed = true;
|
|
5204
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
5205
|
+
sessionState = killSession(sessionState);
|
|
5206
|
+
emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
|
|
5207
|
+
}
|
|
5208
|
+
}
|
|
5209
|
+
timing.pre_check_ms = Date.now() - preCheckStart;
|
|
5210
|
+
captureDecision(
|
|
5211
|
+
decision,
|
|
5212
|
+
null,
|
|
5213
|
+
request,
|
|
5214
|
+
guardStart,
|
|
5215
|
+
requestToolNames,
|
|
5216
|
+
null,
|
|
5217
|
+
narrowResult,
|
|
5218
|
+
null,
|
|
5219
|
+
null,
|
|
5220
|
+
null,
|
|
5221
|
+
void 0,
|
|
5222
|
+
timing
|
|
5223
|
+
);
|
|
5224
|
+
if (isCompatAdvisory) {
|
|
5225
|
+
emitDiagnostic2(diagnostics, {
|
|
5226
|
+
type: "replay_compat_advisory",
|
|
5227
|
+
session_id: sessionId,
|
|
5228
|
+
would_block: decision.blocked,
|
|
5229
|
+
details: limitResult.reason ?? "session limit exceeded"
|
|
5230
|
+
});
|
|
5231
|
+
} else {
|
|
5232
|
+
throw buildContractError2(decision);
|
|
5233
|
+
}
|
|
5046
5234
|
}
|
|
5047
5235
|
}
|
|
5048
5236
|
if (isAtHardStepCap(sessionState)) {
|
|
@@ -5075,8 +5263,23 @@ function replay(client, opts = {}) {
|
|
|
5075
5263
|
);
|
|
5076
5264
|
throw buildContractError2(decision);
|
|
5077
5265
|
}
|
|
5266
|
+
if (!checkSessionLimits(sessionState, resolvedSessionLimits).exceeded) {
|
|
5267
|
+
trace.push({
|
|
5268
|
+
stage: "pre_check",
|
|
5269
|
+
tool: null,
|
|
5270
|
+
verdict: "allow",
|
|
5271
|
+
reason: "session_limits_ok",
|
|
5272
|
+
checked: {
|
|
5273
|
+
max_steps: resolvedSessionLimits.max_steps ?? null,
|
|
5274
|
+
max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
|
|
5275
|
+
max_cost: resolvedSessionLimits.max_cost_per_session ?? null
|
|
5276
|
+
},
|
|
5277
|
+
found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
|
|
5278
|
+
});
|
|
5279
|
+
}
|
|
5280
|
+
} else if (mode === "enforce") {
|
|
5281
|
+
trace.push({ stage: "pre_check", tool: null, verdict: "skip", reason: "no_session_limits", checked: {}, found: {} });
|
|
5078
5282
|
}
|
|
5079
|
-
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
5080
5283
|
if (messages.length > 0) {
|
|
5081
5284
|
const msgResult = validateToolResultMessages(messages, contracts, provider);
|
|
5082
5285
|
if (!msgResult.passed) {
|
|
@@ -5086,13 +5289,6 @@ function replay(client, opts = {}) {
|
|
|
5086
5289
|
});
|
|
5087
5290
|
}
|
|
5088
5291
|
}
|
|
5089
|
-
if (messages.length > 0) {
|
|
5090
|
-
const toolResults = extractToolResults(messages, provider);
|
|
5091
|
-
if (toolResults.length > 0) {
|
|
5092
|
-
const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
|
|
5093
|
-
sessionState = applyOutputExtracts(sessionState, outputUpdates);
|
|
5094
|
-
}
|
|
5095
|
-
}
|
|
5096
5292
|
const inputFailures = evaluateInputInvariants(request, contracts);
|
|
5097
5293
|
if (mode === "enforce" && inputFailures.length > 0) {
|
|
5098
5294
|
if (onError === "block") {
|
|
@@ -5167,6 +5363,10 @@ function replay(client, opts = {}) {
|
|
|
5167
5363
|
sessionState = updateActualCost(sessionState, costDelta);
|
|
5168
5364
|
}
|
|
5169
5365
|
if (mode === "log-only") {
|
|
5366
|
+
trace.push({ stage: "gate", tool: null, verdict: "allow", reason: "log_only_mode", checked: {}, found: {} });
|
|
5367
|
+
trace.complete = true;
|
|
5368
|
+
lastTrace = trace;
|
|
5369
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5170
5370
|
captureDecision(
|
|
5171
5371
|
{ action: "allow", tool_calls: extractToolCalls(response, provider) },
|
|
5172
5372
|
response,
|
|
@@ -5179,13 +5379,26 @@ function replay(client, opts = {}) {
|
|
|
5179
5379
|
null,
|
|
5180
5380
|
null,
|
|
5181
5381
|
void 0,
|
|
5182
|
-
timing
|
|
5382
|
+
timing,
|
|
5383
|
+
trace
|
|
5183
5384
|
);
|
|
5184
5385
|
return response;
|
|
5185
5386
|
}
|
|
5387
|
+
currentTraceStage = "validate";
|
|
5186
5388
|
const toolCalls = extractToolCalls(response, provider);
|
|
5187
5389
|
const validateStart = Date.now();
|
|
5188
5390
|
const validation = validateResponse2(response, toolCalls, contracts, requestToolNames, unmatchedPolicy, provider);
|
|
5391
|
+
for (const f of validation.failures) {
|
|
5392
|
+
const toolName = extractToolNameFromFailure(f, toolCalls);
|
|
5393
|
+
trace.push({
|
|
5394
|
+
stage: "validate",
|
|
5395
|
+
tool: toolName === "_response" ? null : toolName,
|
|
5396
|
+
verdict: "block",
|
|
5397
|
+
reason: f.operator === "response_format" ? "response_format_failed" : "output_invariant_failed",
|
|
5398
|
+
checked: { path: f.path, operator: f.operator, invariant_type: f.operator === "response_format" ? "response_format" : "output" },
|
|
5399
|
+
found: { value: f.found }
|
|
5400
|
+
});
|
|
5401
|
+
}
|
|
5189
5402
|
timing.validate_ms += Date.now() - validateStart;
|
|
5190
5403
|
if (isActiveGovern && !attemptDegraded && attemptPreparedRequestId) {
|
|
5191
5404
|
const rtProposalStart = Date.now();
|
|
@@ -5220,9 +5433,10 @@ function replay(client, opts = {}) {
|
|
|
5220
5433
|
}
|
|
5221
5434
|
timing.runtime_ms += Date.now() - rtProposalStart;
|
|
5222
5435
|
}
|
|
5436
|
+
currentTraceStage = "cross_step";
|
|
5223
5437
|
const crossStepStart = Date.now();
|
|
5224
5438
|
const crossStepContracts = compiledSession ? Array.from(compiledSession.perToolContracts.values()) : contracts;
|
|
5225
|
-
const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts);
|
|
5439
|
+
const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts, traceCtx);
|
|
5226
5440
|
if (!crossStepResult.passed) {
|
|
5227
5441
|
for (const f of crossStepResult.failures) {
|
|
5228
5442
|
validation.failures.push({
|
|
@@ -5236,10 +5450,11 @@ function replay(client, opts = {}) {
|
|
|
5236
5450
|
}
|
|
5237
5451
|
}
|
|
5238
5452
|
timing.cross_step_ms += Date.now() - crossStepStart;
|
|
5453
|
+
currentTraceStage = "phase";
|
|
5239
5454
|
let phaseResult = null;
|
|
5240
5455
|
const phaseStart = Date.now();
|
|
5241
5456
|
if (compiledSession) {
|
|
5242
|
-
phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession);
|
|
5457
|
+
phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession, traceCtx);
|
|
5243
5458
|
if (!phaseResult.legal) {
|
|
5244
5459
|
validation.failures.push({
|
|
5245
5460
|
path: `$.tool_calls.${phaseResult.blockedTool}`,
|
|
@@ -5269,7 +5484,7 @@ function replay(client, opts = {}) {
|
|
|
5269
5484
|
for (const f of avResult.failures) {
|
|
5270
5485
|
validation.failures.push({
|
|
5271
5486
|
path: f.path,
|
|
5272
|
-
operator:
|
|
5487
|
+
operator: "argument_value_mismatch",
|
|
5273
5488
|
expected: String(f.expected),
|
|
5274
5489
|
found: String(f.actual),
|
|
5275
5490
|
message: f.detail,
|
|
@@ -5280,10 +5495,12 @@ function replay(client, opts = {}) {
|
|
|
5280
5495
|
}
|
|
5281
5496
|
}
|
|
5282
5497
|
}
|
|
5498
|
+
currentTraceStage = "limit";
|
|
5283
5499
|
if (resolvedSessionLimits) {
|
|
5284
5500
|
const workingState = { ...sessionState, toolCallCounts: workingToolCallCounts };
|
|
5285
5501
|
const perToolResult = checkPerToolLimits(workingState, tc.name, resolvedSessionLimits);
|
|
5286
5502
|
if (perToolResult.exceeded) {
|
|
5503
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "per_tool_limit_exceeded", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
|
|
5287
5504
|
validation.failures.push({
|
|
5288
5505
|
path: `$.tool_calls.${tc.name}`,
|
|
5289
5506
|
operator: "session_limit",
|
|
@@ -5292,6 +5509,8 @@ function replay(client, opts = {}) {
|
|
|
5292
5509
|
message: perToolResult.reason ?? "per-tool limit exceeded",
|
|
5293
5510
|
contract_file: ""
|
|
5294
5511
|
});
|
|
5512
|
+
} else {
|
|
5513
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "allow", reason: "per_tool_limit_ok", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
|
|
5295
5514
|
}
|
|
5296
5515
|
}
|
|
5297
5516
|
workingToolCallCounts.set(tc.name, (workingToolCallCounts.get(tc.name) ?? 0) + 1);
|
|
@@ -5308,6 +5527,7 @@ function replay(client, opts = {}) {
|
|
|
5308
5527
|
).length;
|
|
5309
5528
|
const totalMatches = loopResult.matchCount + intraMatches;
|
|
5310
5529
|
if (totalMatches >= resolvedSessionLimits.loop_detection.threshold) {
|
|
5530
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "loop_detected", checked: { window: resolvedSessionLimits.loop_detection.window, threshold: resolvedSessionLimits.loop_detection.threshold }, found: { match_count: totalMatches, arguments_hash: argsHash } });
|
|
5311
5531
|
validation.failures.push({
|
|
5312
5532
|
path: `$.tool_calls.${tc.name}`,
|
|
5313
5533
|
operator: "loop_detected",
|
|
@@ -5321,6 +5541,7 @@ function replay(client, opts = {}) {
|
|
|
5321
5541
|
}
|
|
5322
5542
|
}
|
|
5323
5543
|
timing.argument_values_ms += Date.now() - argValuesStart;
|
|
5544
|
+
currentTraceStage = "policy";
|
|
5324
5545
|
let policyVerdicts = null;
|
|
5325
5546
|
const policyStart = Date.now();
|
|
5326
5547
|
if (compiledSession?.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
@@ -5341,6 +5562,14 @@ function replay(client, opts = {}) {
|
|
|
5341
5562
|
);
|
|
5342
5563
|
policyVerdicts.set(tc.name, verdict);
|
|
5343
5564
|
if (!verdict.allowed) {
|
|
5565
|
+
trace.push({
|
|
5566
|
+
stage: "policy",
|
|
5567
|
+
tool: tc.name,
|
|
5568
|
+
verdict: "block",
|
|
5569
|
+
reason: verdict.reason?.startsWith("Session deny") ? "session_deny_matched" : verdict.reason?.startsWith("default_deny") ? "default_deny_no_allow" : "policy_denied",
|
|
5570
|
+
checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
|
|
5571
|
+
found: { matched: true }
|
|
5572
|
+
});
|
|
5344
5573
|
validation.failures.push({
|
|
5345
5574
|
path: `$.tool_calls.${tc.name}`,
|
|
5346
5575
|
operator: "policy_denied",
|
|
@@ -5349,10 +5578,22 @@ function replay(client, opts = {}) {
|
|
|
5349
5578
|
message: `Policy denied: ${tc.name} \u2014 ${verdict.reason}`,
|
|
5350
5579
|
contract_file: ""
|
|
5351
5580
|
});
|
|
5581
|
+
} else {
|
|
5582
|
+
trace.push({
|
|
5583
|
+
stage: "policy",
|
|
5584
|
+
tool: tc.name,
|
|
5585
|
+
verdict: "allow",
|
|
5586
|
+
reason: "policy_allowed",
|
|
5587
|
+
checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
|
|
5588
|
+
found: { session_deny_matched: false, tool_deny_matched: false }
|
|
5589
|
+
});
|
|
5352
5590
|
}
|
|
5353
5591
|
}
|
|
5592
|
+
} else {
|
|
5593
|
+
trace.push({ stage: "policy", tool: null, verdict: "skip", reason: "no_policy_configured", checked: {}, found: {} });
|
|
5354
5594
|
}
|
|
5355
5595
|
timing.policy_ms += Date.now() - policyStart;
|
|
5596
|
+
currentTraceStage = "gate";
|
|
5356
5597
|
if (mode === "shadow") {
|
|
5357
5598
|
const shadowGateStart = Date.now();
|
|
5358
5599
|
const shadowDecision = validation.failures.length > 0 ? {
|
|
@@ -5361,6 +5602,15 @@ function replay(client, opts = {}) {
|
|
|
5361
5602
|
blocked: buildBlockedCalls(toolCalls, validation.failures, validation.unmatchedBlocked),
|
|
5362
5603
|
response_modification: gateMode
|
|
5363
5604
|
} : { action: "allow", tool_calls: toolCalls };
|
|
5605
|
+
const blockedTools = shadowDecision.action === "block" ? shadowDecision.blocked.map((b) => b.tool_name) : [];
|
|
5606
|
+
trace.push({
|
|
5607
|
+
stage: "gate",
|
|
5608
|
+
tool: null,
|
|
5609
|
+
verdict: blockedTools.length > 0 ? "info" : "allow",
|
|
5610
|
+
reason: blockedTools.length > 0 ? "violations_found" : "no_violations",
|
|
5611
|
+
checked: { gate_mode: gateMode },
|
|
5612
|
+
found: { blocked_count: blockedTools.length, action: shadowDecision.action, ...blockedTools.length > 0 ? { blocked_tools: blockedTools } : {} }
|
|
5613
|
+
});
|
|
5364
5614
|
const shadowDelta = {
|
|
5365
5615
|
would_have_blocked: shadowDecision.action === "block" ? shadowDecision.blocked : [],
|
|
5366
5616
|
would_have_narrowed: narrowResult?.removed ?? [],
|
|
@@ -5370,7 +5620,11 @@ function replay(client, opts = {}) {
|
|
|
5370
5620
|
lastShadowDeltaValue = shadowDelta;
|
|
5371
5621
|
shadowEvaluationCount++;
|
|
5372
5622
|
timing.gate_ms += Date.now() - shadowGateStart;
|
|
5373
|
-
|
|
5623
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockedTools, killed: false, step_index: sessionState.totalStepCount } });
|
|
5624
|
+
trace.complete = true;
|
|
5625
|
+
lastTrace = trace;
|
|
5626
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5627
|
+
captureDecision(shadowDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, shadowDelta, timing, trace);
|
|
5374
5628
|
return response;
|
|
5375
5629
|
}
|
|
5376
5630
|
if (isCompatAdvisory) {
|
|
@@ -5411,7 +5665,21 @@ function replay(client, opts = {}) {
|
|
|
5411
5665
|
sessionState = recordDecisionOutcome(sessionState, "allowed");
|
|
5412
5666
|
}
|
|
5413
5667
|
timing.finalize_ms += Date.now() - advisoryFinalizeStart;
|
|
5414
|
-
|
|
5668
|
+
const advisoryBlockedTools = advisoryDecision.action === "block" ? advisoryDecision.blocked.map((b) => b.tool_name) : [];
|
|
5669
|
+
trace.push({
|
|
5670
|
+
stage: "gate",
|
|
5671
|
+
tool: null,
|
|
5672
|
+
verdict: advisoryBlockedTools.length > 0 ? "info" : "allow",
|
|
5673
|
+
reason: advisoryBlockedTools.length > 0 ? "violations_found" : "no_violations",
|
|
5674
|
+
checked: { gate_mode: gateMode },
|
|
5675
|
+
found: { blocked_count: advisoryBlockedTools.length, action: advisoryDecision.action, ...advisoryBlockedTools.length > 0 ? { blocked_tools: advisoryBlockedTools } : {} }
|
|
5676
|
+
});
|
|
5677
|
+
const advisoryNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
5678
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: advisoryNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: advisoryBlockedTools, killed: false, step_index: sessionState.totalStepCount } });
|
|
5679
|
+
trace.complete = true;
|
|
5680
|
+
lastTrace = trace;
|
|
5681
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5682
|
+
captureDecision(advisoryDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5415
5683
|
return response;
|
|
5416
5684
|
}
|
|
5417
5685
|
const enforceGateStart = Date.now();
|
|
@@ -5449,7 +5717,20 @@ function replay(client, opts = {}) {
|
|
|
5449
5717
|
});
|
|
5450
5718
|
}
|
|
5451
5719
|
}
|
|
5452
|
-
|
|
5720
|
+
trace.push({
|
|
5721
|
+
stage: "gate",
|
|
5722
|
+
tool: null,
|
|
5723
|
+
verdict: "allow",
|
|
5724
|
+
reason: "no_violations",
|
|
5725
|
+
checked: { gate_mode: gateMode },
|
|
5726
|
+
found: { blocked_count: 0, action: "allow" }
|
|
5727
|
+
});
|
|
5728
|
+
const allowNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
5729
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: completedStep.phase, phase_after: allowNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: [], killed: false, step_index: sessionState.totalStepCount } });
|
|
5730
|
+
trace.complete = true;
|
|
5731
|
+
lastTrace = trace;
|
|
5732
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5733
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5453
5734
|
return response;
|
|
5454
5735
|
}
|
|
5455
5736
|
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
@@ -5514,15 +5795,42 @@ function replay(client, opts = {}) {
|
|
|
5514
5795
|
);
|
|
5515
5796
|
continue;
|
|
5516
5797
|
}
|
|
5517
|
-
|
|
5798
|
+
const blockBlockedTools = decision.action === "block" ? decision.blocked.map((b) => b.tool_name) : [];
|
|
5799
|
+
trace.push({
|
|
5800
|
+
stage: "gate",
|
|
5801
|
+
tool: null,
|
|
5802
|
+
verdict: "block",
|
|
5803
|
+
reason: "violations_found",
|
|
5804
|
+
checked: { gate_mode: gateMode },
|
|
5805
|
+
found: { blocked_count: blockBlockedTools.length, action: "block", blocked_tools: blockBlockedTools }
|
|
5806
|
+
});
|
|
5807
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockBlockedTools, killed, step_index: sessionState.totalStepCount } });
|
|
5808
|
+
trace.complete = true;
|
|
5809
|
+
lastTrace = trace;
|
|
5810
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5811
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5518
5812
|
return applyGateDecision(decision, response, provider, gateMode, opts.onBlock);
|
|
5519
5813
|
}
|
|
5520
5814
|
if (lastError) throw lastError;
|
|
5521
5815
|
throw new ReplayInternalError("Retry loop exhausted without result", { sessionId });
|
|
5522
5816
|
} catch (err) {
|
|
5523
5817
|
if (err instanceof ReplayContractError || err instanceof ReplayKillError) {
|
|
5818
|
+
if (!trace.complete) {
|
|
5819
|
+
lastTrace = trace;
|
|
5820
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5821
|
+
}
|
|
5524
5822
|
throw err;
|
|
5525
5823
|
}
|
|
5824
|
+
trace.push({
|
|
5825
|
+
stage: currentTraceStage,
|
|
5826
|
+
tool: null,
|
|
5827
|
+
verdict: "error",
|
|
5828
|
+
reason: "stage_threw",
|
|
5829
|
+
checked: {},
|
|
5830
|
+
found: { error: err instanceof Error ? err.message : String(err) }
|
|
5831
|
+
});
|
|
5832
|
+
lastTrace = trace;
|
|
5833
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5526
5834
|
sessionState = recordDecisionOutcome(sessionState, "error");
|
|
5527
5835
|
if (resolvedSessionLimits?.circuit_breaker) {
|
|
5528
5836
|
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
@@ -5648,6 +5956,9 @@ function replay(client, opts = {}) {
|
|
|
5648
5956
|
getLastShadowDelta() {
|
|
5649
5957
|
return lastShadowDeltaValue;
|
|
5650
5958
|
},
|
|
5959
|
+
getLastTrace() {
|
|
5960
|
+
return lastTrace;
|
|
5961
|
+
},
|
|
5651
5962
|
/**
|
|
5652
5963
|
* v3: Manually restrict available tools within compiled legal space.
|
|
5653
5964
|
* @see specs/replay-v3.md § narrow() / widen()
|
|
@@ -5783,7 +6094,7 @@ function replay(client, opts = {}) {
|
|
|
5783
6094
|
}
|
|
5784
6095
|
return wrapped;
|
|
5785
6096
|
}
|
|
5786
|
-
function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam) {
|
|
6097
|
+
function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam, traceParam) {
|
|
5787
6098
|
if (!buffer && !store) return;
|
|
5788
6099
|
if (timingParam) {
|
|
5789
6100
|
timingParam.total_ms = Date.now() - guardStart;
|
|
@@ -5818,6 +6129,7 @@ function replay(client, opts = {}) {
|
|
|
5818
6129
|
phase: sessionState.currentPhase,
|
|
5819
6130
|
phase_transition: phaseTransitionStr,
|
|
5820
6131
|
shadow_delta: shadowDelta,
|
|
6132
|
+
trace: traceParam ? redactTrace(traceParam, opts.captureLevel ?? "full") : void 0,
|
|
5821
6133
|
receipt: null
|
|
5822
6134
|
};
|
|
5823
6135
|
const capturedCall = {
|
|
@@ -6375,6 +6687,7 @@ function resolveSessionLimits(contracts) {
|
|
|
6375
6687
|
const sl = c.session_limits;
|
|
6376
6688
|
if (sl.max_steps !== void 0 && merged.max_steps === void 0) merged.max_steps = sl.max_steps;
|
|
6377
6689
|
if (sl.max_tool_calls !== void 0 && merged.max_tool_calls === void 0) merged.max_tool_calls = sl.max_tool_calls;
|
|
6690
|
+
if (sl.max_tool_calls_mode !== void 0 && merged.max_tool_calls_mode === void 0) merged.max_tool_calls_mode = sl.max_tool_calls_mode;
|
|
6378
6691
|
if (sl.max_cost_per_session !== void 0 && merged.max_cost_per_session === void 0) merged.max_cost_per_session = sl.max_cost_per_session;
|
|
6379
6692
|
if (sl.loop_detection && !merged.loop_detection) merged.loop_detection = sl.loop_detection;
|
|
6380
6693
|
if (sl.circuit_breaker && !merged.circuit_breaker) merged.circuit_breaker = sl.circuit_breaker;
|
|
@@ -6482,6 +6795,7 @@ function createInactiveSession(client, sessionId, reason) {
|
|
|
6482
6795
|
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6483
6796
|
getLastNarrowing: () => null,
|
|
6484
6797
|
getLastShadowDelta: () => null,
|
|
6798
|
+
getLastTrace: () => null,
|
|
6485
6799
|
narrow() {
|
|
6486
6800
|
},
|
|
6487
6801
|
widen() {
|
|
@@ -6523,6 +6837,7 @@ function createBlockingInactiveSession(client, sessionId, detail, configError) {
|
|
|
6523
6837
|
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6524
6838
|
getLastNarrowing: () => null,
|
|
6525
6839
|
getLastShadowDelta: () => null,
|
|
6840
|
+
getLastTrace: () => null,
|
|
6526
6841
|
narrow() {
|
|
6527
6842
|
},
|
|
6528
6843
|
widen() {
|
|
@@ -6607,6 +6922,83 @@ function generateSessionId2() {
|
|
|
6607
6922
|
function stripHashPrefix(hash) {
|
|
6608
6923
|
return hash.startsWith("sha256:") ? hash.slice(7) : hash;
|
|
6609
6924
|
}
|
|
6925
|
+
function resolveLogLevel() {
|
|
6926
|
+
const raw = typeof process !== "undefined" ? process.env.REPLAYCI_LOG : void 0;
|
|
6927
|
+
if (!raw) return "warn";
|
|
6928
|
+
const lower = raw.toLowerCase();
|
|
6929
|
+
if (lower === "trace" || lower === "debug") return "trace";
|
|
6930
|
+
if (lower === "silent" || lower === "off" || lower === "none") return "silent";
|
|
6931
|
+
return "warn";
|
|
6932
|
+
}
|
|
6933
|
+
function defaultReplayDiagnosticsHandler(event) {
|
|
6934
|
+
const level = resolveLogLevel();
|
|
6935
|
+
if (level === "silent") return;
|
|
6936
|
+
switch (event.type) {
|
|
6937
|
+
case "replay_inactive":
|
|
6938
|
+
console.warn(`[replayci] replay() inactive: ${event.reason}${event.error_message ? ` \u2014 ${event.error_message}` : ""}`);
|
|
6939
|
+
break;
|
|
6940
|
+
case "replay_compile_error":
|
|
6941
|
+
console.warn(`[replayci] compile error: ${event.details}`);
|
|
6942
|
+
break;
|
|
6943
|
+
case "replay_compile_warning":
|
|
6944
|
+
console.warn(`[replayci] compile warning: ${event.details}`);
|
|
6945
|
+
break;
|
|
6946
|
+
case "replay_bypass_detected":
|
|
6947
|
+
console.warn(`[replayci] bypass detected on session ${event.session_id}`);
|
|
6948
|
+
break;
|
|
6949
|
+
case "replay_kill":
|
|
6950
|
+
console.warn(`[replayci] session ${event.session_id} killed`);
|
|
6951
|
+
break;
|
|
6952
|
+
case "replay_block":
|
|
6953
|
+
console.warn(`[replayci] blocked ${event.tool_name}: ${event.reason}`);
|
|
6954
|
+
break;
|
|
6955
|
+
case "replay_narrow": {
|
|
6956
|
+
for (const r of event.removed) {
|
|
6957
|
+
console.warn(`[replayci] removed ${r.tool} \u2192 ${r.reason}${r.detail ? ` (${r.detail})` : ""}`);
|
|
6958
|
+
}
|
|
6959
|
+
break;
|
|
6960
|
+
}
|
|
6961
|
+
case "replay_trace": {
|
|
6962
|
+
const t = event.trace;
|
|
6963
|
+
if (level === "trace") {
|
|
6964
|
+
for (const entry of t.entries) {
|
|
6965
|
+
const toolStr = entry.tool ? ` ${entry.tool}` : "";
|
|
6966
|
+
const detail = entry.reason !== entry.verdict ? ` \u2014 ${entry.reason}` : "";
|
|
6967
|
+
const checkedStr = Object.keys(entry.checked).length > 0 ? ` checked=${JSON.stringify(entry.checked)}` : "";
|
|
6968
|
+
const foundStr = Object.keys(entry.found).length > 0 ? ` found=${JSON.stringify(entry.found)}` : "";
|
|
6969
|
+
console.warn(`[replayci] ${entry.stage}${toolStr}: ${entry.verdict}${detail}${checkedStr}${foundStr}`);
|
|
6970
|
+
}
|
|
6971
|
+
if (!t.complete) {
|
|
6972
|
+
console.warn(`[replayci] trace INCOMPLETE (fault in pipeline)`);
|
|
6973
|
+
}
|
|
6974
|
+
} else {
|
|
6975
|
+
const blocks = t.entries.filter((e) => e.verdict === "block");
|
|
6976
|
+
for (const b of blocks) {
|
|
6977
|
+
const toolStr = b.tool ?? "session";
|
|
6978
|
+
console.warn(`[replayci] blocked ${toolStr} at ${b.stage} \u2192 ${b.reason}`);
|
|
6979
|
+
}
|
|
6980
|
+
if (!t.complete) {
|
|
6981
|
+
console.warn(`[replayci] enforcement cycle incomplete (fault) \u2014 session.getLastTrace() for partial trace`);
|
|
6982
|
+
}
|
|
6983
|
+
}
|
|
6984
|
+
break;
|
|
6985
|
+
}
|
|
6986
|
+
case "replay_workflow_error":
|
|
6987
|
+
console.warn(`[replayci] workflow error: ${event.details}`);
|
|
6988
|
+
break;
|
|
6989
|
+
case "replay_state_sync_error":
|
|
6990
|
+
console.warn(`[replayci] state sync error: ${event.details}`);
|
|
6991
|
+
break;
|
|
6992
|
+
case "replay_receipt_error":
|
|
6993
|
+
console.warn(`[replayci] receipt error (${event.tool_name}): ${event.details}`);
|
|
6994
|
+
break;
|
|
6995
|
+
case "replay_capture_error":
|
|
6996
|
+
console.warn(`[replayci] capture error: ${event.details}`);
|
|
6997
|
+
break;
|
|
6998
|
+
default:
|
|
6999
|
+
break;
|
|
7000
|
+
}
|
|
7001
|
+
}
|
|
6610
7002
|
function emitDiagnostic2(diagnostics, event) {
|
|
6611
7003
|
try {
|
|
6612
7004
|
diagnostics?.(event);
|