@replayci/replay 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +469 -66
- package/dist/index.d.cts +52 -1
- package/dist/index.d.ts +52 -1
- package/dist/index.js +469 -66
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -3224,6 +3224,34 @@ function redactCapture(input) {
|
|
|
3224
3224
|
pattern_fingerprint: PATTERN_FINGERPRINT
|
|
3225
3225
|
};
|
|
3226
3226
|
}
|
|
3227
|
+
function redactTrace(trace, captureLevel) {
|
|
3228
|
+
if (captureLevel === "full") return trace;
|
|
3229
|
+
return {
|
|
3230
|
+
...trace,
|
|
3231
|
+
entries: trace.entries.map((entry) => redactTraceEntry(entry, captureLevel))
|
|
3232
|
+
};
|
|
3233
|
+
}
|
|
3234
|
+
function redactTraceEntry(entry, captureLevel) {
|
|
3235
|
+
if (captureLevel === "metadata") {
|
|
3236
|
+
return {
|
|
3237
|
+
...entry,
|
|
3238
|
+
checked: redactRecord(entry.checked),
|
|
3239
|
+
found: redactRecord(entry.found)
|
|
3240
|
+
};
|
|
3241
|
+
}
|
|
3242
|
+
return entry;
|
|
3243
|
+
}
|
|
3244
|
+
function redactRecord(record) {
|
|
3245
|
+
const result = {};
|
|
3246
|
+
for (const [key, value] of Object.entries(record)) {
|
|
3247
|
+
if (typeof value === "string") {
|
|
3248
|
+
result[key] = redactString(value);
|
|
3249
|
+
} else {
|
|
3250
|
+
result[key] = value;
|
|
3251
|
+
}
|
|
3252
|
+
}
|
|
3253
|
+
return result;
|
|
3254
|
+
}
|
|
3227
3255
|
|
|
3228
3256
|
// src/errors/replay.ts
|
|
3229
3257
|
var ReplayContractError = class extends Error {
|
|
@@ -3520,8 +3548,9 @@ function toRecord8(value) {
|
|
|
3520
3548
|
var import_node_crypto3 = __toESM(require("crypto"), 1);
|
|
3521
3549
|
|
|
3522
3550
|
// src/phases.ts
|
|
3523
|
-
function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
3551
|
+
function validatePhaseTransition(toolCalls, sessionState, compiledSession, ctx) {
|
|
3524
3552
|
if (!compiledSession.phases) {
|
|
3553
|
+
ctx?.trace.push({ stage: "phase", tool: null, verdict: "skip", reason: "no_phases_configured", checked: {}, found: {} });
|
|
3525
3554
|
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3526
3555
|
}
|
|
3527
3556
|
const attemptedTransitions = [];
|
|
@@ -3532,6 +3561,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3532
3561
|
sessionState.currentPhase ?? ""
|
|
3533
3562
|
);
|
|
3534
3563
|
if (!allowedTransitions?.includes(contract.transitions.advances_to)) {
|
|
3564
|
+
ctx?.trace.push({
|
|
3565
|
+
stage: "phase",
|
|
3566
|
+
tool: toolCall.name,
|
|
3567
|
+
verdict: "block",
|
|
3568
|
+
reason: "illegal_phase_transition",
|
|
3569
|
+
checked: { advances_to: contract.transitions.advances_to, from: sessionState.currentPhase },
|
|
3570
|
+
found: { allowed_transitions: allowedTransitions ?? [] }
|
|
3571
|
+
});
|
|
3535
3572
|
return {
|
|
3536
3573
|
legal: false,
|
|
3537
3574
|
newPhase: sessionState.currentPhase,
|
|
@@ -3548,6 +3585,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3548
3585
|
if (attemptedTransitions.length > 1) {
|
|
3549
3586
|
const distinctTargets = new Set(attemptedTransitions.map((t) => t.target));
|
|
3550
3587
|
if (distinctTargets.size > 1) {
|
|
3588
|
+
ctx?.trace.push({
|
|
3589
|
+
stage: "phase",
|
|
3590
|
+
tool: attemptedTransitions.map((t) => t.tool).join(", "),
|
|
3591
|
+
verdict: "block",
|
|
3592
|
+
reason: "ambiguous_phase_transition",
|
|
3593
|
+
checked: { targets: Array.from(distinctTargets) },
|
|
3594
|
+
found: { from: sessionState.currentPhase }
|
|
3595
|
+
});
|
|
3551
3596
|
return {
|
|
3552
3597
|
legal: false,
|
|
3553
3598
|
newPhase: sessionState.currentPhase,
|
|
@@ -3558,7 +3603,17 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3558
3603
|
}
|
|
3559
3604
|
}
|
|
3560
3605
|
if (attemptedTransitions.length > 0) {
|
|
3561
|
-
|
|
3606
|
+
const target = attemptedTransitions[0].target;
|
|
3607
|
+
const allowedTransitions = compiledSession.transitions.get(sessionState.currentPhase ?? "") ?? [];
|
|
3608
|
+
ctx?.trace.push({
|
|
3609
|
+
stage: "phase",
|
|
3610
|
+
tool: attemptedTransitions[0].tool,
|
|
3611
|
+
verdict: "allow",
|
|
3612
|
+
reason: "phase_advanced",
|
|
3613
|
+
checked: { advances_to: target, from: sessionState.currentPhase },
|
|
3614
|
+
found: { allowed_transitions: allowedTransitions }
|
|
3615
|
+
});
|
|
3616
|
+
return { legal: true, newPhase: target };
|
|
3562
3617
|
}
|
|
3563
3618
|
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3564
3619
|
}
|
|
@@ -3808,7 +3863,7 @@ function checkCircuitBreaker(state, config) {
|
|
|
3808
3863
|
}
|
|
3809
3864
|
|
|
3810
3865
|
// src/crossStep.ts
|
|
3811
|
-
function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
3866
|
+
function validateCrossStep(toolCalls, sessionState, contracts, ctx) {
|
|
3812
3867
|
const failures = [];
|
|
3813
3868
|
const contractByTool = new Map(contracts.map((c) => [c.tool, c]));
|
|
3814
3869
|
const workingForbidden = new Set(sessionState.forbiddenTools);
|
|
@@ -3836,8 +3891,17 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3836
3891
|
reason: "forbidden_tool",
|
|
3837
3892
|
detail: resourceValue !== void 0 ? `Tool "${tc.name}" is forbidden in this session for resource ${JSON.stringify(resourceValue)}` : `Tool "${tc.name}" is forbidden in this session`
|
|
3838
3893
|
});
|
|
3894
|
+
ctx?.trace.push({
|
|
3895
|
+
stage: "cross_step",
|
|
3896
|
+
tool: tc.name,
|
|
3897
|
+
verdict: "block",
|
|
3898
|
+
reason: "forbidden_tool",
|
|
3899
|
+
checked: { tool: tc.name },
|
|
3900
|
+
found: { is_resource_scoped: resourceValue !== void 0, resource_value: resourceValue ?? null }
|
|
3901
|
+
});
|
|
3839
3902
|
continue;
|
|
3840
3903
|
}
|
|
3904
|
+
let crossStepPassed = true;
|
|
3841
3905
|
if (contract?.preconditions && contract.preconditions.length > 0) {
|
|
3842
3906
|
const results = evaluatePreconditions(
|
|
3843
3907
|
contract.preconditions,
|
|
@@ -3846,6 +3910,7 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3846
3910
|
);
|
|
3847
3911
|
for (const result of results) {
|
|
3848
3912
|
if (!result.satisfied) {
|
|
3913
|
+
crossStepPassed = false;
|
|
3849
3914
|
failures.push({
|
|
3850
3915
|
toolName: tc.name,
|
|
3851
3916
|
reason: "precondition_not_met",
|
|
@@ -3854,6 +3919,25 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3854
3919
|
}
|
|
3855
3920
|
}
|
|
3856
3921
|
}
|
|
3922
|
+
if (crossStepPassed) {
|
|
3923
|
+
ctx?.trace.push({
|
|
3924
|
+
stage: "cross_step",
|
|
3925
|
+
tool: tc.name,
|
|
3926
|
+
verdict: "allow",
|
|
3927
|
+
reason: "preconditions_satisfied",
|
|
3928
|
+
checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
|
|
3929
|
+
found: { resource_value: resourceValue ?? null }
|
|
3930
|
+
});
|
|
3931
|
+
} else {
|
|
3932
|
+
ctx?.trace.push({
|
|
3933
|
+
stage: "cross_step",
|
|
3934
|
+
tool: tc.name,
|
|
3935
|
+
verdict: "block",
|
|
3936
|
+
reason: "precondition_not_met",
|
|
3937
|
+
checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
|
|
3938
|
+
found: { resource_value: resourceValue ?? null, matching_step_index: null }
|
|
3939
|
+
});
|
|
3940
|
+
}
|
|
3857
3941
|
if (contract?.forbids_after) {
|
|
3858
3942
|
for (const entry of contract.forbids_after) {
|
|
3859
3943
|
if (typeof entry === "string") {
|
|
@@ -4075,20 +4159,23 @@ function extractPath2(obj, path) {
|
|
|
4075
4159
|
}
|
|
4076
4160
|
|
|
4077
4161
|
// src/narrow.ts
|
|
4078
|
-
function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter) {
|
|
4162
|
+
function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter, ctx) {
|
|
4079
4163
|
const allowed = [];
|
|
4080
4164
|
const removed = [];
|
|
4081
4165
|
for (const tool of requestedTools) {
|
|
4082
4166
|
if (manualFilter && !manualFilter.includes(tool.name)) {
|
|
4083
4167
|
removed.push({ tool: tool.name, reason: "manual_filter" });
|
|
4168
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "manual_filter", checked: { filter: manualFilter }, found: {} });
|
|
4084
4169
|
continue;
|
|
4085
4170
|
}
|
|
4086
4171
|
const contract = compiledSession.perToolContracts.get(tool.name);
|
|
4087
4172
|
if (!contract) {
|
|
4088
4173
|
if (unmatchedPolicy === "allow") {
|
|
4089
4174
|
allowed.push(tool);
|
|
4175
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "allow", reason: "no_contract_passthrough", checked: { unmatched_policy: "allow" }, found: {} });
|
|
4090
4176
|
} else {
|
|
4091
4177
|
removed.push({ tool: tool.name, reason: "no_contract" });
|
|
4178
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "no_contract", checked: { unmatched_policy: "block" }, found: {} });
|
|
4092
4179
|
}
|
|
4093
4180
|
continue;
|
|
4094
4181
|
}
|
|
@@ -4101,6 +4188,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4101
4188
|
reason: "wrong_phase",
|
|
4102
4189
|
detail: `Tool valid in [${contract.transitions.valid_in_phases.join(", ")}], current phase: ${sessionState.currentPhase}`
|
|
4103
4190
|
});
|
|
4191
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "wrong_phase", checked: { valid_in_phases: contract.transitions.valid_in_phases }, found: { current_phase: sessionState.currentPhase } });
|
|
4104
4192
|
continue;
|
|
4105
4193
|
}
|
|
4106
4194
|
}
|
|
@@ -4111,6 +4199,18 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4111
4199
|
);
|
|
4112
4200
|
const unsatisfied = results.find((r) => !r.satisfied);
|
|
4113
4201
|
if (unsatisfied) {
|
|
4202
|
+
const firstPre = contract.preconditions[0];
|
|
4203
|
+
ctx?.trace.push({
|
|
4204
|
+
stage: "narrow",
|
|
4205
|
+
tool: tool.name,
|
|
4206
|
+
verdict: "remove",
|
|
4207
|
+
reason: "precondition_not_met",
|
|
4208
|
+
checked: {
|
|
4209
|
+
requires_prior_tool: firstPre.requires_prior_tool ?? null,
|
|
4210
|
+
with_output: firstPre.with_output ?? []
|
|
4211
|
+
},
|
|
4212
|
+
found: { satisfied_precondition_cache_hit: false }
|
|
4213
|
+
});
|
|
4114
4214
|
removed.push({
|
|
4115
4215
|
tool: tool.name,
|
|
4116
4216
|
reason: "precondition_not_met",
|
|
@@ -4124,6 +4224,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4124
4224
|
tool: tool.name,
|
|
4125
4225
|
reason: "forbidden_in_state"
|
|
4126
4226
|
});
|
|
4227
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "forbidden_in_state", checked: { tool: tool.name }, found: { is_resource_scoped: false } });
|
|
4127
4228
|
continue;
|
|
4128
4229
|
}
|
|
4129
4230
|
if (compiledSession.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
@@ -4140,9 +4241,24 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4140
4241
|
reason: "policy_denied",
|
|
4141
4242
|
detail: verdict.reason ?? "Policy deny rule matched"
|
|
4142
4243
|
});
|
|
4244
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "policy_denied", checked: { rule_type: "session_deny" }, found: { matched: true } });
|
|
4143
4245
|
continue;
|
|
4144
4246
|
}
|
|
4145
4247
|
}
|
|
4248
|
+
ctx?.trace.push({
|
|
4249
|
+
stage: "narrow",
|
|
4250
|
+
tool: tool.name,
|
|
4251
|
+
verdict: "allow",
|
|
4252
|
+
reason: "all_checks_passed",
|
|
4253
|
+
checked: {
|
|
4254
|
+
has_contract: true,
|
|
4255
|
+
phase_ok: true,
|
|
4256
|
+
preconditions_ok: true,
|
|
4257
|
+
not_forbidden: true,
|
|
4258
|
+
policy_ok: true
|
|
4259
|
+
},
|
|
4260
|
+
found: {}
|
|
4261
|
+
});
|
|
4146
4262
|
allowed.push(tool);
|
|
4147
4263
|
}
|
|
4148
4264
|
return { allowed, removed };
|
|
@@ -4636,7 +4752,7 @@ function replay(client, opts = {}) {
|
|
|
4636
4752
|
const unmatchedPolicy = opts.unmatchedPolicy ?? "block";
|
|
4637
4753
|
const maxRetries = Math.min(Math.max(0, opts.maxRetries ?? 0), MAX_RETRIES);
|
|
4638
4754
|
const compatEnforcement = opts.compatEnforcement ?? "protective";
|
|
4639
|
-
const diagnostics = opts.diagnostics;
|
|
4755
|
+
const diagnostics = opts.diagnostics ?? defaultReplayDiagnosticsHandler;
|
|
4640
4756
|
let provider;
|
|
4641
4757
|
try {
|
|
4642
4758
|
provider = detectProvider(client);
|
|
@@ -4669,6 +4785,12 @@ function replay(client, opts = {}) {
|
|
|
4669
4785
|
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4670
4786
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4671
4787
|
}
|
|
4788
|
+
if (opts.contractsDir && !discoveredSessionYaml && !opts.sessionYamlPath) {
|
|
4789
|
+
emitDiagnostic2(diagnostics, {
|
|
4790
|
+
type: "replay_compile_warning",
|
|
4791
|
+
details: "No session.yaml found in contractsDir \u2014 session-level features (phases, policy, session_limits) are inactive. Per-tool contracts still apply."
|
|
4792
|
+
});
|
|
4793
|
+
}
|
|
4672
4794
|
let sessionYaml = discoveredSessionYaml;
|
|
4673
4795
|
if (!sessionYaml && opts.providerConstraints) {
|
|
4674
4796
|
sessionYaml = { schema_version: "1.0", agent, provider_constraints: opts.providerConstraints };
|
|
@@ -4828,6 +4950,7 @@ function replay(client, opts = {}) {
|
|
|
4828
4950
|
let bypassDetected = false;
|
|
4829
4951
|
let lastShadowDeltaValue = null;
|
|
4830
4952
|
let lastNarrowResult = null;
|
|
4953
|
+
let lastTrace = null;
|
|
4831
4954
|
let shadowEvaluationCount = 0;
|
|
4832
4955
|
let manualFilter = null;
|
|
4833
4956
|
const deferredReceipts = /* @__PURE__ */ new Map();
|
|
@@ -4835,6 +4958,17 @@ function replay(client, opts = {}) {
|
|
|
4835
4958
|
const compiledLimits = compiledSession?.sessionLimits;
|
|
4836
4959
|
const mergedLimits = { ...contractLimits ?? {}, ...compiledLimits ?? {} };
|
|
4837
4960
|
const resolvedSessionLimits = Object.keys(mergedLimits).length > 0 ? mergedLimits : null;
|
|
4961
|
+
if (resolvedSessionLimits?.max_tool_calls_mode === "narrow" && resolvedSessionLimits.max_calls_per_tool) {
|
|
4962
|
+
const budgetedTools = new Set(Object.keys(resolvedSessionLimits.max_calls_per_tool));
|
|
4963
|
+
const unbudgeted = contracts.map((c) => c.tool).filter((t) => !budgetedTools.has(t));
|
|
4964
|
+
if (unbudgeted.length > 0) {
|
|
4965
|
+
emitDiagnostic2(diagnostics, {
|
|
4966
|
+
type: "replay_narrow_unbudgeted_tools",
|
|
4967
|
+
session_id: sessionId,
|
|
4968
|
+
tools: unbudgeted
|
|
4969
|
+
});
|
|
4970
|
+
}
|
|
4971
|
+
}
|
|
4838
4972
|
const store = opts.store ?? null;
|
|
4839
4973
|
let storeLoadPromise = null;
|
|
4840
4974
|
let storeLoadDone = false;
|
|
@@ -4902,6 +5036,18 @@ function replay(client, opts = {}) {
|
|
|
4902
5036
|
} catch {
|
|
4903
5037
|
}
|
|
4904
5038
|
}
|
|
5039
|
+
function createTrace(stepIndex) {
|
|
5040
|
+
const entries = [];
|
|
5041
|
+
return {
|
|
5042
|
+
sessionId,
|
|
5043
|
+
stepIndex,
|
|
5044
|
+
complete: false,
|
|
5045
|
+
entries,
|
|
5046
|
+
push(entry) {
|
|
5047
|
+
entries.push(entry);
|
|
5048
|
+
}
|
|
5049
|
+
};
|
|
5050
|
+
}
|
|
4905
5051
|
const enforcementCreate = async function replayEnforcementCreate(...args) {
|
|
4906
5052
|
if (killed) {
|
|
4907
5053
|
throw new ReplayKillError(sessionId, killedAt);
|
|
@@ -4944,8 +5090,19 @@ function replay(client, opts = {}) {
|
|
|
4944
5090
|
total_ms: 0,
|
|
4945
5091
|
enforcement_ms: 0
|
|
4946
5092
|
};
|
|
5093
|
+
const trace = createTrace(sessionState.totalStepCount);
|
|
5094
|
+
const traceCtx = { trace };
|
|
5095
|
+
let currentTraceStage = "narrow";
|
|
4947
5096
|
const request = toRecord10(args[0]);
|
|
4948
5097
|
const requestToolNames = extractRequestToolNames(request);
|
|
5098
|
+
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
5099
|
+
if (messages.length > 0) {
|
|
5100
|
+
const toolResults = extractToolResults(messages, provider);
|
|
5101
|
+
if (toolResults.length > 0) {
|
|
5102
|
+
const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
|
|
5103
|
+
sessionState = applyOutputExtracts(sessionState, outputUpdates);
|
|
5104
|
+
}
|
|
5105
|
+
}
|
|
4949
5106
|
let narrowResult = null;
|
|
4950
5107
|
let activeArgs = args;
|
|
4951
5108
|
if (compiledSession && Array.isArray(request.tools) && request.tools.length > 0) {
|
|
@@ -4956,7 +5113,8 @@ function replay(client, opts = {}) {
|
|
|
4956
5113
|
sessionState,
|
|
4957
5114
|
compiledSession,
|
|
4958
5115
|
unmatchedPolicy,
|
|
4959
|
-
manualFilter
|
|
5116
|
+
manualFilter,
|
|
5117
|
+
traceCtx
|
|
4960
5118
|
);
|
|
4961
5119
|
lastNarrowResult = narrowResult;
|
|
4962
5120
|
if (narrowResult.removed.length > 0) {
|
|
@@ -4994,55 +5152,96 @@ function replay(client, opts = {}) {
|
|
|
4994
5152
|
timing.narrow_ms = Date.now() - guardStart;
|
|
4995
5153
|
const preCheckStart = Date.now();
|
|
4996
5154
|
try {
|
|
5155
|
+
currentTraceStage = "pre_check";
|
|
4997
5156
|
if (mode === "enforce" && resolvedSessionLimits) {
|
|
4998
5157
|
const limitResult = checkSessionLimits(sessionState, resolvedSessionLimits);
|
|
4999
5158
|
if (limitResult.exceeded) {
|
|
5000
|
-
|
|
5001
|
-
|
|
5002
|
-
|
|
5003
|
-
|
|
5004
|
-
|
|
5005
|
-
|
|
5006
|
-
|
|
5007
|
-
|
|
5008
|
-
|
|
5009
|
-
|
|
5010
|
-
|
|
5011
|
-
|
|
5012
|
-
|
|
5013
|
-
|
|
5014
|
-
|
|
5015
|
-
|
|
5016
|
-
|
|
5017
|
-
|
|
5018
|
-
|
|
5019
|
-
|
|
5159
|
+
let narrowedPastLimit = false;
|
|
5160
|
+
if (limitResult.reason?.startsWith("max_tool_calls") && resolvedSessionLimits.max_tool_calls_mode === "narrow" && resolvedSessionLimits.max_calls_per_tool) {
|
|
5161
|
+
const costOk = !(typeof resolvedSessionLimits.max_cost_per_session === "number" && sessionState.actualCost >= resolvedSessionLimits.max_cost_per_session);
|
|
5162
|
+
if (costOk) {
|
|
5163
|
+
const currentRequest = toRecord10(activeArgs[0]);
|
|
5164
|
+
const currentTools = Array.isArray(currentRequest.tools) ? extractToolDefinitions(currentRequest.tools) : [];
|
|
5165
|
+
const budgetedTools = currentTools.filter((tool) => {
|
|
5166
|
+
const max = resolvedSessionLimits.max_calls_per_tool[tool.name];
|
|
5167
|
+
if (typeof max !== "number") return false;
|
|
5168
|
+
return (sessionState.toolCallCounts.get(tool.name) ?? 0) < max;
|
|
5169
|
+
});
|
|
5170
|
+
if (budgetedTools.length > 0) {
|
|
5171
|
+
const modifiedRequest = { ...currentRequest, tools: budgetedTools };
|
|
5172
|
+
activeArgs = [modifiedRequest, ...Array.prototype.slice.call(activeArgs, 1)];
|
|
5173
|
+
narrowedPastLimit = true;
|
|
5174
|
+
trace.push({
|
|
5175
|
+
stage: "pre_check",
|
|
5176
|
+
tool: null,
|
|
5177
|
+
verdict: "narrow",
|
|
5178
|
+
reason: "max_tool_calls_narrow_mode",
|
|
5179
|
+
checked: { max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null, budgeted_tools: budgetedTools.map((t) => t.name) },
|
|
5180
|
+
found: { total_tool_calls: sessionState.totalToolCalls }
|
|
5181
|
+
});
|
|
5182
|
+
}
|
|
5020
5183
|
}
|
|
5021
5184
|
}
|
|
5022
|
-
|
|
5023
|
-
|
|
5024
|
-
|
|
5025
|
-
|
|
5026
|
-
|
|
5027
|
-
|
|
5028
|
-
|
|
5029
|
-
|
|
5030
|
-
|
|
5031
|
-
|
|
5032
|
-
|
|
5033
|
-
|
|
5034
|
-
void 0,
|
|
5035
|
-
timing
|
|
5036
|
-
);
|
|
5037
|
-
if (isCompatAdvisory) {
|
|
5038
|
-
emitDiagnostic2(diagnostics, {
|
|
5039
|
-
type: "replay_compat_advisory",
|
|
5040
|
-
session_id: sessionId,
|
|
5041
|
-
would_block: decision.blocked,
|
|
5042
|
-
details: limitResult.reason ?? "session limit exceeded"
|
|
5185
|
+
if (!narrowedPastLimit) {
|
|
5186
|
+
trace.push({
|
|
5187
|
+
stage: "pre_check",
|
|
5188
|
+
tool: null,
|
|
5189
|
+
verdict: "block",
|
|
5190
|
+
reason: "session_limit_exceeded",
|
|
5191
|
+
checked: {
|
|
5192
|
+
max_steps: resolvedSessionLimits.max_steps ?? null,
|
|
5193
|
+
max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
|
|
5194
|
+
max_cost: resolvedSessionLimits.max_cost_per_session ?? null
|
|
5195
|
+
},
|
|
5196
|
+
found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
|
|
5043
5197
|
});
|
|
5044
|
-
|
|
5045
|
-
|
|
5198
|
+
const decision = {
|
|
5199
|
+
action: "block",
|
|
5200
|
+
tool_calls: [],
|
|
5201
|
+
blocked: [{
|
|
5202
|
+
tool_name: "_session",
|
|
5203
|
+
arguments: "",
|
|
5204
|
+
reason: "session_limit_exceeded",
|
|
5205
|
+
contract_file: "",
|
|
5206
|
+
failures: [{ path: "$", operator: "session_limit", expected: "", found: "", message: limitResult.reason ?? "session limit exceeded" }]
|
|
5207
|
+
}],
|
|
5208
|
+
response_modification: gateMode
|
|
5209
|
+
};
|
|
5210
|
+
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
5211
|
+
if (resolvedSessionLimits.circuit_breaker) {
|
|
5212
|
+
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
5213
|
+
if (cbResult.triggered) {
|
|
5214
|
+
killed = true;
|
|
5215
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
5216
|
+
sessionState = killSession(sessionState);
|
|
5217
|
+
emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
|
|
5218
|
+
}
|
|
5219
|
+
}
|
|
5220
|
+
timing.pre_check_ms = Date.now() - preCheckStart;
|
|
5221
|
+
captureDecision(
|
|
5222
|
+
decision,
|
|
5223
|
+
null,
|
|
5224
|
+
request,
|
|
5225
|
+
guardStart,
|
|
5226
|
+
requestToolNames,
|
|
5227
|
+
null,
|
|
5228
|
+
narrowResult,
|
|
5229
|
+
null,
|
|
5230
|
+
null,
|
|
5231
|
+
null,
|
|
5232
|
+
void 0,
|
|
5233
|
+
timing
|
|
5234
|
+
);
|
|
5235
|
+
if (isCompatAdvisory) {
|
|
5236
|
+
emitDiagnostic2(diagnostics, {
|
|
5237
|
+
type: "replay_compat_advisory",
|
|
5238
|
+
session_id: sessionId,
|
|
5239
|
+
would_block: decision.blocked,
|
|
5240
|
+
details: limitResult.reason ?? "session limit exceeded"
|
|
5241
|
+
});
|
|
5242
|
+
} else {
|
|
5243
|
+
throw buildContractError2(decision);
|
|
5244
|
+
}
|
|
5046
5245
|
}
|
|
5047
5246
|
}
|
|
5048
5247
|
if (isAtHardStepCap(sessionState)) {
|
|
@@ -5075,8 +5274,23 @@ function replay(client, opts = {}) {
|
|
|
5075
5274
|
);
|
|
5076
5275
|
throw buildContractError2(decision);
|
|
5077
5276
|
}
|
|
5277
|
+
if (!checkSessionLimits(sessionState, resolvedSessionLimits).exceeded) {
|
|
5278
|
+
trace.push({
|
|
5279
|
+
stage: "pre_check",
|
|
5280
|
+
tool: null,
|
|
5281
|
+
verdict: "allow",
|
|
5282
|
+
reason: "session_limits_ok",
|
|
5283
|
+
checked: {
|
|
5284
|
+
max_steps: resolvedSessionLimits.max_steps ?? null,
|
|
5285
|
+
max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
|
|
5286
|
+
max_cost: resolvedSessionLimits.max_cost_per_session ?? null
|
|
5287
|
+
},
|
|
5288
|
+
found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
|
|
5289
|
+
});
|
|
5290
|
+
}
|
|
5291
|
+
} else if (mode === "enforce") {
|
|
5292
|
+
trace.push({ stage: "pre_check", tool: null, verdict: "skip", reason: "no_session_limits", checked: {}, found: {} });
|
|
5078
5293
|
}
|
|
5079
|
-
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
5080
5294
|
if (messages.length > 0) {
|
|
5081
5295
|
const msgResult = validateToolResultMessages(messages, contracts, provider);
|
|
5082
5296
|
if (!msgResult.passed) {
|
|
@@ -5086,13 +5300,6 @@ function replay(client, opts = {}) {
|
|
|
5086
5300
|
});
|
|
5087
5301
|
}
|
|
5088
5302
|
}
|
|
5089
|
-
if (messages.length > 0) {
|
|
5090
|
-
const toolResults = extractToolResults(messages, provider);
|
|
5091
|
-
if (toolResults.length > 0) {
|
|
5092
|
-
const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
|
|
5093
|
-
sessionState = applyOutputExtracts(sessionState, outputUpdates);
|
|
5094
|
-
}
|
|
5095
|
-
}
|
|
5096
5303
|
const inputFailures = evaluateInputInvariants(request, contracts);
|
|
5097
5304
|
if (mode === "enforce" && inputFailures.length > 0) {
|
|
5098
5305
|
if (onError === "block") {
|
|
@@ -5167,6 +5374,10 @@ function replay(client, opts = {}) {
|
|
|
5167
5374
|
sessionState = updateActualCost(sessionState, costDelta);
|
|
5168
5375
|
}
|
|
5169
5376
|
if (mode === "log-only") {
|
|
5377
|
+
trace.push({ stage: "gate", tool: null, verdict: "allow", reason: "log_only_mode", checked: {}, found: {} });
|
|
5378
|
+
trace.complete = true;
|
|
5379
|
+
lastTrace = trace;
|
|
5380
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5170
5381
|
captureDecision(
|
|
5171
5382
|
{ action: "allow", tool_calls: extractToolCalls(response, provider) },
|
|
5172
5383
|
response,
|
|
@@ -5179,13 +5390,26 @@ function replay(client, opts = {}) {
|
|
|
5179
5390
|
null,
|
|
5180
5391
|
null,
|
|
5181
5392
|
void 0,
|
|
5182
|
-
timing
|
|
5393
|
+
timing,
|
|
5394
|
+
trace
|
|
5183
5395
|
);
|
|
5184
5396
|
return response;
|
|
5185
5397
|
}
|
|
5398
|
+
currentTraceStage = "validate";
|
|
5186
5399
|
const toolCalls = extractToolCalls(response, provider);
|
|
5187
5400
|
const validateStart = Date.now();
|
|
5188
5401
|
const validation = validateResponse2(response, toolCalls, contracts, requestToolNames, unmatchedPolicy, provider);
|
|
5402
|
+
for (const f of validation.failures) {
|
|
5403
|
+
const toolName = extractToolNameFromFailure(f, toolCalls);
|
|
5404
|
+
trace.push({
|
|
5405
|
+
stage: "validate",
|
|
5406
|
+
tool: toolName === "_response" ? null : toolName,
|
|
5407
|
+
verdict: "block",
|
|
5408
|
+
reason: f.operator === "response_format" ? "response_format_failed" : "output_invariant_failed",
|
|
5409
|
+
checked: { path: f.path, operator: f.operator, invariant_type: f.operator === "response_format" ? "response_format" : "output" },
|
|
5410
|
+
found: { value: f.found }
|
|
5411
|
+
});
|
|
5412
|
+
}
|
|
5189
5413
|
timing.validate_ms += Date.now() - validateStart;
|
|
5190
5414
|
if (isActiveGovern && !attemptDegraded && attemptPreparedRequestId) {
|
|
5191
5415
|
const rtProposalStart = Date.now();
|
|
@@ -5220,9 +5444,10 @@ function replay(client, opts = {}) {
|
|
|
5220
5444
|
}
|
|
5221
5445
|
timing.runtime_ms += Date.now() - rtProposalStart;
|
|
5222
5446
|
}
|
|
5447
|
+
currentTraceStage = "cross_step";
|
|
5223
5448
|
const crossStepStart = Date.now();
|
|
5224
5449
|
const crossStepContracts = compiledSession ? Array.from(compiledSession.perToolContracts.values()) : contracts;
|
|
5225
|
-
const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts);
|
|
5450
|
+
const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts, traceCtx);
|
|
5226
5451
|
if (!crossStepResult.passed) {
|
|
5227
5452
|
for (const f of crossStepResult.failures) {
|
|
5228
5453
|
validation.failures.push({
|
|
@@ -5236,10 +5461,11 @@ function replay(client, opts = {}) {
|
|
|
5236
5461
|
}
|
|
5237
5462
|
}
|
|
5238
5463
|
timing.cross_step_ms += Date.now() - crossStepStart;
|
|
5464
|
+
currentTraceStage = "phase";
|
|
5239
5465
|
let phaseResult = null;
|
|
5240
5466
|
const phaseStart = Date.now();
|
|
5241
5467
|
if (compiledSession) {
|
|
5242
|
-
phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession);
|
|
5468
|
+
phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession, traceCtx);
|
|
5243
5469
|
if (!phaseResult.legal) {
|
|
5244
5470
|
validation.failures.push({
|
|
5245
5471
|
path: `$.tool_calls.${phaseResult.blockedTool}`,
|
|
@@ -5269,7 +5495,7 @@ function replay(client, opts = {}) {
|
|
|
5269
5495
|
for (const f of avResult.failures) {
|
|
5270
5496
|
validation.failures.push({
|
|
5271
5497
|
path: f.path,
|
|
5272
|
-
operator:
|
|
5498
|
+
operator: "argument_value_mismatch",
|
|
5273
5499
|
expected: String(f.expected),
|
|
5274
5500
|
found: String(f.actual),
|
|
5275
5501
|
message: f.detail,
|
|
@@ -5280,10 +5506,12 @@ function replay(client, opts = {}) {
|
|
|
5280
5506
|
}
|
|
5281
5507
|
}
|
|
5282
5508
|
}
|
|
5509
|
+
currentTraceStage = "limit";
|
|
5283
5510
|
if (resolvedSessionLimits) {
|
|
5284
5511
|
const workingState = { ...sessionState, toolCallCounts: workingToolCallCounts };
|
|
5285
5512
|
const perToolResult = checkPerToolLimits(workingState, tc.name, resolvedSessionLimits);
|
|
5286
5513
|
if (perToolResult.exceeded) {
|
|
5514
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "per_tool_limit_exceeded", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
|
|
5287
5515
|
validation.failures.push({
|
|
5288
5516
|
path: `$.tool_calls.${tc.name}`,
|
|
5289
5517
|
operator: "session_limit",
|
|
@@ -5292,6 +5520,8 @@ function replay(client, opts = {}) {
|
|
|
5292
5520
|
message: perToolResult.reason ?? "per-tool limit exceeded",
|
|
5293
5521
|
contract_file: ""
|
|
5294
5522
|
});
|
|
5523
|
+
} else {
|
|
5524
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "allow", reason: "per_tool_limit_ok", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
|
|
5295
5525
|
}
|
|
5296
5526
|
}
|
|
5297
5527
|
workingToolCallCounts.set(tc.name, (workingToolCallCounts.get(tc.name) ?? 0) + 1);
|
|
@@ -5308,6 +5538,7 @@ function replay(client, opts = {}) {
|
|
|
5308
5538
|
).length;
|
|
5309
5539
|
const totalMatches = loopResult.matchCount + intraMatches;
|
|
5310
5540
|
if (totalMatches >= resolvedSessionLimits.loop_detection.threshold) {
|
|
5541
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "loop_detected", checked: { window: resolvedSessionLimits.loop_detection.window, threshold: resolvedSessionLimits.loop_detection.threshold }, found: { match_count: totalMatches, arguments_hash: argsHash } });
|
|
5311
5542
|
validation.failures.push({
|
|
5312
5543
|
path: `$.tool_calls.${tc.name}`,
|
|
5313
5544
|
operator: "loop_detected",
|
|
@@ -5321,6 +5552,7 @@ function replay(client, opts = {}) {
|
|
|
5321
5552
|
}
|
|
5322
5553
|
}
|
|
5323
5554
|
timing.argument_values_ms += Date.now() - argValuesStart;
|
|
5555
|
+
currentTraceStage = "policy";
|
|
5324
5556
|
let policyVerdicts = null;
|
|
5325
5557
|
const policyStart = Date.now();
|
|
5326
5558
|
if (compiledSession?.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
@@ -5341,6 +5573,14 @@ function replay(client, opts = {}) {
|
|
|
5341
5573
|
);
|
|
5342
5574
|
policyVerdicts.set(tc.name, verdict);
|
|
5343
5575
|
if (!verdict.allowed) {
|
|
5576
|
+
trace.push({
|
|
5577
|
+
stage: "policy",
|
|
5578
|
+
tool: tc.name,
|
|
5579
|
+
verdict: "block",
|
|
5580
|
+
reason: verdict.reason?.startsWith("Session deny") ? "session_deny_matched" : verdict.reason?.startsWith("default_deny") ? "default_deny_no_allow" : "policy_denied",
|
|
5581
|
+
checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
|
|
5582
|
+
found: { matched: true }
|
|
5583
|
+
});
|
|
5344
5584
|
validation.failures.push({
|
|
5345
5585
|
path: `$.tool_calls.${tc.name}`,
|
|
5346
5586
|
operator: "policy_denied",
|
|
@@ -5349,10 +5589,22 @@ function replay(client, opts = {}) {
|
|
|
5349
5589
|
message: `Policy denied: ${tc.name} \u2014 ${verdict.reason}`,
|
|
5350
5590
|
contract_file: ""
|
|
5351
5591
|
});
|
|
5592
|
+
} else {
|
|
5593
|
+
trace.push({
|
|
5594
|
+
stage: "policy",
|
|
5595
|
+
tool: tc.name,
|
|
5596
|
+
verdict: "allow",
|
|
5597
|
+
reason: "policy_allowed",
|
|
5598
|
+
checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
|
|
5599
|
+
found: { session_deny_matched: false, tool_deny_matched: false }
|
|
5600
|
+
});
|
|
5352
5601
|
}
|
|
5353
5602
|
}
|
|
5603
|
+
} else {
|
|
5604
|
+
trace.push({ stage: "policy", tool: null, verdict: "skip", reason: "no_policy_configured", checked: {}, found: {} });
|
|
5354
5605
|
}
|
|
5355
5606
|
timing.policy_ms += Date.now() - policyStart;
|
|
5607
|
+
currentTraceStage = "gate";
|
|
5356
5608
|
if (mode === "shadow") {
|
|
5357
5609
|
const shadowGateStart = Date.now();
|
|
5358
5610
|
const shadowDecision = validation.failures.length > 0 ? {
|
|
@@ -5361,6 +5613,15 @@ function replay(client, opts = {}) {
|
|
|
5361
5613
|
blocked: buildBlockedCalls(toolCalls, validation.failures, validation.unmatchedBlocked),
|
|
5362
5614
|
response_modification: gateMode
|
|
5363
5615
|
} : { action: "allow", tool_calls: toolCalls };
|
|
5616
|
+
const blockedTools = shadowDecision.action === "block" ? shadowDecision.blocked.map((b) => b.tool_name) : [];
|
|
5617
|
+
trace.push({
|
|
5618
|
+
stage: "gate",
|
|
5619
|
+
tool: null,
|
|
5620
|
+
verdict: blockedTools.length > 0 ? "info" : "allow",
|
|
5621
|
+
reason: blockedTools.length > 0 ? "violations_found" : "no_violations",
|
|
5622
|
+
checked: { gate_mode: gateMode },
|
|
5623
|
+
found: { blocked_count: blockedTools.length, action: shadowDecision.action, ...blockedTools.length > 0 ? { blocked_tools: blockedTools } : {} }
|
|
5624
|
+
});
|
|
5364
5625
|
const shadowDelta = {
|
|
5365
5626
|
would_have_blocked: shadowDecision.action === "block" ? shadowDecision.blocked : [],
|
|
5366
5627
|
would_have_narrowed: narrowResult?.removed ?? [],
|
|
@@ -5370,7 +5631,11 @@ function replay(client, opts = {}) {
|
|
|
5370
5631
|
lastShadowDeltaValue = shadowDelta;
|
|
5371
5632
|
shadowEvaluationCount++;
|
|
5372
5633
|
timing.gate_ms += Date.now() - shadowGateStart;
|
|
5373
|
-
|
|
5634
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockedTools, killed: false, step_index: sessionState.totalStepCount } });
|
|
5635
|
+
trace.complete = true;
|
|
5636
|
+
lastTrace = trace;
|
|
5637
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5638
|
+
captureDecision(shadowDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, shadowDelta, timing, trace);
|
|
5374
5639
|
return response;
|
|
5375
5640
|
}
|
|
5376
5641
|
if (isCompatAdvisory) {
|
|
@@ -5411,7 +5676,21 @@ function replay(client, opts = {}) {
|
|
|
5411
5676
|
sessionState = recordDecisionOutcome(sessionState, "allowed");
|
|
5412
5677
|
}
|
|
5413
5678
|
timing.finalize_ms += Date.now() - advisoryFinalizeStart;
|
|
5414
|
-
|
|
5679
|
+
const advisoryBlockedTools = advisoryDecision.action === "block" ? advisoryDecision.blocked.map((b) => b.tool_name) : [];
|
|
5680
|
+
trace.push({
|
|
5681
|
+
stage: "gate",
|
|
5682
|
+
tool: null,
|
|
5683
|
+
verdict: advisoryBlockedTools.length > 0 ? "info" : "allow",
|
|
5684
|
+
reason: advisoryBlockedTools.length > 0 ? "violations_found" : "no_violations",
|
|
5685
|
+
checked: { gate_mode: gateMode },
|
|
5686
|
+
found: { blocked_count: advisoryBlockedTools.length, action: advisoryDecision.action, ...advisoryBlockedTools.length > 0 ? { blocked_tools: advisoryBlockedTools } : {} }
|
|
5687
|
+
});
|
|
5688
|
+
const advisoryNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
5689
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: advisoryNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: advisoryBlockedTools, killed: false, step_index: sessionState.totalStepCount } });
|
|
5690
|
+
trace.complete = true;
|
|
5691
|
+
lastTrace = trace;
|
|
5692
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5693
|
+
captureDecision(advisoryDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5415
5694
|
return response;
|
|
5416
5695
|
}
|
|
5417
5696
|
const enforceGateStart = Date.now();
|
|
@@ -5449,7 +5728,20 @@ function replay(client, opts = {}) {
|
|
|
5449
5728
|
});
|
|
5450
5729
|
}
|
|
5451
5730
|
}
|
|
5452
|
-
|
|
5731
|
+
trace.push({
|
|
5732
|
+
stage: "gate",
|
|
5733
|
+
tool: null,
|
|
5734
|
+
verdict: "allow",
|
|
5735
|
+
reason: "no_violations",
|
|
5736
|
+
checked: { gate_mode: gateMode },
|
|
5737
|
+
found: { blocked_count: 0, action: "allow" }
|
|
5738
|
+
});
|
|
5739
|
+
const allowNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
5740
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: completedStep.phase, phase_after: allowNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: [], killed: false, step_index: sessionState.totalStepCount } });
|
|
5741
|
+
trace.complete = true;
|
|
5742
|
+
lastTrace = trace;
|
|
5743
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5744
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5453
5745
|
return response;
|
|
5454
5746
|
}
|
|
5455
5747
|
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
@@ -5514,15 +5806,42 @@ function replay(client, opts = {}) {
|
|
|
5514
5806
|
);
|
|
5515
5807
|
continue;
|
|
5516
5808
|
}
|
|
5517
|
-
|
|
5809
|
+
const blockBlockedTools = decision.action === "block" ? decision.blocked.map((b) => b.tool_name) : [];
|
|
5810
|
+
trace.push({
|
|
5811
|
+
stage: "gate",
|
|
5812
|
+
tool: null,
|
|
5813
|
+
verdict: "block",
|
|
5814
|
+
reason: "violations_found",
|
|
5815
|
+
checked: { gate_mode: gateMode },
|
|
5816
|
+
found: { blocked_count: blockBlockedTools.length, action: "block", blocked_tools: blockBlockedTools }
|
|
5817
|
+
});
|
|
5818
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockBlockedTools, killed, step_index: sessionState.totalStepCount } });
|
|
5819
|
+
trace.complete = true;
|
|
5820
|
+
lastTrace = trace;
|
|
5821
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5822
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5518
5823
|
return applyGateDecision(decision, response, provider, gateMode, opts.onBlock);
|
|
5519
5824
|
}
|
|
5520
5825
|
if (lastError) throw lastError;
|
|
5521
5826
|
throw new ReplayInternalError("Retry loop exhausted without result", { sessionId });
|
|
5522
5827
|
} catch (err) {
|
|
5523
5828
|
if (err instanceof ReplayContractError || err instanceof ReplayKillError) {
|
|
5829
|
+
if (!trace.complete) {
|
|
5830
|
+
lastTrace = trace;
|
|
5831
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5832
|
+
}
|
|
5524
5833
|
throw err;
|
|
5525
5834
|
}
|
|
5835
|
+
trace.push({
|
|
5836
|
+
stage: currentTraceStage,
|
|
5837
|
+
tool: null,
|
|
5838
|
+
verdict: "error",
|
|
5839
|
+
reason: "stage_threw",
|
|
5840
|
+
checked: {},
|
|
5841
|
+
found: { error: err instanceof Error ? err.message : String(err) }
|
|
5842
|
+
});
|
|
5843
|
+
lastTrace = trace;
|
|
5844
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5526
5845
|
sessionState = recordDecisionOutcome(sessionState, "error");
|
|
5527
5846
|
if (resolvedSessionLimits?.circuit_breaker) {
|
|
5528
5847
|
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
@@ -5648,6 +5967,9 @@ function replay(client, opts = {}) {
|
|
|
5648
5967
|
getLastShadowDelta() {
|
|
5649
5968
|
return lastShadowDeltaValue;
|
|
5650
5969
|
},
|
|
5970
|
+
getLastTrace() {
|
|
5971
|
+
return lastTrace;
|
|
5972
|
+
},
|
|
5651
5973
|
/**
|
|
5652
5974
|
* v3: Manually restrict available tools within compiled legal space.
|
|
5653
5975
|
* @see specs/replay-v3.md § narrow() / widen()
|
|
@@ -5783,7 +6105,7 @@ function replay(client, opts = {}) {
|
|
|
5783
6105
|
}
|
|
5784
6106
|
return wrapped;
|
|
5785
6107
|
}
|
|
5786
|
-
function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam) {
|
|
6108
|
+
function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam, traceParam) {
|
|
5787
6109
|
if (!buffer && !store) return;
|
|
5788
6110
|
if (timingParam) {
|
|
5789
6111
|
timingParam.total_ms = Date.now() - guardStart;
|
|
@@ -5818,6 +6140,7 @@ function replay(client, opts = {}) {
|
|
|
5818
6140
|
phase: sessionState.currentPhase,
|
|
5819
6141
|
phase_transition: phaseTransitionStr,
|
|
5820
6142
|
shadow_delta: shadowDelta,
|
|
6143
|
+
trace: traceParam ? redactTrace(traceParam, opts.captureLevel ?? "full") : void 0,
|
|
5821
6144
|
receipt: null
|
|
5822
6145
|
};
|
|
5823
6146
|
const capturedCall = {
|
|
@@ -6375,6 +6698,7 @@ function resolveSessionLimits(contracts) {
|
|
|
6375
6698
|
const sl = c.session_limits;
|
|
6376
6699
|
if (sl.max_steps !== void 0 && merged.max_steps === void 0) merged.max_steps = sl.max_steps;
|
|
6377
6700
|
if (sl.max_tool_calls !== void 0 && merged.max_tool_calls === void 0) merged.max_tool_calls = sl.max_tool_calls;
|
|
6701
|
+
if (sl.max_tool_calls_mode !== void 0 && merged.max_tool_calls_mode === void 0) merged.max_tool_calls_mode = sl.max_tool_calls_mode;
|
|
6378
6702
|
if (sl.max_cost_per_session !== void 0 && merged.max_cost_per_session === void 0) merged.max_cost_per_session = sl.max_cost_per_session;
|
|
6379
6703
|
if (sl.loop_detection && !merged.loop_detection) merged.loop_detection = sl.loop_detection;
|
|
6380
6704
|
if (sl.circuit_breaker && !merged.circuit_breaker) merged.circuit_breaker = sl.circuit_breaker;
|
|
@@ -6482,6 +6806,7 @@ function createInactiveSession(client, sessionId, reason) {
|
|
|
6482
6806
|
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6483
6807
|
getLastNarrowing: () => null,
|
|
6484
6808
|
getLastShadowDelta: () => null,
|
|
6809
|
+
getLastTrace: () => null,
|
|
6485
6810
|
narrow() {
|
|
6486
6811
|
},
|
|
6487
6812
|
widen() {
|
|
@@ -6523,6 +6848,7 @@ function createBlockingInactiveSession(client, sessionId, detail, configError) {
|
|
|
6523
6848
|
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6524
6849
|
getLastNarrowing: () => null,
|
|
6525
6850
|
getLastShadowDelta: () => null,
|
|
6851
|
+
getLastTrace: () => null,
|
|
6526
6852
|
narrow() {
|
|
6527
6853
|
},
|
|
6528
6854
|
widen() {
|
|
@@ -6607,6 +6933,83 @@ function generateSessionId2() {
|
|
|
6607
6933
|
function stripHashPrefix(hash) {
|
|
6608
6934
|
return hash.startsWith("sha256:") ? hash.slice(7) : hash;
|
|
6609
6935
|
}
|
|
6936
|
+
function resolveLogLevel() {
|
|
6937
|
+
const raw = typeof process !== "undefined" ? process.env.REPLAYCI_LOG : void 0;
|
|
6938
|
+
if (!raw) return "warn";
|
|
6939
|
+
const lower = raw.toLowerCase();
|
|
6940
|
+
if (lower === "trace" || lower === "debug") return "trace";
|
|
6941
|
+
if (lower === "silent" || lower === "off" || lower === "none") return "silent";
|
|
6942
|
+
return "warn";
|
|
6943
|
+
}
|
|
6944
|
+
function defaultReplayDiagnosticsHandler(event) {
|
|
6945
|
+
const level = resolveLogLevel();
|
|
6946
|
+
if (level === "silent") return;
|
|
6947
|
+
switch (event.type) {
|
|
6948
|
+
case "replay_inactive":
|
|
6949
|
+
console.warn(`[replayci] replay() inactive: ${event.reason}${event.error_message ? ` \u2014 ${event.error_message}` : ""}`);
|
|
6950
|
+
break;
|
|
6951
|
+
case "replay_compile_error":
|
|
6952
|
+
console.warn(`[replayci] compile error: ${event.details}`);
|
|
6953
|
+
break;
|
|
6954
|
+
case "replay_compile_warning":
|
|
6955
|
+
console.warn(`[replayci] compile warning: ${event.details}`);
|
|
6956
|
+
break;
|
|
6957
|
+
case "replay_bypass_detected":
|
|
6958
|
+
console.warn(`[replayci] bypass detected on session ${event.session_id}`);
|
|
6959
|
+
break;
|
|
6960
|
+
case "replay_kill":
|
|
6961
|
+
console.warn(`[replayci] session ${event.session_id} killed`);
|
|
6962
|
+
break;
|
|
6963
|
+
case "replay_block":
|
|
6964
|
+
console.warn(`[replayci] blocked ${event.tool_name}: ${event.reason}`);
|
|
6965
|
+
break;
|
|
6966
|
+
case "replay_narrow": {
|
|
6967
|
+
for (const r of event.removed) {
|
|
6968
|
+
console.warn(`[replayci] removed ${r.tool} \u2192 ${r.reason}${r.detail ? ` (${r.detail})` : ""}`);
|
|
6969
|
+
}
|
|
6970
|
+
break;
|
|
6971
|
+
}
|
|
6972
|
+
case "replay_trace": {
|
|
6973
|
+
const t = event.trace;
|
|
6974
|
+
if (level === "trace") {
|
|
6975
|
+
for (const entry of t.entries) {
|
|
6976
|
+
const toolStr = entry.tool ? ` ${entry.tool}` : "";
|
|
6977
|
+
const detail = entry.reason !== entry.verdict ? ` \u2014 ${entry.reason}` : "";
|
|
6978
|
+
const checkedStr = Object.keys(entry.checked).length > 0 ? ` checked=${JSON.stringify(entry.checked)}` : "";
|
|
6979
|
+
const foundStr = Object.keys(entry.found).length > 0 ? ` found=${JSON.stringify(entry.found)}` : "";
|
|
6980
|
+
console.warn(`[replayci] ${entry.stage}${toolStr}: ${entry.verdict}${detail}${checkedStr}${foundStr}`);
|
|
6981
|
+
}
|
|
6982
|
+
if (!t.complete) {
|
|
6983
|
+
console.warn(`[replayci] trace INCOMPLETE (fault in pipeline)`);
|
|
6984
|
+
}
|
|
6985
|
+
} else {
|
|
6986
|
+
const blocks = t.entries.filter((e) => e.verdict === "block");
|
|
6987
|
+
for (const b of blocks) {
|
|
6988
|
+
const toolStr = b.tool ?? "session";
|
|
6989
|
+
console.warn(`[replayci] blocked ${toolStr} at ${b.stage} \u2192 ${b.reason}`);
|
|
6990
|
+
}
|
|
6991
|
+
if (!t.complete) {
|
|
6992
|
+
console.warn(`[replayci] enforcement cycle incomplete (fault) \u2014 session.getLastTrace() for partial trace`);
|
|
6993
|
+
}
|
|
6994
|
+
}
|
|
6995
|
+
break;
|
|
6996
|
+
}
|
|
6997
|
+
case "replay_workflow_error":
|
|
6998
|
+
console.warn(`[replayci] workflow error: ${event.details}`);
|
|
6999
|
+
break;
|
|
7000
|
+
case "replay_state_sync_error":
|
|
7001
|
+
console.warn(`[replayci] state sync error: ${event.details}`);
|
|
7002
|
+
break;
|
|
7003
|
+
case "replay_receipt_error":
|
|
7004
|
+
console.warn(`[replayci] receipt error (${event.tool_name}): ${event.details}`);
|
|
7005
|
+
break;
|
|
7006
|
+
case "replay_capture_error":
|
|
7007
|
+
console.warn(`[replayci] capture error: ${event.details}`);
|
|
7008
|
+
break;
|
|
7009
|
+
default:
|
|
7010
|
+
break;
|
|
7011
|
+
}
|
|
7012
|
+
}
|
|
6610
7013
|
function emitDiagnostic2(diagnostics, event) {
|
|
6611
7014
|
try {
|
|
6612
7015
|
diagnostics?.(event);
|