@replayci/replay 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +733 -257
- package/dist/index.d.cts +77 -1
- package/dist/index.d.ts +77 -1
- package/dist/index.js +733 -257
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1916,7 +1916,12 @@ function safelyCaptureResponse(input) {
|
|
|
1916
1916
|
});
|
|
1917
1917
|
input.persistHealthEvent();
|
|
1918
1918
|
}
|
|
1919
|
-
} catch {
|
|
1919
|
+
} catch (err) {
|
|
1920
|
+
emitDiagnostic(input.diagnostics, {
|
|
1921
|
+
type: "capture_error",
|
|
1922
|
+
session_id: input.sessionId,
|
|
1923
|
+
details: err instanceof Error ? err.message : "response capture failed"
|
|
1924
|
+
});
|
|
1920
1925
|
}
|
|
1921
1926
|
}
|
|
1922
1927
|
function safelyPushStreamCapture(input) {
|
|
@@ -1945,7 +1950,12 @@ function safelyPushStreamCapture(input) {
|
|
|
1945
1950
|
});
|
|
1946
1951
|
input.persistHealthEvent();
|
|
1947
1952
|
}
|
|
1948
|
-
} catch {
|
|
1953
|
+
} catch (err) {
|
|
1954
|
+
emitDiagnostic(input.diagnostics, {
|
|
1955
|
+
type: "capture_error",
|
|
1956
|
+
session_id: input.sessionId,
|
|
1957
|
+
details: err instanceof Error ? err.message : "stream capture failed"
|
|
1958
|
+
});
|
|
1949
1959
|
}
|
|
1950
1960
|
}
|
|
1951
1961
|
function buildCapturedCall(input) {
|
|
@@ -2514,6 +2524,173 @@ function formatErrorMessage(error) {
|
|
|
2514
2524
|
return error instanceof Error ? error.message : String(error);
|
|
2515
2525
|
}
|
|
2516
2526
|
|
|
2527
|
+
// src/preconditions.ts
|
|
2528
|
+
function evaluatePreconditions(preconditions, sessionState, currentArguments) {
|
|
2529
|
+
return preconditions.map(
|
|
2530
|
+
(p) => evaluatePrecondition(p, sessionState, currentArguments)
|
|
2531
|
+
);
|
|
2532
|
+
}
|
|
2533
|
+
function evaluatePrecondition(precondition, sessionState, currentArguments) {
|
|
2534
|
+
if (precondition.requires_step_count) {
|
|
2535
|
+
const required = precondition.requires_step_count.gte;
|
|
2536
|
+
if (sessionState.totalStepCount < required) {
|
|
2537
|
+
return {
|
|
2538
|
+
satisfied: false,
|
|
2539
|
+
detail: `Need ${required} prior steps, have ${sessionState.totalStepCount}`
|
|
2540
|
+
};
|
|
2541
|
+
}
|
|
2542
|
+
}
|
|
2543
|
+
if (precondition.requires_prior_tool) {
|
|
2544
|
+
const toolName = precondition.requires_prior_tool;
|
|
2545
|
+
const resourcePath = precondition.resource ? typeof precondition.resource === "string" ? precondition.resource : precondition.resource.path : void 0;
|
|
2546
|
+
const resourceValue = resourcePath ? extractPath(currentArguments ?? {}, resourcePath) : void 0;
|
|
2547
|
+
const cacheKey = resourceValue !== void 0 ? `${toolName}:${JSON.stringify(resourceValue)}` : toolName;
|
|
2548
|
+
let priorStep;
|
|
2549
|
+
for (let i = sessionState.steps.length - 1; i >= 0; i--) {
|
|
2550
|
+
const s = sessionState.steps[i];
|
|
2551
|
+
if (s.toolCalls.some((tc) => {
|
|
2552
|
+
if (tc.toolName !== toolName) return false;
|
|
2553
|
+
if (tc.proposal_decision !== "allowed") return false;
|
|
2554
|
+
if (resourceValue !== void 0 && tc.resourceValues?.[resourcePath] !== resourceValue) {
|
|
2555
|
+
return false;
|
|
2556
|
+
}
|
|
2557
|
+
return true;
|
|
2558
|
+
})) {
|
|
2559
|
+
priorStep = s;
|
|
2560
|
+
break;
|
|
2561
|
+
}
|
|
2562
|
+
}
|
|
2563
|
+
const cachedExtract = sessionState.satisfiedPreconditions.get(cacheKey);
|
|
2564
|
+
if (!priorStep && cachedExtract === void 0) {
|
|
2565
|
+
const detail = resourceValue !== void 0 ? `Required prior tool ${toolName} not found for resource ${JSON.stringify(resourceValue)}` : `Required prior tool ${toolName} not found in session`;
|
|
2566
|
+
return { satisfied: false, detail };
|
|
2567
|
+
}
|
|
2568
|
+
if (precondition.with_output) {
|
|
2569
|
+
const extract = priorStep?.outputExtract ?? cachedExtract ?? {};
|
|
2570
|
+
for (const assertion of precondition.with_output) {
|
|
2571
|
+
const value = extractPath(extract, assertion.path);
|
|
2572
|
+
if (assertion.equals !== void 0 && value !== assertion.equals) {
|
|
2573
|
+
return {
|
|
2574
|
+
satisfied: false,
|
|
2575
|
+
detail: `Prior tool output assertion failed: ${assertion.path} \u2014 expected ${JSON.stringify(assertion.equals)}, got ${JSON.stringify(value)}`
|
|
2576
|
+
};
|
|
2577
|
+
}
|
|
2578
|
+
}
|
|
2579
|
+
}
|
|
2580
|
+
}
|
|
2581
|
+
return { satisfied: true, detail: "" };
|
|
2582
|
+
}
|
|
2583
|
+
function extractPath(obj, path) {
|
|
2584
|
+
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
2585
|
+
if (cleanPath === "" || cleanPath === "$") return obj;
|
|
2586
|
+
const segments = cleanPath.split(".");
|
|
2587
|
+
let current = obj;
|
|
2588
|
+
for (const segment of segments) {
|
|
2589
|
+
if (current === null || current === void 0) return void 0;
|
|
2590
|
+
if (typeof current !== "object") return void 0;
|
|
2591
|
+
current = current[segment];
|
|
2592
|
+
}
|
|
2593
|
+
return current;
|
|
2594
|
+
}
|
|
2595
|
+
|
|
2596
|
+
// src/argumentValues.ts
|
|
2597
|
+
function evaluateArgumentValueInvariants(parsedArguments, invariants) {
|
|
2598
|
+
const failures = [];
|
|
2599
|
+
for (const inv of invariants) {
|
|
2600
|
+
const value = extractPath(parsedArguments, inv.path);
|
|
2601
|
+
if (inv.exact_match !== void 0) {
|
|
2602
|
+
const strValue = typeof value === "string" ? value : JSON.stringify(value);
|
|
2603
|
+
if (strValue !== inv.exact_match) {
|
|
2604
|
+
failures.push({
|
|
2605
|
+
path: inv.path,
|
|
2606
|
+
operator: "exact_match",
|
|
2607
|
+
expected: inv.exact_match,
|
|
2608
|
+
actual: value,
|
|
2609
|
+
detail: `Expected exact match "${inv.exact_match}", got "${strValue}"`
|
|
2610
|
+
});
|
|
2611
|
+
}
|
|
2612
|
+
}
|
|
2613
|
+
if (inv.regex !== void 0) {
|
|
2614
|
+
const strValue = typeof value === "string" ? value : String(value);
|
|
2615
|
+
try {
|
|
2616
|
+
const re = safeRegex(inv.regex);
|
|
2617
|
+
if (!re.test(strValue)) {
|
|
2618
|
+
failures.push({
|
|
2619
|
+
path: inv.path,
|
|
2620
|
+
operator: "regex",
|
|
2621
|
+
expected: inv.regex,
|
|
2622
|
+
actual: value,
|
|
2623
|
+
detail: `Value "${strValue}" does not match regex "${inv.regex}"`
|
|
2624
|
+
});
|
|
2625
|
+
}
|
|
2626
|
+
} catch {
|
|
2627
|
+
failures.push({
|
|
2628
|
+
path: inv.path,
|
|
2629
|
+
operator: "regex",
|
|
2630
|
+
expected: inv.regex,
|
|
2631
|
+
actual: value,
|
|
2632
|
+
detail: `Invalid regex pattern: "${inv.regex}"`
|
|
2633
|
+
});
|
|
2634
|
+
}
|
|
2635
|
+
}
|
|
2636
|
+
if (inv.one_of !== void 0) {
|
|
2637
|
+
const match = inv.one_of.some((candidate) => {
|
|
2638
|
+
if (typeof candidate === typeof value) {
|
|
2639
|
+
return JSON.stringify(candidate) === JSON.stringify(value);
|
|
2640
|
+
}
|
|
2641
|
+
return false;
|
|
2642
|
+
});
|
|
2643
|
+
if (!match) {
|
|
2644
|
+
failures.push({
|
|
2645
|
+
path: inv.path,
|
|
2646
|
+
operator: "one_of",
|
|
2647
|
+
expected: inv.one_of,
|
|
2648
|
+
actual: value,
|
|
2649
|
+
detail: `Value ${JSON.stringify(value)} not in ${JSON.stringify(inv.one_of)}`
|
|
2650
|
+
});
|
|
2651
|
+
}
|
|
2652
|
+
}
|
|
2653
|
+
if (inv.type !== void 0) {
|
|
2654
|
+
const actualType = Array.isArray(value) ? "array" : typeof value;
|
|
2655
|
+
if (actualType !== inv.type) {
|
|
2656
|
+
failures.push({
|
|
2657
|
+
path: inv.path,
|
|
2658
|
+
operator: "type",
|
|
2659
|
+
expected: inv.type,
|
|
2660
|
+
actual: actualType,
|
|
2661
|
+
detail: `Expected type "${inv.type}", got "${actualType}"`
|
|
2662
|
+
});
|
|
2663
|
+
}
|
|
2664
|
+
}
|
|
2665
|
+
if (typeof inv.gte === "number") {
|
|
2666
|
+
if (typeof value !== "number" || value < inv.gte) {
|
|
2667
|
+
failures.push({
|
|
2668
|
+
path: inv.path,
|
|
2669
|
+
operator: "gte",
|
|
2670
|
+
expected: inv.gte,
|
|
2671
|
+
actual: value,
|
|
2672
|
+
detail: `Expected >= ${inv.gte}, got ${JSON.stringify(value)}`
|
|
2673
|
+
});
|
|
2674
|
+
}
|
|
2675
|
+
}
|
|
2676
|
+
if (typeof inv.lte === "number") {
|
|
2677
|
+
if (typeof value !== "number" || value > inv.lte) {
|
|
2678
|
+
failures.push({
|
|
2679
|
+
path: inv.path,
|
|
2680
|
+
operator: "lte",
|
|
2681
|
+
expected: inv.lte,
|
|
2682
|
+
actual: value,
|
|
2683
|
+
detail: `Expected <= ${inv.lte}, got ${JSON.stringify(value)}`
|
|
2684
|
+
});
|
|
2685
|
+
}
|
|
2686
|
+
}
|
|
2687
|
+
}
|
|
2688
|
+
return {
|
|
2689
|
+
passed: failures.length === 0,
|
|
2690
|
+
failures
|
|
2691
|
+
};
|
|
2692
|
+
}
|
|
2693
|
+
|
|
2517
2694
|
// src/validate.ts
|
|
2518
2695
|
function prepareContracts(input) {
|
|
2519
2696
|
assertSupportedNodeRuntime();
|
|
@@ -2578,6 +2755,7 @@ function evaluateAllContracts(matchedContracts, extraction) {
|
|
|
2578
2755
|
failures.push(...evaluateExpectTools(contract, extraction.toolCalls));
|
|
2579
2756
|
failures.push(...evaluateOutputInvariants(contract, extraction.normalizedResponse));
|
|
2580
2757
|
failures.push(...evaluateExpectedToolCallMatchers(contract, extraction.toolCalls));
|
|
2758
|
+
failures.push(...evaluateArgumentInvariants(contract, extraction.toolCalls));
|
|
2581
2759
|
}
|
|
2582
2760
|
return {
|
|
2583
2761
|
pass: failures.length === 0,
|
|
@@ -2745,6 +2923,28 @@ function evaluateExpectedToolCallMatchers(contract, toolCalls) {
|
|
|
2745
2923
|
contract_file: contract.contract_file
|
|
2746
2924
|
}));
|
|
2747
2925
|
}
|
|
2926
|
+
function evaluateArgumentInvariants(contract, toolCalls) {
|
|
2927
|
+
if (!contract.argument_value_invariants || contract.argument_value_invariants.length === 0) {
|
|
2928
|
+
return [];
|
|
2929
|
+
}
|
|
2930
|
+
const failures = [];
|
|
2931
|
+
for (const toolCall of toolCalls) {
|
|
2932
|
+
if (toolCall.name !== contract.tool) continue;
|
|
2933
|
+
const parsedArgs = toolCall.parsedArguments != null && typeof toolCall.parsedArguments === "object" ? toolCall.parsedArguments : {};
|
|
2934
|
+
const result = evaluateArgumentValueInvariants(parsedArgs, contract.argument_value_invariants);
|
|
2935
|
+
for (const f of result.failures) {
|
|
2936
|
+
failures.push({
|
|
2937
|
+
path: f.path,
|
|
2938
|
+
operator: f.operator,
|
|
2939
|
+
expected: f.expected,
|
|
2940
|
+
found: f.actual,
|
|
2941
|
+
message: f.detail,
|
|
2942
|
+
contract_file: contract.contract_file
|
|
2943
|
+
});
|
|
2944
|
+
}
|
|
2945
|
+
}
|
|
2946
|
+
return failures;
|
|
2947
|
+
}
|
|
2748
2948
|
function mapInvariantFailure(contract, failure, normalizedResponse) {
|
|
2749
2949
|
const invariant = findMatchingInvariant(contract.assertions.output_invariants, failure);
|
|
2750
2950
|
const lookup = (0, import_contracts_core3.getPathValue)(normalizedResponse, failure.path);
|
|
@@ -3024,6 +3224,34 @@ function redactCapture(input) {
|
|
|
3024
3224
|
pattern_fingerprint: PATTERN_FINGERPRINT
|
|
3025
3225
|
};
|
|
3026
3226
|
}
|
|
3227
|
+
function redactTrace(trace, captureLevel) {
|
|
3228
|
+
if (captureLevel === "full") return trace;
|
|
3229
|
+
return {
|
|
3230
|
+
...trace,
|
|
3231
|
+
entries: trace.entries.map((entry) => redactTraceEntry(entry, captureLevel))
|
|
3232
|
+
};
|
|
3233
|
+
}
|
|
3234
|
+
function redactTraceEntry(entry, captureLevel) {
|
|
3235
|
+
if (captureLevel === "metadata") {
|
|
3236
|
+
return {
|
|
3237
|
+
...entry,
|
|
3238
|
+
checked: redactRecord(entry.checked),
|
|
3239
|
+
found: redactRecord(entry.found)
|
|
3240
|
+
};
|
|
3241
|
+
}
|
|
3242
|
+
return entry;
|
|
3243
|
+
}
|
|
3244
|
+
function redactRecord(record) {
|
|
3245
|
+
const result = {};
|
|
3246
|
+
for (const [key, value] of Object.entries(record)) {
|
|
3247
|
+
if (typeof value === "string") {
|
|
3248
|
+
result[key] = redactString(value);
|
|
3249
|
+
} else {
|
|
3250
|
+
result[key] = value;
|
|
3251
|
+
}
|
|
3252
|
+
}
|
|
3253
|
+
return result;
|
|
3254
|
+
}
|
|
3027
3255
|
|
|
3028
3256
|
// src/errors/replay.ts
|
|
3029
3257
|
var ReplayContractError = class extends Error {
|
|
@@ -3320,8 +3548,9 @@ function toRecord8(value) {
|
|
|
3320
3548
|
var import_node_crypto3 = __toESM(require("crypto"), 1);
|
|
3321
3549
|
|
|
3322
3550
|
// src/phases.ts
|
|
3323
|
-
function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
3551
|
+
function validatePhaseTransition(toolCalls, sessionState, compiledSession, ctx) {
|
|
3324
3552
|
if (!compiledSession.phases) {
|
|
3553
|
+
ctx?.trace.push({ stage: "phase", tool: null, verdict: "skip", reason: "no_phases_configured", checked: {}, found: {} });
|
|
3325
3554
|
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3326
3555
|
}
|
|
3327
3556
|
const attemptedTransitions = [];
|
|
@@ -3332,6 +3561,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3332
3561
|
sessionState.currentPhase ?? ""
|
|
3333
3562
|
);
|
|
3334
3563
|
if (!allowedTransitions?.includes(contract.transitions.advances_to)) {
|
|
3564
|
+
ctx?.trace.push({
|
|
3565
|
+
stage: "phase",
|
|
3566
|
+
tool: toolCall.name,
|
|
3567
|
+
verdict: "block",
|
|
3568
|
+
reason: "illegal_phase_transition",
|
|
3569
|
+
checked: { advances_to: contract.transitions.advances_to, from: sessionState.currentPhase },
|
|
3570
|
+
found: { allowed_transitions: allowedTransitions ?? [] }
|
|
3571
|
+
});
|
|
3335
3572
|
return {
|
|
3336
3573
|
legal: false,
|
|
3337
3574
|
newPhase: sessionState.currentPhase,
|
|
@@ -3348,6 +3585,14 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3348
3585
|
if (attemptedTransitions.length > 1) {
|
|
3349
3586
|
const distinctTargets = new Set(attemptedTransitions.map((t) => t.target));
|
|
3350
3587
|
if (distinctTargets.size > 1) {
|
|
3588
|
+
ctx?.trace.push({
|
|
3589
|
+
stage: "phase",
|
|
3590
|
+
tool: attemptedTransitions.map((t) => t.tool).join(", "),
|
|
3591
|
+
verdict: "block",
|
|
3592
|
+
reason: "ambiguous_phase_transition",
|
|
3593
|
+
checked: { targets: Array.from(distinctTargets) },
|
|
3594
|
+
found: { from: sessionState.currentPhase }
|
|
3595
|
+
});
|
|
3351
3596
|
return {
|
|
3352
3597
|
legal: false,
|
|
3353
3598
|
newPhase: sessionState.currentPhase,
|
|
@@ -3358,7 +3603,17 @@ function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
|
3358
3603
|
}
|
|
3359
3604
|
}
|
|
3360
3605
|
if (attemptedTransitions.length > 0) {
|
|
3361
|
-
|
|
3606
|
+
const target = attemptedTransitions[0].target;
|
|
3607
|
+
const allowedTransitions = compiledSession.transitions.get(sessionState.currentPhase ?? "") ?? [];
|
|
3608
|
+
ctx?.trace.push({
|
|
3609
|
+
stage: "phase",
|
|
3610
|
+
tool: attemptedTransitions[0].tool,
|
|
3611
|
+
verdict: "allow",
|
|
3612
|
+
reason: "phase_advanced",
|
|
3613
|
+
checked: { advances_to: target, from: sessionState.currentPhase },
|
|
3614
|
+
found: { allowed_transitions: allowedTransitions }
|
|
3615
|
+
});
|
|
3616
|
+
return { legal: true, newPhase: target };
|
|
3362
3617
|
}
|
|
3363
3618
|
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3364
3619
|
}
|
|
@@ -3607,77 +3862,8 @@ function checkCircuitBreaker(state, config) {
|
|
|
3607
3862
|
return { triggered: false, reason: null };
|
|
3608
3863
|
}
|
|
3609
3864
|
|
|
3610
|
-
// src/preconditions.ts
|
|
3611
|
-
function evaluatePreconditions(preconditions, sessionState, currentArguments) {
|
|
3612
|
-
return preconditions.map(
|
|
3613
|
-
(p) => evaluatePrecondition(p, sessionState, currentArguments)
|
|
3614
|
-
);
|
|
3615
|
-
}
|
|
3616
|
-
function evaluatePrecondition(precondition, sessionState, currentArguments) {
|
|
3617
|
-
if (precondition.requires_step_count) {
|
|
3618
|
-
const required = precondition.requires_step_count.gte;
|
|
3619
|
-
if (sessionState.totalStepCount < required) {
|
|
3620
|
-
return {
|
|
3621
|
-
satisfied: false,
|
|
3622
|
-
detail: `Need ${required} prior steps, have ${sessionState.totalStepCount}`
|
|
3623
|
-
};
|
|
3624
|
-
}
|
|
3625
|
-
}
|
|
3626
|
-
if (precondition.requires_prior_tool) {
|
|
3627
|
-
const toolName = precondition.requires_prior_tool;
|
|
3628
|
-
const resourcePath = precondition.resource ? typeof precondition.resource === "string" ? precondition.resource : precondition.resource.path : void 0;
|
|
3629
|
-
const resourceValue = resourcePath ? extractPath(currentArguments ?? {}, resourcePath) : void 0;
|
|
3630
|
-
const cacheKey = resourceValue !== void 0 ? `${toolName}:${JSON.stringify(resourceValue)}` : toolName;
|
|
3631
|
-
let priorStep;
|
|
3632
|
-
for (let i = sessionState.steps.length - 1; i >= 0; i--) {
|
|
3633
|
-
const s = sessionState.steps[i];
|
|
3634
|
-
if (s.toolCalls.some((tc) => {
|
|
3635
|
-
if (tc.toolName !== toolName) return false;
|
|
3636
|
-
if (tc.proposal_decision !== "allowed") return false;
|
|
3637
|
-
if (resourceValue !== void 0 && tc.resourceValues?.[resourcePath] !== resourceValue) {
|
|
3638
|
-
return false;
|
|
3639
|
-
}
|
|
3640
|
-
return true;
|
|
3641
|
-
})) {
|
|
3642
|
-
priorStep = s;
|
|
3643
|
-
break;
|
|
3644
|
-
}
|
|
3645
|
-
}
|
|
3646
|
-
const cachedExtract = sessionState.satisfiedPreconditions.get(cacheKey);
|
|
3647
|
-
if (!priorStep && cachedExtract === void 0) {
|
|
3648
|
-
const detail = resourceValue !== void 0 ? `Required prior tool ${toolName} not found for resource ${JSON.stringify(resourceValue)}` : `Required prior tool ${toolName} not found in session`;
|
|
3649
|
-
return { satisfied: false, detail };
|
|
3650
|
-
}
|
|
3651
|
-
if (precondition.with_output) {
|
|
3652
|
-
const extract = priorStep?.outputExtract ?? cachedExtract ?? {};
|
|
3653
|
-
for (const assertion of precondition.with_output) {
|
|
3654
|
-
const value = extractPath(extract, assertion.path);
|
|
3655
|
-
if (assertion.equals !== void 0 && value !== assertion.equals) {
|
|
3656
|
-
return {
|
|
3657
|
-
satisfied: false,
|
|
3658
|
-
detail: `Prior tool output assertion failed: ${assertion.path} \u2014 expected ${JSON.stringify(assertion.equals)}, got ${JSON.stringify(value)}`
|
|
3659
|
-
};
|
|
3660
|
-
}
|
|
3661
|
-
}
|
|
3662
|
-
}
|
|
3663
|
-
}
|
|
3664
|
-
return { satisfied: true, detail: "" };
|
|
3665
|
-
}
|
|
3666
|
-
function extractPath(obj, path) {
|
|
3667
|
-
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
3668
|
-
if (cleanPath === "" || cleanPath === "$") return obj;
|
|
3669
|
-
const segments = cleanPath.split(".");
|
|
3670
|
-
let current = obj;
|
|
3671
|
-
for (const segment of segments) {
|
|
3672
|
-
if (current === null || current === void 0) return void 0;
|
|
3673
|
-
if (typeof current !== "object") return void 0;
|
|
3674
|
-
current = current[segment];
|
|
3675
|
-
}
|
|
3676
|
-
return current;
|
|
3677
|
-
}
|
|
3678
|
-
|
|
3679
3865
|
// src/crossStep.ts
|
|
3680
|
-
function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
3866
|
+
function validateCrossStep(toolCalls, sessionState, contracts, ctx) {
|
|
3681
3867
|
const failures = [];
|
|
3682
3868
|
const contractByTool = new Map(contracts.map((c) => [c.tool, c]));
|
|
3683
3869
|
const workingForbidden = new Set(sessionState.forbiddenTools);
|
|
@@ -3705,8 +3891,17 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3705
3891
|
reason: "forbidden_tool",
|
|
3706
3892
|
detail: resourceValue !== void 0 ? `Tool "${tc.name}" is forbidden in this session for resource ${JSON.stringify(resourceValue)}` : `Tool "${tc.name}" is forbidden in this session`
|
|
3707
3893
|
});
|
|
3894
|
+
ctx?.trace.push({
|
|
3895
|
+
stage: "cross_step",
|
|
3896
|
+
tool: tc.name,
|
|
3897
|
+
verdict: "block",
|
|
3898
|
+
reason: "forbidden_tool",
|
|
3899
|
+
checked: { tool: tc.name },
|
|
3900
|
+
found: { is_resource_scoped: resourceValue !== void 0, resource_value: resourceValue ?? null }
|
|
3901
|
+
});
|
|
3708
3902
|
continue;
|
|
3709
3903
|
}
|
|
3904
|
+
let crossStepPassed = true;
|
|
3710
3905
|
if (contract?.preconditions && contract.preconditions.length > 0) {
|
|
3711
3906
|
const results = evaluatePreconditions(
|
|
3712
3907
|
contract.preconditions,
|
|
@@ -3715,6 +3910,7 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3715
3910
|
);
|
|
3716
3911
|
for (const result of results) {
|
|
3717
3912
|
if (!result.satisfied) {
|
|
3913
|
+
crossStepPassed = false;
|
|
3718
3914
|
failures.push({
|
|
3719
3915
|
toolName: tc.name,
|
|
3720
3916
|
reason: "precondition_not_met",
|
|
@@ -3723,6 +3919,25 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3723
3919
|
}
|
|
3724
3920
|
}
|
|
3725
3921
|
}
|
|
3922
|
+
if (crossStepPassed) {
|
|
3923
|
+
ctx?.trace.push({
|
|
3924
|
+
stage: "cross_step",
|
|
3925
|
+
tool: tc.name,
|
|
3926
|
+
verdict: "allow",
|
|
3927
|
+
reason: "preconditions_satisfied",
|
|
3928
|
+
checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
|
|
3929
|
+
found: { resource_value: resourceValue ?? null }
|
|
3930
|
+
});
|
|
3931
|
+
} else {
|
|
3932
|
+
ctx?.trace.push({
|
|
3933
|
+
stage: "cross_step",
|
|
3934
|
+
tool: tc.name,
|
|
3935
|
+
verdict: "block",
|
|
3936
|
+
reason: "precondition_not_met",
|
|
3937
|
+
checked: { requires_prior_tool: contract?.preconditions?.[0]?.requires_prior_tool ?? null },
|
|
3938
|
+
found: { resource_value: resourceValue ?? null, matching_step_index: null }
|
|
3939
|
+
});
|
|
3940
|
+
}
|
|
3726
3941
|
if (contract?.forbids_after) {
|
|
3727
3942
|
for (const entry of contract.forbids_after) {
|
|
3728
3943
|
if (typeof entry === "string") {
|
|
@@ -3749,104 +3964,6 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3749
3964
|
};
|
|
3750
3965
|
}
|
|
3751
3966
|
|
|
3752
|
-
// src/argumentValues.ts
|
|
3753
|
-
function evaluateArgumentValueInvariants(parsedArguments, invariants) {
|
|
3754
|
-
const failures = [];
|
|
3755
|
-
for (const inv of invariants) {
|
|
3756
|
-
const value = extractPath(parsedArguments, inv.path);
|
|
3757
|
-
if (inv.exact_match !== void 0) {
|
|
3758
|
-
const strValue = typeof value === "string" ? value : JSON.stringify(value);
|
|
3759
|
-
if (strValue !== inv.exact_match) {
|
|
3760
|
-
failures.push({
|
|
3761
|
-
path: inv.path,
|
|
3762
|
-
operator: "exact_match",
|
|
3763
|
-
expected: inv.exact_match,
|
|
3764
|
-
actual: value,
|
|
3765
|
-
detail: `Expected exact match "${inv.exact_match}", got "${strValue}"`
|
|
3766
|
-
});
|
|
3767
|
-
}
|
|
3768
|
-
}
|
|
3769
|
-
if (inv.regex !== void 0) {
|
|
3770
|
-
const strValue = typeof value === "string" ? value : String(value);
|
|
3771
|
-
try {
|
|
3772
|
-
const re = safeRegex(inv.regex);
|
|
3773
|
-
if (!re.test(strValue)) {
|
|
3774
|
-
failures.push({
|
|
3775
|
-
path: inv.path,
|
|
3776
|
-
operator: "regex",
|
|
3777
|
-
expected: inv.regex,
|
|
3778
|
-
actual: value,
|
|
3779
|
-
detail: `Value "${strValue}" does not match regex "${inv.regex}"`
|
|
3780
|
-
});
|
|
3781
|
-
}
|
|
3782
|
-
} catch {
|
|
3783
|
-
failures.push({
|
|
3784
|
-
path: inv.path,
|
|
3785
|
-
operator: "regex",
|
|
3786
|
-
expected: inv.regex,
|
|
3787
|
-
actual: value,
|
|
3788
|
-
detail: `Invalid regex pattern: "${inv.regex}"`
|
|
3789
|
-
});
|
|
3790
|
-
}
|
|
3791
|
-
}
|
|
3792
|
-
if (inv.one_of !== void 0) {
|
|
3793
|
-
const match = inv.one_of.some((candidate) => {
|
|
3794
|
-
if (typeof candidate === typeof value) {
|
|
3795
|
-
return JSON.stringify(candidate) === JSON.stringify(value);
|
|
3796
|
-
}
|
|
3797
|
-
return false;
|
|
3798
|
-
});
|
|
3799
|
-
if (!match) {
|
|
3800
|
-
failures.push({
|
|
3801
|
-
path: inv.path,
|
|
3802
|
-
operator: "one_of",
|
|
3803
|
-
expected: inv.one_of,
|
|
3804
|
-
actual: value,
|
|
3805
|
-
detail: `Value ${JSON.stringify(value)} not in ${JSON.stringify(inv.one_of)}`
|
|
3806
|
-
});
|
|
3807
|
-
}
|
|
3808
|
-
}
|
|
3809
|
-
if (inv.type !== void 0) {
|
|
3810
|
-
const actualType = Array.isArray(value) ? "array" : typeof value;
|
|
3811
|
-
if (actualType !== inv.type) {
|
|
3812
|
-
failures.push({
|
|
3813
|
-
path: inv.path,
|
|
3814
|
-
operator: "type",
|
|
3815
|
-
expected: inv.type,
|
|
3816
|
-
actual: actualType,
|
|
3817
|
-
detail: `Expected type "${inv.type}", got "${actualType}"`
|
|
3818
|
-
});
|
|
3819
|
-
}
|
|
3820
|
-
}
|
|
3821
|
-
if (typeof inv.gte === "number") {
|
|
3822
|
-
if (typeof value !== "number" || value < inv.gte) {
|
|
3823
|
-
failures.push({
|
|
3824
|
-
path: inv.path,
|
|
3825
|
-
operator: "gte",
|
|
3826
|
-
expected: inv.gte,
|
|
3827
|
-
actual: value,
|
|
3828
|
-
detail: `Expected >= ${inv.gte}, got ${JSON.stringify(value)}`
|
|
3829
|
-
});
|
|
3830
|
-
}
|
|
3831
|
-
}
|
|
3832
|
-
if (typeof inv.lte === "number") {
|
|
3833
|
-
if (typeof value !== "number" || value > inv.lte) {
|
|
3834
|
-
failures.push({
|
|
3835
|
-
path: inv.path,
|
|
3836
|
-
operator: "lte",
|
|
3837
|
-
expected: inv.lte,
|
|
3838
|
-
actual: value,
|
|
3839
|
-
detail: `Expected <= ${inv.lte}, got ${JSON.stringify(value)}`
|
|
3840
|
-
});
|
|
3841
|
-
}
|
|
3842
|
-
}
|
|
3843
|
-
}
|
|
3844
|
-
return {
|
|
3845
|
-
passed: failures.length === 0,
|
|
3846
|
-
failures
|
|
3847
|
-
};
|
|
3848
|
-
}
|
|
3849
|
-
|
|
3850
3967
|
// src/messageValidation.ts
|
|
3851
3968
|
var import_contracts_core4 = require("@replayci/contracts-core");
|
|
3852
3969
|
function validateToolResultMessages(messages, contracts, provider) {
|
|
@@ -4042,20 +4159,23 @@ function extractPath2(obj, path) {
|
|
|
4042
4159
|
}
|
|
4043
4160
|
|
|
4044
4161
|
// src/narrow.ts
|
|
4045
|
-
function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter) {
|
|
4162
|
+
function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter, ctx) {
|
|
4046
4163
|
const allowed = [];
|
|
4047
4164
|
const removed = [];
|
|
4048
4165
|
for (const tool of requestedTools) {
|
|
4049
4166
|
if (manualFilter && !manualFilter.includes(tool.name)) {
|
|
4050
4167
|
removed.push({ tool: tool.name, reason: "manual_filter" });
|
|
4168
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "manual_filter", checked: { filter: manualFilter }, found: {} });
|
|
4051
4169
|
continue;
|
|
4052
4170
|
}
|
|
4053
4171
|
const contract = compiledSession.perToolContracts.get(tool.name);
|
|
4054
4172
|
if (!contract) {
|
|
4055
4173
|
if (unmatchedPolicy === "allow") {
|
|
4056
4174
|
allowed.push(tool);
|
|
4175
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "allow", reason: "no_contract_passthrough", checked: { unmatched_policy: "allow" }, found: {} });
|
|
4057
4176
|
} else {
|
|
4058
4177
|
removed.push({ tool: tool.name, reason: "no_contract" });
|
|
4178
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "no_contract", checked: { unmatched_policy: "block" }, found: {} });
|
|
4059
4179
|
}
|
|
4060
4180
|
continue;
|
|
4061
4181
|
}
|
|
@@ -4068,6 +4188,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4068
4188
|
reason: "wrong_phase",
|
|
4069
4189
|
detail: `Tool valid in [${contract.transitions.valid_in_phases.join(", ")}], current phase: ${sessionState.currentPhase}`
|
|
4070
4190
|
});
|
|
4191
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "wrong_phase", checked: { valid_in_phases: contract.transitions.valid_in_phases }, found: { current_phase: sessionState.currentPhase } });
|
|
4071
4192
|
continue;
|
|
4072
4193
|
}
|
|
4073
4194
|
}
|
|
@@ -4078,6 +4199,18 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4078
4199
|
);
|
|
4079
4200
|
const unsatisfied = results.find((r) => !r.satisfied);
|
|
4080
4201
|
if (unsatisfied) {
|
|
4202
|
+
const firstPre = contract.preconditions[0];
|
|
4203
|
+
ctx?.trace.push({
|
|
4204
|
+
stage: "narrow",
|
|
4205
|
+
tool: tool.name,
|
|
4206
|
+
verdict: "remove",
|
|
4207
|
+
reason: "precondition_not_met",
|
|
4208
|
+
checked: {
|
|
4209
|
+
requires_prior_tool: firstPre.requires_prior_tool ?? null,
|
|
4210
|
+
with_output: firstPre.with_output ?? []
|
|
4211
|
+
},
|
|
4212
|
+
found: { satisfied_precondition_cache_hit: false }
|
|
4213
|
+
});
|
|
4081
4214
|
removed.push({
|
|
4082
4215
|
tool: tool.name,
|
|
4083
4216
|
reason: "precondition_not_met",
|
|
@@ -4091,6 +4224,7 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4091
4224
|
tool: tool.name,
|
|
4092
4225
|
reason: "forbidden_in_state"
|
|
4093
4226
|
});
|
|
4227
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "forbidden_in_state", checked: { tool: tool.name }, found: { is_resource_scoped: false } });
|
|
4094
4228
|
continue;
|
|
4095
4229
|
}
|
|
4096
4230
|
if (compiledSession.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
@@ -4107,9 +4241,24 @@ function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPol
|
|
|
4107
4241
|
reason: "policy_denied",
|
|
4108
4242
|
detail: verdict.reason ?? "Policy deny rule matched"
|
|
4109
4243
|
});
|
|
4244
|
+
ctx?.trace.push({ stage: "narrow", tool: tool.name, verdict: "remove", reason: "policy_denied", checked: { rule_type: "session_deny" }, found: { matched: true } });
|
|
4110
4245
|
continue;
|
|
4111
4246
|
}
|
|
4112
4247
|
}
|
|
4248
|
+
ctx?.trace.push({
|
|
4249
|
+
stage: "narrow",
|
|
4250
|
+
tool: tool.name,
|
|
4251
|
+
verdict: "allow",
|
|
4252
|
+
reason: "all_checks_passed",
|
|
4253
|
+
checked: {
|
|
4254
|
+
has_contract: true,
|
|
4255
|
+
phase_ok: true,
|
|
4256
|
+
preconditions_ok: true,
|
|
4257
|
+
not_forbidden: true,
|
|
4258
|
+
policy_ok: true
|
|
4259
|
+
},
|
|
4260
|
+
found: {}
|
|
4261
|
+
});
|
|
4113
4262
|
allowed.push(tool);
|
|
4114
4263
|
}
|
|
4115
4264
|
return { allowed, removed };
|
|
@@ -4603,7 +4752,7 @@ function replay(client, opts = {}) {
|
|
|
4603
4752
|
const unmatchedPolicy = opts.unmatchedPolicy ?? "block";
|
|
4604
4753
|
const maxRetries = Math.min(Math.max(0, opts.maxRetries ?? 0), MAX_RETRIES);
|
|
4605
4754
|
const compatEnforcement = opts.compatEnforcement ?? "protective";
|
|
4606
|
-
const diagnostics = opts.diagnostics;
|
|
4755
|
+
const diagnostics = opts.diagnostics ?? defaultReplayDiagnosticsHandler;
|
|
4607
4756
|
let provider;
|
|
4608
4757
|
try {
|
|
4609
4758
|
provider = detectProvider(client);
|
|
@@ -4620,12 +4769,12 @@ function replay(client, opts = {}) {
|
|
|
4620
4769
|
contracts = resolveContracts(opts);
|
|
4621
4770
|
} catch (err) {
|
|
4622
4771
|
const detail = err instanceof Error ? err.message : "Failed to load contracts";
|
|
4623
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4772
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4624
4773
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4625
4774
|
}
|
|
4626
4775
|
const configError = validateConfig(contracts, opts);
|
|
4627
4776
|
if (configError) {
|
|
4628
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4777
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: configError.message });
|
|
4629
4778
|
return createBlockingInactiveSession(client, sessionId, configError.message, configError);
|
|
4630
4779
|
}
|
|
4631
4780
|
let discoveredSessionYaml = null;
|
|
@@ -4633,9 +4782,15 @@ function replay(client, opts = {}) {
|
|
|
4633
4782
|
discoveredSessionYaml = discoverSessionYaml(opts);
|
|
4634
4783
|
} catch (err) {
|
|
4635
4784
|
const detail = `session.yaml: ${err instanceof Error ? err.message : String(err)}`;
|
|
4636
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4785
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4637
4786
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4638
4787
|
}
|
|
4788
|
+
if (opts.contractsDir && !discoveredSessionYaml && !opts.sessionYamlPath) {
|
|
4789
|
+
emitDiagnostic2(diagnostics, {
|
|
4790
|
+
type: "replay_compile_warning",
|
|
4791
|
+
details: "No session.yaml found in contractsDir \u2014 session-level features (phases, policy, session_limits) are inactive. Per-tool contracts still apply."
|
|
4792
|
+
});
|
|
4793
|
+
}
|
|
4639
4794
|
let sessionYaml = discoveredSessionYaml;
|
|
4640
4795
|
if (!sessionYaml && opts.providerConstraints) {
|
|
4641
4796
|
sessionYaml = { schema_version: "1.0", agent, provider_constraints: opts.providerConstraints };
|
|
@@ -4649,15 +4804,19 @@ function replay(client, opts = {}) {
|
|
|
4649
4804
|
tools: opts.tools ? new Map(Object.entries(opts.tools)) : void 0
|
|
4650
4805
|
});
|
|
4651
4806
|
} catch (err) {
|
|
4807
|
+
const detail = `Session compilation failed: ${err instanceof Error ? err.message : String(err)}`;
|
|
4652
4808
|
emitDiagnostic2(diagnostics, {
|
|
4653
|
-
type: "
|
|
4654
|
-
details:
|
|
4809
|
+
type: "replay_compile_error",
|
|
4810
|
+
details: detail
|
|
4655
4811
|
});
|
|
4812
|
+
if (discoveredSessionYaml) {
|
|
4813
|
+
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4814
|
+
}
|
|
4656
4815
|
}
|
|
4657
4816
|
if (compiledSession?.warnings && compiledSession.warnings.length > 0) {
|
|
4658
4817
|
for (const warning of compiledSession.warnings) {
|
|
4659
4818
|
emitDiagnostic2(diagnostics, {
|
|
4660
|
-
type: "
|
|
4819
|
+
type: "replay_compile_warning",
|
|
4661
4820
|
details: `Compile warning: ${warning}`
|
|
4662
4821
|
});
|
|
4663
4822
|
}
|
|
@@ -4669,7 +4828,7 @@ function replay(client, opts = {}) {
|
|
|
4669
4828
|
if (spec.block_incompatible && spec.block_incompatible.length > 0) {
|
|
4670
4829
|
const detail = `Provider '${provider}' is blocked by provider_constraints: ${spec.block_incompatible.join("; ")}`;
|
|
4671
4830
|
const err = new ReplayConfigError("provider_incompatible", detail);
|
|
4672
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4831
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4673
4832
|
return createBlockingInactiveSession(client, sessionId, detail, err);
|
|
4674
4833
|
}
|
|
4675
4834
|
if (spec.warn_incompatible && spec.warn_incompatible.length > 0) {
|
|
@@ -4703,10 +4862,10 @@ function replay(client, opts = {}) {
|
|
|
4703
4862
|
emitDiagnostic2(diagnostics, { type: "replay_inactive", reason: "unsupported_client" });
|
|
4704
4863
|
return createInactiveSession(client, sessionId, "Could not resolve terminal resource");
|
|
4705
4864
|
}
|
|
4706
|
-
const
|
|
4865
|
+
const apiKey = resolveApiKey2(opts);
|
|
4866
|
+
const protectionLevel = determineProtectionLevel(mode, opts.tools, contracts, apiKey);
|
|
4707
4867
|
const maxUnguardedCalls = opts.maxUnguardedCalls ?? DEFAULT_MAX_UNGUARDED_CALLS;
|
|
4708
4868
|
const narrowingFeedback = opts.narrowingFeedback ?? "silent";
|
|
4709
|
-
const apiKey = resolveApiKey2(opts);
|
|
4710
4869
|
let runtimeClient = null;
|
|
4711
4870
|
let runtimeSession = null;
|
|
4712
4871
|
let runtimeInitPromise = null;
|
|
@@ -4791,6 +4950,7 @@ function replay(client, opts = {}) {
|
|
|
4791
4950
|
let bypassDetected = false;
|
|
4792
4951
|
let lastShadowDeltaValue = null;
|
|
4793
4952
|
let lastNarrowResult = null;
|
|
4953
|
+
let lastTrace = null;
|
|
4794
4954
|
let shadowEvaluationCount = 0;
|
|
4795
4955
|
let manualFilter = null;
|
|
4796
4956
|
const deferredReceipts = /* @__PURE__ */ new Map();
|
|
@@ -4846,7 +5006,12 @@ function replay(client, opts = {}) {
|
|
|
4846
5006
|
void result.catch(() => {
|
|
4847
5007
|
});
|
|
4848
5008
|
}
|
|
4849
|
-
} catch {
|
|
5009
|
+
} catch (err) {
|
|
5010
|
+
emitDiagnostic2(diagnostics, {
|
|
5011
|
+
type: "replay_state_sync_error",
|
|
5012
|
+
session_id: sessionId,
|
|
5013
|
+
details: err instanceof Error ? err.message : "state sync failed"
|
|
5014
|
+
});
|
|
4850
5015
|
}
|
|
4851
5016
|
}
|
|
4852
5017
|
function appendCaptureToStore(capture) {
|
|
@@ -4860,6 +5025,18 @@ function replay(client, opts = {}) {
|
|
|
4860
5025
|
} catch {
|
|
4861
5026
|
}
|
|
4862
5027
|
}
|
|
5028
|
+
function createTrace(stepIndex) {
|
|
5029
|
+
const entries = [];
|
|
5030
|
+
return {
|
|
5031
|
+
sessionId,
|
|
5032
|
+
stepIndex,
|
|
5033
|
+
complete: false,
|
|
5034
|
+
entries,
|
|
5035
|
+
push(entry) {
|
|
5036
|
+
entries.push(entry);
|
|
5037
|
+
}
|
|
5038
|
+
};
|
|
5039
|
+
}
|
|
4863
5040
|
const enforcementCreate = async function replayEnforcementCreate(...args) {
|
|
4864
5041
|
if (killed) {
|
|
4865
5042
|
throw new ReplayKillError(sessionId, killedAt);
|
|
@@ -4902,8 +5079,19 @@ function replay(client, opts = {}) {
|
|
|
4902
5079
|
total_ms: 0,
|
|
4903
5080
|
enforcement_ms: 0
|
|
4904
5081
|
};
|
|
5082
|
+
const trace = createTrace(sessionState.totalStepCount);
|
|
5083
|
+
const traceCtx = { trace };
|
|
5084
|
+
let currentTraceStage = "narrow";
|
|
4905
5085
|
const request = toRecord10(args[0]);
|
|
4906
5086
|
const requestToolNames = extractRequestToolNames(request);
|
|
5087
|
+
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
5088
|
+
if (messages.length > 0) {
|
|
5089
|
+
const toolResults = extractToolResults(messages, provider);
|
|
5090
|
+
if (toolResults.length > 0) {
|
|
5091
|
+
const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
|
|
5092
|
+
sessionState = applyOutputExtracts(sessionState, outputUpdates);
|
|
5093
|
+
}
|
|
5094
|
+
}
|
|
4907
5095
|
let narrowResult = null;
|
|
4908
5096
|
let activeArgs = args;
|
|
4909
5097
|
if (compiledSession && Array.isArray(request.tools) && request.tools.length > 0) {
|
|
@@ -4914,7 +5102,8 @@ function replay(client, opts = {}) {
|
|
|
4914
5102
|
sessionState,
|
|
4915
5103
|
compiledSession,
|
|
4916
5104
|
unmatchedPolicy,
|
|
4917
|
-
manualFilter
|
|
5105
|
+
manualFilter,
|
|
5106
|
+
traceCtx
|
|
4918
5107
|
);
|
|
4919
5108
|
lastNarrowResult = narrowResult;
|
|
4920
5109
|
if (narrowResult.removed.length > 0) {
|
|
@@ -4952,55 +5141,96 @@ function replay(client, opts = {}) {
|
|
|
4952
5141
|
timing.narrow_ms = Date.now() - guardStart;
|
|
4953
5142
|
const preCheckStart = Date.now();
|
|
4954
5143
|
try {
|
|
5144
|
+
currentTraceStage = "pre_check";
|
|
4955
5145
|
if (mode === "enforce" && resolvedSessionLimits) {
|
|
4956
5146
|
const limitResult = checkSessionLimits(sessionState, resolvedSessionLimits);
|
|
4957
5147
|
if (limitResult.exceeded) {
|
|
4958
|
-
|
|
4959
|
-
|
|
4960
|
-
|
|
4961
|
-
|
|
4962
|
-
|
|
4963
|
-
|
|
4964
|
-
|
|
4965
|
-
|
|
4966
|
-
|
|
4967
|
-
|
|
4968
|
-
|
|
4969
|
-
|
|
4970
|
-
|
|
4971
|
-
|
|
4972
|
-
|
|
4973
|
-
|
|
4974
|
-
|
|
4975
|
-
|
|
4976
|
-
|
|
4977
|
-
|
|
5148
|
+
let narrowedPastLimit = false;
|
|
5149
|
+
if (limitResult.reason?.startsWith("max_tool_calls") && resolvedSessionLimits.max_tool_calls_mode === "narrow" && resolvedSessionLimits.max_calls_per_tool) {
|
|
5150
|
+
const costOk = !(typeof resolvedSessionLimits.max_cost_per_session === "number" && sessionState.actualCost >= resolvedSessionLimits.max_cost_per_session);
|
|
5151
|
+
if (costOk) {
|
|
5152
|
+
const currentRequest = toRecord10(activeArgs[0]);
|
|
5153
|
+
const currentTools = Array.isArray(currentRequest.tools) ? extractToolDefinitions(currentRequest.tools) : [];
|
|
5154
|
+
const budgetedTools = currentTools.filter((tool) => {
|
|
5155
|
+
const max = resolvedSessionLimits.max_calls_per_tool[tool.name];
|
|
5156
|
+
if (typeof max !== "number") return false;
|
|
5157
|
+
return (sessionState.toolCallCounts.get(tool.name) ?? 0) < max;
|
|
5158
|
+
});
|
|
5159
|
+
if (budgetedTools.length > 0) {
|
|
5160
|
+
const modifiedRequest = { ...currentRequest, tools: budgetedTools };
|
|
5161
|
+
activeArgs = [modifiedRequest, ...Array.prototype.slice.call(activeArgs, 1)];
|
|
5162
|
+
narrowedPastLimit = true;
|
|
5163
|
+
trace.push({
|
|
5164
|
+
stage: "pre_check",
|
|
5165
|
+
tool: null,
|
|
5166
|
+
verdict: "narrow",
|
|
5167
|
+
reason: "max_tool_calls_narrow_mode",
|
|
5168
|
+
checked: { max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null, budgeted_tools: budgetedTools.map((t) => t.name) },
|
|
5169
|
+
found: { total_tool_calls: sessionState.totalToolCalls }
|
|
5170
|
+
});
|
|
5171
|
+
}
|
|
4978
5172
|
}
|
|
4979
5173
|
}
|
|
4980
|
-
|
|
4981
|
-
|
|
4982
|
-
|
|
4983
|
-
|
|
4984
|
-
|
|
4985
|
-
|
|
4986
|
-
|
|
4987
|
-
|
|
4988
|
-
|
|
4989
|
-
|
|
4990
|
-
|
|
4991
|
-
|
|
4992
|
-
void 0,
|
|
4993
|
-
timing
|
|
4994
|
-
);
|
|
4995
|
-
if (isCompatAdvisory) {
|
|
4996
|
-
emitDiagnostic2(diagnostics, {
|
|
4997
|
-
type: "replay_compat_advisory",
|
|
4998
|
-
session_id: sessionId,
|
|
4999
|
-
would_block: decision.blocked,
|
|
5000
|
-
details: limitResult.reason ?? "session limit exceeded"
|
|
5174
|
+
if (!narrowedPastLimit) {
|
|
5175
|
+
trace.push({
|
|
5176
|
+
stage: "pre_check",
|
|
5177
|
+
tool: null,
|
|
5178
|
+
verdict: "block",
|
|
5179
|
+
reason: "session_limit_exceeded",
|
|
5180
|
+
checked: {
|
|
5181
|
+
max_steps: resolvedSessionLimits.max_steps ?? null,
|
|
5182
|
+
max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
|
|
5183
|
+
max_cost: resolvedSessionLimits.max_cost_per_session ?? null
|
|
5184
|
+
},
|
|
5185
|
+
found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
|
|
5001
5186
|
});
|
|
5002
|
-
|
|
5003
|
-
|
|
5187
|
+
const decision = {
|
|
5188
|
+
action: "block",
|
|
5189
|
+
tool_calls: [],
|
|
5190
|
+
blocked: [{
|
|
5191
|
+
tool_name: "_session",
|
|
5192
|
+
arguments: "",
|
|
5193
|
+
reason: "session_limit_exceeded",
|
|
5194
|
+
contract_file: "",
|
|
5195
|
+
failures: [{ path: "$", operator: "session_limit", expected: "", found: "", message: limitResult.reason ?? "session limit exceeded" }]
|
|
5196
|
+
}],
|
|
5197
|
+
response_modification: gateMode
|
|
5198
|
+
};
|
|
5199
|
+
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
5200
|
+
if (resolvedSessionLimits.circuit_breaker) {
|
|
5201
|
+
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
5202
|
+
if (cbResult.triggered) {
|
|
5203
|
+
killed = true;
|
|
5204
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
5205
|
+
sessionState = killSession(sessionState);
|
|
5206
|
+
emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
|
|
5207
|
+
}
|
|
5208
|
+
}
|
|
5209
|
+
timing.pre_check_ms = Date.now() - preCheckStart;
|
|
5210
|
+
captureDecision(
|
|
5211
|
+
decision,
|
|
5212
|
+
null,
|
|
5213
|
+
request,
|
|
5214
|
+
guardStart,
|
|
5215
|
+
requestToolNames,
|
|
5216
|
+
null,
|
|
5217
|
+
narrowResult,
|
|
5218
|
+
null,
|
|
5219
|
+
null,
|
|
5220
|
+
null,
|
|
5221
|
+
void 0,
|
|
5222
|
+
timing
|
|
5223
|
+
);
|
|
5224
|
+
if (isCompatAdvisory) {
|
|
5225
|
+
emitDiagnostic2(diagnostics, {
|
|
5226
|
+
type: "replay_compat_advisory",
|
|
5227
|
+
session_id: sessionId,
|
|
5228
|
+
would_block: decision.blocked,
|
|
5229
|
+
details: limitResult.reason ?? "session limit exceeded"
|
|
5230
|
+
});
|
|
5231
|
+
} else {
|
|
5232
|
+
throw buildContractError2(decision);
|
|
5233
|
+
}
|
|
5004
5234
|
}
|
|
5005
5235
|
}
|
|
5006
5236
|
if (isAtHardStepCap(sessionState)) {
|
|
@@ -5033,24 +5263,32 @@ function replay(client, opts = {}) {
|
|
|
5033
5263
|
);
|
|
5034
5264
|
throw buildContractError2(decision);
|
|
5035
5265
|
}
|
|
5266
|
+
if (!checkSessionLimits(sessionState, resolvedSessionLimits).exceeded) {
|
|
5267
|
+
trace.push({
|
|
5268
|
+
stage: "pre_check",
|
|
5269
|
+
tool: null,
|
|
5270
|
+
verdict: "allow",
|
|
5271
|
+
reason: "session_limits_ok",
|
|
5272
|
+
checked: {
|
|
5273
|
+
max_steps: resolvedSessionLimits.max_steps ?? null,
|
|
5274
|
+
max_tool_calls: resolvedSessionLimits.max_tool_calls ?? null,
|
|
5275
|
+
max_cost: resolvedSessionLimits.max_cost_per_session ?? null
|
|
5276
|
+
},
|
|
5277
|
+
found: { total_steps: sessionState.totalStepCount, total_tool_calls: sessionState.totalToolCalls, actual_cost: sessionState.actualCost }
|
|
5278
|
+
});
|
|
5279
|
+
}
|
|
5280
|
+
} else if (mode === "enforce") {
|
|
5281
|
+
trace.push({ stage: "pre_check", tool: null, verdict: "skip", reason: "no_session_limits", checked: {}, found: {} });
|
|
5036
5282
|
}
|
|
5037
|
-
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
5038
5283
|
if (messages.length > 0) {
|
|
5039
5284
|
const msgResult = validateToolResultMessages(messages, contracts, provider);
|
|
5040
5285
|
if (!msgResult.passed) {
|
|
5041
5286
|
emitDiagnostic2(diagnostics, {
|
|
5042
|
-
type: "
|
|
5287
|
+
type: "replay_validation_warning",
|
|
5043
5288
|
details: `Message validation: ${msgResult.failures.map((f) => f.detail).join("; ")}`
|
|
5044
5289
|
});
|
|
5045
5290
|
}
|
|
5046
5291
|
}
|
|
5047
|
-
if (messages.length > 0) {
|
|
5048
|
-
const toolResults = extractToolResults(messages, provider);
|
|
5049
|
-
if (toolResults.length > 0) {
|
|
5050
|
-
const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
|
|
5051
|
-
sessionState = applyOutputExtracts(sessionState, outputUpdates);
|
|
5052
|
-
}
|
|
5053
|
-
}
|
|
5054
5292
|
const inputFailures = evaluateInputInvariants(request, contracts);
|
|
5055
5293
|
if (mode === "enforce" && inputFailures.length > 0) {
|
|
5056
5294
|
if (onError === "block") {
|
|
@@ -5125,6 +5363,10 @@ function replay(client, opts = {}) {
|
|
|
5125
5363
|
sessionState = updateActualCost(sessionState, costDelta);
|
|
5126
5364
|
}
|
|
5127
5365
|
if (mode === "log-only") {
|
|
5366
|
+
trace.push({ stage: "gate", tool: null, verdict: "allow", reason: "log_only_mode", checked: {}, found: {} });
|
|
5367
|
+
trace.complete = true;
|
|
5368
|
+
lastTrace = trace;
|
|
5369
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5128
5370
|
captureDecision(
|
|
5129
5371
|
{ action: "allow", tool_calls: extractToolCalls(response, provider) },
|
|
5130
5372
|
response,
|
|
@@ -5137,13 +5379,26 @@ function replay(client, opts = {}) {
|
|
|
5137
5379
|
null,
|
|
5138
5380
|
null,
|
|
5139
5381
|
void 0,
|
|
5140
|
-
timing
|
|
5382
|
+
timing,
|
|
5383
|
+
trace
|
|
5141
5384
|
);
|
|
5142
5385
|
return response;
|
|
5143
5386
|
}
|
|
5387
|
+
currentTraceStage = "validate";
|
|
5144
5388
|
const toolCalls = extractToolCalls(response, provider);
|
|
5145
5389
|
const validateStart = Date.now();
|
|
5146
5390
|
const validation = validateResponse2(response, toolCalls, contracts, requestToolNames, unmatchedPolicy, provider);
|
|
5391
|
+
for (const f of validation.failures) {
|
|
5392
|
+
const toolName = extractToolNameFromFailure(f, toolCalls);
|
|
5393
|
+
trace.push({
|
|
5394
|
+
stage: "validate",
|
|
5395
|
+
tool: toolName === "_response" ? null : toolName,
|
|
5396
|
+
verdict: "block",
|
|
5397
|
+
reason: f.operator === "response_format" ? "response_format_failed" : "output_invariant_failed",
|
|
5398
|
+
checked: { path: f.path, operator: f.operator, invariant_type: f.operator === "response_format" ? "response_format" : "output" },
|
|
5399
|
+
found: { value: f.found }
|
|
5400
|
+
});
|
|
5401
|
+
}
|
|
5147
5402
|
timing.validate_ms += Date.now() - validateStart;
|
|
5148
5403
|
if (isActiveGovern && !attemptDegraded && attemptPreparedRequestId) {
|
|
5149
5404
|
const rtProposalStart = Date.now();
|
|
@@ -5178,9 +5433,10 @@ function replay(client, opts = {}) {
|
|
|
5178
5433
|
}
|
|
5179
5434
|
timing.runtime_ms += Date.now() - rtProposalStart;
|
|
5180
5435
|
}
|
|
5436
|
+
currentTraceStage = "cross_step";
|
|
5181
5437
|
const crossStepStart = Date.now();
|
|
5182
5438
|
const crossStepContracts = compiledSession ? Array.from(compiledSession.perToolContracts.values()) : contracts;
|
|
5183
|
-
const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts);
|
|
5439
|
+
const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts, traceCtx);
|
|
5184
5440
|
if (!crossStepResult.passed) {
|
|
5185
5441
|
for (const f of crossStepResult.failures) {
|
|
5186
5442
|
validation.failures.push({
|
|
@@ -5194,10 +5450,11 @@ function replay(client, opts = {}) {
|
|
|
5194
5450
|
}
|
|
5195
5451
|
}
|
|
5196
5452
|
timing.cross_step_ms += Date.now() - crossStepStart;
|
|
5453
|
+
currentTraceStage = "phase";
|
|
5197
5454
|
let phaseResult = null;
|
|
5198
5455
|
const phaseStart = Date.now();
|
|
5199
5456
|
if (compiledSession) {
|
|
5200
|
-
phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession);
|
|
5457
|
+
phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession, traceCtx);
|
|
5201
5458
|
if (!phaseResult.legal) {
|
|
5202
5459
|
validation.failures.push({
|
|
5203
5460
|
path: `$.tool_calls.${phaseResult.blockedTool}`,
|
|
@@ -5211,6 +5468,8 @@ function replay(client, opts = {}) {
|
|
|
5211
5468
|
}
|
|
5212
5469
|
timing.phase_ms += Date.now() - phaseStart;
|
|
5213
5470
|
const argValuesStart = Date.now();
|
|
5471
|
+
const workingToolCallCounts = new Map(sessionState.toolCallCounts);
|
|
5472
|
+
const intraResponseTuples = [];
|
|
5214
5473
|
for (const tc of toolCalls) {
|
|
5215
5474
|
const contract = contracts.find((c) => c.tool === tc.name);
|
|
5216
5475
|
if (contract?.argument_value_invariants && contract.argument_value_invariants.length > 0) {
|
|
@@ -5225,7 +5484,7 @@ function replay(client, opts = {}) {
|
|
|
5225
5484
|
for (const f of avResult.failures) {
|
|
5226
5485
|
validation.failures.push({
|
|
5227
5486
|
path: f.path,
|
|
5228
|
-
operator:
|
|
5487
|
+
operator: "argument_value_mismatch",
|
|
5229
5488
|
expected: String(f.expected),
|
|
5230
5489
|
found: String(f.actual),
|
|
5231
5490
|
message: f.detail,
|
|
@@ -5236,9 +5495,12 @@ function replay(client, opts = {}) {
|
|
|
5236
5495
|
}
|
|
5237
5496
|
}
|
|
5238
5497
|
}
|
|
5498
|
+
currentTraceStage = "limit";
|
|
5239
5499
|
if (resolvedSessionLimits) {
|
|
5240
|
-
const
|
|
5500
|
+
const workingState = { ...sessionState, toolCallCounts: workingToolCallCounts };
|
|
5501
|
+
const perToolResult = checkPerToolLimits(workingState, tc.name, resolvedSessionLimits);
|
|
5241
5502
|
if (perToolResult.exceeded) {
|
|
5503
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "per_tool_limit_exceeded", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
|
|
5242
5504
|
validation.failures.push({
|
|
5243
5505
|
path: `$.tool_calls.${tc.name}`,
|
|
5244
5506
|
operator: "session_limit",
|
|
@@ -5247,28 +5509,39 @@ function replay(client, opts = {}) {
|
|
|
5247
5509
|
message: perToolResult.reason ?? "per-tool limit exceeded",
|
|
5248
5510
|
contract_file: ""
|
|
5249
5511
|
});
|
|
5512
|
+
} else {
|
|
5513
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "allow", reason: "per_tool_limit_ok", checked: { max_calls: resolvedSessionLimits.max_calls_per_tool?.[tc.name] ?? null }, found: { current_calls: workingToolCallCounts.get(tc.name) ?? 0 } });
|
|
5250
5514
|
}
|
|
5251
5515
|
}
|
|
5516
|
+
workingToolCallCounts.set(tc.name, (workingToolCallCounts.get(tc.name) ?? 0) + 1);
|
|
5252
5517
|
if (resolvedSessionLimits?.loop_detection) {
|
|
5518
|
+
const argsHash = computeArgumentsHash(tc.arguments);
|
|
5253
5519
|
const loopResult = checkLoopDetection(
|
|
5254
5520
|
tc.name,
|
|
5255
5521
|
tc.arguments,
|
|
5256
5522
|
sessionState,
|
|
5257
5523
|
resolvedSessionLimits.loop_detection
|
|
5258
5524
|
);
|
|
5259
|
-
|
|
5525
|
+
const intraMatches = intraResponseTuples.filter(
|
|
5526
|
+
(t) => t.toolName === tc.name && t.argsHash === argsHash
|
|
5527
|
+
).length;
|
|
5528
|
+
const totalMatches = loopResult.matchCount + intraMatches;
|
|
5529
|
+
if (totalMatches >= resolvedSessionLimits.loop_detection.threshold) {
|
|
5530
|
+
trace.push({ stage: "limit", tool: tc.name, verdict: "block", reason: "loop_detected", checked: { window: resolvedSessionLimits.loop_detection.window, threshold: resolvedSessionLimits.loop_detection.threshold }, found: { match_count: totalMatches, arguments_hash: argsHash } });
|
|
5260
5531
|
validation.failures.push({
|
|
5261
5532
|
path: `$.tool_calls.${tc.name}`,
|
|
5262
5533
|
operator: "loop_detected",
|
|
5263
|
-
expected: `< ${
|
|
5264
|
-
found: String(
|
|
5265
|
-
message: `Loop detected: ${tc.name} repeated ${
|
|
5534
|
+
expected: `< ${resolvedSessionLimits.loop_detection.threshold} occurrences in window ${resolvedSessionLimits.loop_detection.window}`,
|
|
5535
|
+
found: String(totalMatches),
|
|
5536
|
+
message: `Loop detected: ${tc.name} repeated ${totalMatches} times in last ${resolvedSessionLimits.loop_detection.window} steps`,
|
|
5266
5537
|
contract_file: ""
|
|
5267
5538
|
});
|
|
5268
5539
|
}
|
|
5540
|
+
intraResponseTuples.push({ toolName: tc.name, argsHash });
|
|
5269
5541
|
}
|
|
5270
5542
|
}
|
|
5271
5543
|
timing.argument_values_ms += Date.now() - argValuesStart;
|
|
5544
|
+
currentTraceStage = "policy";
|
|
5272
5545
|
let policyVerdicts = null;
|
|
5273
5546
|
const policyStart = Date.now();
|
|
5274
5547
|
if (compiledSession?.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
@@ -5289,6 +5562,14 @@ function replay(client, opts = {}) {
|
|
|
5289
5562
|
);
|
|
5290
5563
|
policyVerdicts.set(tc.name, verdict);
|
|
5291
5564
|
if (!verdict.allowed) {
|
|
5565
|
+
trace.push({
|
|
5566
|
+
stage: "policy",
|
|
5567
|
+
tool: tc.name,
|
|
5568
|
+
verdict: "block",
|
|
5569
|
+
reason: verdict.reason?.startsWith("Session deny") ? "session_deny_matched" : verdict.reason?.startsWith("default_deny") ? "default_deny_no_allow" : "policy_denied",
|
|
5570
|
+
checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
|
|
5571
|
+
found: { matched: true }
|
|
5572
|
+
});
|
|
5292
5573
|
validation.failures.push({
|
|
5293
5574
|
path: `$.tool_calls.${tc.name}`,
|
|
5294
5575
|
operator: "policy_denied",
|
|
@@ -5297,10 +5578,22 @@ function replay(client, opts = {}) {
|
|
|
5297
5578
|
message: `Policy denied: ${tc.name} \u2014 ${verdict.reason}`,
|
|
5298
5579
|
contract_file: ""
|
|
5299
5580
|
});
|
|
5581
|
+
} else {
|
|
5582
|
+
trace.push({
|
|
5583
|
+
stage: "policy",
|
|
5584
|
+
tool: tc.name,
|
|
5585
|
+
verdict: "allow",
|
|
5586
|
+
reason: "policy_allowed",
|
|
5587
|
+
checked: { has_policy: true, default_deny: compiledSession.policyProgram.defaultDeny },
|
|
5588
|
+
found: { session_deny_matched: false, tool_deny_matched: false }
|
|
5589
|
+
});
|
|
5300
5590
|
}
|
|
5301
5591
|
}
|
|
5592
|
+
} else {
|
|
5593
|
+
trace.push({ stage: "policy", tool: null, verdict: "skip", reason: "no_policy_configured", checked: {}, found: {} });
|
|
5302
5594
|
}
|
|
5303
5595
|
timing.policy_ms += Date.now() - policyStart;
|
|
5596
|
+
currentTraceStage = "gate";
|
|
5304
5597
|
if (mode === "shadow") {
|
|
5305
5598
|
const shadowGateStart = Date.now();
|
|
5306
5599
|
const shadowDecision = validation.failures.length > 0 ? {
|
|
@@ -5309,6 +5602,15 @@ function replay(client, opts = {}) {
|
|
|
5309
5602
|
blocked: buildBlockedCalls(toolCalls, validation.failures, validation.unmatchedBlocked),
|
|
5310
5603
|
response_modification: gateMode
|
|
5311
5604
|
} : { action: "allow", tool_calls: toolCalls };
|
|
5605
|
+
const blockedTools = shadowDecision.action === "block" ? shadowDecision.blocked.map((b) => b.tool_name) : [];
|
|
5606
|
+
trace.push({
|
|
5607
|
+
stage: "gate",
|
|
5608
|
+
tool: null,
|
|
5609
|
+
verdict: blockedTools.length > 0 ? "info" : "allow",
|
|
5610
|
+
reason: blockedTools.length > 0 ? "violations_found" : "no_violations",
|
|
5611
|
+
checked: { gate_mode: gateMode },
|
|
5612
|
+
found: { blocked_count: blockedTools.length, action: shadowDecision.action, ...blockedTools.length > 0 ? { blocked_tools: blockedTools } : {} }
|
|
5613
|
+
});
|
|
5312
5614
|
const shadowDelta = {
|
|
5313
5615
|
would_have_blocked: shadowDecision.action === "block" ? shadowDecision.blocked : [],
|
|
5314
5616
|
would_have_narrowed: narrowResult?.removed ?? [],
|
|
@@ -5318,7 +5620,11 @@ function replay(client, opts = {}) {
|
|
|
5318
5620
|
lastShadowDeltaValue = shadowDelta;
|
|
5319
5621
|
shadowEvaluationCount++;
|
|
5320
5622
|
timing.gate_ms += Date.now() - shadowGateStart;
|
|
5321
|
-
|
|
5623
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockedTools, killed: false, step_index: sessionState.totalStepCount } });
|
|
5624
|
+
trace.complete = true;
|
|
5625
|
+
lastTrace = trace;
|
|
5626
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5627
|
+
captureDecision(shadowDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, shadowDelta, timing, trace);
|
|
5322
5628
|
return response;
|
|
5323
5629
|
}
|
|
5324
5630
|
if (isCompatAdvisory) {
|
|
@@ -5359,7 +5665,21 @@ function replay(client, opts = {}) {
|
|
|
5359
5665
|
sessionState = recordDecisionOutcome(sessionState, "allowed");
|
|
5360
5666
|
}
|
|
5361
5667
|
timing.finalize_ms += Date.now() - advisoryFinalizeStart;
|
|
5362
|
-
|
|
5668
|
+
const advisoryBlockedTools = advisoryDecision.action === "block" ? advisoryDecision.blocked.map((b) => b.tool_name) : [];
|
|
5669
|
+
trace.push({
|
|
5670
|
+
stage: "gate",
|
|
5671
|
+
tool: null,
|
|
5672
|
+
verdict: advisoryBlockedTools.length > 0 ? "info" : "allow",
|
|
5673
|
+
reason: advisoryBlockedTools.length > 0 ? "violations_found" : "no_violations",
|
|
5674
|
+
checked: { gate_mode: gateMode },
|
|
5675
|
+
found: { blocked_count: advisoryBlockedTools.length, action: advisoryDecision.action, ...advisoryBlockedTools.length > 0 ? { blocked_tools: advisoryBlockedTools } : {} }
|
|
5676
|
+
});
|
|
5677
|
+
const advisoryNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
5678
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: advisoryNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: advisoryBlockedTools, killed: false, step_index: sessionState.totalStepCount } });
|
|
5679
|
+
trace.complete = true;
|
|
5680
|
+
lastTrace = trace;
|
|
5681
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5682
|
+
captureDecision(advisoryDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5363
5683
|
return response;
|
|
5364
5684
|
}
|
|
5365
5685
|
const enforceGateStart = Date.now();
|
|
@@ -5397,7 +5717,20 @@ function replay(client, opts = {}) {
|
|
|
5397
5717
|
});
|
|
5398
5718
|
}
|
|
5399
5719
|
}
|
|
5400
|
-
|
|
5720
|
+
trace.push({
|
|
5721
|
+
stage: "gate",
|
|
5722
|
+
tool: null,
|
|
5723
|
+
verdict: "allow",
|
|
5724
|
+
reason: "no_violations",
|
|
5725
|
+
checked: { gate_mode: gateMode },
|
|
5726
|
+
found: { blocked_count: 0, action: "allow" }
|
|
5727
|
+
});
|
|
5728
|
+
const allowNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
5729
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: completedStep.phase, phase_after: allowNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: [], killed: false, step_index: sessionState.totalStepCount } });
|
|
5730
|
+
trace.complete = true;
|
|
5731
|
+
lastTrace = trace;
|
|
5732
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5733
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5401
5734
|
return response;
|
|
5402
5735
|
}
|
|
5403
5736
|
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
@@ -5462,15 +5795,42 @@ function replay(client, opts = {}) {
|
|
|
5462
5795
|
);
|
|
5463
5796
|
continue;
|
|
5464
5797
|
}
|
|
5465
|
-
|
|
5798
|
+
const blockBlockedTools = decision.action === "block" ? decision.blocked.map((b) => b.tool_name) : [];
|
|
5799
|
+
trace.push({
|
|
5800
|
+
stage: "gate",
|
|
5801
|
+
tool: null,
|
|
5802
|
+
verdict: "block",
|
|
5803
|
+
reason: "violations_found",
|
|
5804
|
+
checked: { gate_mode: gateMode },
|
|
5805
|
+
found: { blocked_count: blockBlockedTools.length, action: "block", blocked_tools: blockBlockedTools }
|
|
5806
|
+
});
|
|
5807
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: sessionState.currentPhase, phase_after: sessionState.currentPhase, tools_committed: [], tools_blocked: blockBlockedTools, killed, step_index: sessionState.totalStepCount } });
|
|
5808
|
+
trace.complete = true;
|
|
5809
|
+
lastTrace = trace;
|
|
5810
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5811
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing, trace);
|
|
5466
5812
|
return applyGateDecision(decision, response, provider, gateMode, opts.onBlock);
|
|
5467
5813
|
}
|
|
5468
5814
|
if (lastError) throw lastError;
|
|
5469
5815
|
throw new ReplayInternalError("Retry loop exhausted without result", { sessionId });
|
|
5470
5816
|
} catch (err) {
|
|
5471
5817
|
if (err instanceof ReplayContractError || err instanceof ReplayKillError) {
|
|
5818
|
+
if (!trace.complete) {
|
|
5819
|
+
lastTrace = trace;
|
|
5820
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5821
|
+
}
|
|
5472
5822
|
throw err;
|
|
5473
5823
|
}
|
|
5824
|
+
trace.push({
|
|
5825
|
+
stage: currentTraceStage,
|
|
5826
|
+
tool: null,
|
|
5827
|
+
verdict: "error",
|
|
5828
|
+
reason: "stage_threw",
|
|
5829
|
+
checked: {},
|
|
5830
|
+
found: { error: err instanceof Error ? err.message : String(err) }
|
|
5831
|
+
});
|
|
5832
|
+
lastTrace = trace;
|
|
5833
|
+
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
5474
5834
|
sessionState = recordDecisionOutcome(sessionState, "error");
|
|
5475
5835
|
if (resolvedSessionLimits?.circuit_breaker) {
|
|
5476
5836
|
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
@@ -5555,7 +5915,7 @@ function replay(client, opts = {}) {
|
|
|
5555
5915
|
},
|
|
5556
5916
|
getHealth() {
|
|
5557
5917
|
const isAuthoritative = runtimeSession != null && !runtimeDegraded;
|
|
5558
|
-
const effectiveProtection =
|
|
5918
|
+
const effectiveProtection = protectionLevel === "govern" && !isAuthoritative ? "protect" : protectionLevel;
|
|
5559
5919
|
let durability;
|
|
5560
5920
|
if (isAuthoritative) {
|
|
5561
5921
|
durability = runtimeClient?.isCircuitOpen() ? "degraded-local" : "server";
|
|
@@ -5596,6 +5956,9 @@ function replay(client, opts = {}) {
|
|
|
5596
5956
|
getLastShadowDelta() {
|
|
5597
5957
|
return lastShadowDeltaValue;
|
|
5598
5958
|
},
|
|
5959
|
+
getLastTrace() {
|
|
5960
|
+
return lastTrace;
|
|
5961
|
+
},
|
|
5599
5962
|
/**
|
|
5600
5963
|
* v3: Manually restrict available tools within compiled legal space.
|
|
5601
5964
|
* @see specs/replay-v3.md § narrow() / widen()
|
|
@@ -5690,6 +6053,9 @@ function replay(client, opts = {}) {
|
|
|
5690
6053
|
const wrapped = {};
|
|
5691
6054
|
for (const [toolName, executor] of Object.entries(baseTools)) {
|
|
5692
6055
|
wrapped[toolName] = async (args) => {
|
|
6056
|
+
if (killed) {
|
|
6057
|
+
throw new ReplayKillError(sessionId, killedAt);
|
|
6058
|
+
}
|
|
5693
6059
|
const result = await executor(args);
|
|
5694
6060
|
if (runtimeClient && leaseFence && !runtimeDegraded) {
|
|
5695
6061
|
for (const [callId, deferred] of deferredReceipts) {
|
|
@@ -5711,7 +6077,13 @@ function replay(client, opts = {}) {
|
|
|
5711
6077
|
if (receiptResult.stateAdvanced) {
|
|
5712
6078
|
sessionState = { ...sessionState, stateVersion: receiptResult.stateVersion };
|
|
5713
6079
|
}
|
|
5714
|
-
} catch {
|
|
6080
|
+
} catch (err) {
|
|
6081
|
+
emitDiagnostic2(diagnostics, {
|
|
6082
|
+
type: "replay_receipt_error",
|
|
6083
|
+
session_id: sessionId,
|
|
6084
|
+
tool_name: deferred.toolName,
|
|
6085
|
+
details: err instanceof Error ? err.message : "receipt submission failed"
|
|
6086
|
+
});
|
|
5715
6087
|
}
|
|
5716
6088
|
break;
|
|
5717
6089
|
}
|
|
@@ -5722,7 +6094,7 @@ function replay(client, opts = {}) {
|
|
|
5722
6094
|
}
|
|
5723
6095
|
return wrapped;
|
|
5724
6096
|
}
|
|
5725
|
-
function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam) {
|
|
6097
|
+
function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam, traceParam) {
|
|
5726
6098
|
if (!buffer && !store) return;
|
|
5727
6099
|
if (timingParam) {
|
|
5728
6100
|
timingParam.total_ms = Date.now() - guardStart;
|
|
@@ -5757,6 +6129,7 @@ function replay(client, opts = {}) {
|
|
|
5757
6129
|
phase: sessionState.currentPhase,
|
|
5758
6130
|
phase_transition: phaseTransitionStr,
|
|
5759
6131
|
shadow_delta: shadowDelta,
|
|
6132
|
+
trace: traceParam ? redactTrace(traceParam, opts.captureLevel ?? "full") : void 0,
|
|
5760
6133
|
receipt: null
|
|
5761
6134
|
};
|
|
5762
6135
|
const capturedCall = {
|
|
@@ -5786,7 +6159,12 @@ function replay(client, opts = {}) {
|
|
|
5786
6159
|
const redactedCall = JSON.parse(redacted);
|
|
5787
6160
|
if (buffer) buffer.push(redactedCall);
|
|
5788
6161
|
appendCaptureToStore(redactedCall);
|
|
5789
|
-
} catch {
|
|
6162
|
+
} catch (err) {
|
|
6163
|
+
emitDiagnostic2(diagnostics, {
|
|
6164
|
+
type: "replay_capture_error",
|
|
6165
|
+
session_id: sessionId,
|
|
6166
|
+
details: `capture dropped (redaction failed): ${err instanceof Error ? err.message : "unknown"}`
|
|
6167
|
+
});
|
|
5790
6168
|
}
|
|
5791
6169
|
}
|
|
5792
6170
|
}
|
|
@@ -6301,10 +6679,28 @@ function applyOutputExtracts(state, updates) {
|
|
|
6301
6679
|
};
|
|
6302
6680
|
}
|
|
6303
6681
|
function resolveSessionLimits(contracts) {
|
|
6682
|
+
const merged = {};
|
|
6683
|
+
let found = false;
|
|
6304
6684
|
for (const c of contracts) {
|
|
6305
|
-
if (c.session_limits)
|
|
6685
|
+
if (!c.session_limits) continue;
|
|
6686
|
+
found = true;
|
|
6687
|
+
const sl = c.session_limits;
|
|
6688
|
+
if (sl.max_steps !== void 0 && merged.max_steps === void 0) merged.max_steps = sl.max_steps;
|
|
6689
|
+
if (sl.max_tool_calls !== void 0 && merged.max_tool_calls === void 0) merged.max_tool_calls = sl.max_tool_calls;
|
|
6690
|
+
if (sl.max_tool_calls_mode !== void 0 && merged.max_tool_calls_mode === void 0) merged.max_tool_calls_mode = sl.max_tool_calls_mode;
|
|
6691
|
+
if (sl.max_cost_per_session !== void 0 && merged.max_cost_per_session === void 0) merged.max_cost_per_session = sl.max_cost_per_session;
|
|
6692
|
+
if (sl.loop_detection && !merged.loop_detection) merged.loop_detection = sl.loop_detection;
|
|
6693
|
+
if (sl.circuit_breaker && !merged.circuit_breaker) merged.circuit_breaker = sl.circuit_breaker;
|
|
6694
|
+
if (sl.max_calls_per_tool) {
|
|
6695
|
+
if (!merged.max_calls_per_tool) merged.max_calls_per_tool = {};
|
|
6696
|
+
for (const [tool, limit] of Object.entries(sl.max_calls_per_tool)) {
|
|
6697
|
+
if (merged.max_calls_per_tool[tool] === void 0) {
|
|
6698
|
+
merged.max_calls_per_tool[tool] = limit;
|
|
6699
|
+
}
|
|
6700
|
+
}
|
|
6701
|
+
}
|
|
6306
6702
|
}
|
|
6307
|
-
return null;
|
|
6703
|
+
return found ? merged : null;
|
|
6308
6704
|
}
|
|
6309
6705
|
function buildStateSnapshot(state, lastNarrowing = null) {
|
|
6310
6706
|
const lastStep = state.lastStep ? {
|
|
@@ -6399,6 +6795,7 @@ function createInactiveSession(client, sessionId, reason) {
|
|
|
6399
6795
|
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6400
6796
|
getLastNarrowing: () => null,
|
|
6401
6797
|
getLastShadowDelta: () => null,
|
|
6798
|
+
getLastTrace: () => null,
|
|
6402
6799
|
narrow() {
|
|
6403
6800
|
},
|
|
6404
6801
|
widen() {
|
|
@@ -6440,6 +6837,7 @@ function createBlockingInactiveSession(client, sessionId, detail, configError) {
|
|
|
6440
6837
|
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6441
6838
|
getLastNarrowing: () => null,
|
|
6442
6839
|
getLastShadowDelta: () => null,
|
|
6840
|
+
getLastTrace: () => null,
|
|
6443
6841
|
narrow() {
|
|
6444
6842
|
},
|
|
6445
6843
|
widen() {
|
|
@@ -6524,6 +6922,83 @@ function generateSessionId2() {
|
|
|
6524
6922
|
function stripHashPrefix(hash) {
|
|
6525
6923
|
return hash.startsWith("sha256:") ? hash.slice(7) : hash;
|
|
6526
6924
|
}
|
|
6925
|
+
function resolveLogLevel() {
|
|
6926
|
+
const raw = typeof process !== "undefined" ? process.env.REPLAYCI_LOG : void 0;
|
|
6927
|
+
if (!raw) return "warn";
|
|
6928
|
+
const lower = raw.toLowerCase();
|
|
6929
|
+
if (lower === "trace" || lower === "debug") return "trace";
|
|
6930
|
+
if (lower === "silent" || lower === "off" || lower === "none") return "silent";
|
|
6931
|
+
return "warn";
|
|
6932
|
+
}
|
|
6933
|
+
function defaultReplayDiagnosticsHandler(event) {
|
|
6934
|
+
const level = resolveLogLevel();
|
|
6935
|
+
if (level === "silent") return;
|
|
6936
|
+
switch (event.type) {
|
|
6937
|
+
case "replay_inactive":
|
|
6938
|
+
console.warn(`[replayci] replay() inactive: ${event.reason}${event.error_message ? ` \u2014 ${event.error_message}` : ""}`);
|
|
6939
|
+
break;
|
|
6940
|
+
case "replay_compile_error":
|
|
6941
|
+
console.warn(`[replayci] compile error: ${event.details}`);
|
|
6942
|
+
break;
|
|
6943
|
+
case "replay_compile_warning":
|
|
6944
|
+
console.warn(`[replayci] compile warning: ${event.details}`);
|
|
6945
|
+
break;
|
|
6946
|
+
case "replay_bypass_detected":
|
|
6947
|
+
console.warn(`[replayci] bypass detected on session ${event.session_id}`);
|
|
6948
|
+
break;
|
|
6949
|
+
case "replay_kill":
|
|
6950
|
+
console.warn(`[replayci] session ${event.session_id} killed`);
|
|
6951
|
+
break;
|
|
6952
|
+
case "replay_block":
|
|
6953
|
+
console.warn(`[replayci] blocked ${event.tool_name}: ${event.reason}`);
|
|
6954
|
+
break;
|
|
6955
|
+
case "replay_narrow": {
|
|
6956
|
+
for (const r of event.removed) {
|
|
6957
|
+
console.warn(`[replayci] removed ${r.tool} \u2192 ${r.reason}${r.detail ? ` (${r.detail})` : ""}`);
|
|
6958
|
+
}
|
|
6959
|
+
break;
|
|
6960
|
+
}
|
|
6961
|
+
case "replay_trace": {
|
|
6962
|
+
const t = event.trace;
|
|
6963
|
+
if (level === "trace") {
|
|
6964
|
+
for (const entry of t.entries) {
|
|
6965
|
+
const toolStr = entry.tool ? ` ${entry.tool}` : "";
|
|
6966
|
+
const detail = entry.reason !== entry.verdict ? ` \u2014 ${entry.reason}` : "";
|
|
6967
|
+
const checkedStr = Object.keys(entry.checked).length > 0 ? ` checked=${JSON.stringify(entry.checked)}` : "";
|
|
6968
|
+
const foundStr = Object.keys(entry.found).length > 0 ? ` found=${JSON.stringify(entry.found)}` : "";
|
|
6969
|
+
console.warn(`[replayci] ${entry.stage}${toolStr}: ${entry.verdict}${detail}${checkedStr}${foundStr}`);
|
|
6970
|
+
}
|
|
6971
|
+
if (!t.complete) {
|
|
6972
|
+
console.warn(`[replayci] trace INCOMPLETE (fault in pipeline)`);
|
|
6973
|
+
}
|
|
6974
|
+
} else {
|
|
6975
|
+
const blocks = t.entries.filter((e) => e.verdict === "block");
|
|
6976
|
+
for (const b of blocks) {
|
|
6977
|
+
const toolStr = b.tool ?? "session";
|
|
6978
|
+
console.warn(`[replayci] blocked ${toolStr} at ${b.stage} \u2192 ${b.reason}`);
|
|
6979
|
+
}
|
|
6980
|
+
if (!t.complete) {
|
|
6981
|
+
console.warn(`[replayci] enforcement cycle incomplete (fault) \u2014 session.getLastTrace() for partial trace`);
|
|
6982
|
+
}
|
|
6983
|
+
}
|
|
6984
|
+
break;
|
|
6985
|
+
}
|
|
6986
|
+
case "replay_workflow_error":
|
|
6987
|
+
console.warn(`[replayci] workflow error: ${event.details}`);
|
|
6988
|
+
break;
|
|
6989
|
+
case "replay_state_sync_error":
|
|
6990
|
+
console.warn(`[replayci] state sync error: ${event.details}`);
|
|
6991
|
+
break;
|
|
6992
|
+
case "replay_receipt_error":
|
|
6993
|
+
console.warn(`[replayci] receipt error (${event.tool_name}): ${event.details}`);
|
|
6994
|
+
break;
|
|
6995
|
+
case "replay_capture_error":
|
|
6996
|
+
console.warn(`[replayci] capture error: ${event.details}`);
|
|
6997
|
+
break;
|
|
6998
|
+
default:
|
|
6999
|
+
break;
|
|
7000
|
+
}
|
|
7001
|
+
}
|
|
6527
7002
|
function emitDiagnostic2(diagnostics, event) {
|
|
6528
7003
|
try {
|
|
6529
7004
|
diagnostics?.(event);
|
|
@@ -6533,9 +7008,10 @@ function emitDiagnostic2(diagnostics, event) {
|
|
|
6533
7008
|
function toRecord10(value) {
|
|
6534
7009
|
return value !== null && typeof value === "object" ? value : {};
|
|
6535
7010
|
}
|
|
6536
|
-
function determineProtectionLevel(mode, tools, contracts) {
|
|
7011
|
+
function determineProtectionLevel(mode, tools, contracts, apiKey) {
|
|
6537
7012
|
if (mode === "shadow" || mode === "log-only") return "monitor";
|
|
6538
7013
|
if (!tools || Object.keys(tools).length === 0) return "protect";
|
|
7014
|
+
if (!apiKey) return "protect";
|
|
6539
7015
|
const stateBearingTools = contracts.filter(isStateBearing);
|
|
6540
7016
|
if (stateBearingTools.length === 0) return "protect";
|
|
6541
7017
|
const wrappedTools = new Set(Object.keys(tools));
|