@replayci/replay 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +275 -191
- package/dist/index.d.cts +29 -0
- package/dist/index.d.ts +29 -0
- package/dist/index.js +275 -191
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1880,7 +1880,12 @@ function safelyCaptureResponse(input) {
|
|
|
1880
1880
|
});
|
|
1881
1881
|
input.persistHealthEvent();
|
|
1882
1882
|
}
|
|
1883
|
-
} catch {
|
|
1883
|
+
} catch (err) {
|
|
1884
|
+
emitDiagnostic(input.diagnostics, {
|
|
1885
|
+
type: "capture_error",
|
|
1886
|
+
session_id: input.sessionId,
|
|
1887
|
+
details: err instanceof Error ? err.message : "response capture failed"
|
|
1888
|
+
});
|
|
1884
1889
|
}
|
|
1885
1890
|
}
|
|
1886
1891
|
function safelyPushStreamCapture(input) {
|
|
@@ -1909,7 +1914,12 @@ function safelyPushStreamCapture(input) {
|
|
|
1909
1914
|
});
|
|
1910
1915
|
input.persistHealthEvent();
|
|
1911
1916
|
}
|
|
1912
|
-
} catch {
|
|
1917
|
+
} catch (err) {
|
|
1918
|
+
emitDiagnostic(input.diagnostics, {
|
|
1919
|
+
type: "capture_error",
|
|
1920
|
+
session_id: input.sessionId,
|
|
1921
|
+
details: err instanceof Error ? err.message : "stream capture failed"
|
|
1922
|
+
});
|
|
1913
1923
|
}
|
|
1914
1924
|
}
|
|
1915
1925
|
function buildCapturedCall(input) {
|
|
@@ -2494,6 +2504,173 @@ function formatErrorMessage(error) {
|
|
|
2494
2504
|
return error instanceof Error ? error.message : String(error);
|
|
2495
2505
|
}
|
|
2496
2506
|
|
|
2507
|
+
// src/preconditions.ts
|
|
2508
|
+
function evaluatePreconditions(preconditions, sessionState, currentArguments) {
|
|
2509
|
+
return preconditions.map(
|
|
2510
|
+
(p) => evaluatePrecondition(p, sessionState, currentArguments)
|
|
2511
|
+
);
|
|
2512
|
+
}
|
|
2513
|
+
function evaluatePrecondition(precondition, sessionState, currentArguments) {
|
|
2514
|
+
if (precondition.requires_step_count) {
|
|
2515
|
+
const required = precondition.requires_step_count.gte;
|
|
2516
|
+
if (sessionState.totalStepCount < required) {
|
|
2517
|
+
return {
|
|
2518
|
+
satisfied: false,
|
|
2519
|
+
detail: `Need ${required} prior steps, have ${sessionState.totalStepCount}`
|
|
2520
|
+
};
|
|
2521
|
+
}
|
|
2522
|
+
}
|
|
2523
|
+
if (precondition.requires_prior_tool) {
|
|
2524
|
+
const toolName = precondition.requires_prior_tool;
|
|
2525
|
+
const resourcePath = precondition.resource ? typeof precondition.resource === "string" ? precondition.resource : precondition.resource.path : void 0;
|
|
2526
|
+
const resourceValue = resourcePath ? extractPath(currentArguments ?? {}, resourcePath) : void 0;
|
|
2527
|
+
const cacheKey = resourceValue !== void 0 ? `${toolName}:${JSON.stringify(resourceValue)}` : toolName;
|
|
2528
|
+
let priorStep;
|
|
2529
|
+
for (let i = sessionState.steps.length - 1; i >= 0; i--) {
|
|
2530
|
+
const s = sessionState.steps[i];
|
|
2531
|
+
if (s.toolCalls.some((tc) => {
|
|
2532
|
+
if (tc.toolName !== toolName) return false;
|
|
2533
|
+
if (tc.proposal_decision !== "allowed") return false;
|
|
2534
|
+
if (resourceValue !== void 0 && tc.resourceValues?.[resourcePath] !== resourceValue) {
|
|
2535
|
+
return false;
|
|
2536
|
+
}
|
|
2537
|
+
return true;
|
|
2538
|
+
})) {
|
|
2539
|
+
priorStep = s;
|
|
2540
|
+
break;
|
|
2541
|
+
}
|
|
2542
|
+
}
|
|
2543
|
+
const cachedExtract = sessionState.satisfiedPreconditions.get(cacheKey);
|
|
2544
|
+
if (!priorStep && cachedExtract === void 0) {
|
|
2545
|
+
const detail = resourceValue !== void 0 ? `Required prior tool ${toolName} not found for resource ${JSON.stringify(resourceValue)}` : `Required prior tool ${toolName} not found in session`;
|
|
2546
|
+
return { satisfied: false, detail };
|
|
2547
|
+
}
|
|
2548
|
+
if (precondition.with_output) {
|
|
2549
|
+
const extract = priorStep?.outputExtract ?? cachedExtract ?? {};
|
|
2550
|
+
for (const assertion of precondition.with_output) {
|
|
2551
|
+
const value = extractPath(extract, assertion.path);
|
|
2552
|
+
if (assertion.equals !== void 0 && value !== assertion.equals) {
|
|
2553
|
+
return {
|
|
2554
|
+
satisfied: false,
|
|
2555
|
+
detail: `Prior tool output assertion failed: ${assertion.path} \u2014 expected ${JSON.stringify(assertion.equals)}, got ${JSON.stringify(value)}`
|
|
2556
|
+
};
|
|
2557
|
+
}
|
|
2558
|
+
}
|
|
2559
|
+
}
|
|
2560
|
+
}
|
|
2561
|
+
return { satisfied: true, detail: "" };
|
|
2562
|
+
}
|
|
2563
|
+
function extractPath(obj, path) {
|
|
2564
|
+
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
2565
|
+
if (cleanPath === "" || cleanPath === "$") return obj;
|
|
2566
|
+
const segments = cleanPath.split(".");
|
|
2567
|
+
let current = obj;
|
|
2568
|
+
for (const segment of segments) {
|
|
2569
|
+
if (current === null || current === void 0) return void 0;
|
|
2570
|
+
if (typeof current !== "object") return void 0;
|
|
2571
|
+
current = current[segment];
|
|
2572
|
+
}
|
|
2573
|
+
return current;
|
|
2574
|
+
}
|
|
2575
|
+
|
|
2576
|
+
// src/argumentValues.ts
|
|
2577
|
+
function evaluateArgumentValueInvariants(parsedArguments, invariants) {
|
|
2578
|
+
const failures = [];
|
|
2579
|
+
for (const inv of invariants) {
|
|
2580
|
+
const value = extractPath(parsedArguments, inv.path);
|
|
2581
|
+
if (inv.exact_match !== void 0) {
|
|
2582
|
+
const strValue = typeof value === "string" ? value : JSON.stringify(value);
|
|
2583
|
+
if (strValue !== inv.exact_match) {
|
|
2584
|
+
failures.push({
|
|
2585
|
+
path: inv.path,
|
|
2586
|
+
operator: "exact_match",
|
|
2587
|
+
expected: inv.exact_match,
|
|
2588
|
+
actual: value,
|
|
2589
|
+
detail: `Expected exact match "${inv.exact_match}", got "${strValue}"`
|
|
2590
|
+
});
|
|
2591
|
+
}
|
|
2592
|
+
}
|
|
2593
|
+
if (inv.regex !== void 0) {
|
|
2594
|
+
const strValue = typeof value === "string" ? value : String(value);
|
|
2595
|
+
try {
|
|
2596
|
+
const re = safeRegex(inv.regex);
|
|
2597
|
+
if (!re.test(strValue)) {
|
|
2598
|
+
failures.push({
|
|
2599
|
+
path: inv.path,
|
|
2600
|
+
operator: "regex",
|
|
2601
|
+
expected: inv.regex,
|
|
2602
|
+
actual: value,
|
|
2603
|
+
detail: `Value "${strValue}" does not match regex "${inv.regex}"`
|
|
2604
|
+
});
|
|
2605
|
+
}
|
|
2606
|
+
} catch {
|
|
2607
|
+
failures.push({
|
|
2608
|
+
path: inv.path,
|
|
2609
|
+
operator: "regex",
|
|
2610
|
+
expected: inv.regex,
|
|
2611
|
+
actual: value,
|
|
2612
|
+
detail: `Invalid regex pattern: "${inv.regex}"`
|
|
2613
|
+
});
|
|
2614
|
+
}
|
|
2615
|
+
}
|
|
2616
|
+
if (inv.one_of !== void 0) {
|
|
2617
|
+
const match = inv.one_of.some((candidate) => {
|
|
2618
|
+
if (typeof candidate === typeof value) {
|
|
2619
|
+
return JSON.stringify(candidate) === JSON.stringify(value);
|
|
2620
|
+
}
|
|
2621
|
+
return false;
|
|
2622
|
+
});
|
|
2623
|
+
if (!match) {
|
|
2624
|
+
failures.push({
|
|
2625
|
+
path: inv.path,
|
|
2626
|
+
operator: "one_of",
|
|
2627
|
+
expected: inv.one_of,
|
|
2628
|
+
actual: value,
|
|
2629
|
+
detail: `Value ${JSON.stringify(value)} not in ${JSON.stringify(inv.one_of)}`
|
|
2630
|
+
});
|
|
2631
|
+
}
|
|
2632
|
+
}
|
|
2633
|
+
if (inv.type !== void 0) {
|
|
2634
|
+
const actualType = Array.isArray(value) ? "array" : typeof value;
|
|
2635
|
+
if (actualType !== inv.type) {
|
|
2636
|
+
failures.push({
|
|
2637
|
+
path: inv.path,
|
|
2638
|
+
operator: "type",
|
|
2639
|
+
expected: inv.type,
|
|
2640
|
+
actual: actualType,
|
|
2641
|
+
detail: `Expected type "${inv.type}", got "${actualType}"`
|
|
2642
|
+
});
|
|
2643
|
+
}
|
|
2644
|
+
}
|
|
2645
|
+
if (typeof inv.gte === "number") {
|
|
2646
|
+
if (typeof value !== "number" || value < inv.gte) {
|
|
2647
|
+
failures.push({
|
|
2648
|
+
path: inv.path,
|
|
2649
|
+
operator: "gte",
|
|
2650
|
+
expected: inv.gte,
|
|
2651
|
+
actual: value,
|
|
2652
|
+
detail: `Expected >= ${inv.gte}, got ${JSON.stringify(value)}`
|
|
2653
|
+
});
|
|
2654
|
+
}
|
|
2655
|
+
}
|
|
2656
|
+
if (typeof inv.lte === "number") {
|
|
2657
|
+
if (typeof value !== "number" || value > inv.lte) {
|
|
2658
|
+
failures.push({
|
|
2659
|
+
path: inv.path,
|
|
2660
|
+
operator: "lte",
|
|
2661
|
+
expected: inv.lte,
|
|
2662
|
+
actual: value,
|
|
2663
|
+
detail: `Expected <= ${inv.lte}, got ${JSON.stringify(value)}`
|
|
2664
|
+
});
|
|
2665
|
+
}
|
|
2666
|
+
}
|
|
2667
|
+
}
|
|
2668
|
+
return {
|
|
2669
|
+
passed: failures.length === 0,
|
|
2670
|
+
failures
|
|
2671
|
+
};
|
|
2672
|
+
}
|
|
2673
|
+
|
|
2497
2674
|
// src/validate.ts
|
|
2498
2675
|
function prepareContracts(input) {
|
|
2499
2676
|
assertSupportedNodeRuntime();
|
|
@@ -2558,6 +2735,7 @@ function evaluateAllContracts(matchedContracts, extraction) {
|
|
|
2558
2735
|
failures.push(...evaluateExpectTools(contract, extraction.toolCalls));
|
|
2559
2736
|
failures.push(...evaluateOutputInvariants(contract, extraction.normalizedResponse));
|
|
2560
2737
|
failures.push(...evaluateExpectedToolCallMatchers(contract, extraction.toolCalls));
|
|
2738
|
+
failures.push(...evaluateArgumentInvariants(contract, extraction.toolCalls));
|
|
2561
2739
|
}
|
|
2562
2740
|
return {
|
|
2563
2741
|
pass: failures.length === 0,
|
|
@@ -2725,6 +2903,28 @@ function evaluateExpectedToolCallMatchers(contract, toolCalls) {
|
|
|
2725
2903
|
contract_file: contract.contract_file
|
|
2726
2904
|
}));
|
|
2727
2905
|
}
|
|
2906
|
+
function evaluateArgumentInvariants(contract, toolCalls) {
|
|
2907
|
+
if (!contract.argument_value_invariants || contract.argument_value_invariants.length === 0) {
|
|
2908
|
+
return [];
|
|
2909
|
+
}
|
|
2910
|
+
const failures = [];
|
|
2911
|
+
for (const toolCall of toolCalls) {
|
|
2912
|
+
if (toolCall.name !== contract.tool) continue;
|
|
2913
|
+
const parsedArgs = toolCall.parsedArguments != null && typeof toolCall.parsedArguments === "object" ? toolCall.parsedArguments : {};
|
|
2914
|
+
const result = evaluateArgumentValueInvariants(parsedArgs, contract.argument_value_invariants);
|
|
2915
|
+
for (const f of result.failures) {
|
|
2916
|
+
failures.push({
|
|
2917
|
+
path: f.path,
|
|
2918
|
+
operator: f.operator,
|
|
2919
|
+
expected: f.expected,
|
|
2920
|
+
found: f.actual,
|
|
2921
|
+
message: f.detail,
|
|
2922
|
+
contract_file: contract.contract_file
|
|
2923
|
+
});
|
|
2924
|
+
}
|
|
2925
|
+
}
|
|
2926
|
+
return failures;
|
|
2927
|
+
}
|
|
2728
2928
|
function mapInvariantFailure(contract, failure, normalizedResponse) {
|
|
2729
2929
|
const invariant = findMatchingInvariant(contract.assertions.output_invariants, failure);
|
|
2730
2930
|
const lookup = getPathValue(normalizedResponse, failure.path);
|
|
@@ -3596,75 +3796,6 @@ function checkCircuitBreaker(state, config) {
|
|
|
3596
3796
|
return { triggered: false, reason: null };
|
|
3597
3797
|
}
|
|
3598
3798
|
|
|
3599
|
-
// src/preconditions.ts
|
|
3600
|
-
function evaluatePreconditions(preconditions, sessionState, currentArguments) {
|
|
3601
|
-
return preconditions.map(
|
|
3602
|
-
(p) => evaluatePrecondition(p, sessionState, currentArguments)
|
|
3603
|
-
);
|
|
3604
|
-
}
|
|
3605
|
-
function evaluatePrecondition(precondition, sessionState, currentArguments) {
|
|
3606
|
-
if (precondition.requires_step_count) {
|
|
3607
|
-
const required = precondition.requires_step_count.gte;
|
|
3608
|
-
if (sessionState.totalStepCount < required) {
|
|
3609
|
-
return {
|
|
3610
|
-
satisfied: false,
|
|
3611
|
-
detail: `Need ${required} prior steps, have ${sessionState.totalStepCount}`
|
|
3612
|
-
};
|
|
3613
|
-
}
|
|
3614
|
-
}
|
|
3615
|
-
if (precondition.requires_prior_tool) {
|
|
3616
|
-
const toolName = precondition.requires_prior_tool;
|
|
3617
|
-
const resourcePath = precondition.resource ? typeof precondition.resource === "string" ? precondition.resource : precondition.resource.path : void 0;
|
|
3618
|
-
const resourceValue = resourcePath ? extractPath(currentArguments ?? {}, resourcePath) : void 0;
|
|
3619
|
-
const cacheKey = resourceValue !== void 0 ? `${toolName}:${JSON.stringify(resourceValue)}` : toolName;
|
|
3620
|
-
let priorStep;
|
|
3621
|
-
for (let i = sessionState.steps.length - 1; i >= 0; i--) {
|
|
3622
|
-
const s = sessionState.steps[i];
|
|
3623
|
-
if (s.toolCalls.some((tc) => {
|
|
3624
|
-
if (tc.toolName !== toolName) return false;
|
|
3625
|
-
if (tc.proposal_decision !== "allowed") return false;
|
|
3626
|
-
if (resourceValue !== void 0 && tc.resourceValues?.[resourcePath] !== resourceValue) {
|
|
3627
|
-
return false;
|
|
3628
|
-
}
|
|
3629
|
-
return true;
|
|
3630
|
-
})) {
|
|
3631
|
-
priorStep = s;
|
|
3632
|
-
break;
|
|
3633
|
-
}
|
|
3634
|
-
}
|
|
3635
|
-
const cachedExtract = sessionState.satisfiedPreconditions.get(cacheKey);
|
|
3636
|
-
if (!priorStep && cachedExtract === void 0) {
|
|
3637
|
-
const detail = resourceValue !== void 0 ? `Required prior tool ${toolName} not found for resource ${JSON.stringify(resourceValue)}` : `Required prior tool ${toolName} not found in session`;
|
|
3638
|
-
return { satisfied: false, detail };
|
|
3639
|
-
}
|
|
3640
|
-
if (precondition.with_output) {
|
|
3641
|
-
const extract = priorStep?.outputExtract ?? cachedExtract ?? {};
|
|
3642
|
-
for (const assertion of precondition.with_output) {
|
|
3643
|
-
const value = extractPath(extract, assertion.path);
|
|
3644
|
-
if (assertion.equals !== void 0 && value !== assertion.equals) {
|
|
3645
|
-
return {
|
|
3646
|
-
satisfied: false,
|
|
3647
|
-
detail: `Prior tool output assertion failed: ${assertion.path} \u2014 expected ${JSON.stringify(assertion.equals)}, got ${JSON.stringify(value)}`
|
|
3648
|
-
};
|
|
3649
|
-
}
|
|
3650
|
-
}
|
|
3651
|
-
}
|
|
3652
|
-
}
|
|
3653
|
-
return { satisfied: true, detail: "" };
|
|
3654
|
-
}
|
|
3655
|
-
function extractPath(obj, path) {
|
|
3656
|
-
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
3657
|
-
if (cleanPath === "" || cleanPath === "$") return obj;
|
|
3658
|
-
const segments = cleanPath.split(".");
|
|
3659
|
-
let current = obj;
|
|
3660
|
-
for (const segment of segments) {
|
|
3661
|
-
if (current === null || current === void 0) return void 0;
|
|
3662
|
-
if (typeof current !== "object") return void 0;
|
|
3663
|
-
current = current[segment];
|
|
3664
|
-
}
|
|
3665
|
-
return current;
|
|
3666
|
-
}
|
|
3667
|
-
|
|
3668
3799
|
// src/crossStep.ts
|
|
3669
3800
|
function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
3670
3801
|
const failures = [];
|
|
@@ -3738,104 +3869,6 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3738
3869
|
};
|
|
3739
3870
|
}
|
|
3740
3871
|
|
|
3741
|
-
// src/argumentValues.ts
|
|
3742
|
-
function evaluateArgumentValueInvariants(parsedArguments, invariants) {
|
|
3743
|
-
const failures = [];
|
|
3744
|
-
for (const inv of invariants) {
|
|
3745
|
-
const value = extractPath(parsedArguments, inv.path);
|
|
3746
|
-
if (inv.exact_match !== void 0) {
|
|
3747
|
-
const strValue = typeof value === "string" ? value : JSON.stringify(value);
|
|
3748
|
-
if (strValue !== inv.exact_match) {
|
|
3749
|
-
failures.push({
|
|
3750
|
-
path: inv.path,
|
|
3751
|
-
operator: "exact_match",
|
|
3752
|
-
expected: inv.exact_match,
|
|
3753
|
-
actual: value,
|
|
3754
|
-
detail: `Expected exact match "${inv.exact_match}", got "${strValue}"`
|
|
3755
|
-
});
|
|
3756
|
-
}
|
|
3757
|
-
}
|
|
3758
|
-
if (inv.regex !== void 0) {
|
|
3759
|
-
const strValue = typeof value === "string" ? value : String(value);
|
|
3760
|
-
try {
|
|
3761
|
-
const re = safeRegex(inv.regex);
|
|
3762
|
-
if (!re.test(strValue)) {
|
|
3763
|
-
failures.push({
|
|
3764
|
-
path: inv.path,
|
|
3765
|
-
operator: "regex",
|
|
3766
|
-
expected: inv.regex,
|
|
3767
|
-
actual: value,
|
|
3768
|
-
detail: `Value "${strValue}" does not match regex "${inv.regex}"`
|
|
3769
|
-
});
|
|
3770
|
-
}
|
|
3771
|
-
} catch {
|
|
3772
|
-
failures.push({
|
|
3773
|
-
path: inv.path,
|
|
3774
|
-
operator: "regex",
|
|
3775
|
-
expected: inv.regex,
|
|
3776
|
-
actual: value,
|
|
3777
|
-
detail: `Invalid regex pattern: "${inv.regex}"`
|
|
3778
|
-
});
|
|
3779
|
-
}
|
|
3780
|
-
}
|
|
3781
|
-
if (inv.one_of !== void 0) {
|
|
3782
|
-
const match = inv.one_of.some((candidate) => {
|
|
3783
|
-
if (typeof candidate === typeof value) {
|
|
3784
|
-
return JSON.stringify(candidate) === JSON.stringify(value);
|
|
3785
|
-
}
|
|
3786
|
-
return false;
|
|
3787
|
-
});
|
|
3788
|
-
if (!match) {
|
|
3789
|
-
failures.push({
|
|
3790
|
-
path: inv.path,
|
|
3791
|
-
operator: "one_of",
|
|
3792
|
-
expected: inv.one_of,
|
|
3793
|
-
actual: value,
|
|
3794
|
-
detail: `Value ${JSON.stringify(value)} not in ${JSON.stringify(inv.one_of)}`
|
|
3795
|
-
});
|
|
3796
|
-
}
|
|
3797
|
-
}
|
|
3798
|
-
if (inv.type !== void 0) {
|
|
3799
|
-
const actualType = Array.isArray(value) ? "array" : typeof value;
|
|
3800
|
-
if (actualType !== inv.type) {
|
|
3801
|
-
failures.push({
|
|
3802
|
-
path: inv.path,
|
|
3803
|
-
operator: "type",
|
|
3804
|
-
expected: inv.type,
|
|
3805
|
-
actual: actualType,
|
|
3806
|
-
detail: `Expected type "${inv.type}", got "${actualType}"`
|
|
3807
|
-
});
|
|
3808
|
-
}
|
|
3809
|
-
}
|
|
3810
|
-
if (typeof inv.gte === "number") {
|
|
3811
|
-
if (typeof value !== "number" || value < inv.gte) {
|
|
3812
|
-
failures.push({
|
|
3813
|
-
path: inv.path,
|
|
3814
|
-
operator: "gte",
|
|
3815
|
-
expected: inv.gte,
|
|
3816
|
-
actual: value,
|
|
3817
|
-
detail: `Expected >= ${inv.gte}, got ${JSON.stringify(value)}`
|
|
3818
|
-
});
|
|
3819
|
-
}
|
|
3820
|
-
}
|
|
3821
|
-
if (typeof inv.lte === "number") {
|
|
3822
|
-
if (typeof value !== "number" || value > inv.lte) {
|
|
3823
|
-
failures.push({
|
|
3824
|
-
path: inv.path,
|
|
3825
|
-
operator: "lte",
|
|
3826
|
-
expected: inv.lte,
|
|
3827
|
-
actual: value,
|
|
3828
|
-
detail: `Expected <= ${inv.lte}, got ${JSON.stringify(value)}`
|
|
3829
|
-
});
|
|
3830
|
-
}
|
|
3831
|
-
}
|
|
3832
|
-
}
|
|
3833
|
-
return {
|
|
3834
|
-
passed: failures.length === 0,
|
|
3835
|
-
failures
|
|
3836
|
-
};
|
|
3837
|
-
}
|
|
3838
|
-
|
|
3839
3872
|
// src/messageValidation.ts
|
|
3840
3873
|
import {
|
|
3841
3874
|
evaluateInvariants as evaluateInvariants2
|
|
@@ -4611,12 +4644,12 @@ function replay(client, opts = {}) {
|
|
|
4611
4644
|
contracts = resolveContracts(opts);
|
|
4612
4645
|
} catch (err) {
|
|
4613
4646
|
const detail = err instanceof Error ? err.message : "Failed to load contracts";
|
|
4614
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4647
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4615
4648
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4616
4649
|
}
|
|
4617
4650
|
const configError = validateConfig(contracts, opts);
|
|
4618
4651
|
if (configError) {
|
|
4619
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4652
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: configError.message });
|
|
4620
4653
|
return createBlockingInactiveSession(client, sessionId, configError.message, configError);
|
|
4621
4654
|
}
|
|
4622
4655
|
let discoveredSessionYaml = null;
|
|
@@ -4624,7 +4657,7 @@ function replay(client, opts = {}) {
|
|
|
4624
4657
|
discoveredSessionYaml = discoverSessionYaml(opts);
|
|
4625
4658
|
} catch (err) {
|
|
4626
4659
|
const detail = `session.yaml: ${err instanceof Error ? err.message : String(err)}`;
|
|
4627
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4660
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4628
4661
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4629
4662
|
}
|
|
4630
4663
|
let sessionYaml = discoveredSessionYaml;
|
|
@@ -4640,15 +4673,19 @@ function replay(client, opts = {}) {
|
|
|
4640
4673
|
tools: opts.tools ? new Map(Object.entries(opts.tools)) : void 0
|
|
4641
4674
|
});
|
|
4642
4675
|
} catch (err) {
|
|
4676
|
+
const detail = `Session compilation failed: ${err instanceof Error ? err.message : String(err)}`;
|
|
4643
4677
|
emitDiagnostic2(diagnostics, {
|
|
4644
|
-
type: "
|
|
4645
|
-
details:
|
|
4678
|
+
type: "replay_compile_error",
|
|
4679
|
+
details: detail
|
|
4646
4680
|
});
|
|
4681
|
+
if (discoveredSessionYaml) {
|
|
4682
|
+
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4683
|
+
}
|
|
4647
4684
|
}
|
|
4648
4685
|
if (compiledSession?.warnings && compiledSession.warnings.length > 0) {
|
|
4649
4686
|
for (const warning of compiledSession.warnings) {
|
|
4650
4687
|
emitDiagnostic2(diagnostics, {
|
|
4651
|
-
type: "
|
|
4688
|
+
type: "replay_compile_warning",
|
|
4652
4689
|
details: `Compile warning: ${warning}`
|
|
4653
4690
|
});
|
|
4654
4691
|
}
|
|
@@ -4660,7 +4697,7 @@ function replay(client, opts = {}) {
|
|
|
4660
4697
|
if (spec.block_incompatible && spec.block_incompatible.length > 0) {
|
|
4661
4698
|
const detail = `Provider '${provider}' is blocked by provider_constraints: ${spec.block_incompatible.join("; ")}`;
|
|
4662
4699
|
const err = new ReplayConfigError("provider_incompatible", detail);
|
|
4663
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4700
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4664
4701
|
return createBlockingInactiveSession(client, sessionId, detail, err);
|
|
4665
4702
|
}
|
|
4666
4703
|
if (spec.warn_incompatible && spec.warn_incompatible.length > 0) {
|
|
@@ -4694,10 +4731,10 @@ function replay(client, opts = {}) {
|
|
|
4694
4731
|
emitDiagnostic2(diagnostics, { type: "replay_inactive", reason: "unsupported_client" });
|
|
4695
4732
|
return createInactiveSession(client, sessionId, "Could not resolve terminal resource");
|
|
4696
4733
|
}
|
|
4697
|
-
const
|
|
4734
|
+
const apiKey = resolveApiKey2(opts);
|
|
4735
|
+
const protectionLevel = determineProtectionLevel(mode, opts.tools, contracts, apiKey);
|
|
4698
4736
|
const maxUnguardedCalls = opts.maxUnguardedCalls ?? DEFAULT_MAX_UNGUARDED_CALLS;
|
|
4699
4737
|
const narrowingFeedback = opts.narrowingFeedback ?? "silent";
|
|
4700
|
-
const apiKey = resolveApiKey2(opts);
|
|
4701
4738
|
let runtimeClient = null;
|
|
4702
4739
|
let runtimeSession = null;
|
|
4703
4740
|
let runtimeInitPromise = null;
|
|
@@ -4837,7 +4874,12 @@ function replay(client, opts = {}) {
|
|
|
4837
4874
|
void result.catch(() => {
|
|
4838
4875
|
});
|
|
4839
4876
|
}
|
|
4840
|
-
} catch {
|
|
4877
|
+
} catch (err) {
|
|
4878
|
+
emitDiagnostic2(diagnostics, {
|
|
4879
|
+
type: "replay_state_sync_error",
|
|
4880
|
+
session_id: sessionId,
|
|
4881
|
+
details: err instanceof Error ? err.message : "state sync failed"
|
|
4882
|
+
});
|
|
4841
4883
|
}
|
|
4842
4884
|
}
|
|
4843
4885
|
function appendCaptureToStore(capture) {
|
|
@@ -5030,7 +5072,7 @@ function replay(client, opts = {}) {
|
|
|
5030
5072
|
const msgResult = validateToolResultMessages(messages, contracts, provider);
|
|
5031
5073
|
if (!msgResult.passed) {
|
|
5032
5074
|
emitDiagnostic2(diagnostics, {
|
|
5033
|
-
type: "
|
|
5075
|
+
type: "replay_validation_warning",
|
|
5034
5076
|
details: `Message validation: ${msgResult.failures.map((f) => f.detail).join("; ")}`
|
|
5035
5077
|
});
|
|
5036
5078
|
}
|
|
@@ -5202,6 +5244,8 @@ function replay(client, opts = {}) {
|
|
|
5202
5244
|
}
|
|
5203
5245
|
timing.phase_ms += Date.now() - phaseStart;
|
|
5204
5246
|
const argValuesStart = Date.now();
|
|
5247
|
+
const workingToolCallCounts = new Map(sessionState.toolCallCounts);
|
|
5248
|
+
const intraResponseTuples = [];
|
|
5205
5249
|
for (const tc of toolCalls) {
|
|
5206
5250
|
const contract = contracts.find((c) => c.tool === tc.name);
|
|
5207
5251
|
if (contract?.argument_value_invariants && contract.argument_value_invariants.length > 0) {
|
|
@@ -5228,7 +5272,8 @@ function replay(client, opts = {}) {
|
|
|
5228
5272
|
}
|
|
5229
5273
|
}
|
|
5230
5274
|
if (resolvedSessionLimits) {
|
|
5231
|
-
const
|
|
5275
|
+
const workingState = { ...sessionState, toolCallCounts: workingToolCallCounts };
|
|
5276
|
+
const perToolResult = checkPerToolLimits(workingState, tc.name, resolvedSessionLimits);
|
|
5232
5277
|
if (perToolResult.exceeded) {
|
|
5233
5278
|
validation.failures.push({
|
|
5234
5279
|
path: `$.tool_calls.${tc.name}`,
|
|
@@ -5240,23 +5285,30 @@ function replay(client, opts = {}) {
|
|
|
5240
5285
|
});
|
|
5241
5286
|
}
|
|
5242
5287
|
}
|
|
5288
|
+
workingToolCallCounts.set(tc.name, (workingToolCallCounts.get(tc.name) ?? 0) + 1);
|
|
5243
5289
|
if (resolvedSessionLimits?.loop_detection) {
|
|
5290
|
+
const argsHash = computeArgumentsHash(tc.arguments);
|
|
5244
5291
|
const loopResult = checkLoopDetection(
|
|
5245
5292
|
tc.name,
|
|
5246
5293
|
tc.arguments,
|
|
5247
5294
|
sessionState,
|
|
5248
5295
|
resolvedSessionLimits.loop_detection
|
|
5249
5296
|
);
|
|
5250
|
-
|
|
5297
|
+
const intraMatches = intraResponseTuples.filter(
|
|
5298
|
+
(t) => t.toolName === tc.name && t.argsHash === argsHash
|
|
5299
|
+
).length;
|
|
5300
|
+
const totalMatches = loopResult.matchCount + intraMatches;
|
|
5301
|
+
if (totalMatches >= resolvedSessionLimits.loop_detection.threshold) {
|
|
5251
5302
|
validation.failures.push({
|
|
5252
5303
|
path: `$.tool_calls.${tc.name}`,
|
|
5253
5304
|
operator: "loop_detected",
|
|
5254
|
-
expected: `< ${
|
|
5255
|
-
found: String(
|
|
5256
|
-
message: `Loop detected: ${tc.name} repeated ${
|
|
5305
|
+
expected: `< ${resolvedSessionLimits.loop_detection.threshold} occurrences in window ${resolvedSessionLimits.loop_detection.window}`,
|
|
5306
|
+
found: String(totalMatches),
|
|
5307
|
+
message: `Loop detected: ${tc.name} repeated ${totalMatches} times in last ${resolvedSessionLimits.loop_detection.window} steps`,
|
|
5257
5308
|
contract_file: ""
|
|
5258
5309
|
});
|
|
5259
5310
|
}
|
|
5311
|
+
intraResponseTuples.push({ toolName: tc.name, argsHash });
|
|
5260
5312
|
}
|
|
5261
5313
|
}
|
|
5262
5314
|
timing.argument_values_ms += Date.now() - argValuesStart;
|
|
@@ -5546,7 +5598,7 @@ function replay(client, opts = {}) {
|
|
|
5546
5598
|
},
|
|
5547
5599
|
getHealth() {
|
|
5548
5600
|
const isAuthoritative = runtimeSession != null && !runtimeDegraded;
|
|
5549
|
-
const effectiveProtection =
|
|
5601
|
+
const effectiveProtection = protectionLevel === "govern" && !isAuthoritative ? "protect" : protectionLevel;
|
|
5550
5602
|
let durability;
|
|
5551
5603
|
if (isAuthoritative) {
|
|
5552
5604
|
durability = runtimeClient?.isCircuitOpen() ? "degraded-local" : "server";
|
|
@@ -5681,6 +5733,9 @@ function replay(client, opts = {}) {
|
|
|
5681
5733
|
const wrapped = {};
|
|
5682
5734
|
for (const [toolName, executor] of Object.entries(baseTools)) {
|
|
5683
5735
|
wrapped[toolName] = async (args) => {
|
|
5736
|
+
if (killed) {
|
|
5737
|
+
throw new ReplayKillError(sessionId, killedAt);
|
|
5738
|
+
}
|
|
5684
5739
|
const result = await executor(args);
|
|
5685
5740
|
if (runtimeClient && leaseFence && !runtimeDegraded) {
|
|
5686
5741
|
for (const [callId, deferred] of deferredReceipts) {
|
|
@@ -5702,7 +5757,13 @@ function replay(client, opts = {}) {
|
|
|
5702
5757
|
if (receiptResult.stateAdvanced) {
|
|
5703
5758
|
sessionState = { ...sessionState, stateVersion: receiptResult.stateVersion };
|
|
5704
5759
|
}
|
|
5705
|
-
} catch {
|
|
5760
|
+
} catch (err) {
|
|
5761
|
+
emitDiagnostic2(diagnostics, {
|
|
5762
|
+
type: "replay_receipt_error",
|
|
5763
|
+
session_id: sessionId,
|
|
5764
|
+
tool_name: deferred.toolName,
|
|
5765
|
+
details: err instanceof Error ? err.message : "receipt submission failed"
|
|
5766
|
+
});
|
|
5706
5767
|
}
|
|
5707
5768
|
break;
|
|
5708
5769
|
}
|
|
@@ -5777,7 +5838,12 @@ function replay(client, opts = {}) {
|
|
|
5777
5838
|
const redactedCall = JSON.parse(redacted);
|
|
5778
5839
|
if (buffer) buffer.push(redactedCall);
|
|
5779
5840
|
appendCaptureToStore(redactedCall);
|
|
5780
|
-
} catch {
|
|
5841
|
+
} catch (err) {
|
|
5842
|
+
emitDiagnostic2(diagnostics, {
|
|
5843
|
+
type: "replay_capture_error",
|
|
5844
|
+
session_id: sessionId,
|
|
5845
|
+
details: `capture dropped (redaction failed): ${err instanceof Error ? err.message : "unknown"}`
|
|
5846
|
+
});
|
|
5781
5847
|
}
|
|
5782
5848
|
}
|
|
5783
5849
|
}
|
|
@@ -6292,10 +6358,27 @@ function applyOutputExtracts(state, updates) {
|
|
|
6292
6358
|
};
|
|
6293
6359
|
}
|
|
6294
6360
|
function resolveSessionLimits(contracts) {
|
|
6361
|
+
const merged = {};
|
|
6362
|
+
let found = false;
|
|
6295
6363
|
for (const c of contracts) {
|
|
6296
|
-
if (c.session_limits)
|
|
6364
|
+
if (!c.session_limits) continue;
|
|
6365
|
+
found = true;
|
|
6366
|
+
const sl = c.session_limits;
|
|
6367
|
+
if (sl.max_steps !== void 0 && merged.max_steps === void 0) merged.max_steps = sl.max_steps;
|
|
6368
|
+
if (sl.max_tool_calls !== void 0 && merged.max_tool_calls === void 0) merged.max_tool_calls = sl.max_tool_calls;
|
|
6369
|
+
if (sl.max_cost_per_session !== void 0 && merged.max_cost_per_session === void 0) merged.max_cost_per_session = sl.max_cost_per_session;
|
|
6370
|
+
if (sl.loop_detection && !merged.loop_detection) merged.loop_detection = sl.loop_detection;
|
|
6371
|
+
if (sl.circuit_breaker && !merged.circuit_breaker) merged.circuit_breaker = sl.circuit_breaker;
|
|
6372
|
+
if (sl.max_calls_per_tool) {
|
|
6373
|
+
if (!merged.max_calls_per_tool) merged.max_calls_per_tool = {};
|
|
6374
|
+
for (const [tool, limit] of Object.entries(sl.max_calls_per_tool)) {
|
|
6375
|
+
if (merged.max_calls_per_tool[tool] === void 0) {
|
|
6376
|
+
merged.max_calls_per_tool[tool] = limit;
|
|
6377
|
+
}
|
|
6378
|
+
}
|
|
6379
|
+
}
|
|
6297
6380
|
}
|
|
6298
|
-
return null;
|
|
6381
|
+
return found ? merged : null;
|
|
6299
6382
|
}
|
|
6300
6383
|
function buildStateSnapshot(state, lastNarrowing = null) {
|
|
6301
6384
|
const lastStep = state.lastStep ? {
|
|
@@ -6524,9 +6607,10 @@ function emitDiagnostic2(diagnostics, event) {
|
|
|
6524
6607
|
function toRecord10(value) {
|
|
6525
6608
|
return value !== null && typeof value === "object" ? value : {};
|
|
6526
6609
|
}
|
|
6527
|
-
function determineProtectionLevel(mode, tools, contracts) {
|
|
6610
|
+
function determineProtectionLevel(mode, tools, contracts, apiKey) {
|
|
6528
6611
|
if (mode === "shadow" || mode === "log-only") return "monitor";
|
|
6529
6612
|
if (!tools || Object.keys(tools).length === 0) return "protect";
|
|
6613
|
+
if (!apiKey) return "protect";
|
|
6530
6614
|
const stateBearingTools = contracts.filter(isStateBearing);
|
|
6531
6615
|
if (stateBearingTools.length === 0) return "protect";
|
|
6532
6616
|
const wrappedTools = new Set(Object.keys(tools));
|