@replayci/replay 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +275 -191
- package/dist/index.d.cts +29 -0
- package/dist/index.d.ts +29 -0
- package/dist/index.js +275 -191
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1916,7 +1916,12 @@ function safelyCaptureResponse(input) {
|
|
|
1916
1916
|
});
|
|
1917
1917
|
input.persistHealthEvent();
|
|
1918
1918
|
}
|
|
1919
|
-
} catch {
|
|
1919
|
+
} catch (err) {
|
|
1920
|
+
emitDiagnostic(input.diagnostics, {
|
|
1921
|
+
type: "capture_error",
|
|
1922
|
+
session_id: input.sessionId,
|
|
1923
|
+
details: err instanceof Error ? err.message : "response capture failed"
|
|
1924
|
+
});
|
|
1920
1925
|
}
|
|
1921
1926
|
}
|
|
1922
1927
|
function safelyPushStreamCapture(input) {
|
|
@@ -1945,7 +1950,12 @@ function safelyPushStreamCapture(input) {
|
|
|
1945
1950
|
});
|
|
1946
1951
|
input.persistHealthEvent();
|
|
1947
1952
|
}
|
|
1948
|
-
} catch {
|
|
1953
|
+
} catch (err) {
|
|
1954
|
+
emitDiagnostic(input.diagnostics, {
|
|
1955
|
+
type: "capture_error",
|
|
1956
|
+
session_id: input.sessionId,
|
|
1957
|
+
details: err instanceof Error ? err.message : "stream capture failed"
|
|
1958
|
+
});
|
|
1949
1959
|
}
|
|
1950
1960
|
}
|
|
1951
1961
|
function buildCapturedCall(input) {
|
|
@@ -2514,6 +2524,173 @@ function formatErrorMessage(error) {
|
|
|
2514
2524
|
return error instanceof Error ? error.message : String(error);
|
|
2515
2525
|
}
|
|
2516
2526
|
|
|
2527
|
+
// src/preconditions.ts
|
|
2528
|
+
function evaluatePreconditions(preconditions, sessionState, currentArguments) {
|
|
2529
|
+
return preconditions.map(
|
|
2530
|
+
(p) => evaluatePrecondition(p, sessionState, currentArguments)
|
|
2531
|
+
);
|
|
2532
|
+
}
|
|
2533
|
+
function evaluatePrecondition(precondition, sessionState, currentArguments) {
|
|
2534
|
+
if (precondition.requires_step_count) {
|
|
2535
|
+
const required = precondition.requires_step_count.gte;
|
|
2536
|
+
if (sessionState.totalStepCount < required) {
|
|
2537
|
+
return {
|
|
2538
|
+
satisfied: false,
|
|
2539
|
+
detail: `Need ${required} prior steps, have ${sessionState.totalStepCount}`
|
|
2540
|
+
};
|
|
2541
|
+
}
|
|
2542
|
+
}
|
|
2543
|
+
if (precondition.requires_prior_tool) {
|
|
2544
|
+
const toolName = precondition.requires_prior_tool;
|
|
2545
|
+
const resourcePath = precondition.resource ? typeof precondition.resource === "string" ? precondition.resource : precondition.resource.path : void 0;
|
|
2546
|
+
const resourceValue = resourcePath ? extractPath(currentArguments ?? {}, resourcePath) : void 0;
|
|
2547
|
+
const cacheKey = resourceValue !== void 0 ? `${toolName}:${JSON.stringify(resourceValue)}` : toolName;
|
|
2548
|
+
let priorStep;
|
|
2549
|
+
for (let i = sessionState.steps.length - 1; i >= 0; i--) {
|
|
2550
|
+
const s = sessionState.steps[i];
|
|
2551
|
+
if (s.toolCalls.some((tc) => {
|
|
2552
|
+
if (tc.toolName !== toolName) return false;
|
|
2553
|
+
if (tc.proposal_decision !== "allowed") return false;
|
|
2554
|
+
if (resourceValue !== void 0 && tc.resourceValues?.[resourcePath] !== resourceValue) {
|
|
2555
|
+
return false;
|
|
2556
|
+
}
|
|
2557
|
+
return true;
|
|
2558
|
+
})) {
|
|
2559
|
+
priorStep = s;
|
|
2560
|
+
break;
|
|
2561
|
+
}
|
|
2562
|
+
}
|
|
2563
|
+
const cachedExtract = sessionState.satisfiedPreconditions.get(cacheKey);
|
|
2564
|
+
if (!priorStep && cachedExtract === void 0) {
|
|
2565
|
+
const detail = resourceValue !== void 0 ? `Required prior tool ${toolName} not found for resource ${JSON.stringify(resourceValue)}` : `Required prior tool ${toolName} not found in session`;
|
|
2566
|
+
return { satisfied: false, detail };
|
|
2567
|
+
}
|
|
2568
|
+
if (precondition.with_output) {
|
|
2569
|
+
const extract = priorStep?.outputExtract ?? cachedExtract ?? {};
|
|
2570
|
+
for (const assertion of precondition.with_output) {
|
|
2571
|
+
const value = extractPath(extract, assertion.path);
|
|
2572
|
+
if (assertion.equals !== void 0 && value !== assertion.equals) {
|
|
2573
|
+
return {
|
|
2574
|
+
satisfied: false,
|
|
2575
|
+
detail: `Prior tool output assertion failed: ${assertion.path} \u2014 expected ${JSON.stringify(assertion.equals)}, got ${JSON.stringify(value)}`
|
|
2576
|
+
};
|
|
2577
|
+
}
|
|
2578
|
+
}
|
|
2579
|
+
}
|
|
2580
|
+
}
|
|
2581
|
+
return { satisfied: true, detail: "" };
|
|
2582
|
+
}
|
|
2583
|
+
function extractPath(obj, path) {
|
|
2584
|
+
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
2585
|
+
if (cleanPath === "" || cleanPath === "$") return obj;
|
|
2586
|
+
const segments = cleanPath.split(".");
|
|
2587
|
+
let current = obj;
|
|
2588
|
+
for (const segment of segments) {
|
|
2589
|
+
if (current === null || current === void 0) return void 0;
|
|
2590
|
+
if (typeof current !== "object") return void 0;
|
|
2591
|
+
current = current[segment];
|
|
2592
|
+
}
|
|
2593
|
+
return current;
|
|
2594
|
+
}
|
|
2595
|
+
|
|
2596
|
+
// src/argumentValues.ts
|
|
2597
|
+
function evaluateArgumentValueInvariants(parsedArguments, invariants) {
|
|
2598
|
+
const failures = [];
|
|
2599
|
+
for (const inv of invariants) {
|
|
2600
|
+
const value = extractPath(parsedArguments, inv.path);
|
|
2601
|
+
if (inv.exact_match !== void 0) {
|
|
2602
|
+
const strValue = typeof value === "string" ? value : JSON.stringify(value);
|
|
2603
|
+
if (strValue !== inv.exact_match) {
|
|
2604
|
+
failures.push({
|
|
2605
|
+
path: inv.path,
|
|
2606
|
+
operator: "exact_match",
|
|
2607
|
+
expected: inv.exact_match,
|
|
2608
|
+
actual: value,
|
|
2609
|
+
detail: `Expected exact match "${inv.exact_match}", got "${strValue}"`
|
|
2610
|
+
});
|
|
2611
|
+
}
|
|
2612
|
+
}
|
|
2613
|
+
if (inv.regex !== void 0) {
|
|
2614
|
+
const strValue = typeof value === "string" ? value : String(value);
|
|
2615
|
+
try {
|
|
2616
|
+
const re = safeRegex(inv.regex);
|
|
2617
|
+
if (!re.test(strValue)) {
|
|
2618
|
+
failures.push({
|
|
2619
|
+
path: inv.path,
|
|
2620
|
+
operator: "regex",
|
|
2621
|
+
expected: inv.regex,
|
|
2622
|
+
actual: value,
|
|
2623
|
+
detail: `Value "${strValue}" does not match regex "${inv.regex}"`
|
|
2624
|
+
});
|
|
2625
|
+
}
|
|
2626
|
+
} catch {
|
|
2627
|
+
failures.push({
|
|
2628
|
+
path: inv.path,
|
|
2629
|
+
operator: "regex",
|
|
2630
|
+
expected: inv.regex,
|
|
2631
|
+
actual: value,
|
|
2632
|
+
detail: `Invalid regex pattern: "${inv.regex}"`
|
|
2633
|
+
});
|
|
2634
|
+
}
|
|
2635
|
+
}
|
|
2636
|
+
if (inv.one_of !== void 0) {
|
|
2637
|
+
const match = inv.one_of.some((candidate) => {
|
|
2638
|
+
if (typeof candidate === typeof value) {
|
|
2639
|
+
return JSON.stringify(candidate) === JSON.stringify(value);
|
|
2640
|
+
}
|
|
2641
|
+
return false;
|
|
2642
|
+
});
|
|
2643
|
+
if (!match) {
|
|
2644
|
+
failures.push({
|
|
2645
|
+
path: inv.path,
|
|
2646
|
+
operator: "one_of",
|
|
2647
|
+
expected: inv.one_of,
|
|
2648
|
+
actual: value,
|
|
2649
|
+
detail: `Value ${JSON.stringify(value)} not in ${JSON.stringify(inv.one_of)}`
|
|
2650
|
+
});
|
|
2651
|
+
}
|
|
2652
|
+
}
|
|
2653
|
+
if (inv.type !== void 0) {
|
|
2654
|
+
const actualType = Array.isArray(value) ? "array" : typeof value;
|
|
2655
|
+
if (actualType !== inv.type) {
|
|
2656
|
+
failures.push({
|
|
2657
|
+
path: inv.path,
|
|
2658
|
+
operator: "type",
|
|
2659
|
+
expected: inv.type,
|
|
2660
|
+
actual: actualType,
|
|
2661
|
+
detail: `Expected type "${inv.type}", got "${actualType}"`
|
|
2662
|
+
});
|
|
2663
|
+
}
|
|
2664
|
+
}
|
|
2665
|
+
if (typeof inv.gte === "number") {
|
|
2666
|
+
if (typeof value !== "number" || value < inv.gte) {
|
|
2667
|
+
failures.push({
|
|
2668
|
+
path: inv.path,
|
|
2669
|
+
operator: "gte",
|
|
2670
|
+
expected: inv.gte,
|
|
2671
|
+
actual: value,
|
|
2672
|
+
detail: `Expected >= ${inv.gte}, got ${JSON.stringify(value)}`
|
|
2673
|
+
});
|
|
2674
|
+
}
|
|
2675
|
+
}
|
|
2676
|
+
if (typeof inv.lte === "number") {
|
|
2677
|
+
if (typeof value !== "number" || value > inv.lte) {
|
|
2678
|
+
failures.push({
|
|
2679
|
+
path: inv.path,
|
|
2680
|
+
operator: "lte",
|
|
2681
|
+
expected: inv.lte,
|
|
2682
|
+
actual: value,
|
|
2683
|
+
detail: `Expected <= ${inv.lte}, got ${JSON.stringify(value)}`
|
|
2684
|
+
});
|
|
2685
|
+
}
|
|
2686
|
+
}
|
|
2687
|
+
}
|
|
2688
|
+
return {
|
|
2689
|
+
passed: failures.length === 0,
|
|
2690
|
+
failures
|
|
2691
|
+
};
|
|
2692
|
+
}
|
|
2693
|
+
|
|
2517
2694
|
// src/validate.ts
|
|
2518
2695
|
function prepareContracts(input) {
|
|
2519
2696
|
assertSupportedNodeRuntime();
|
|
@@ -2578,6 +2755,7 @@ function evaluateAllContracts(matchedContracts, extraction) {
|
|
|
2578
2755
|
failures.push(...evaluateExpectTools(contract, extraction.toolCalls));
|
|
2579
2756
|
failures.push(...evaluateOutputInvariants(contract, extraction.normalizedResponse));
|
|
2580
2757
|
failures.push(...evaluateExpectedToolCallMatchers(contract, extraction.toolCalls));
|
|
2758
|
+
failures.push(...evaluateArgumentInvariants(contract, extraction.toolCalls));
|
|
2581
2759
|
}
|
|
2582
2760
|
return {
|
|
2583
2761
|
pass: failures.length === 0,
|
|
@@ -2745,6 +2923,28 @@ function evaluateExpectedToolCallMatchers(contract, toolCalls) {
|
|
|
2745
2923
|
contract_file: contract.contract_file
|
|
2746
2924
|
}));
|
|
2747
2925
|
}
|
|
2926
|
+
function evaluateArgumentInvariants(contract, toolCalls) {
|
|
2927
|
+
if (!contract.argument_value_invariants || contract.argument_value_invariants.length === 0) {
|
|
2928
|
+
return [];
|
|
2929
|
+
}
|
|
2930
|
+
const failures = [];
|
|
2931
|
+
for (const toolCall of toolCalls) {
|
|
2932
|
+
if (toolCall.name !== contract.tool) continue;
|
|
2933
|
+
const parsedArgs = toolCall.parsedArguments != null && typeof toolCall.parsedArguments === "object" ? toolCall.parsedArguments : {};
|
|
2934
|
+
const result = evaluateArgumentValueInvariants(parsedArgs, contract.argument_value_invariants);
|
|
2935
|
+
for (const f of result.failures) {
|
|
2936
|
+
failures.push({
|
|
2937
|
+
path: f.path,
|
|
2938
|
+
operator: f.operator,
|
|
2939
|
+
expected: f.expected,
|
|
2940
|
+
found: f.actual,
|
|
2941
|
+
message: f.detail,
|
|
2942
|
+
contract_file: contract.contract_file
|
|
2943
|
+
});
|
|
2944
|
+
}
|
|
2945
|
+
}
|
|
2946
|
+
return failures;
|
|
2947
|
+
}
|
|
2748
2948
|
function mapInvariantFailure(contract, failure, normalizedResponse) {
|
|
2749
2949
|
const invariant = findMatchingInvariant(contract.assertions.output_invariants, failure);
|
|
2750
2950
|
const lookup = (0, import_contracts_core3.getPathValue)(normalizedResponse, failure.path);
|
|
@@ -3607,75 +3807,6 @@ function checkCircuitBreaker(state, config) {
|
|
|
3607
3807
|
return { triggered: false, reason: null };
|
|
3608
3808
|
}
|
|
3609
3809
|
|
|
3610
|
-
// src/preconditions.ts
|
|
3611
|
-
function evaluatePreconditions(preconditions, sessionState, currentArguments) {
|
|
3612
|
-
return preconditions.map(
|
|
3613
|
-
(p) => evaluatePrecondition(p, sessionState, currentArguments)
|
|
3614
|
-
);
|
|
3615
|
-
}
|
|
3616
|
-
function evaluatePrecondition(precondition, sessionState, currentArguments) {
|
|
3617
|
-
if (precondition.requires_step_count) {
|
|
3618
|
-
const required = precondition.requires_step_count.gte;
|
|
3619
|
-
if (sessionState.totalStepCount < required) {
|
|
3620
|
-
return {
|
|
3621
|
-
satisfied: false,
|
|
3622
|
-
detail: `Need ${required} prior steps, have ${sessionState.totalStepCount}`
|
|
3623
|
-
};
|
|
3624
|
-
}
|
|
3625
|
-
}
|
|
3626
|
-
if (precondition.requires_prior_tool) {
|
|
3627
|
-
const toolName = precondition.requires_prior_tool;
|
|
3628
|
-
const resourcePath = precondition.resource ? typeof precondition.resource === "string" ? precondition.resource : precondition.resource.path : void 0;
|
|
3629
|
-
const resourceValue = resourcePath ? extractPath(currentArguments ?? {}, resourcePath) : void 0;
|
|
3630
|
-
const cacheKey = resourceValue !== void 0 ? `${toolName}:${JSON.stringify(resourceValue)}` : toolName;
|
|
3631
|
-
let priorStep;
|
|
3632
|
-
for (let i = sessionState.steps.length - 1; i >= 0; i--) {
|
|
3633
|
-
const s = sessionState.steps[i];
|
|
3634
|
-
if (s.toolCalls.some((tc) => {
|
|
3635
|
-
if (tc.toolName !== toolName) return false;
|
|
3636
|
-
if (tc.proposal_decision !== "allowed") return false;
|
|
3637
|
-
if (resourceValue !== void 0 && tc.resourceValues?.[resourcePath] !== resourceValue) {
|
|
3638
|
-
return false;
|
|
3639
|
-
}
|
|
3640
|
-
return true;
|
|
3641
|
-
})) {
|
|
3642
|
-
priorStep = s;
|
|
3643
|
-
break;
|
|
3644
|
-
}
|
|
3645
|
-
}
|
|
3646
|
-
const cachedExtract = sessionState.satisfiedPreconditions.get(cacheKey);
|
|
3647
|
-
if (!priorStep && cachedExtract === void 0) {
|
|
3648
|
-
const detail = resourceValue !== void 0 ? `Required prior tool ${toolName} not found for resource ${JSON.stringify(resourceValue)}` : `Required prior tool ${toolName} not found in session`;
|
|
3649
|
-
return { satisfied: false, detail };
|
|
3650
|
-
}
|
|
3651
|
-
if (precondition.with_output) {
|
|
3652
|
-
const extract = priorStep?.outputExtract ?? cachedExtract ?? {};
|
|
3653
|
-
for (const assertion of precondition.with_output) {
|
|
3654
|
-
const value = extractPath(extract, assertion.path);
|
|
3655
|
-
if (assertion.equals !== void 0 && value !== assertion.equals) {
|
|
3656
|
-
return {
|
|
3657
|
-
satisfied: false,
|
|
3658
|
-
detail: `Prior tool output assertion failed: ${assertion.path} \u2014 expected ${JSON.stringify(assertion.equals)}, got ${JSON.stringify(value)}`
|
|
3659
|
-
};
|
|
3660
|
-
}
|
|
3661
|
-
}
|
|
3662
|
-
}
|
|
3663
|
-
}
|
|
3664
|
-
return { satisfied: true, detail: "" };
|
|
3665
|
-
}
|
|
3666
|
-
function extractPath(obj, path) {
|
|
3667
|
-
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
3668
|
-
if (cleanPath === "" || cleanPath === "$") return obj;
|
|
3669
|
-
const segments = cleanPath.split(".");
|
|
3670
|
-
let current = obj;
|
|
3671
|
-
for (const segment of segments) {
|
|
3672
|
-
if (current === null || current === void 0) return void 0;
|
|
3673
|
-
if (typeof current !== "object") return void 0;
|
|
3674
|
-
current = current[segment];
|
|
3675
|
-
}
|
|
3676
|
-
return current;
|
|
3677
|
-
}
|
|
3678
|
-
|
|
3679
3810
|
// src/crossStep.ts
|
|
3680
3811
|
function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
3681
3812
|
const failures = [];
|
|
@@ -3749,104 +3880,6 @@ function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
|
3749
3880
|
};
|
|
3750
3881
|
}
|
|
3751
3882
|
|
|
3752
|
-
// src/argumentValues.ts
|
|
3753
|
-
function evaluateArgumentValueInvariants(parsedArguments, invariants) {
|
|
3754
|
-
const failures = [];
|
|
3755
|
-
for (const inv of invariants) {
|
|
3756
|
-
const value = extractPath(parsedArguments, inv.path);
|
|
3757
|
-
if (inv.exact_match !== void 0) {
|
|
3758
|
-
const strValue = typeof value === "string" ? value : JSON.stringify(value);
|
|
3759
|
-
if (strValue !== inv.exact_match) {
|
|
3760
|
-
failures.push({
|
|
3761
|
-
path: inv.path,
|
|
3762
|
-
operator: "exact_match",
|
|
3763
|
-
expected: inv.exact_match,
|
|
3764
|
-
actual: value,
|
|
3765
|
-
detail: `Expected exact match "${inv.exact_match}", got "${strValue}"`
|
|
3766
|
-
});
|
|
3767
|
-
}
|
|
3768
|
-
}
|
|
3769
|
-
if (inv.regex !== void 0) {
|
|
3770
|
-
const strValue = typeof value === "string" ? value : String(value);
|
|
3771
|
-
try {
|
|
3772
|
-
const re = safeRegex(inv.regex);
|
|
3773
|
-
if (!re.test(strValue)) {
|
|
3774
|
-
failures.push({
|
|
3775
|
-
path: inv.path,
|
|
3776
|
-
operator: "regex",
|
|
3777
|
-
expected: inv.regex,
|
|
3778
|
-
actual: value,
|
|
3779
|
-
detail: `Value "${strValue}" does not match regex "${inv.regex}"`
|
|
3780
|
-
});
|
|
3781
|
-
}
|
|
3782
|
-
} catch {
|
|
3783
|
-
failures.push({
|
|
3784
|
-
path: inv.path,
|
|
3785
|
-
operator: "regex",
|
|
3786
|
-
expected: inv.regex,
|
|
3787
|
-
actual: value,
|
|
3788
|
-
detail: `Invalid regex pattern: "${inv.regex}"`
|
|
3789
|
-
});
|
|
3790
|
-
}
|
|
3791
|
-
}
|
|
3792
|
-
if (inv.one_of !== void 0) {
|
|
3793
|
-
const match = inv.one_of.some((candidate) => {
|
|
3794
|
-
if (typeof candidate === typeof value) {
|
|
3795
|
-
return JSON.stringify(candidate) === JSON.stringify(value);
|
|
3796
|
-
}
|
|
3797
|
-
return false;
|
|
3798
|
-
});
|
|
3799
|
-
if (!match) {
|
|
3800
|
-
failures.push({
|
|
3801
|
-
path: inv.path,
|
|
3802
|
-
operator: "one_of",
|
|
3803
|
-
expected: inv.one_of,
|
|
3804
|
-
actual: value,
|
|
3805
|
-
detail: `Value ${JSON.stringify(value)} not in ${JSON.stringify(inv.one_of)}`
|
|
3806
|
-
});
|
|
3807
|
-
}
|
|
3808
|
-
}
|
|
3809
|
-
if (inv.type !== void 0) {
|
|
3810
|
-
const actualType = Array.isArray(value) ? "array" : typeof value;
|
|
3811
|
-
if (actualType !== inv.type) {
|
|
3812
|
-
failures.push({
|
|
3813
|
-
path: inv.path,
|
|
3814
|
-
operator: "type",
|
|
3815
|
-
expected: inv.type,
|
|
3816
|
-
actual: actualType,
|
|
3817
|
-
detail: `Expected type "${inv.type}", got "${actualType}"`
|
|
3818
|
-
});
|
|
3819
|
-
}
|
|
3820
|
-
}
|
|
3821
|
-
if (typeof inv.gte === "number") {
|
|
3822
|
-
if (typeof value !== "number" || value < inv.gte) {
|
|
3823
|
-
failures.push({
|
|
3824
|
-
path: inv.path,
|
|
3825
|
-
operator: "gte",
|
|
3826
|
-
expected: inv.gte,
|
|
3827
|
-
actual: value,
|
|
3828
|
-
detail: `Expected >= ${inv.gte}, got ${JSON.stringify(value)}`
|
|
3829
|
-
});
|
|
3830
|
-
}
|
|
3831
|
-
}
|
|
3832
|
-
if (typeof inv.lte === "number") {
|
|
3833
|
-
if (typeof value !== "number" || value > inv.lte) {
|
|
3834
|
-
failures.push({
|
|
3835
|
-
path: inv.path,
|
|
3836
|
-
operator: "lte",
|
|
3837
|
-
expected: inv.lte,
|
|
3838
|
-
actual: value,
|
|
3839
|
-
detail: `Expected <= ${inv.lte}, got ${JSON.stringify(value)}`
|
|
3840
|
-
});
|
|
3841
|
-
}
|
|
3842
|
-
}
|
|
3843
|
-
}
|
|
3844
|
-
return {
|
|
3845
|
-
passed: failures.length === 0,
|
|
3846
|
-
failures
|
|
3847
|
-
};
|
|
3848
|
-
}
|
|
3849
|
-
|
|
3850
3883
|
// src/messageValidation.ts
|
|
3851
3884
|
var import_contracts_core4 = require("@replayci/contracts-core");
|
|
3852
3885
|
function validateToolResultMessages(messages, contracts, provider) {
|
|
@@ -4620,12 +4653,12 @@ function replay(client, opts = {}) {
|
|
|
4620
4653
|
contracts = resolveContracts(opts);
|
|
4621
4654
|
} catch (err) {
|
|
4622
4655
|
const detail = err instanceof Error ? err.message : "Failed to load contracts";
|
|
4623
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4656
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4624
4657
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4625
4658
|
}
|
|
4626
4659
|
const configError = validateConfig(contracts, opts);
|
|
4627
4660
|
if (configError) {
|
|
4628
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4661
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: configError.message });
|
|
4629
4662
|
return createBlockingInactiveSession(client, sessionId, configError.message, configError);
|
|
4630
4663
|
}
|
|
4631
4664
|
let discoveredSessionYaml = null;
|
|
@@ -4633,7 +4666,7 @@ function replay(client, opts = {}) {
|
|
|
4633
4666
|
discoveredSessionYaml = discoverSessionYaml(opts);
|
|
4634
4667
|
} catch (err) {
|
|
4635
4668
|
const detail = `session.yaml: ${err instanceof Error ? err.message : String(err)}`;
|
|
4636
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4669
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4637
4670
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4638
4671
|
}
|
|
4639
4672
|
let sessionYaml = discoveredSessionYaml;
|
|
@@ -4649,15 +4682,19 @@ function replay(client, opts = {}) {
|
|
|
4649
4682
|
tools: opts.tools ? new Map(Object.entries(opts.tools)) : void 0
|
|
4650
4683
|
});
|
|
4651
4684
|
} catch (err) {
|
|
4685
|
+
const detail = `Session compilation failed: ${err instanceof Error ? err.message : String(err)}`;
|
|
4652
4686
|
emitDiagnostic2(diagnostics, {
|
|
4653
|
-
type: "
|
|
4654
|
-
details:
|
|
4687
|
+
type: "replay_compile_error",
|
|
4688
|
+
details: detail
|
|
4655
4689
|
});
|
|
4690
|
+
if (discoveredSessionYaml) {
|
|
4691
|
+
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4692
|
+
}
|
|
4656
4693
|
}
|
|
4657
4694
|
if (compiledSession?.warnings && compiledSession.warnings.length > 0) {
|
|
4658
4695
|
for (const warning of compiledSession.warnings) {
|
|
4659
4696
|
emitDiagnostic2(diagnostics, {
|
|
4660
|
-
type: "
|
|
4697
|
+
type: "replay_compile_warning",
|
|
4661
4698
|
details: `Compile warning: ${warning}`
|
|
4662
4699
|
});
|
|
4663
4700
|
}
|
|
@@ -4669,7 +4706,7 @@ function replay(client, opts = {}) {
|
|
|
4669
4706
|
if (spec.block_incompatible && spec.block_incompatible.length > 0) {
|
|
4670
4707
|
const detail = `Provider '${provider}' is blocked by provider_constraints: ${spec.block_incompatible.join("; ")}`;
|
|
4671
4708
|
const err = new ReplayConfigError("provider_incompatible", detail);
|
|
4672
|
-
emitDiagnostic2(diagnostics, { type: "
|
|
4709
|
+
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
4673
4710
|
return createBlockingInactiveSession(client, sessionId, detail, err);
|
|
4674
4711
|
}
|
|
4675
4712
|
if (spec.warn_incompatible && spec.warn_incompatible.length > 0) {
|
|
@@ -4703,10 +4740,10 @@ function replay(client, opts = {}) {
|
|
|
4703
4740
|
emitDiagnostic2(diagnostics, { type: "replay_inactive", reason: "unsupported_client" });
|
|
4704
4741
|
return createInactiveSession(client, sessionId, "Could not resolve terminal resource");
|
|
4705
4742
|
}
|
|
4706
|
-
const
|
|
4743
|
+
const apiKey = resolveApiKey2(opts);
|
|
4744
|
+
const protectionLevel = determineProtectionLevel(mode, opts.tools, contracts, apiKey);
|
|
4707
4745
|
const maxUnguardedCalls = opts.maxUnguardedCalls ?? DEFAULT_MAX_UNGUARDED_CALLS;
|
|
4708
4746
|
const narrowingFeedback = opts.narrowingFeedback ?? "silent";
|
|
4709
|
-
const apiKey = resolveApiKey2(opts);
|
|
4710
4747
|
let runtimeClient = null;
|
|
4711
4748
|
let runtimeSession = null;
|
|
4712
4749
|
let runtimeInitPromise = null;
|
|
@@ -4846,7 +4883,12 @@ function replay(client, opts = {}) {
|
|
|
4846
4883
|
void result.catch(() => {
|
|
4847
4884
|
});
|
|
4848
4885
|
}
|
|
4849
|
-
} catch {
|
|
4886
|
+
} catch (err) {
|
|
4887
|
+
emitDiagnostic2(diagnostics, {
|
|
4888
|
+
type: "replay_state_sync_error",
|
|
4889
|
+
session_id: sessionId,
|
|
4890
|
+
details: err instanceof Error ? err.message : "state sync failed"
|
|
4891
|
+
});
|
|
4850
4892
|
}
|
|
4851
4893
|
}
|
|
4852
4894
|
function appendCaptureToStore(capture) {
|
|
@@ -5039,7 +5081,7 @@ function replay(client, opts = {}) {
|
|
|
5039
5081
|
const msgResult = validateToolResultMessages(messages, contracts, provider);
|
|
5040
5082
|
if (!msgResult.passed) {
|
|
5041
5083
|
emitDiagnostic2(diagnostics, {
|
|
5042
|
-
type: "
|
|
5084
|
+
type: "replay_validation_warning",
|
|
5043
5085
|
details: `Message validation: ${msgResult.failures.map((f) => f.detail).join("; ")}`
|
|
5044
5086
|
});
|
|
5045
5087
|
}
|
|
@@ -5211,6 +5253,8 @@ function replay(client, opts = {}) {
|
|
|
5211
5253
|
}
|
|
5212
5254
|
timing.phase_ms += Date.now() - phaseStart;
|
|
5213
5255
|
const argValuesStart = Date.now();
|
|
5256
|
+
const workingToolCallCounts = new Map(sessionState.toolCallCounts);
|
|
5257
|
+
const intraResponseTuples = [];
|
|
5214
5258
|
for (const tc of toolCalls) {
|
|
5215
5259
|
const contract = contracts.find((c) => c.tool === tc.name);
|
|
5216
5260
|
if (contract?.argument_value_invariants && contract.argument_value_invariants.length > 0) {
|
|
@@ -5237,7 +5281,8 @@ function replay(client, opts = {}) {
|
|
|
5237
5281
|
}
|
|
5238
5282
|
}
|
|
5239
5283
|
if (resolvedSessionLimits) {
|
|
5240
|
-
const
|
|
5284
|
+
const workingState = { ...sessionState, toolCallCounts: workingToolCallCounts };
|
|
5285
|
+
const perToolResult = checkPerToolLimits(workingState, tc.name, resolvedSessionLimits);
|
|
5241
5286
|
if (perToolResult.exceeded) {
|
|
5242
5287
|
validation.failures.push({
|
|
5243
5288
|
path: `$.tool_calls.${tc.name}`,
|
|
@@ -5249,23 +5294,30 @@ function replay(client, opts = {}) {
|
|
|
5249
5294
|
});
|
|
5250
5295
|
}
|
|
5251
5296
|
}
|
|
5297
|
+
workingToolCallCounts.set(tc.name, (workingToolCallCounts.get(tc.name) ?? 0) + 1);
|
|
5252
5298
|
if (resolvedSessionLimits?.loop_detection) {
|
|
5299
|
+
const argsHash = computeArgumentsHash(tc.arguments);
|
|
5253
5300
|
const loopResult = checkLoopDetection(
|
|
5254
5301
|
tc.name,
|
|
5255
5302
|
tc.arguments,
|
|
5256
5303
|
sessionState,
|
|
5257
5304
|
resolvedSessionLimits.loop_detection
|
|
5258
5305
|
);
|
|
5259
|
-
|
|
5306
|
+
const intraMatches = intraResponseTuples.filter(
|
|
5307
|
+
(t) => t.toolName === tc.name && t.argsHash === argsHash
|
|
5308
|
+
).length;
|
|
5309
|
+
const totalMatches = loopResult.matchCount + intraMatches;
|
|
5310
|
+
if (totalMatches >= resolvedSessionLimits.loop_detection.threshold) {
|
|
5260
5311
|
validation.failures.push({
|
|
5261
5312
|
path: `$.tool_calls.${tc.name}`,
|
|
5262
5313
|
operator: "loop_detected",
|
|
5263
|
-
expected: `< ${
|
|
5264
|
-
found: String(
|
|
5265
|
-
message: `Loop detected: ${tc.name} repeated ${
|
|
5314
|
+
expected: `< ${resolvedSessionLimits.loop_detection.threshold} occurrences in window ${resolvedSessionLimits.loop_detection.window}`,
|
|
5315
|
+
found: String(totalMatches),
|
|
5316
|
+
message: `Loop detected: ${tc.name} repeated ${totalMatches} times in last ${resolvedSessionLimits.loop_detection.window} steps`,
|
|
5266
5317
|
contract_file: ""
|
|
5267
5318
|
});
|
|
5268
5319
|
}
|
|
5320
|
+
intraResponseTuples.push({ toolName: tc.name, argsHash });
|
|
5269
5321
|
}
|
|
5270
5322
|
}
|
|
5271
5323
|
timing.argument_values_ms += Date.now() - argValuesStart;
|
|
@@ -5555,7 +5607,7 @@ function replay(client, opts = {}) {
|
|
|
5555
5607
|
},
|
|
5556
5608
|
getHealth() {
|
|
5557
5609
|
const isAuthoritative = runtimeSession != null && !runtimeDegraded;
|
|
5558
|
-
const effectiveProtection =
|
|
5610
|
+
const effectiveProtection = protectionLevel === "govern" && !isAuthoritative ? "protect" : protectionLevel;
|
|
5559
5611
|
let durability;
|
|
5560
5612
|
if (isAuthoritative) {
|
|
5561
5613
|
durability = runtimeClient?.isCircuitOpen() ? "degraded-local" : "server";
|
|
@@ -5690,6 +5742,9 @@ function replay(client, opts = {}) {
|
|
|
5690
5742
|
const wrapped = {};
|
|
5691
5743
|
for (const [toolName, executor] of Object.entries(baseTools)) {
|
|
5692
5744
|
wrapped[toolName] = async (args) => {
|
|
5745
|
+
if (killed) {
|
|
5746
|
+
throw new ReplayKillError(sessionId, killedAt);
|
|
5747
|
+
}
|
|
5693
5748
|
const result = await executor(args);
|
|
5694
5749
|
if (runtimeClient && leaseFence && !runtimeDegraded) {
|
|
5695
5750
|
for (const [callId, deferred] of deferredReceipts) {
|
|
@@ -5711,7 +5766,13 @@ function replay(client, opts = {}) {
|
|
|
5711
5766
|
if (receiptResult.stateAdvanced) {
|
|
5712
5767
|
sessionState = { ...sessionState, stateVersion: receiptResult.stateVersion };
|
|
5713
5768
|
}
|
|
5714
|
-
} catch {
|
|
5769
|
+
} catch (err) {
|
|
5770
|
+
emitDiagnostic2(diagnostics, {
|
|
5771
|
+
type: "replay_receipt_error",
|
|
5772
|
+
session_id: sessionId,
|
|
5773
|
+
tool_name: deferred.toolName,
|
|
5774
|
+
details: err instanceof Error ? err.message : "receipt submission failed"
|
|
5775
|
+
});
|
|
5715
5776
|
}
|
|
5716
5777
|
break;
|
|
5717
5778
|
}
|
|
@@ -5786,7 +5847,12 @@ function replay(client, opts = {}) {
|
|
|
5786
5847
|
const redactedCall = JSON.parse(redacted);
|
|
5787
5848
|
if (buffer) buffer.push(redactedCall);
|
|
5788
5849
|
appendCaptureToStore(redactedCall);
|
|
5789
|
-
} catch {
|
|
5850
|
+
} catch (err) {
|
|
5851
|
+
emitDiagnostic2(diagnostics, {
|
|
5852
|
+
type: "replay_capture_error",
|
|
5853
|
+
session_id: sessionId,
|
|
5854
|
+
details: `capture dropped (redaction failed): ${err instanceof Error ? err.message : "unknown"}`
|
|
5855
|
+
});
|
|
5790
5856
|
}
|
|
5791
5857
|
}
|
|
5792
5858
|
}
|
|
@@ -6301,10 +6367,27 @@ function applyOutputExtracts(state, updates) {
|
|
|
6301
6367
|
};
|
|
6302
6368
|
}
|
|
6303
6369
|
function resolveSessionLimits(contracts) {
|
|
6370
|
+
const merged = {};
|
|
6371
|
+
let found = false;
|
|
6304
6372
|
for (const c of contracts) {
|
|
6305
|
-
if (c.session_limits)
|
|
6373
|
+
if (!c.session_limits) continue;
|
|
6374
|
+
found = true;
|
|
6375
|
+
const sl = c.session_limits;
|
|
6376
|
+
if (sl.max_steps !== void 0 && merged.max_steps === void 0) merged.max_steps = sl.max_steps;
|
|
6377
|
+
if (sl.max_tool_calls !== void 0 && merged.max_tool_calls === void 0) merged.max_tool_calls = sl.max_tool_calls;
|
|
6378
|
+
if (sl.max_cost_per_session !== void 0 && merged.max_cost_per_session === void 0) merged.max_cost_per_session = sl.max_cost_per_session;
|
|
6379
|
+
if (sl.loop_detection && !merged.loop_detection) merged.loop_detection = sl.loop_detection;
|
|
6380
|
+
if (sl.circuit_breaker && !merged.circuit_breaker) merged.circuit_breaker = sl.circuit_breaker;
|
|
6381
|
+
if (sl.max_calls_per_tool) {
|
|
6382
|
+
if (!merged.max_calls_per_tool) merged.max_calls_per_tool = {};
|
|
6383
|
+
for (const [tool, limit] of Object.entries(sl.max_calls_per_tool)) {
|
|
6384
|
+
if (merged.max_calls_per_tool[tool] === void 0) {
|
|
6385
|
+
merged.max_calls_per_tool[tool] = limit;
|
|
6386
|
+
}
|
|
6387
|
+
}
|
|
6388
|
+
}
|
|
6306
6389
|
}
|
|
6307
|
-
return null;
|
|
6390
|
+
return found ? merged : null;
|
|
6308
6391
|
}
|
|
6309
6392
|
function buildStateSnapshot(state, lastNarrowing = null) {
|
|
6310
6393
|
const lastStep = state.lastStep ? {
|
|
@@ -6533,9 +6616,10 @@ function emitDiagnostic2(diagnostics, event) {
|
|
|
6533
6616
|
function toRecord10(value) {
|
|
6534
6617
|
return value !== null && typeof value === "object" ? value : {};
|
|
6535
6618
|
}
|
|
6536
|
-
function determineProtectionLevel(mode, tools, contracts) {
|
|
6619
|
+
function determineProtectionLevel(mode, tools, contracts, apiKey) {
|
|
6537
6620
|
if (mode === "shadow" || mode === "log-only") return "monitor";
|
|
6538
6621
|
if (!tools || Object.keys(tools).length === 0) return "protect";
|
|
6622
|
+
if (!apiKey) return "protect";
|
|
6539
6623
|
const stateBearingTools = contracts.filter(isStateBearing);
|
|
6540
6624
|
if (stateBearingTools.length === 0) return "protect";
|
|
6541
6625
|
const wrappedTools = new Set(Object.keys(tools));
|