@replayci/replay 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +247 -38
- package/dist/index.d.cts +23 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.js +248 -38
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -2320,7 +2320,6 @@ function normalizeInlineContract(input) {
|
|
|
2320
2320
|
if (!tool) {
|
|
2321
2321
|
throw new ReplayConfigurationError("Inline contract is missing required field: tool");
|
|
2322
2322
|
}
|
|
2323
|
-
const assertions = toRecord5(source.assertions);
|
|
2324
2323
|
const expectTools = toStringArray(source.expect_tools);
|
|
2325
2324
|
const expectedToolCalls = toExpectedToolCalls(source.expected_tool_calls);
|
|
2326
2325
|
const contract = {
|
|
@@ -2328,28 +2327,38 @@ function normalizeInlineContract(input) {
|
|
|
2328
2327
|
...toString5(source.tool_schema_hash) ? { tool_schema_hash: toString5(source.tool_schema_hash) } : {},
|
|
2329
2328
|
...isSideEffect(source.side_effect) ? { side_effect: source.side_effect } : {},
|
|
2330
2329
|
...toString5(source.contract_file) ? { contract_file: toString5(source.contract_file) } : {},
|
|
2331
|
-
timeouts
|
|
2332
|
-
total_ms: toNonNegativeNumber(toRecord5(source.timeouts).total_ms, 0)
|
|
2333
|
-
},
|
|
2334
|
-
retries
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
rate_limits: {
|
|
2339
|
-
on_429: {
|
|
2340
|
-
respect_retry_after: toBoolean(toRecord5(toRecord5(source.rate_limits).on_429).respect_retry_after, false),
|
|
2341
|
-
max_sleep_seconds: toNonNegativeNumber(
|
|
2342
|
-
toRecord5(toRecord5(source.rate_limits).on_429).max_sleep_seconds,
|
|
2343
|
-
0
|
|
2344
|
-
)
|
|
2330
|
+
...source.timeouts != null ? {
|
|
2331
|
+
timeouts: { total_ms: toNonNegativeNumber(toRecord5(source.timeouts).total_ms, 0) }
|
|
2332
|
+
} : {},
|
|
2333
|
+
...source.retries != null ? {
|
|
2334
|
+
retries: {
|
|
2335
|
+
max_attempts: Math.max(1, toNonNegativeNumber(toRecord5(source.retries).max_attempts, 1)),
|
|
2336
|
+
retry_on: toStringArray(toRecord5(source.retries).retry_on)
|
|
2345
2337
|
}
|
|
2346
|
-
},
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2338
|
+
} : {},
|
|
2339
|
+
...source.rate_limits != null ? {
|
|
2340
|
+
rate_limits: {
|
|
2341
|
+
on_429: {
|
|
2342
|
+
respect_retry_after: toBoolean(toRecord5(toRecord5(source.rate_limits).on_429).respect_retry_after, false),
|
|
2343
|
+
max_sleep_seconds: toNonNegativeNumber(
|
|
2344
|
+
toRecord5(toRecord5(source.rate_limits).on_429).max_sleep_seconds,
|
|
2345
|
+
0
|
|
2346
|
+
)
|
|
2347
|
+
}
|
|
2348
|
+
}
|
|
2349
|
+
} : {},
|
|
2350
|
+
...source.assertions != null ? {
|
|
2351
|
+
assertions: {
|
|
2352
|
+
input_invariants: toInvariantArray(toRecord5(source.assertions).input_invariants),
|
|
2353
|
+
output_invariants: toInvariantArray(toRecord5(source.assertions).output_invariants)
|
|
2354
|
+
}
|
|
2355
|
+
} : {},
|
|
2356
|
+
...source.golden_cases != null ? {
|
|
2357
|
+
golden_cases: Array.isArray(source.golden_cases) ? source.golden_cases : []
|
|
2358
|
+
} : {},
|
|
2359
|
+
...source.allowed_errors != null ? {
|
|
2360
|
+
allowed_errors: toStringArray(source.allowed_errors)
|
|
2361
|
+
} : {},
|
|
2353
2362
|
...expectTools.length > 0 ? { expect_tools: expectTools } : {},
|
|
2354
2363
|
...toToolOrder(source.tool_order, expectTools.length > 0) ? {
|
|
2355
2364
|
tool_order: toToolOrder(source.tool_order, expectTools.length > 0)
|
|
@@ -2375,8 +2384,9 @@ function normalizeInlineContract(input) {
|
|
|
2375
2384
|
...Array.isArray(source.schema_derived_exclude) ? { schema_derived_exclude: source.schema_derived_exclude } : {},
|
|
2376
2385
|
...Array.isArray(source.binds) ? { binds: source.binds } : {}
|
|
2377
2386
|
};
|
|
2378
|
-
|
|
2379
|
-
|
|
2387
|
+
const filled = { ...(0, import_contracts_core2.fillContractDefaults)(contract), ...contract.contract_file ? { contract_file: contract.contract_file } : {} };
|
|
2388
|
+
validateSafeRegexes(filled);
|
|
2389
|
+
return filled;
|
|
2380
2390
|
}
|
|
2381
2391
|
function validateContractSet(contracts) {
|
|
2382
2392
|
const seenKeys = /* @__PURE__ */ new Set();
|
|
@@ -2395,11 +2405,11 @@ function validateSafeRegexes(contract) {
|
|
|
2395
2405
|
const invariantGroups = [
|
|
2396
2406
|
{
|
|
2397
2407
|
label: "assertions.input_invariants",
|
|
2398
|
-
invariants: contract.assertions
|
|
2408
|
+
invariants: contract.assertions?.input_invariants ?? []
|
|
2399
2409
|
},
|
|
2400
2410
|
{
|
|
2401
2411
|
label: "assertions.output_invariants",
|
|
2402
|
-
invariants: contract.assertions
|
|
2412
|
+
invariants: contract.assertions?.output_invariants ?? []
|
|
2403
2413
|
}
|
|
2404
2414
|
];
|
|
2405
2415
|
for (const [index, expectedToolCall] of (contract.expected_tool_calls ?? []).entries()) {
|
|
@@ -3002,7 +3012,7 @@ function evaluateExpectTools(contract, toolCalls) {
|
|
|
3002
3012
|
function evaluateOutputInvariants(contract, normalizedResponse) {
|
|
3003
3013
|
const invariantFailures = (0, import_contracts_core3.evaluateInvariants)(
|
|
3004
3014
|
normalizedResponse,
|
|
3005
|
-
contract.assertions
|
|
3015
|
+
contract.assertions?.output_invariants ?? [],
|
|
3006
3016
|
process.env
|
|
3007
3017
|
);
|
|
3008
3018
|
return invariantFailures.map(
|
|
@@ -3052,7 +3062,7 @@ function evaluateArgumentInvariants(contract, toolCalls) {
|
|
|
3052
3062
|
return failures;
|
|
3053
3063
|
}
|
|
3054
3064
|
function mapInvariantFailure(contract, failure, normalizedResponse) {
|
|
3055
|
-
const invariant = findMatchingInvariant(contract.assertions
|
|
3065
|
+
const invariant = findMatchingInvariant(contract.assertions?.output_invariants ?? [], failure);
|
|
3056
3066
|
const lookup = (0, import_contracts_core3.getPathValue)(normalizedResponse, failure.path);
|
|
3057
3067
|
return {
|
|
3058
3068
|
path: failure.path,
|
|
@@ -3778,7 +3788,7 @@ function createInitialState(sessionId, options) {
|
|
|
3778
3788
|
checkpointCount: 0
|
|
3779
3789
|
};
|
|
3780
3790
|
}
|
|
3781
|
-
function finalizeExecutedStep(state, step, contracts, compiledSession) {
|
|
3791
|
+
function finalizeExecutedStep(state, step, contracts, compiledSession, options) {
|
|
3782
3792
|
const newSteps = [...state.steps, step];
|
|
3783
3793
|
const newToolCallCounts = updateToolCallCounts(state.toolCallCounts, step);
|
|
3784
3794
|
const resolvedContracts = compiledSession ? Array.from(compiledSession.perToolContracts.values()) : contracts;
|
|
@@ -3788,7 +3798,7 @@ function finalizeExecutedStep(state, step, contracts, compiledSession) {
|
|
|
3788
3798
|
step
|
|
3789
3799
|
);
|
|
3790
3800
|
const costDelta = computeStepCost(step);
|
|
3791
|
-
const newPhase = compiledSession ? recomputePhaseFromCommitted(step.toolCalls, state, compiledSession) : state.currentPhase;
|
|
3801
|
+
const newPhase = options?.deferPhase ? state.currentPhase : compiledSession ? recomputePhaseFromCommitted(step.toolCalls, state, compiledSession) : state.currentPhase;
|
|
3792
3802
|
return {
|
|
3793
3803
|
...state,
|
|
3794
3804
|
steps: newSteps,
|
|
@@ -4634,7 +4644,7 @@ function validateToolResultMessages(messages, contracts, provider) {
|
|
|
4634
4644
|
for (const result of toolResults) {
|
|
4635
4645
|
const contract = contractByTool.get(result.toolName);
|
|
4636
4646
|
if (!contract) continue;
|
|
4637
|
-
const outputInvariants = contract.assertions
|
|
4647
|
+
const outputInvariants = contract.assertions?.output_invariants ?? [];
|
|
4638
4648
|
if (outputInvariants.length === 0) continue;
|
|
4639
4649
|
let parsed;
|
|
4640
4650
|
try {
|
|
@@ -5284,6 +5294,32 @@ var RuntimeClient = class {
|
|
|
5284
5294
|
stateVersion: h.state_version
|
|
5285
5295
|
};
|
|
5286
5296
|
}
|
|
5297
|
+
/**
|
|
5298
|
+
* Fetch governance plan for an agent.
|
|
5299
|
+
* Returns null on 404 (no plan exists).
|
|
5300
|
+
* @see zero-config-governance.md § GET /api/v1/governance/plan
|
|
5301
|
+
*/
|
|
5302
|
+
async fetchGovernancePlan(agent, environment) {
|
|
5303
|
+
const env = environment ?? "development";
|
|
5304
|
+
try {
|
|
5305
|
+
const data = await this.get(
|
|
5306
|
+
`/api/v1/governance/plan?agent=${encodeURIComponent(agent)}&environment=${encodeURIComponent(env)}`
|
|
5307
|
+
);
|
|
5308
|
+
return {
|
|
5309
|
+
status: data.status,
|
|
5310
|
+
compiledSession: data.compiled_session,
|
|
5311
|
+
compiledHash: data.compiled_hash,
|
|
5312
|
+
observations: data.observations,
|
|
5313
|
+
confidence: data.confidence,
|
|
5314
|
+
version: data.version
|
|
5315
|
+
};
|
|
5316
|
+
} catch (err) {
|
|
5317
|
+
if (err instanceof RuntimeClientError && err.httpStatus === 404) {
|
|
5318
|
+
return null;
|
|
5319
|
+
}
|
|
5320
|
+
throw err;
|
|
5321
|
+
}
|
|
5322
|
+
}
|
|
5287
5323
|
getHealth() {
|
|
5288
5324
|
return {
|
|
5289
5325
|
circuitOpen: this.now() < this.circuitOpenUntil,
|
|
@@ -5450,9 +5486,14 @@ function replay(client, opts = {}) {
|
|
|
5450
5486
|
return createInactiveSession(client, sessionId, "Client already has an active observe() or replay() attachment");
|
|
5451
5487
|
}
|
|
5452
5488
|
let contracts;
|
|
5489
|
+
let zeroConfigMode = false;
|
|
5453
5490
|
try {
|
|
5454
5491
|
contracts = resolveContracts(opts);
|
|
5455
5492
|
} catch (err) {
|
|
5493
|
+
const apiKeyForGov = resolveApiKey2(opts);
|
|
5494
|
+
if (apiKeyForGov && !opts.contracts && !opts.contractsDir) {
|
|
5495
|
+
return createGovernanceSession(client, sessionId, agent, provider, apiKeyForGov, opts, diagnostics);
|
|
5496
|
+
}
|
|
5456
5497
|
const detail = err instanceof Error ? err.message : "Failed to load contracts";
|
|
5457
5498
|
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
5458
5499
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
@@ -5659,6 +5700,8 @@ function replay(client, opts = {}) {
|
|
|
5659
5700
|
let shadowEvaluationCount = 0;
|
|
5660
5701
|
let manualFilter = null;
|
|
5661
5702
|
const deferredReceipts = /* @__PURE__ */ new Map();
|
|
5703
|
+
let deferredPhase = null;
|
|
5704
|
+
const hasWrappedTools = opts.tools != null && Object.keys(opts.tools).length > 0;
|
|
5662
5705
|
const contractLimits = resolveSessionLimits(contracts);
|
|
5663
5706
|
const compiledLimits = compiledSession?.sessionLimits;
|
|
5664
5707
|
const mergedLimits = { ...contractLimits ?? {}, ...compiledLimits ?? {} };
|
|
@@ -5795,6 +5838,7 @@ function replay(client, opts = {}) {
|
|
|
5795
5838
|
total_ms: 0,
|
|
5796
5839
|
enforcement_ms: 0
|
|
5797
5840
|
};
|
|
5841
|
+
deferredPhase = null;
|
|
5798
5842
|
const trace = createTrace(sessionState.totalStepCount);
|
|
5799
5843
|
const traceCtx = { trace };
|
|
5800
5844
|
let currentTraceStage = "narrow";
|
|
@@ -6617,9 +6661,29 @@ function replay(client, opts = {}) {
|
|
|
6617
6661
|
}
|
|
6618
6662
|
}
|
|
6619
6663
|
}
|
|
6664
|
+
const compatHasPhaseTransition = !!(phaseResult?.legal && phaseResult.newPhase !== sessionState.currentPhase);
|
|
6665
|
+
const compatShouldDefer = hasWrappedTools && compatHasPhaseTransition;
|
|
6620
6666
|
const prevVersion = sessionState.stateVersion;
|
|
6621
|
-
sessionState = finalizeExecutedStep(
|
|
6667
|
+
sessionState = finalizeExecutedStep(
|
|
6668
|
+
sessionState,
|
|
6669
|
+
completedStep,
|
|
6670
|
+
contracts,
|
|
6671
|
+
compiledSession,
|
|
6672
|
+
compatShouldDefer ? { deferPhase: true } : void 0
|
|
6673
|
+
);
|
|
6622
6674
|
syncStateToStore(prevVersion, sessionState);
|
|
6675
|
+
if (compatShouldDefer && compiledSession && phaseResult) {
|
|
6676
|
+
const advancingTools = /* @__PURE__ */ new Set();
|
|
6677
|
+
for (const tc of toolCalls) {
|
|
6678
|
+
const contract = compiledSession.perToolContracts.get(tc.name);
|
|
6679
|
+
if (contract?.transitions?.advances_to === phaseResult.newPhase) {
|
|
6680
|
+
advancingTools.add(tc.name);
|
|
6681
|
+
}
|
|
6682
|
+
}
|
|
6683
|
+
if (advancingTools.size > 0 && phaseResult.newPhase != null) {
|
|
6684
|
+
deferredPhase = { newPhase: phaseResult.newPhase, toolNames: advancingTools };
|
|
6685
|
+
}
|
|
6686
|
+
}
|
|
6623
6687
|
}
|
|
6624
6688
|
if (advisoryDecision.action === "block") {
|
|
6625
6689
|
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
@@ -6709,11 +6773,31 @@ function replay(client, opts = {}) {
|
|
|
6709
6773
|
}
|
|
6710
6774
|
}
|
|
6711
6775
|
}
|
|
6776
|
+
const hasPhaseTransition = phaseResult?.legal && phaseResult.newPhase !== sessionState.currentPhase;
|
|
6777
|
+
const shouldDeferPhase = hasWrappedTools && !!hasPhaseTransition;
|
|
6712
6778
|
const prevVersionAllow = sessionState.stateVersion;
|
|
6713
|
-
sessionState = finalizeExecutedStep(
|
|
6779
|
+
sessionState = finalizeExecutedStep(
|
|
6780
|
+
sessionState,
|
|
6781
|
+
completedStep,
|
|
6782
|
+
contracts,
|
|
6783
|
+
compiledSession,
|
|
6784
|
+
shouldDeferPhase ? { deferPhase: true } : void 0
|
|
6785
|
+
);
|
|
6714
6786
|
sessionState = recordDecisionOutcome(sessionState, "allowed");
|
|
6715
6787
|
syncStateToStore(prevVersionAllow, sessionState);
|
|
6716
6788
|
timing.finalize_ms += Date.now() - enforceFinalizeStart;
|
|
6789
|
+
if (shouldDeferPhase && compiledSession) {
|
|
6790
|
+
const advancingTools = /* @__PURE__ */ new Set();
|
|
6791
|
+
for (const tc of toolCalls) {
|
|
6792
|
+
const contract = compiledSession.perToolContracts.get(tc.name);
|
|
6793
|
+
if (contract?.transitions?.advances_to === phaseResult.newPhase) {
|
|
6794
|
+
advancingTools.add(tc.name);
|
|
6795
|
+
}
|
|
6796
|
+
}
|
|
6797
|
+
if (advancingTools.size > 0 && phaseResult.newPhase != null) {
|
|
6798
|
+
deferredPhase = { newPhase: phaseResult.newPhase, toolNames: advancingTools };
|
|
6799
|
+
}
|
|
6800
|
+
}
|
|
6717
6801
|
if (isActiveGovern && !attemptDegraded && attemptPendingCalls && attemptPendingCalls.size > 0) {
|
|
6718
6802
|
for (const [toolCallId, pending] of attemptPendingCalls) {
|
|
6719
6803
|
deferredReceipts.set(toolCallId, {
|
|
@@ -6731,8 +6815,8 @@ function replay(client, opts = {}) {
|
|
|
6731
6815
|
checked: { gate_mode: gateMode },
|
|
6732
6816
|
found: { blocked_count: 0, action: "allow" }
|
|
6733
6817
|
});
|
|
6734
|
-
const allowNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
6735
|
-
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: completedStep.phase, phase_after: allowNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: [], killed: false, step_index: sessionState.totalStepCount } });
|
|
6818
|
+
const allowNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== (completedStep.phase ?? sessionState.currentPhase) ? phaseResult.newPhase : sessionState.currentPhase;
|
|
6819
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: completedStep.phase, phase_after: allowNewPhase, ...shouldDeferPhase ? { phase_deferred: true } : {}, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: [], killed: false, step_index: sessionState.totalStepCount } });
|
|
6736
6820
|
trace.complete = true;
|
|
6737
6821
|
lastTrace = trace;
|
|
6738
6822
|
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
@@ -7079,6 +7163,10 @@ function replay(client, opts = {}) {
|
|
|
7079
7163
|
throw new ReplayKillError(sessionId, killedAt);
|
|
7080
7164
|
}
|
|
7081
7165
|
const result = await executor(args);
|
|
7166
|
+
if (deferredPhase && deferredPhase.toolNames.has(toolName)) {
|
|
7167
|
+
sessionState = { ...sessionState, currentPhase: deferredPhase.newPhase };
|
|
7168
|
+
deferredPhase = null;
|
|
7169
|
+
}
|
|
7082
7170
|
if (runtimeClient && leaseFence && !runtimeDegraded) {
|
|
7083
7171
|
for (const [callId, deferred] of deferredReceipts) {
|
|
7084
7172
|
if (deferred.toolName === toolName) {
|
|
@@ -7329,7 +7417,7 @@ function validateResponse2(response, toolCalls, contracts, requestToolNames, unm
|
|
|
7329
7417
|
}
|
|
7330
7418
|
}
|
|
7331
7419
|
for (const contract of matched) {
|
|
7332
|
-
const outputInvariants = contract.assertions
|
|
7420
|
+
const outputInvariants = contract.assertions?.output_invariants ?? [];
|
|
7333
7421
|
if (outputInvariants.length > 0) {
|
|
7334
7422
|
const normalizedResponse = buildNormalizedResponse(response, toolCalls);
|
|
7335
7423
|
const result = (0, import_contracts_core7.evaluateInvariants)(normalizedResponse, outputInvariants, process.env);
|
|
@@ -7497,8 +7585,9 @@ function evaluateInputInvariants(request, contracts) {
|
|
|
7497
7585
|
const requestToolSet = new Set(requestToolNames);
|
|
7498
7586
|
for (const contract of contracts) {
|
|
7499
7587
|
if (!requestToolSet.has(contract.tool)) continue;
|
|
7500
|
-
|
|
7501
|
-
|
|
7588
|
+
const inputInvariants = contract.assertions?.input_invariants ?? [];
|
|
7589
|
+
if (inputInvariants.length === 0) continue;
|
|
7590
|
+
const result = (0, import_contracts_core7.evaluateInvariants)(request, inputInvariants, process.env);
|
|
7502
7591
|
for (const failure of result) {
|
|
7503
7592
|
failures.push({
|
|
7504
7593
|
path: failure.path,
|
|
@@ -7873,6 +7962,126 @@ function createBlockingInactiveSession(client, sessionId, detail, configError) {
|
|
|
7873
7962
|
handoff: () => Promise.resolve(null)
|
|
7874
7963
|
};
|
|
7875
7964
|
}
|
|
7965
|
+
function resolveGovernanceEnvironment(opts) {
|
|
7966
|
+
if (opts.environment) return opts.environment;
|
|
7967
|
+
const envVar = typeof process !== "undefined" ? process.env.REPLAYCI_ENVIRONMENT : void 0;
|
|
7968
|
+
if (envVar === "staging") return "staging";
|
|
7969
|
+
if (envVar === "production") return "production";
|
|
7970
|
+
if (envVar === "development") return "development";
|
|
7971
|
+
const nodeEnv = typeof process !== "undefined" ? process.env.NODE_ENV : void 0;
|
|
7972
|
+
if (nodeEnv === "production") return "production";
|
|
7973
|
+
return "development";
|
|
7974
|
+
}
|
|
7975
|
+
function governanceProtectionLevel(env) {
|
|
7976
|
+
switch (env) {
|
|
7977
|
+
case "production":
|
|
7978
|
+
return "govern";
|
|
7979
|
+
case "staging":
|
|
7980
|
+
return "protect";
|
|
7981
|
+
default:
|
|
7982
|
+
return "monitor";
|
|
7983
|
+
}
|
|
7984
|
+
}
|
|
7985
|
+
function createGovernanceSession(client, sessionId, agent, provider, apiKey, opts, diagnostics) {
|
|
7986
|
+
const environment = resolveGovernanceEnvironment(opts);
|
|
7987
|
+
const protLevel = governanceProtectionLevel(environment);
|
|
7988
|
+
const runtimeClient = new RuntimeClient({
|
|
7989
|
+
apiKey,
|
|
7990
|
+
apiUrl: opts.runtimeUrl
|
|
7991
|
+
});
|
|
7992
|
+
let governancePlan;
|
|
7993
|
+
let planFetchPromise = null;
|
|
7994
|
+
let planFetchDone = false;
|
|
7995
|
+
let planFetchError = null;
|
|
7996
|
+
planFetchPromise = runtimeClient.fetchGovernancePlan(agent, environment).then((result) => {
|
|
7997
|
+
governancePlan = result;
|
|
7998
|
+
planFetchDone = true;
|
|
7999
|
+
}).catch((err) => {
|
|
8000
|
+
planFetchDone = true;
|
|
8001
|
+
planFetchError = err instanceof Error ? err.message : String(err);
|
|
8002
|
+
governancePlan = null;
|
|
8003
|
+
});
|
|
8004
|
+
const captureBuffer = new CaptureBuffer({
|
|
8005
|
+
apiKey,
|
|
8006
|
+
endpoint: opts.runtimeUrl
|
|
8007
|
+
});
|
|
8008
|
+
registerBeforeExit(captureBuffer);
|
|
8009
|
+
const terminalInfo = resolveTerminal(client, provider);
|
|
8010
|
+
if (!terminalInfo) {
|
|
8011
|
+
emitDiagnostic2(diagnostics, { type: "replay_inactive", reason: "unsupported_client" });
|
|
8012
|
+
return createInactiveSession(client, sessionId, "Could not resolve terminal resource");
|
|
8013
|
+
}
|
|
8014
|
+
const { terminal, originalCreate } = terminalInfo;
|
|
8015
|
+
const patchedCreate = async function(...args) {
|
|
8016
|
+
if (!planFetchDone && planFetchPromise) {
|
|
8017
|
+
await planFetchPromise;
|
|
8018
|
+
}
|
|
8019
|
+
const hasApprovedPlan = governancePlan && (governancePlan.status === "approved" || governancePlan.status === "enforcing") && governancePlan.compiledSession;
|
|
8020
|
+
if (hasApprovedPlan) {
|
|
8021
|
+
}
|
|
8022
|
+
const result = await originalCreate.apply(this, args);
|
|
8023
|
+
try {
|
|
8024
|
+
const toolCalls = extractToolCalls(result, provider);
|
|
8025
|
+
const usage = extractUsage(result, provider);
|
|
8026
|
+
const requestArg = args[0] && typeof args[0] === "object" ? args[0] : {};
|
|
8027
|
+
captureBuffer.push({
|
|
8028
|
+
schema_version: CAPTURE_SCHEMA_VERSION_CURRENT,
|
|
8029
|
+
agent,
|
|
8030
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
8031
|
+
provider,
|
|
8032
|
+
model_id: requestArg.model ?? "unknown",
|
|
8033
|
+
primary_tool_name: toolCalls[0]?.name ?? null,
|
|
8034
|
+
tool_names: toolCalls.map((tc) => tc.name),
|
|
8035
|
+
request: requestArg,
|
|
8036
|
+
response: result,
|
|
8037
|
+
usage,
|
|
8038
|
+
latency_ms: 0,
|
|
8039
|
+
sdk_session_id: sessionId
|
|
8040
|
+
});
|
|
8041
|
+
} catch {
|
|
8042
|
+
}
|
|
8043
|
+
return result;
|
|
8044
|
+
};
|
|
8045
|
+
terminal[terminalInfo.methodName] = patchedCreate;
|
|
8046
|
+
setReplayAttached(client);
|
|
8047
|
+
return {
|
|
8048
|
+
client,
|
|
8049
|
+
flush: () => captureBuffer.flush(),
|
|
8050
|
+
restore() {
|
|
8051
|
+
terminal[terminalInfo.methodName] = originalCreate;
|
|
8052
|
+
},
|
|
8053
|
+
kill() {
|
|
8054
|
+
},
|
|
8055
|
+
getHealth: () => ({
|
|
8056
|
+
status: "healthy",
|
|
8057
|
+
authorityState: "active",
|
|
8058
|
+
protectionLevel: protLevel,
|
|
8059
|
+
durability: "inactive",
|
|
8060
|
+
tier: "compat",
|
|
8061
|
+
compatEnforcement: "protective",
|
|
8062
|
+
cluster_detected: false,
|
|
8063
|
+
bypass_detected: false,
|
|
8064
|
+
totalSteps: 0,
|
|
8065
|
+
totalBlocks: 0,
|
|
8066
|
+
totalErrors: 0,
|
|
8067
|
+
killed: false,
|
|
8068
|
+
shadowEvaluations: 0
|
|
8069
|
+
}),
|
|
8070
|
+
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
8071
|
+
getLastNarrowing: () => null,
|
|
8072
|
+
getLastShadowDelta: () => null,
|
|
8073
|
+
getLastTrace: () => null,
|
|
8074
|
+
narrow() {
|
|
8075
|
+
},
|
|
8076
|
+
widen() {
|
|
8077
|
+
},
|
|
8078
|
+
addLabel() {
|
|
8079
|
+
},
|
|
8080
|
+
tools: {},
|
|
8081
|
+
getWorkflowState: () => Promise.resolve(null),
|
|
8082
|
+
handoff: () => Promise.resolve(null)
|
|
8083
|
+
};
|
|
8084
|
+
}
|
|
7876
8085
|
function toNarrowingSnapshot(result) {
|
|
7877
8086
|
if (!result || result.removed.length === 0) return null;
|
|
7878
8087
|
return {
|
package/dist/index.d.cts
CHANGED
|
@@ -587,6 +587,15 @@ type ReplayOptions = {
|
|
|
587
587
|
runtimeUrl?: string;
|
|
588
588
|
captureLevel?: CapturePrivacyTier;
|
|
589
589
|
diagnostics?: (event: ObserveDiagnosticEvent | ReplayDiagnosticEvent) => void;
|
|
590
|
+
/**
|
|
591
|
+
* Explicit environment for zero-config governance mode selection.
|
|
592
|
+
* - "development" → monitor mode (log, don't block)
|
|
593
|
+
* - "staging" → protect mode (warn, don't block)
|
|
594
|
+
* - "production" → govern mode (block violations)
|
|
595
|
+
* Falls back to NODE_ENV if not set.
|
|
596
|
+
* @see zero-config-governance.md § Environment promotion
|
|
597
|
+
*/
|
|
598
|
+
environment?: "development" | "staging" | "production";
|
|
590
599
|
};
|
|
591
600
|
/**
|
|
592
601
|
* Raw tool executor provided by the user in `replay()` options.
|
|
@@ -1305,6 +1314,14 @@ type HandoffOfferResult = {
|
|
|
1305
1314
|
eventSeq: number;
|
|
1306
1315
|
stateVersion: number;
|
|
1307
1316
|
};
|
|
1317
|
+
type GovernancePlanResult = {
|
|
1318
|
+
status: string;
|
|
1319
|
+
compiledSession?: unknown;
|
|
1320
|
+
compiledHash?: string;
|
|
1321
|
+
observations?: number;
|
|
1322
|
+
confidence?: string;
|
|
1323
|
+
version?: number;
|
|
1324
|
+
};
|
|
1308
1325
|
type RuntimeClientHealth = {
|
|
1309
1326
|
circuitOpen: boolean;
|
|
1310
1327
|
failureCount: number;
|
|
@@ -1336,6 +1353,12 @@ declare class RuntimeClient {
|
|
|
1336
1353
|
getWorkflowState(workflowId: string): Promise<WorkflowStateResult>;
|
|
1337
1354
|
/** v4: Offer a handoff from a session. */
|
|
1338
1355
|
offerHandoff(input: HandoffOfferInput): Promise<HandoffOfferResult>;
|
|
1356
|
+
/**
|
|
1357
|
+
* Fetch governance plan for an agent.
|
|
1358
|
+
* Returns null on 404 (no plan exists).
|
|
1359
|
+
* @see zero-config-governance.md § GET /api/v1/governance/plan
|
|
1360
|
+
*/
|
|
1361
|
+
fetchGovernancePlan(agent: string, environment?: string): Promise<GovernancePlanResult | null>;
|
|
1339
1362
|
getHealth(): RuntimeClientHealth;
|
|
1340
1363
|
isCircuitOpen(): boolean;
|
|
1341
1364
|
private get;
|
package/dist/index.d.ts
CHANGED
|
@@ -587,6 +587,15 @@ type ReplayOptions = {
|
|
|
587
587
|
runtimeUrl?: string;
|
|
588
588
|
captureLevel?: CapturePrivacyTier;
|
|
589
589
|
diagnostics?: (event: ObserveDiagnosticEvent | ReplayDiagnosticEvent) => void;
|
|
590
|
+
/**
|
|
591
|
+
* Explicit environment for zero-config governance mode selection.
|
|
592
|
+
* - "development" → monitor mode (log, don't block)
|
|
593
|
+
* - "staging" → protect mode (warn, don't block)
|
|
594
|
+
* - "production" → govern mode (block violations)
|
|
595
|
+
* Falls back to NODE_ENV if not set.
|
|
596
|
+
* @see zero-config-governance.md § Environment promotion
|
|
597
|
+
*/
|
|
598
|
+
environment?: "development" | "staging" | "production";
|
|
590
599
|
};
|
|
591
600
|
/**
|
|
592
601
|
* Raw tool executor provided by the user in `replay()` options.
|
|
@@ -1305,6 +1314,14 @@ type HandoffOfferResult = {
|
|
|
1305
1314
|
eventSeq: number;
|
|
1306
1315
|
stateVersion: number;
|
|
1307
1316
|
};
|
|
1317
|
+
type GovernancePlanResult = {
|
|
1318
|
+
status: string;
|
|
1319
|
+
compiledSession?: unknown;
|
|
1320
|
+
compiledHash?: string;
|
|
1321
|
+
observations?: number;
|
|
1322
|
+
confidence?: string;
|
|
1323
|
+
version?: number;
|
|
1324
|
+
};
|
|
1308
1325
|
type RuntimeClientHealth = {
|
|
1309
1326
|
circuitOpen: boolean;
|
|
1310
1327
|
failureCount: number;
|
|
@@ -1336,6 +1353,12 @@ declare class RuntimeClient {
|
|
|
1336
1353
|
getWorkflowState(workflowId: string): Promise<WorkflowStateResult>;
|
|
1337
1354
|
/** v4: Offer a handoff from a session. */
|
|
1338
1355
|
offerHandoff(input: HandoffOfferInput): Promise<HandoffOfferResult>;
|
|
1356
|
+
/**
|
|
1357
|
+
* Fetch governance plan for an agent.
|
|
1358
|
+
* Returns null on 404 (no plan exists).
|
|
1359
|
+
* @see zero-config-governance.md § GET /api/v1/governance/plan
|
|
1360
|
+
*/
|
|
1361
|
+
fetchGovernancePlan(agent: string, environment?: string): Promise<GovernancePlanResult | null>;
|
|
1339
1362
|
getHealth(): RuntimeClientHealth;
|
|
1340
1363
|
isCircuitOpen(): boolean;
|
|
1341
1364
|
private get;
|
package/dist/index.js
CHANGED
|
@@ -2173,6 +2173,7 @@ import {
|
|
|
2173
2173
|
|
|
2174
2174
|
// src/contracts.ts
|
|
2175
2175
|
import {
|
|
2176
|
+
fillContractDefaults,
|
|
2176
2177
|
hashToolSchema,
|
|
2177
2178
|
loadContractSync,
|
|
2178
2179
|
normalizeToolArray as normalizeToolArray2
|
|
@@ -2300,7 +2301,6 @@ function normalizeInlineContract(input) {
|
|
|
2300
2301
|
if (!tool) {
|
|
2301
2302
|
throw new ReplayConfigurationError("Inline contract is missing required field: tool");
|
|
2302
2303
|
}
|
|
2303
|
-
const assertions = toRecord5(source.assertions);
|
|
2304
2304
|
const expectTools = toStringArray(source.expect_tools);
|
|
2305
2305
|
const expectedToolCalls = toExpectedToolCalls(source.expected_tool_calls);
|
|
2306
2306
|
const contract = {
|
|
@@ -2308,28 +2308,38 @@ function normalizeInlineContract(input) {
|
|
|
2308
2308
|
...toString5(source.tool_schema_hash) ? { tool_schema_hash: toString5(source.tool_schema_hash) } : {},
|
|
2309
2309
|
...isSideEffect(source.side_effect) ? { side_effect: source.side_effect } : {},
|
|
2310
2310
|
...toString5(source.contract_file) ? { contract_file: toString5(source.contract_file) } : {},
|
|
2311
|
-
timeouts
|
|
2312
|
-
total_ms: toNonNegativeNumber(toRecord5(source.timeouts).total_ms, 0)
|
|
2313
|
-
},
|
|
2314
|
-
retries
|
|
2315
|
-
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
rate_limits: {
|
|
2319
|
-
on_429: {
|
|
2320
|
-
respect_retry_after: toBoolean(toRecord5(toRecord5(source.rate_limits).on_429).respect_retry_after, false),
|
|
2321
|
-
max_sleep_seconds: toNonNegativeNumber(
|
|
2322
|
-
toRecord5(toRecord5(source.rate_limits).on_429).max_sleep_seconds,
|
|
2323
|
-
0
|
|
2324
|
-
)
|
|
2311
|
+
...source.timeouts != null ? {
|
|
2312
|
+
timeouts: { total_ms: toNonNegativeNumber(toRecord5(source.timeouts).total_ms, 0) }
|
|
2313
|
+
} : {},
|
|
2314
|
+
...source.retries != null ? {
|
|
2315
|
+
retries: {
|
|
2316
|
+
max_attempts: Math.max(1, toNonNegativeNumber(toRecord5(source.retries).max_attempts, 1)),
|
|
2317
|
+
retry_on: toStringArray(toRecord5(source.retries).retry_on)
|
|
2325
2318
|
}
|
|
2326
|
-
},
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2319
|
+
} : {},
|
|
2320
|
+
...source.rate_limits != null ? {
|
|
2321
|
+
rate_limits: {
|
|
2322
|
+
on_429: {
|
|
2323
|
+
respect_retry_after: toBoolean(toRecord5(toRecord5(source.rate_limits).on_429).respect_retry_after, false),
|
|
2324
|
+
max_sleep_seconds: toNonNegativeNumber(
|
|
2325
|
+
toRecord5(toRecord5(source.rate_limits).on_429).max_sleep_seconds,
|
|
2326
|
+
0
|
|
2327
|
+
)
|
|
2328
|
+
}
|
|
2329
|
+
}
|
|
2330
|
+
} : {},
|
|
2331
|
+
...source.assertions != null ? {
|
|
2332
|
+
assertions: {
|
|
2333
|
+
input_invariants: toInvariantArray(toRecord5(source.assertions).input_invariants),
|
|
2334
|
+
output_invariants: toInvariantArray(toRecord5(source.assertions).output_invariants)
|
|
2335
|
+
}
|
|
2336
|
+
} : {},
|
|
2337
|
+
...source.golden_cases != null ? {
|
|
2338
|
+
golden_cases: Array.isArray(source.golden_cases) ? source.golden_cases : []
|
|
2339
|
+
} : {},
|
|
2340
|
+
...source.allowed_errors != null ? {
|
|
2341
|
+
allowed_errors: toStringArray(source.allowed_errors)
|
|
2342
|
+
} : {},
|
|
2333
2343
|
...expectTools.length > 0 ? { expect_tools: expectTools } : {},
|
|
2334
2344
|
...toToolOrder(source.tool_order, expectTools.length > 0) ? {
|
|
2335
2345
|
tool_order: toToolOrder(source.tool_order, expectTools.length > 0)
|
|
@@ -2355,8 +2365,9 @@ function normalizeInlineContract(input) {
|
|
|
2355
2365
|
...Array.isArray(source.schema_derived_exclude) ? { schema_derived_exclude: source.schema_derived_exclude } : {},
|
|
2356
2366
|
...Array.isArray(source.binds) ? { binds: source.binds } : {}
|
|
2357
2367
|
};
|
|
2358
|
-
|
|
2359
|
-
|
|
2368
|
+
const filled = { ...fillContractDefaults(contract), ...contract.contract_file ? { contract_file: contract.contract_file } : {} };
|
|
2369
|
+
validateSafeRegexes(filled);
|
|
2370
|
+
return filled;
|
|
2360
2371
|
}
|
|
2361
2372
|
function validateContractSet(contracts) {
|
|
2362
2373
|
const seenKeys = /* @__PURE__ */ new Set();
|
|
@@ -2375,11 +2386,11 @@ function validateSafeRegexes(contract) {
|
|
|
2375
2386
|
const invariantGroups = [
|
|
2376
2387
|
{
|
|
2377
2388
|
label: "assertions.input_invariants",
|
|
2378
|
-
invariants: contract.assertions
|
|
2389
|
+
invariants: contract.assertions?.input_invariants ?? []
|
|
2379
2390
|
},
|
|
2380
2391
|
{
|
|
2381
2392
|
label: "assertions.output_invariants",
|
|
2382
|
-
invariants: contract.assertions
|
|
2393
|
+
invariants: contract.assertions?.output_invariants ?? []
|
|
2383
2394
|
}
|
|
2384
2395
|
];
|
|
2385
2396
|
for (const [index, expectedToolCall] of (contract.expected_tool_calls ?? []).entries()) {
|
|
@@ -2982,7 +2993,7 @@ function evaluateExpectTools(contract, toolCalls) {
|
|
|
2982
2993
|
function evaluateOutputInvariants(contract, normalizedResponse) {
|
|
2983
2994
|
const invariantFailures = evaluateInvariants(
|
|
2984
2995
|
normalizedResponse,
|
|
2985
|
-
contract.assertions
|
|
2996
|
+
contract.assertions?.output_invariants ?? [],
|
|
2986
2997
|
process.env
|
|
2987
2998
|
);
|
|
2988
2999
|
return invariantFailures.map(
|
|
@@ -3032,7 +3043,7 @@ function evaluateArgumentInvariants(contract, toolCalls) {
|
|
|
3032
3043
|
return failures;
|
|
3033
3044
|
}
|
|
3034
3045
|
function mapInvariantFailure(contract, failure, normalizedResponse) {
|
|
3035
|
-
const invariant = findMatchingInvariant(contract.assertions
|
|
3046
|
+
const invariant = findMatchingInvariant(contract.assertions?.output_invariants ?? [], failure);
|
|
3036
3047
|
const lookup = getPathValue(normalizedResponse, failure.path);
|
|
3037
3048
|
return {
|
|
3038
3049
|
path: failure.path,
|
|
@@ -3767,7 +3778,7 @@ function createInitialState(sessionId, options) {
|
|
|
3767
3778
|
checkpointCount: 0
|
|
3768
3779
|
};
|
|
3769
3780
|
}
|
|
3770
|
-
function finalizeExecutedStep(state, step, contracts, compiledSession) {
|
|
3781
|
+
function finalizeExecutedStep(state, step, contracts, compiledSession, options) {
|
|
3771
3782
|
const newSteps = [...state.steps, step];
|
|
3772
3783
|
const newToolCallCounts = updateToolCallCounts(state.toolCallCounts, step);
|
|
3773
3784
|
const resolvedContracts = compiledSession ? Array.from(compiledSession.perToolContracts.values()) : contracts;
|
|
@@ -3777,7 +3788,7 @@ function finalizeExecutedStep(state, step, contracts, compiledSession) {
|
|
|
3777
3788
|
step
|
|
3778
3789
|
);
|
|
3779
3790
|
const costDelta = computeStepCost(step);
|
|
3780
|
-
const newPhase = compiledSession ? recomputePhaseFromCommitted(step.toolCalls, state, compiledSession) : state.currentPhase;
|
|
3791
|
+
const newPhase = options?.deferPhase ? state.currentPhase : compiledSession ? recomputePhaseFromCommitted(step.toolCalls, state, compiledSession) : state.currentPhase;
|
|
3781
3792
|
return {
|
|
3782
3793
|
...state,
|
|
3783
3794
|
steps: newSteps,
|
|
@@ -4625,7 +4636,7 @@ function validateToolResultMessages(messages, contracts, provider) {
|
|
|
4625
4636
|
for (const result of toolResults) {
|
|
4626
4637
|
const contract = contractByTool.get(result.toolName);
|
|
4627
4638
|
if (!contract) continue;
|
|
4628
|
-
const outputInvariants = contract.assertions
|
|
4639
|
+
const outputInvariants = contract.assertions?.output_invariants ?? [];
|
|
4629
4640
|
if (outputInvariants.length === 0) continue;
|
|
4630
4641
|
let parsed;
|
|
4631
4642
|
try {
|
|
@@ -5275,6 +5286,32 @@ var RuntimeClient = class {
|
|
|
5275
5286
|
stateVersion: h.state_version
|
|
5276
5287
|
};
|
|
5277
5288
|
}
|
|
5289
|
+
/**
|
|
5290
|
+
* Fetch governance plan for an agent.
|
|
5291
|
+
* Returns null on 404 (no plan exists).
|
|
5292
|
+
* @see zero-config-governance.md § GET /api/v1/governance/plan
|
|
5293
|
+
*/
|
|
5294
|
+
async fetchGovernancePlan(agent, environment) {
|
|
5295
|
+
const env = environment ?? "development";
|
|
5296
|
+
try {
|
|
5297
|
+
const data = await this.get(
|
|
5298
|
+
`/api/v1/governance/plan?agent=${encodeURIComponent(agent)}&environment=${encodeURIComponent(env)}`
|
|
5299
|
+
);
|
|
5300
|
+
return {
|
|
5301
|
+
status: data.status,
|
|
5302
|
+
compiledSession: data.compiled_session,
|
|
5303
|
+
compiledHash: data.compiled_hash,
|
|
5304
|
+
observations: data.observations,
|
|
5305
|
+
confidence: data.confidence,
|
|
5306
|
+
version: data.version
|
|
5307
|
+
};
|
|
5308
|
+
} catch (err) {
|
|
5309
|
+
if (err instanceof RuntimeClientError && err.httpStatus === 404) {
|
|
5310
|
+
return null;
|
|
5311
|
+
}
|
|
5312
|
+
throw err;
|
|
5313
|
+
}
|
|
5314
|
+
}
|
|
5278
5315
|
getHealth() {
|
|
5279
5316
|
return {
|
|
5280
5317
|
circuitOpen: this.now() < this.circuitOpenUntil,
|
|
@@ -5441,9 +5478,14 @@ function replay(client, opts = {}) {
|
|
|
5441
5478
|
return createInactiveSession(client, sessionId, "Client already has an active observe() or replay() attachment");
|
|
5442
5479
|
}
|
|
5443
5480
|
let contracts;
|
|
5481
|
+
let zeroConfigMode = false;
|
|
5444
5482
|
try {
|
|
5445
5483
|
contracts = resolveContracts(opts);
|
|
5446
5484
|
} catch (err) {
|
|
5485
|
+
const apiKeyForGov = resolveApiKey2(opts);
|
|
5486
|
+
if (apiKeyForGov && !opts.contracts && !opts.contractsDir) {
|
|
5487
|
+
return createGovernanceSession(client, sessionId, agent, provider, apiKeyForGov, opts, diagnostics);
|
|
5488
|
+
}
|
|
5447
5489
|
const detail = err instanceof Error ? err.message : "Failed to load contracts";
|
|
5448
5490
|
emitDiagnostic2(diagnostics, { type: "replay_compile_error", details: detail });
|
|
5449
5491
|
return createBlockingInactiveSession(client, sessionId, detail);
|
|
@@ -5650,6 +5692,8 @@ function replay(client, opts = {}) {
|
|
|
5650
5692
|
let shadowEvaluationCount = 0;
|
|
5651
5693
|
let manualFilter = null;
|
|
5652
5694
|
const deferredReceipts = /* @__PURE__ */ new Map();
|
|
5695
|
+
let deferredPhase = null;
|
|
5696
|
+
const hasWrappedTools = opts.tools != null && Object.keys(opts.tools).length > 0;
|
|
5653
5697
|
const contractLimits = resolveSessionLimits(contracts);
|
|
5654
5698
|
const compiledLimits = compiledSession?.sessionLimits;
|
|
5655
5699
|
const mergedLimits = { ...contractLimits ?? {}, ...compiledLimits ?? {} };
|
|
@@ -5786,6 +5830,7 @@ function replay(client, opts = {}) {
|
|
|
5786
5830
|
total_ms: 0,
|
|
5787
5831
|
enforcement_ms: 0
|
|
5788
5832
|
};
|
|
5833
|
+
deferredPhase = null;
|
|
5789
5834
|
const trace = createTrace(sessionState.totalStepCount);
|
|
5790
5835
|
const traceCtx = { trace };
|
|
5791
5836
|
let currentTraceStage = "narrow";
|
|
@@ -6608,9 +6653,29 @@ function replay(client, opts = {}) {
|
|
|
6608
6653
|
}
|
|
6609
6654
|
}
|
|
6610
6655
|
}
|
|
6656
|
+
const compatHasPhaseTransition = !!(phaseResult?.legal && phaseResult.newPhase !== sessionState.currentPhase);
|
|
6657
|
+
const compatShouldDefer = hasWrappedTools && compatHasPhaseTransition;
|
|
6611
6658
|
const prevVersion = sessionState.stateVersion;
|
|
6612
|
-
sessionState = finalizeExecutedStep(
|
|
6659
|
+
sessionState = finalizeExecutedStep(
|
|
6660
|
+
sessionState,
|
|
6661
|
+
completedStep,
|
|
6662
|
+
contracts,
|
|
6663
|
+
compiledSession,
|
|
6664
|
+
compatShouldDefer ? { deferPhase: true } : void 0
|
|
6665
|
+
);
|
|
6613
6666
|
syncStateToStore(prevVersion, sessionState);
|
|
6667
|
+
if (compatShouldDefer && compiledSession && phaseResult) {
|
|
6668
|
+
const advancingTools = /* @__PURE__ */ new Set();
|
|
6669
|
+
for (const tc of toolCalls) {
|
|
6670
|
+
const contract = compiledSession.perToolContracts.get(tc.name);
|
|
6671
|
+
if (contract?.transitions?.advances_to === phaseResult.newPhase) {
|
|
6672
|
+
advancingTools.add(tc.name);
|
|
6673
|
+
}
|
|
6674
|
+
}
|
|
6675
|
+
if (advancingTools.size > 0 && phaseResult.newPhase != null) {
|
|
6676
|
+
deferredPhase = { newPhase: phaseResult.newPhase, toolNames: advancingTools };
|
|
6677
|
+
}
|
|
6678
|
+
}
|
|
6614
6679
|
}
|
|
6615
6680
|
if (advisoryDecision.action === "block") {
|
|
6616
6681
|
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
@@ -6700,11 +6765,31 @@ function replay(client, opts = {}) {
|
|
|
6700
6765
|
}
|
|
6701
6766
|
}
|
|
6702
6767
|
}
|
|
6768
|
+
const hasPhaseTransition = phaseResult?.legal && phaseResult.newPhase !== sessionState.currentPhase;
|
|
6769
|
+
const shouldDeferPhase = hasWrappedTools && !!hasPhaseTransition;
|
|
6703
6770
|
const prevVersionAllow = sessionState.stateVersion;
|
|
6704
|
-
sessionState = finalizeExecutedStep(
|
|
6771
|
+
sessionState = finalizeExecutedStep(
|
|
6772
|
+
sessionState,
|
|
6773
|
+
completedStep,
|
|
6774
|
+
contracts,
|
|
6775
|
+
compiledSession,
|
|
6776
|
+
shouldDeferPhase ? { deferPhase: true } : void 0
|
|
6777
|
+
);
|
|
6705
6778
|
sessionState = recordDecisionOutcome(sessionState, "allowed");
|
|
6706
6779
|
syncStateToStore(prevVersionAllow, sessionState);
|
|
6707
6780
|
timing.finalize_ms += Date.now() - enforceFinalizeStart;
|
|
6781
|
+
if (shouldDeferPhase && compiledSession) {
|
|
6782
|
+
const advancingTools = /* @__PURE__ */ new Set();
|
|
6783
|
+
for (const tc of toolCalls) {
|
|
6784
|
+
const contract = compiledSession.perToolContracts.get(tc.name);
|
|
6785
|
+
if (contract?.transitions?.advances_to === phaseResult.newPhase) {
|
|
6786
|
+
advancingTools.add(tc.name);
|
|
6787
|
+
}
|
|
6788
|
+
}
|
|
6789
|
+
if (advancingTools.size > 0 && phaseResult.newPhase != null) {
|
|
6790
|
+
deferredPhase = { newPhase: phaseResult.newPhase, toolNames: advancingTools };
|
|
6791
|
+
}
|
|
6792
|
+
}
|
|
6708
6793
|
if (isActiveGovern && !attemptDegraded && attemptPendingCalls && attemptPendingCalls.size > 0) {
|
|
6709
6794
|
for (const [toolCallId, pending] of attemptPendingCalls) {
|
|
6710
6795
|
deferredReceipts.set(toolCallId, {
|
|
@@ -6722,8 +6807,8 @@ function replay(client, opts = {}) {
|
|
|
6722
6807
|
checked: { gate_mode: gateMode },
|
|
6723
6808
|
found: { blocked_count: 0, action: "allow" }
|
|
6724
6809
|
});
|
|
6725
|
-
const allowNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? phaseResult.newPhase : sessionState.currentPhase;
|
|
6726
|
-
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: completedStep.phase, phase_after: allowNewPhase, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: [], killed: false, step_index: sessionState.totalStepCount } });
|
|
6810
|
+
const allowNewPhase = phaseResult && phaseResult.legal && phaseResult.newPhase !== (completedStep.phase ?? sessionState.currentPhase) ? phaseResult.newPhase : sessionState.currentPhase;
|
|
6811
|
+
trace.push({ stage: "finalize", tool: null, verdict: "info", reason: "cycle_complete", checked: {}, found: { state_version: sessionState.stateVersion, phase_before: completedStep.phase, phase_after: allowNewPhase, ...shouldDeferPhase ? { phase_deferred: true } : {}, tools_committed: toolCalls.map((tc) => tc.name), tools_blocked: [], killed: false, step_index: sessionState.totalStepCount } });
|
|
6727
6812
|
trace.complete = true;
|
|
6728
6813
|
lastTrace = trace;
|
|
6729
6814
|
emitDiagnostic2(diagnostics, { type: "replay_trace", session_id: sessionId, trace });
|
|
@@ -7070,6 +7155,10 @@ function replay(client, opts = {}) {
|
|
|
7070
7155
|
throw new ReplayKillError(sessionId, killedAt);
|
|
7071
7156
|
}
|
|
7072
7157
|
const result = await executor(args);
|
|
7158
|
+
if (deferredPhase && deferredPhase.toolNames.has(toolName)) {
|
|
7159
|
+
sessionState = { ...sessionState, currentPhase: deferredPhase.newPhase };
|
|
7160
|
+
deferredPhase = null;
|
|
7161
|
+
}
|
|
7073
7162
|
if (runtimeClient && leaseFence && !runtimeDegraded) {
|
|
7074
7163
|
for (const [callId, deferred] of deferredReceipts) {
|
|
7075
7164
|
if (deferred.toolName === toolName) {
|
|
@@ -7320,7 +7409,7 @@ function validateResponse2(response, toolCalls, contracts, requestToolNames, unm
|
|
|
7320
7409
|
}
|
|
7321
7410
|
}
|
|
7322
7411
|
for (const contract of matched) {
|
|
7323
|
-
const outputInvariants = contract.assertions
|
|
7412
|
+
const outputInvariants = contract.assertions?.output_invariants ?? [];
|
|
7324
7413
|
if (outputInvariants.length > 0) {
|
|
7325
7414
|
const normalizedResponse = buildNormalizedResponse(response, toolCalls);
|
|
7326
7415
|
const result = evaluateInvariants4(normalizedResponse, outputInvariants, process.env);
|
|
@@ -7488,8 +7577,9 @@ function evaluateInputInvariants(request, contracts) {
|
|
|
7488
7577
|
const requestToolSet = new Set(requestToolNames);
|
|
7489
7578
|
for (const contract of contracts) {
|
|
7490
7579
|
if (!requestToolSet.has(contract.tool)) continue;
|
|
7491
|
-
|
|
7492
|
-
|
|
7580
|
+
const inputInvariants = contract.assertions?.input_invariants ?? [];
|
|
7581
|
+
if (inputInvariants.length === 0) continue;
|
|
7582
|
+
const result = evaluateInvariants4(request, inputInvariants, process.env);
|
|
7493
7583
|
for (const failure of result) {
|
|
7494
7584
|
failures.push({
|
|
7495
7585
|
path: failure.path,
|
|
@@ -7864,6 +7954,126 @@ function createBlockingInactiveSession(client, sessionId, detail, configError) {
|
|
|
7864
7954
|
handoff: () => Promise.resolve(null)
|
|
7865
7955
|
};
|
|
7866
7956
|
}
|
|
7957
|
+
function resolveGovernanceEnvironment(opts) {
|
|
7958
|
+
if (opts.environment) return opts.environment;
|
|
7959
|
+
const envVar = typeof process !== "undefined" ? process.env.REPLAYCI_ENVIRONMENT : void 0;
|
|
7960
|
+
if (envVar === "staging") return "staging";
|
|
7961
|
+
if (envVar === "production") return "production";
|
|
7962
|
+
if (envVar === "development") return "development";
|
|
7963
|
+
const nodeEnv = typeof process !== "undefined" ? process.env.NODE_ENV : void 0;
|
|
7964
|
+
if (nodeEnv === "production") return "production";
|
|
7965
|
+
return "development";
|
|
7966
|
+
}
|
|
7967
|
+
function governanceProtectionLevel(env) {
|
|
7968
|
+
switch (env) {
|
|
7969
|
+
case "production":
|
|
7970
|
+
return "govern";
|
|
7971
|
+
case "staging":
|
|
7972
|
+
return "protect";
|
|
7973
|
+
default:
|
|
7974
|
+
return "monitor";
|
|
7975
|
+
}
|
|
7976
|
+
}
|
|
7977
|
+
function createGovernanceSession(client, sessionId, agent, provider, apiKey, opts, diagnostics) {
|
|
7978
|
+
const environment = resolveGovernanceEnvironment(opts);
|
|
7979
|
+
const protLevel = governanceProtectionLevel(environment);
|
|
7980
|
+
const runtimeClient = new RuntimeClient({
|
|
7981
|
+
apiKey,
|
|
7982
|
+
apiUrl: opts.runtimeUrl
|
|
7983
|
+
});
|
|
7984
|
+
let governancePlan;
|
|
7985
|
+
let planFetchPromise = null;
|
|
7986
|
+
let planFetchDone = false;
|
|
7987
|
+
let planFetchError = null;
|
|
7988
|
+
planFetchPromise = runtimeClient.fetchGovernancePlan(agent, environment).then((result) => {
|
|
7989
|
+
governancePlan = result;
|
|
7990
|
+
planFetchDone = true;
|
|
7991
|
+
}).catch((err) => {
|
|
7992
|
+
planFetchDone = true;
|
|
7993
|
+
planFetchError = err instanceof Error ? err.message : String(err);
|
|
7994
|
+
governancePlan = null;
|
|
7995
|
+
});
|
|
7996
|
+
const captureBuffer = new CaptureBuffer({
|
|
7997
|
+
apiKey,
|
|
7998
|
+
endpoint: opts.runtimeUrl
|
|
7999
|
+
});
|
|
8000
|
+
registerBeforeExit(captureBuffer);
|
|
8001
|
+
const terminalInfo = resolveTerminal(client, provider);
|
|
8002
|
+
if (!terminalInfo) {
|
|
8003
|
+
emitDiagnostic2(diagnostics, { type: "replay_inactive", reason: "unsupported_client" });
|
|
8004
|
+
return createInactiveSession(client, sessionId, "Could not resolve terminal resource");
|
|
8005
|
+
}
|
|
8006
|
+
const { terminal, originalCreate } = terminalInfo;
|
|
8007
|
+
const patchedCreate = async function(...args) {
|
|
8008
|
+
if (!planFetchDone && planFetchPromise) {
|
|
8009
|
+
await planFetchPromise;
|
|
8010
|
+
}
|
|
8011
|
+
const hasApprovedPlan = governancePlan && (governancePlan.status === "approved" || governancePlan.status === "enforcing") && governancePlan.compiledSession;
|
|
8012
|
+
if (hasApprovedPlan) {
|
|
8013
|
+
}
|
|
8014
|
+
const result = await originalCreate.apply(this, args);
|
|
8015
|
+
try {
|
|
8016
|
+
const toolCalls = extractToolCalls(result, provider);
|
|
8017
|
+
const usage = extractUsage(result, provider);
|
|
8018
|
+
const requestArg = args[0] && typeof args[0] === "object" ? args[0] : {};
|
|
8019
|
+
captureBuffer.push({
|
|
8020
|
+
schema_version: CAPTURE_SCHEMA_VERSION_CURRENT,
|
|
8021
|
+
agent,
|
|
8022
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
8023
|
+
provider,
|
|
8024
|
+
model_id: requestArg.model ?? "unknown",
|
|
8025
|
+
primary_tool_name: toolCalls[0]?.name ?? null,
|
|
8026
|
+
tool_names: toolCalls.map((tc) => tc.name),
|
|
8027
|
+
request: requestArg,
|
|
8028
|
+
response: result,
|
|
8029
|
+
usage,
|
|
8030
|
+
latency_ms: 0,
|
|
8031
|
+
sdk_session_id: sessionId
|
|
8032
|
+
});
|
|
8033
|
+
} catch {
|
|
8034
|
+
}
|
|
8035
|
+
return result;
|
|
8036
|
+
};
|
|
8037
|
+
terminal[terminalInfo.methodName] = patchedCreate;
|
|
8038
|
+
setReplayAttached(client);
|
|
8039
|
+
return {
|
|
8040
|
+
client,
|
|
8041
|
+
flush: () => captureBuffer.flush(),
|
|
8042
|
+
restore() {
|
|
8043
|
+
terminal[terminalInfo.methodName] = originalCreate;
|
|
8044
|
+
},
|
|
8045
|
+
kill() {
|
|
8046
|
+
},
|
|
8047
|
+
getHealth: () => ({
|
|
8048
|
+
status: "healthy",
|
|
8049
|
+
authorityState: "active",
|
|
8050
|
+
protectionLevel: protLevel,
|
|
8051
|
+
durability: "inactive",
|
|
8052
|
+
tier: "compat",
|
|
8053
|
+
compatEnforcement: "protective",
|
|
8054
|
+
cluster_detected: false,
|
|
8055
|
+
bypass_detected: false,
|
|
8056
|
+
totalSteps: 0,
|
|
8057
|
+
totalBlocks: 0,
|
|
8058
|
+
totalErrors: 0,
|
|
8059
|
+
killed: false,
|
|
8060
|
+
shadowEvaluations: 0
|
|
8061
|
+
}),
|
|
8062
|
+
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
8063
|
+
getLastNarrowing: () => null,
|
|
8064
|
+
getLastShadowDelta: () => null,
|
|
8065
|
+
getLastTrace: () => null,
|
|
8066
|
+
narrow() {
|
|
8067
|
+
},
|
|
8068
|
+
widen() {
|
|
8069
|
+
},
|
|
8070
|
+
addLabel() {
|
|
8071
|
+
},
|
|
8072
|
+
tools: {},
|
|
8073
|
+
getWorkflowState: () => Promise.resolve(null),
|
|
8074
|
+
handoff: () => Promise.resolve(null)
|
|
8075
|
+
};
|
|
8076
|
+
}
|
|
7867
8077
|
function toNarrowingSnapshot(result) {
|
|
7868
8078
|
if (!result || result.removed.length === 0) return null;
|
|
7869
8079
|
return {
|