@replayci/replay 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3816 -36
- package/dist/index.d.cts +1022 -3
- package/dist/index.d.ts +1022 -3
- package/dist/index.js +3809 -36
- package/package.json +2 -4
package/dist/index.js
CHANGED
|
@@ -33,6 +33,7 @@ var CaptureBuffer = class {
|
|
|
33
33
|
remoteDisabled = false;
|
|
34
34
|
closed = false;
|
|
35
35
|
droppedOverflowTotal = 0;
|
|
36
|
+
hasSucceededOnce = false;
|
|
36
37
|
lastFlushAttemptMs = 0;
|
|
37
38
|
lastFlushSuccessMs = 0;
|
|
38
39
|
lastFlushErrorMs = 0;
|
|
@@ -99,7 +100,7 @@ var CaptureBuffer = class {
|
|
|
99
100
|
}
|
|
100
101
|
flush() {
|
|
101
102
|
if (this.closed || this.remoteDisabled) {
|
|
102
|
-
return Promise.resolve();
|
|
103
|
+
return Promise.resolve({ captured: 0, sent: 0, active: true, errors: [] });
|
|
103
104
|
}
|
|
104
105
|
if (this.flushPromise) {
|
|
105
106
|
return this.flushPromise;
|
|
@@ -109,6 +110,7 @@ var CaptureBuffer = class {
|
|
|
109
110
|
type: "flush_error",
|
|
110
111
|
error: err instanceof Error ? err.message : String(err)
|
|
111
112
|
});
|
|
113
|
+
return { captured: 0, sent: 0, active: true, errors: [err instanceof Error ? err.message : String(err)] };
|
|
112
114
|
}).finally(() => {
|
|
113
115
|
if (this.flushPromise === flushPromise) {
|
|
114
116
|
this.flushPromise = void 0;
|
|
@@ -137,11 +139,11 @@ var CaptureBuffer = class {
|
|
|
137
139
|
}
|
|
138
140
|
async flushOnce() {
|
|
139
141
|
if (this.closed || this.remoteDisabled || this.queue.length === 0) {
|
|
140
|
-
return;
|
|
142
|
+
return { captured: 0, sent: 0, active: true, errors: [] };
|
|
141
143
|
}
|
|
142
144
|
const now = this.now();
|
|
143
145
|
if (this.circuitOpenUntil > now) {
|
|
144
|
-
return;
|
|
146
|
+
return { captured: 0, sent: 0, active: true, errors: [] };
|
|
145
147
|
}
|
|
146
148
|
if (this.circuitOpenUntil !== 0) {
|
|
147
149
|
this.circuitOpenUntil = 0;
|
|
@@ -149,8 +151,9 @@ var CaptureBuffer = class {
|
|
|
149
151
|
}
|
|
150
152
|
const batch = this.queue.splice(0, Math.min(this.queue.length, MAX_BATCH_SIZE));
|
|
151
153
|
if (batch.length === 0) {
|
|
152
|
-
return;
|
|
154
|
+
return { captured: 0, sent: 0, active: true, errors: [] };
|
|
153
155
|
}
|
|
156
|
+
const captured = batch.length;
|
|
154
157
|
this.lastFlushAttemptMs = this.now();
|
|
155
158
|
emitStateChange(this.onStateChange, { type: "flush_attempt" });
|
|
156
159
|
let payload = "";
|
|
@@ -158,7 +161,7 @@ var CaptureBuffer = class {
|
|
|
158
161
|
payload = JSON.stringify({ captures: batch });
|
|
159
162
|
} catch {
|
|
160
163
|
this.handleFailure("JSON serialization failed");
|
|
161
|
-
return;
|
|
164
|
+
return { captured, sent: 0, active: true, errors: ["JSON serialization failed"] };
|
|
162
165
|
}
|
|
163
166
|
const controller = new AbortController();
|
|
164
167
|
const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
@@ -181,19 +184,24 @@ var CaptureBuffer = class {
|
|
|
181
184
|
this.circuitOpenUntil = Number.MAX_SAFE_INTEGER;
|
|
182
185
|
emitDiagnostics(this.diagnostics, { type: "remote_disabled" });
|
|
183
186
|
emitStateChange(this.onStateChange, { type: "remote_disabled" });
|
|
184
|
-
return;
|
|
187
|
+
return { captured, sent: 0, active: true, errors: ["remote_disabled"] };
|
|
185
188
|
}
|
|
186
189
|
if (!response.ok) {
|
|
187
|
-
|
|
188
|
-
|
|
190
|
+
const msg = `HTTP ${response.status}`;
|
|
191
|
+
this.handleFailure(msg);
|
|
192
|
+
return { captured, sent: 0, active: true, errors: [msg] };
|
|
189
193
|
}
|
|
190
194
|
this.failureCount = 0;
|
|
191
195
|
this.circuitOpenUntil = 0;
|
|
196
|
+
this.hasSucceededOnce = true;
|
|
192
197
|
this.lastFlushSuccessMs = this.now();
|
|
193
198
|
this.lastFlushErrorMsg = null;
|
|
194
199
|
emitStateChange(this.onStateChange, { type: "flush_success", batch_size: batch.length });
|
|
200
|
+
return { captured, sent: captured, active: true, errors: [] };
|
|
195
201
|
} catch (err) {
|
|
196
|
-
|
|
202
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
203
|
+
this.handleFailure(msg);
|
|
204
|
+
return { captured, sent: 0, active: true, errors: [msg] };
|
|
197
205
|
} finally {
|
|
198
206
|
clearTimeout(timeout);
|
|
199
207
|
}
|
|
@@ -203,10 +211,12 @@ var CaptureBuffer = class {
|
|
|
203
211
|
const errorStr = errorMsg ?? "unknown error";
|
|
204
212
|
this.lastFlushErrorMs = this.now();
|
|
205
213
|
this.lastFlushErrorMsg = errorStr;
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
214
|
+
if (this.hasSucceededOnce) {
|
|
215
|
+
emitDiagnostics(this.diagnostics, {
|
|
216
|
+
type: "flush_error",
|
|
217
|
+
error: errorStr
|
|
218
|
+
});
|
|
219
|
+
}
|
|
210
220
|
emitStateChange(this.onStateChange, { type: "flush_error", error: errorStr });
|
|
211
221
|
if (this.failureCount >= CIRCUIT_BREAKER_FAILURE_LIMIT) {
|
|
212
222
|
this.circuitOpenUntil = this.now() + CIRCUIT_BREAKER_MS;
|
|
@@ -518,10 +528,12 @@ var ReplayConfigurationError = class extends Error {
|
|
|
518
528
|
};
|
|
519
529
|
var ReplayInternalError = class extends Error {
|
|
520
530
|
cause;
|
|
531
|
+
sessionId;
|
|
521
532
|
constructor(message, options) {
|
|
522
533
|
super(message);
|
|
523
534
|
this.name = "ReplayInternalError";
|
|
524
535
|
this.cause = options?.cause;
|
|
536
|
+
this.sessionId = options?.sessionId;
|
|
525
537
|
}
|
|
526
538
|
};
|
|
527
539
|
|
|
@@ -1416,8 +1428,35 @@ function ensureDir(dir) {
|
|
|
1416
1428
|
|
|
1417
1429
|
// src/observe.ts
|
|
1418
1430
|
var REPLAY_WRAPPED = /* @__PURE__ */ Symbol.for("replayci.wrapped");
|
|
1431
|
+
var REPLAY_ATTACHED = /* @__PURE__ */ Symbol.for("replayci.replay_attached");
|
|
1419
1432
|
var DEFAULT_AGENT = "default";
|
|
1420
1433
|
var IDLE_HEARTBEAT_MS = 3e4;
|
|
1434
|
+
function defaultDiagnosticsHandler(event) {
|
|
1435
|
+
switch (event.type) {
|
|
1436
|
+
case "observe_inactive":
|
|
1437
|
+
if (event.reason_code === "missing_api_key") {
|
|
1438
|
+
console.warn(
|
|
1439
|
+
"[replayci] No API key provided. observe() is inactive \u2014 calls will not be captured. Set REPLAYCI_API_KEY or pass apiKey to observe()."
|
|
1440
|
+
);
|
|
1441
|
+
}
|
|
1442
|
+
break;
|
|
1443
|
+
case "activation_warning":
|
|
1444
|
+
console.warn(`[replayci] ${event.message}`);
|
|
1445
|
+
break;
|
|
1446
|
+
case "flush_error":
|
|
1447
|
+
console.warn(`[replayci] flush failed: ${event.error}`);
|
|
1448
|
+
break;
|
|
1449
|
+
case "flush_empty":
|
|
1450
|
+
console.warn(
|
|
1451
|
+
"[replayci] flush(): No calls were captured. Ensure your LLM calls use the client returned by observe()."
|
|
1452
|
+
);
|
|
1453
|
+
break;
|
|
1454
|
+
case "circuit_open":
|
|
1455
|
+
break;
|
|
1456
|
+
default:
|
|
1457
|
+
break;
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1421
1460
|
function observe(client, opts = {}) {
|
|
1422
1461
|
assertSupportedNodeRuntime();
|
|
1423
1462
|
const sessionId = generateSessionId();
|
|
@@ -1427,38 +1466,45 @@ function observe(client, opts = {}) {
|
|
|
1427
1466
|
if (isDisabled(opts)) {
|
|
1428
1467
|
return createInactiveHandle(client, sessionId, agent, "disabled", void 0, now, opts.diagnostics, opts.stateDir);
|
|
1429
1468
|
}
|
|
1469
|
+
const diagnosticsHandler = opts.diagnostics ?? defaultDiagnosticsHandler;
|
|
1430
1470
|
const apiKey = resolveApiKey(opts);
|
|
1471
|
+
if (apiKey && !/^rci_(live|test)_/.test(apiKey)) {
|
|
1472
|
+
emitDiagnostic(diagnosticsHandler, {
|
|
1473
|
+
type: "activation_warning",
|
|
1474
|
+
message: "API key format looks wrong (expected 'rci_live_...' or 'rci_test_...'). Verify your key at app.replayci.com/settings."
|
|
1475
|
+
});
|
|
1476
|
+
}
|
|
1431
1477
|
if (!apiKey) {
|
|
1432
|
-
return createInactiveHandle(client, sessionId, agent, "missing_api_key", void 0, now,
|
|
1478
|
+
return createInactiveHandle(client, sessionId, agent, "missing_api_key", void 0, now, diagnosticsHandler, opts.stateDir);
|
|
1433
1479
|
}
|
|
1434
|
-
const provider = detectProviderSafely(client,
|
|
1480
|
+
const provider = detectProviderSafely(client, diagnosticsHandler);
|
|
1435
1481
|
if (!provider) {
|
|
1436
|
-
return createInactiveHandle(client, sessionId, agent, "unsupported_client", "Could not detect provider.", now,
|
|
1482
|
+
return createInactiveHandle(client, sessionId, agent, "unsupported_client", "Could not detect provider.", now, diagnosticsHandler, opts.stateDir);
|
|
1437
1483
|
}
|
|
1438
1484
|
const patchTarget = resolvePatchTarget(client, provider);
|
|
1439
1485
|
if (!patchTarget) {
|
|
1440
|
-
emitDiagnostic(
|
|
1486
|
+
emitDiagnostic(diagnosticsHandler, {
|
|
1441
1487
|
type: "unsupported_client",
|
|
1442
1488
|
mode: "observe",
|
|
1443
1489
|
detail: `Unsupported ${provider} client shape.`
|
|
1444
1490
|
});
|
|
1445
|
-
return createInactiveHandle(client, sessionId, agent, "unsupported_client", `Unsupported ${provider} client shape.`, now,
|
|
1491
|
+
return createInactiveHandle(client, sessionId, agent, "unsupported_client", `Unsupported ${provider} client shape.`, now, diagnosticsHandler, opts.stateDir);
|
|
1446
1492
|
}
|
|
1447
|
-
if (isWrapped(client, patchTarget.target)) {
|
|
1448
|
-
emitDiagnostic(
|
|
1493
|
+
if (isWrapped(client, patchTarget.target) || isReplayAttached(client)) {
|
|
1494
|
+
emitDiagnostic(diagnosticsHandler, {
|
|
1449
1495
|
type: "double_wrap",
|
|
1450
1496
|
mode: "observe"
|
|
1451
1497
|
});
|
|
1452
|
-
return createInactiveHandle(client, sessionId, agent, "double_wrap", void 0, now,
|
|
1498
|
+
return createInactiveHandle(client, sessionId, agent, "double_wrap", void 0, now, diagnosticsHandler, opts.stateDir);
|
|
1453
1499
|
}
|
|
1454
1500
|
const patchabilityError = getPatchabilityError(patchTarget.target, patchTarget.methodName);
|
|
1455
1501
|
if (patchabilityError) {
|
|
1456
|
-
emitDiagnostic(
|
|
1502
|
+
emitDiagnostic(diagnosticsHandler, {
|
|
1457
1503
|
type: "unsupported_client",
|
|
1458
1504
|
mode: "observe",
|
|
1459
1505
|
detail: patchabilityError
|
|
1460
1506
|
});
|
|
1461
|
-
return createInactiveHandle(client, sessionId, agent, "patch_target_unwritable", patchabilityError, now,
|
|
1507
|
+
return createInactiveHandle(client, sessionId, agent, "patch_target_unwritable", patchabilityError, now, diagnosticsHandler, opts.stateDir);
|
|
1462
1508
|
}
|
|
1463
1509
|
const captureLevel = normalizeCaptureLevel(opts.captureLevel);
|
|
1464
1510
|
const patchTargetName = `${provider}.${provider === "openai" ? "chat.completions.create" : "messages.create"}`;
|
|
@@ -1516,7 +1562,7 @@ function observe(client, opts = {}) {
|
|
|
1516
1562
|
const detail = err instanceof Error ? err.message : "Failed to write health snapshot";
|
|
1517
1563
|
lastHealthStoreErrorAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1518
1564
|
lastHealthStoreError = detail;
|
|
1519
|
-
emitDiagnostic(
|
|
1565
|
+
emitDiagnostic(diagnosticsHandler, {
|
|
1520
1566
|
type: "health_store_error",
|
|
1521
1567
|
detail
|
|
1522
1568
|
});
|
|
@@ -1556,7 +1602,7 @@ function observe(client, opts = {}) {
|
|
|
1556
1602
|
runtimeState.consecutive_failures = 0;
|
|
1557
1603
|
runtimeState.circuit_open_until = null;
|
|
1558
1604
|
runtimeState.queue_size = buffer.size;
|
|
1559
|
-
emitDiagnostic(
|
|
1605
|
+
emitDiagnostic(diagnosticsHandler, {
|
|
1560
1606
|
type: "flush_succeeded",
|
|
1561
1607
|
batch_size: event.batch_size
|
|
1562
1608
|
});
|
|
@@ -1592,7 +1638,7 @@ function observe(client, opts = {}) {
|
|
|
1592
1638
|
maxBuffer: opts.maxBuffer,
|
|
1593
1639
|
flushMs: opts.flushMs,
|
|
1594
1640
|
timeoutMs: opts.timeoutMs,
|
|
1595
|
-
diagnostics:
|
|
1641
|
+
diagnostics: diagnosticsHandler,
|
|
1596
1642
|
onStateChange: onBufferStateChange
|
|
1597
1643
|
});
|
|
1598
1644
|
registerBeforeExit(buffer);
|
|
@@ -1616,7 +1662,7 @@ function observe(client, opts = {}) {
|
|
|
1616
1662
|
persistHealthEvent();
|
|
1617
1663
|
safeRunJanitor(sessionsDir, sessionId);
|
|
1618
1664
|
startHeartbeat();
|
|
1619
|
-
emitDiagnostic(
|
|
1665
|
+
emitDiagnostic(diagnosticsHandler, {
|
|
1620
1666
|
type: "observe_activated",
|
|
1621
1667
|
session_id: sessionId,
|
|
1622
1668
|
provider,
|
|
@@ -1639,7 +1685,7 @@ function observe(client, opts = {}) {
|
|
|
1639
1685
|
sessionId,
|
|
1640
1686
|
runtimeState,
|
|
1641
1687
|
persistHealthEvent,
|
|
1642
|
-
diagnostics:
|
|
1688
|
+
diagnostics: diagnosticsHandler
|
|
1643
1689
|
});
|
|
1644
1690
|
return response;
|
|
1645
1691
|
});
|
|
@@ -1649,8 +1695,12 @@ function observe(client, opts = {}) {
|
|
|
1649
1695
|
let restored = false;
|
|
1650
1696
|
return {
|
|
1651
1697
|
client,
|
|
1652
|
-
flush() {
|
|
1653
|
-
|
|
1698
|
+
async flush() {
|
|
1699
|
+
const result = await buffer.flush();
|
|
1700
|
+
if (result.captured === 0 && result.errors.length === 0) {
|
|
1701
|
+
emitDiagnostic(diagnosticsHandler, { type: "flush_empty" });
|
|
1702
|
+
}
|
|
1703
|
+
return result;
|
|
1654
1704
|
},
|
|
1655
1705
|
restore() {
|
|
1656
1706
|
if (restored) {
|
|
@@ -1689,7 +1739,7 @@ function observe(client, opts = {}) {
|
|
|
1689
1739
|
};
|
|
1690
1740
|
} catch (err) {
|
|
1691
1741
|
const detail = err instanceof Error ? err.message : "Unknown internal error";
|
|
1692
|
-
return createInactiveHandle(client, sessionId, agent, "internal_error", detail, now, opts.diagnostics, opts.stateDir);
|
|
1742
|
+
return createInactiveHandle(client, sessionId, agent, "internal_error", detail, now, opts.diagnostics ?? defaultDiagnosticsHandler, opts.stateDir);
|
|
1693
1743
|
}
|
|
1694
1744
|
}
|
|
1695
1745
|
function createInactiveHandle(client, sessionId, agent, reasonCode, detail, activatedAt, diagnostics, stateDir) {
|
|
@@ -1716,7 +1766,7 @@ function createInactiveHandle(client, sessionId, agent, reasonCode, detail, acti
|
|
|
1716
1766
|
return {
|
|
1717
1767
|
client,
|
|
1718
1768
|
flush() {
|
|
1719
|
-
return Promise.resolve();
|
|
1769
|
+
return Promise.resolve({ captured: 0, sent: 0, active: false, errors: [] });
|
|
1720
1770
|
},
|
|
1721
1771
|
restore() {
|
|
1722
1772
|
},
|
|
@@ -2043,6 +2093,9 @@ function isWrapped(client, target) {
|
|
|
2043
2093
|
client[REPLAY_WRAPPED] || target[REPLAY_WRAPPED]
|
|
2044
2094
|
);
|
|
2045
2095
|
}
|
|
2096
|
+
function isReplayAttached(client) {
|
|
2097
|
+
return Boolean(client[REPLAY_ATTACHED]);
|
|
2098
|
+
}
|
|
2046
2099
|
function setWrapped(client, target) {
|
|
2047
2100
|
try {
|
|
2048
2101
|
client[REPLAY_WRAPPED] = true;
|
|
@@ -2124,7 +2177,16 @@ import {
|
|
|
2124
2177
|
relative,
|
|
2125
2178
|
resolve
|
|
2126
2179
|
} from "path";
|
|
2180
|
+
|
|
2181
|
+
// src/safeRegex.ts
|
|
2182
|
+
import RE2 from "re2";
|
|
2183
|
+
function safeRegex(pattern) {
|
|
2184
|
+
return new RE2(pattern);
|
|
2185
|
+
}
|
|
2186
|
+
|
|
2187
|
+
// src/contracts.ts
|
|
2127
2188
|
var CONTRACT_EXTENSIONS = /* @__PURE__ */ new Set([".yaml", ".yml"]);
|
|
2189
|
+
var SESSION_YAML_NAMES = /* @__PURE__ */ new Set(["session.yaml", "session.yml"]);
|
|
2128
2190
|
var MAX_REGEX_BYTES = 1024;
|
|
2129
2191
|
var NESTED_QUANTIFIER_RE = /\((?:[^()\\]|\\.)*[+*{](?:[^()\\]|\\.)*\)(?:[+*]|\{\d+(?:,\d*)?\})/;
|
|
2130
2192
|
function loadContracts(input) {
|
|
@@ -2216,7 +2278,7 @@ function collectContractFiles(inputPath) {
|
|
|
2216
2278
|
if (entry.isDirectory()) {
|
|
2217
2279
|
return collectContractFiles(fullPath);
|
|
2218
2280
|
}
|
|
2219
|
-
if (entry.isFile() && CONTRACT_EXTENSIONS.has(extname(entry.name).toLowerCase())) {
|
|
2281
|
+
if (entry.isFile() && CONTRACT_EXTENSIONS.has(extname(entry.name).toLowerCase()) && !SESSION_YAML_NAMES.has(entry.name.toLowerCase())) {
|
|
2220
2282
|
return [fullPath];
|
|
2221
2283
|
}
|
|
2222
2284
|
return [];
|
|
@@ -2266,7 +2328,19 @@ function normalizeInlineContract(input) {
|
|
|
2266
2328
|
...expectedToolCalls.length > 0 ? { expected_tool_calls: expectedToolCalls } : {},
|
|
2267
2329
|
...isMatchMode(source.tool_call_match_mode) ? {
|
|
2268
2330
|
tool_call_match_mode: source.tool_call_match_mode
|
|
2269
|
-
} : {}
|
|
2331
|
+
} : {},
|
|
2332
|
+
// replay() fields — pass through when present on the Contract type
|
|
2333
|
+
...source.response_format_invariants != null ? { response_format_invariants: source.response_format_invariants } : {},
|
|
2334
|
+
...source.policy != null ? { policy: source.policy } : {},
|
|
2335
|
+
...source.execution_constraints != null ? { execution_constraints: source.execution_constraints } : {},
|
|
2336
|
+
...Array.isArray(source.preconditions) ? { preconditions: source.preconditions } : {},
|
|
2337
|
+
...Array.isArray(source.forbids_after) ? { forbids_after: source.forbids_after } : {},
|
|
2338
|
+
...source.session_limits != null ? { session_limits: source.session_limits } : {},
|
|
2339
|
+
...Array.isArray(source.argument_value_invariants) ? { argument_value_invariants: source.argument_value_invariants } : {},
|
|
2340
|
+
...source.transitions != null ? { transitions: source.transitions } : {},
|
|
2341
|
+
...source.gate != null ? { gate: source.gate } : {},
|
|
2342
|
+
...source.evidence_class != null ? { evidence_class: source.evidence_class } : {},
|
|
2343
|
+
...source.commit_requirement != null ? { commit_requirement: source.commit_requirement } : {}
|
|
2270
2344
|
};
|
|
2271
2345
|
validateSafeRegexes(contract);
|
|
2272
2346
|
return contract;
|
|
@@ -2301,6 +2375,12 @@ function validateSafeRegexes(contract) {
|
|
|
2301
2375
|
invariants: expectedToolCall.argument_invariants ?? []
|
|
2302
2376
|
});
|
|
2303
2377
|
}
|
|
2378
|
+
if (contract.argument_value_invariants) {
|
|
2379
|
+
invariantGroups.push({
|
|
2380
|
+
label: "argument_value_invariants",
|
|
2381
|
+
invariants: contract.argument_value_invariants
|
|
2382
|
+
});
|
|
2383
|
+
}
|
|
2304
2384
|
for (const group of invariantGroups) {
|
|
2305
2385
|
for (const invariant of group.invariants) {
|
|
2306
2386
|
if (typeof invariant.regex !== "string") {
|
|
@@ -2317,7 +2397,7 @@ function validateSafeRegexes(contract) {
|
|
|
2317
2397
|
);
|
|
2318
2398
|
}
|
|
2319
2399
|
try {
|
|
2320
|
-
void
|
|
2400
|
+
void safeRegex(invariant.regex);
|
|
2321
2401
|
} catch (error) {
|
|
2322
2402
|
throw new ReplayConfigurationError(
|
|
2323
2403
|
`Invalid regex in ${contractLabel} (${group.label}, ${invariant.path}): ${formatErrorMessage(error)}`
|
|
@@ -2408,7 +2488,7 @@ function toToolOrder(value, hasExpectedTools) {
|
|
|
2408
2488
|
return hasExpectedTools ? "any" : void 0;
|
|
2409
2489
|
}
|
|
2410
2490
|
function isSideEffect(value) {
|
|
2411
|
-
return value === "read" || value === "write" || value === "destructive";
|
|
2491
|
+
return value === "read" || value === "write" || value === "destructive" || value === "admin" || value === "financial";
|
|
2412
2492
|
}
|
|
2413
2493
|
function formatErrorMessage(error) {
|
|
2414
2494
|
return error instanceof Error ? error.message : String(error);
|
|
@@ -2811,8 +2891,3701 @@ function toRecord6(value) {
|
|
|
2811
2891
|
function toString6(value) {
|
|
2812
2892
|
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
2813
2893
|
}
|
|
2894
|
+
|
|
2895
|
+
// src/replay.ts
|
|
2896
|
+
import crypto4 from "crypto";
|
|
2897
|
+
import { existsSync as existsSync2, readFileSync as readFileSync2 } from "fs";
|
|
2898
|
+
import { join as join3, resolve as pathResolve } from "path";
|
|
2899
|
+
import {
|
|
2900
|
+
compileSession,
|
|
2901
|
+
compileWorkflow,
|
|
2902
|
+
evaluateExpectedToolCalls as evaluateExpectedToolCalls2,
|
|
2903
|
+
evaluateInvariants as evaluateInvariants4,
|
|
2904
|
+
parseSessionYaml,
|
|
2905
|
+
parseWorkflowYaml,
|
|
2906
|
+
serializeCompiledSession,
|
|
2907
|
+
serializeCompiledWorkflow
|
|
2908
|
+
} from "@replayci/contracts-core";
|
|
2909
|
+
|
|
2910
|
+
// src/redaction.ts
|
|
2911
|
+
import crypto from "crypto";
|
|
2912
|
+
|
|
2913
|
+
// ../../artifacts/schema/redaction-patterns.json
|
|
2914
|
+
var redaction_patterns_default = {
|
|
2915
|
+
schema_version: "1.0",
|
|
2916
|
+
fingerprint_algorithm: "sha256",
|
|
2917
|
+
patterns: [
|
|
2918
|
+
{
|
|
2919
|
+
name: "slack_token",
|
|
2920
|
+
detect: "xox[baprs]-[A-Za-z0-9-]+",
|
|
2921
|
+
detect_flags: "g",
|
|
2922
|
+
redact: "xox[baprs]-[A-Za-z0-9-]+",
|
|
2923
|
+
redact_flags: "g",
|
|
2924
|
+
replacement: "[REDACTED]"
|
|
2925
|
+
},
|
|
2926
|
+
{
|
|
2927
|
+
name: "bearer_token",
|
|
2928
|
+
detect: "Bearer\\s+[A-Za-z0-9._-]{10,}",
|
|
2929
|
+
detect_flags: "g",
|
|
2930
|
+
redact: "Bearer\\s+[A-Za-z0-9._-]{10,}",
|
|
2931
|
+
redact_flags: "g",
|
|
2932
|
+
replacement: "Bearer [REDACTED]"
|
|
2933
|
+
},
|
|
2934
|
+
{
|
|
2935
|
+
name: "connection_string",
|
|
2936
|
+
detect: "(?:postgresql|mysql|mongodb(?:\\+srv)?)://[^\\s]+@[^\\s]+",
|
|
2937
|
+
detect_flags: "g",
|
|
2938
|
+
redact: "((?:postgresql|mysql|mongodb(?:\\+srv)?)://)[^\\s]+@([^\\s]+)",
|
|
2939
|
+
redact_flags: "g",
|
|
2940
|
+
replacement: "$1[REDACTED]@$2"
|
|
2941
|
+
},
|
|
2942
|
+
{
|
|
2943
|
+
name: "openai_api_key",
|
|
2944
|
+
detect: "sk-(?:proj-)?[A-Za-z0-9]{10,}",
|
|
2945
|
+
detect_flags: "g",
|
|
2946
|
+
redact: "sk-(?:proj-)?[A-Za-z0-9]{10,}",
|
|
2947
|
+
redact_flags: "g",
|
|
2948
|
+
replacement: "[REDACTED]"
|
|
2949
|
+
},
|
|
2950
|
+
{
|
|
2951
|
+
name: "anthropic_api_key",
|
|
2952
|
+
detect: "sk-ant-[A-Za-z0-9_-]{20,}",
|
|
2953
|
+
detect_flags: "g",
|
|
2954
|
+
redact: "sk-ant-[A-Za-z0-9_-]{20,}",
|
|
2955
|
+
redact_flags: "g",
|
|
2956
|
+
replacement: "[REDACTED]"
|
|
2957
|
+
},
|
|
2958
|
+
{
|
|
2959
|
+
name: "api_key_header",
|
|
2960
|
+
detect: "(?:api[_-]key|x-api-key)\\s*[:=]\\s*[A-Za-z0-9._-]{10,}",
|
|
2961
|
+
detect_flags: "gi",
|
|
2962
|
+
redact: "((?:api[_-]key|x-api-key)\\s*[:=]\\s*)[A-Za-z0-9._-]{10,}",
|
|
2963
|
+
redact_flags: "gi",
|
|
2964
|
+
replacement: "$1[REDACTED]"
|
|
2965
|
+
},
|
|
2966
|
+
{
|
|
2967
|
+
name: "private_key",
|
|
2968
|
+
detect: "-----BEGIN\\s[\\w\\s]*PRIVATE\\sKEY-----",
|
|
2969
|
+
detect_flags: "g",
|
|
2970
|
+
redact: "-----BEGIN\\s[\\w\\s]*PRIVATE\\sKEY-----[\\s\\S]*?-----END\\s[\\w\\s]*PRIVATE\\sKEY-----",
|
|
2971
|
+
redact_flags: "g",
|
|
2972
|
+
replacement: "[REDACTED_PRIVATE_KEY]"
|
|
2973
|
+
}
|
|
2974
|
+
]
|
|
2975
|
+
};
|
|
2976
|
+
|
|
2977
|
+
// src/redaction.ts
|
|
2978
|
+
function sha256Hex(s) {
|
|
2979
|
+
return crypto.createHash("sha256").update(s).digest("hex");
|
|
2980
|
+
}
|
|
2981
|
+
var compiledPatterns = redaction_patterns_default.patterns.map((p) => ({
|
|
2982
|
+
name: p.name,
|
|
2983
|
+
detectRe: new RegExp(p.detect, p.detect_flags),
|
|
2984
|
+
redactRe: new RegExp(p.redact, p.redact_flags),
|
|
2985
|
+
replacement: p.replacement
|
|
2986
|
+
}));
|
|
2987
|
+
var PATTERN_FINGERPRINT = sha256Hex(
|
|
2988
|
+
JSON.stringify(redaction_patterns_default.patterns)
|
|
2989
|
+
);
|
|
2990
|
+
function detectFindings(s) {
|
|
2991
|
+
const findings = [];
|
|
2992
|
+
for (const pattern of compiledPatterns) {
|
|
2993
|
+
for (const m of s.matchAll(pattern.detectRe)) {
|
|
2994
|
+
findings.push({ kind: pattern.name, sample_hash: sha256Hex(m[0]) });
|
|
2995
|
+
}
|
|
2996
|
+
}
|
|
2997
|
+
return findings;
|
|
2998
|
+
}
|
|
2999
|
+
function redactString(s) {
|
|
3000
|
+
let out = s;
|
|
3001
|
+
for (const pattern of compiledPatterns) {
|
|
3002
|
+
out = out.replace(pattern.redactRe, pattern.replacement);
|
|
3003
|
+
}
|
|
3004
|
+
return out;
|
|
3005
|
+
}
|
|
3006
|
+
function redactCapture(input) {
|
|
3007
|
+
const findings = detectFindings(input);
|
|
3008
|
+
const redacted = redactString(input);
|
|
3009
|
+
return {
|
|
3010
|
+
redacted,
|
|
3011
|
+
findings,
|
|
3012
|
+
redacted_any: redacted !== input,
|
|
3013
|
+
pattern_fingerprint: PATTERN_FINGERPRINT
|
|
3014
|
+
};
|
|
3015
|
+
}
|
|
3016
|
+
|
|
3017
|
+
// src/errors/replay.ts
|
|
3018
|
+
var ReplayContractError = class extends Error {
|
|
3019
|
+
decision;
|
|
3020
|
+
contractFile;
|
|
3021
|
+
failures;
|
|
3022
|
+
constructor(message, decision, contractFile, failures) {
|
|
3023
|
+
super(message);
|
|
3024
|
+
this.name = "ReplayContractError";
|
|
3025
|
+
this.decision = decision;
|
|
3026
|
+
this.contractFile = contractFile;
|
|
3027
|
+
this.failures = failures;
|
|
3028
|
+
}
|
|
3029
|
+
};
|
|
3030
|
+
var ReplayKillError = class extends Error {
|
|
3031
|
+
sessionId;
|
|
3032
|
+
killedAt;
|
|
3033
|
+
constructor(sessionId, killedAt) {
|
|
3034
|
+
super(`Session ${sessionId} was killed at ${killedAt}`);
|
|
3035
|
+
this.name = "ReplayKillError";
|
|
3036
|
+
this.sessionId = sessionId;
|
|
3037
|
+
this.killedAt = killedAt;
|
|
3038
|
+
}
|
|
3039
|
+
};
|
|
3040
|
+
var ReplayConfigError = class extends ReplayConfigurationError {
|
|
3041
|
+
condition;
|
|
3042
|
+
details;
|
|
3043
|
+
constructor(condition, details) {
|
|
3044
|
+
super(`ReplayConfigError: ${condition} \u2014 ${details}`);
|
|
3045
|
+
this.name = "ReplayConfigError";
|
|
3046
|
+
this.condition = condition;
|
|
3047
|
+
this.details = details;
|
|
3048
|
+
}
|
|
3049
|
+
};
|
|
3050
|
+
|
|
3051
|
+
// src/gate.ts
|
|
3052
|
+
function applyGateDecision(decision, response, provider, gateMode, onBlock) {
|
|
3053
|
+
if (decision.action === "allow") {
|
|
3054
|
+
return response;
|
|
3055
|
+
}
|
|
3056
|
+
try {
|
|
3057
|
+
onBlock?.(decision);
|
|
3058
|
+
} catch {
|
|
3059
|
+
}
|
|
3060
|
+
if (gateMode === "reject_all") {
|
|
3061
|
+
throw buildContractError(decision);
|
|
3062
|
+
}
|
|
3063
|
+
const allowedCalls = getAllowedCalls(decision.tool_calls, decision.blocked);
|
|
3064
|
+
if (gateMode === "strip_partial") {
|
|
3065
|
+
if (allowedCalls.length === 0) {
|
|
3066
|
+
throw buildContractError(decision);
|
|
3067
|
+
}
|
|
3068
|
+
return stripBlockedCalls(response, allowedCalls, provider);
|
|
3069
|
+
}
|
|
3070
|
+
if (allowedCalls.length === 0) {
|
|
3071
|
+
return normalizeStrippedResponse(response, provider);
|
|
3072
|
+
}
|
|
3073
|
+
return stripBlockedCalls(response, allowedCalls, provider);
|
|
3074
|
+
}
|
|
3075
|
+
function normalizeStrippedResponse(response, provider) {
|
|
3076
|
+
if (provider === "openai") {
|
|
3077
|
+
return normalizeOpenAIStripped(response);
|
|
3078
|
+
}
|
|
3079
|
+
return normalizeAnthropicStripped(response);
|
|
3080
|
+
}
|
|
3081
|
+
function getAllowedCalls(allCalls, blocked) {
|
|
3082
|
+
const blockedPool = blocked.map((b) => ({ name: b.tool_name, args: b.arguments }));
|
|
3083
|
+
return allCalls.filter((call) => {
|
|
3084
|
+
const exactIdx = blockedPool.findIndex(
|
|
3085
|
+
(b) => b.name === call.name && b.args === call.arguments
|
|
3086
|
+
);
|
|
3087
|
+
if (exactIdx !== -1) {
|
|
3088
|
+
blockedPool.splice(exactIdx, 1);
|
|
3089
|
+
return false;
|
|
3090
|
+
}
|
|
3091
|
+
const nameIdx = blockedPool.findIndex(
|
|
3092
|
+
(b) => b.name === call.name && b.args === ""
|
|
3093
|
+
);
|
|
3094
|
+
if (nameIdx !== -1) {
|
|
3095
|
+
blockedPool.splice(nameIdx, 1);
|
|
3096
|
+
return false;
|
|
3097
|
+
}
|
|
3098
|
+
return true;
|
|
3099
|
+
});
|
|
3100
|
+
}
|
|
3101
|
+
function stripBlockedCalls(response, allowedCalls, provider) {
|
|
3102
|
+
const allowedIds = new Set(allowedCalls.map((c) => c.id));
|
|
3103
|
+
if (provider === "openai") {
|
|
3104
|
+
return stripOpenAICalls(response, allowedIds);
|
|
3105
|
+
}
|
|
3106
|
+
return stripAnthropicCalls(response, allowedIds);
|
|
3107
|
+
}
|
|
3108
|
+
function stripOpenAICalls(response, allowedIds) {
|
|
3109
|
+
const record = toRecord7(response);
|
|
3110
|
+
const choices = Array.isArray(record.choices) ? record.choices : [];
|
|
3111
|
+
if (choices.length === 0) return response;
|
|
3112
|
+
const firstChoice = toRecord7(choices[0]);
|
|
3113
|
+
const message = toRecord7(firstChoice.message);
|
|
3114
|
+
const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
|
|
3115
|
+
const filtered = toolCalls.filter((tc) => {
|
|
3116
|
+
const id = toRecord7(tc).id;
|
|
3117
|
+
return typeof id === "string" && allowedIds.has(id);
|
|
3118
|
+
});
|
|
3119
|
+
return {
|
|
3120
|
+
...record,
|
|
3121
|
+
replay_modified: true,
|
|
3122
|
+
choices: [{
|
|
3123
|
+
...firstChoice,
|
|
3124
|
+
message: {
|
|
3125
|
+
...message,
|
|
3126
|
+
tool_calls: filtered
|
|
3127
|
+
}
|
|
3128
|
+
}]
|
|
3129
|
+
};
|
|
3130
|
+
}
|
|
3131
|
+
function stripAnthropicCalls(response, allowedIds) {
|
|
3132
|
+
const record = toRecord7(response);
|
|
3133
|
+
const content = Array.isArray(record.content) ? record.content : [];
|
|
3134
|
+
const filtered = content.filter((block) => {
|
|
3135
|
+
const b = toRecord7(block);
|
|
3136
|
+
if (b.type !== "tool_use") return true;
|
|
3137
|
+
const id = b.id;
|
|
3138
|
+
return typeof id === "string" && allowedIds.has(id);
|
|
3139
|
+
});
|
|
3140
|
+
return {
|
|
3141
|
+
...record,
|
|
3142
|
+
replay_modified: true,
|
|
3143
|
+
content: filtered
|
|
3144
|
+
};
|
|
3145
|
+
}
|
|
3146
|
+
function normalizeOpenAIStripped(response) {
|
|
3147
|
+
const record = toRecord7(response);
|
|
3148
|
+
const choices = Array.isArray(record.choices) ? record.choices : [];
|
|
3149
|
+
if (choices.length === 0) return { ...record, replay_modified: true };
|
|
3150
|
+
const firstChoice = toRecord7(choices[0]);
|
|
3151
|
+
const message = toRecord7(firstChoice.message);
|
|
3152
|
+
const content = typeof message.content === "string" && message.content.length > 0 ? message.content : "[replay: all tool calls blocked]";
|
|
3153
|
+
const finishReason = firstChoice.finish_reason === "tool_calls" ? "stop" : firstChoice.finish_reason;
|
|
3154
|
+
return {
|
|
3155
|
+
...record,
|
|
3156
|
+
replay_modified: true,
|
|
3157
|
+
choices: [{
|
|
3158
|
+
...firstChoice,
|
|
3159
|
+
finish_reason: finishReason,
|
|
3160
|
+
message: {
|
|
3161
|
+
...message,
|
|
3162
|
+
content,
|
|
3163
|
+
tool_calls: void 0
|
|
3164
|
+
}
|
|
3165
|
+
}]
|
|
3166
|
+
};
|
|
3167
|
+
}
|
|
3168
|
+
function normalizeAnthropicStripped(response) {
|
|
3169
|
+
const record = toRecord7(response);
|
|
3170
|
+
const contentBlocks = Array.isArray(record.content) ? record.content : [];
|
|
3171
|
+
const textBlocks = contentBlocks.filter((b) => toRecord7(b).type === "text");
|
|
3172
|
+
const stopReason = record.stop_reason === "tool_use" ? "end_turn" : record.stop_reason;
|
|
3173
|
+
const content = textBlocks.length > 0 ? textBlocks : [{ type: "text", text: "[replay: all tool calls blocked]" }];
|
|
3174
|
+
return {
|
|
3175
|
+
...record,
|
|
3176
|
+
replay_modified: true,
|
|
3177
|
+
stop_reason: stopReason,
|
|
3178
|
+
content
|
|
3179
|
+
};
|
|
3180
|
+
}
|
|
3181
|
+
function buildContractError(decision) {
|
|
3182
|
+
if (decision.action !== "block") {
|
|
3183
|
+
throw new Error("Cannot build contract error from allow decision");
|
|
3184
|
+
}
|
|
3185
|
+
const first = decision.blocked[0];
|
|
3186
|
+
return new ReplayContractError(
|
|
3187
|
+
`Tool call blocked: ${first?.tool_name ?? "unknown"} \u2014 ${first?.reason ?? "unknown"}`,
|
|
3188
|
+
decision,
|
|
3189
|
+
first?.contract_file ?? "",
|
|
3190
|
+
first?.failures ?? []
|
|
3191
|
+
);
|
|
3192
|
+
}
|
|
3193
|
+
function toRecord7(value) {
|
|
3194
|
+
return value !== null && typeof value === "object" ? value : {};
|
|
3195
|
+
}
|
|
3196
|
+
|
|
3197
|
+
// src/responseFormat.ts
|
|
3198
|
+
function extractResponseMetadata(response, provider) {
|
|
3199
|
+
if (provider === "openai") {
|
|
3200
|
+
return extractOpenAIMetadata(response);
|
|
3201
|
+
}
|
|
3202
|
+
return extractAnthropicMetadata(response);
|
|
3203
|
+
}
|
|
3204
|
+
function extractOpenAIMetadata(response) {
|
|
3205
|
+
const record = toRecord8(response);
|
|
3206
|
+
const choices = Array.isArray(record.choices) ? record.choices : [];
|
|
3207
|
+
if (choices.length === 0) {
|
|
3208
|
+
return { finish_reason: null, content: null, tool_calls_present: false, has_content: false };
|
|
3209
|
+
}
|
|
3210
|
+
const firstChoice = toRecord8(choices[0]);
|
|
3211
|
+
const message = toRecord8(firstChoice.message);
|
|
3212
|
+
const finishReason = typeof firstChoice.finish_reason === "string" ? firstChoice.finish_reason : null;
|
|
3213
|
+
const content = typeof message.content === "string" ? message.content : null;
|
|
3214
|
+
const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
|
|
3215
|
+
return {
|
|
3216
|
+
finish_reason: finishReason,
|
|
3217
|
+
content,
|
|
3218
|
+
tool_calls_present: toolCalls.length > 0,
|
|
3219
|
+
has_content: content !== null && content.length > 0
|
|
3220
|
+
};
|
|
3221
|
+
}
|
|
3222
|
+
function extractAnthropicMetadata(response) {
|
|
3223
|
+
const record = toRecord8(response);
|
|
3224
|
+
const stopReason = typeof record.stop_reason === "string" ? record.stop_reason : null;
|
|
3225
|
+
const contentBlocks = Array.isArray(record.content) ? record.content : [];
|
|
3226
|
+
const textBlocks = contentBlocks.filter(
|
|
3227
|
+
(block) => toRecord8(block).type === "text"
|
|
3228
|
+
);
|
|
3229
|
+
const toolUseBlocks = contentBlocks.filter(
|
|
3230
|
+
(block) => toRecord8(block).type === "tool_use"
|
|
3231
|
+
);
|
|
3232
|
+
const textContent = textBlocks.map((block) => {
|
|
3233
|
+
const text = toRecord8(block).text;
|
|
3234
|
+
return typeof text === "string" ? text : "";
|
|
3235
|
+
}).join("\n");
|
|
3236
|
+
let finishReason = null;
|
|
3237
|
+
if (stopReason === "end_turn") {
|
|
3238
|
+
finishReason = "stop";
|
|
3239
|
+
} else if (stopReason === "tool_use") {
|
|
3240
|
+
finishReason = "tool_calls";
|
|
3241
|
+
} else if (stopReason !== null) {
|
|
3242
|
+
finishReason = stopReason;
|
|
3243
|
+
}
|
|
3244
|
+
return {
|
|
3245
|
+
finish_reason: finishReason,
|
|
3246
|
+
content: textContent.length > 0 ? textContent : null,
|
|
3247
|
+
tool_calls_present: toolUseBlocks.length > 0,
|
|
3248
|
+
has_content: textContent.length > 0
|
|
3249
|
+
};
|
|
3250
|
+
}
|
|
3251
|
+
function evaluateResponseFormatInvariants(response, contracts, requestToolNames, provider) {
|
|
3252
|
+
const requestToolSet = new Set(requestToolNames);
|
|
3253
|
+
const metadata = extractResponseMetadata(response, provider);
|
|
3254
|
+
const failures = [];
|
|
3255
|
+
for (const contract of contracts) {
|
|
3256
|
+
if (!contract.response_format_invariants) continue;
|
|
3257
|
+
if (!requestToolSet.has(contract.tool)) continue;
|
|
3258
|
+
const rfi = contract.response_format_invariants;
|
|
3259
|
+
const contractFile = contract.contract_file ?? contract.tool;
|
|
3260
|
+
failures.push(...checkFinishReason(rfi, metadata, contractFile));
|
|
3261
|
+
failures.push(...checkContentWhenToolCalls(rfi, metadata, contractFile));
|
|
3262
|
+
failures.push(...checkToolCallsPresent(rfi, metadata, contractFile));
|
|
3263
|
+
}
|
|
3264
|
+
return { failures };
|
|
3265
|
+
}
|
|
3266
|
+
function checkFinishReason(rfi, metadata, contractFile) {
|
|
3267
|
+
if (rfi.finish_reason === void 0) return [];
|
|
3268
|
+
if (metadata.finish_reason === rfi.finish_reason) return [];
|
|
3269
|
+
return [{
|
|
3270
|
+
path: "$.finish_reason",
|
|
3271
|
+
operator: "response_format",
|
|
3272
|
+
expected: rfi.finish_reason,
|
|
3273
|
+
found: metadata.finish_reason,
|
|
3274
|
+
message: `Expected finish_reason "${rfi.finish_reason}", got "${metadata.finish_reason}"`,
|
|
3275
|
+
contract_file: contractFile
|
|
3276
|
+
}];
|
|
3277
|
+
}
|
|
3278
|
+
function checkContentWhenToolCalls(rfi, metadata, contractFile) {
|
|
3279
|
+
if (rfi.content_when_tool_calls === void 0) return [];
|
|
3280
|
+
if (rfi.content_when_tool_calls !== "empty") return [];
|
|
3281
|
+
if (!metadata.tool_calls_present) return [];
|
|
3282
|
+
if (!metadata.has_content) return [];
|
|
3283
|
+
return [{
|
|
3284
|
+
path: "$.content",
|
|
3285
|
+
operator: "response_format",
|
|
3286
|
+
expected: "empty when tool_calls present",
|
|
3287
|
+
found: "content present",
|
|
3288
|
+
message: 'content_when_tool_calls is "empty" but response has both content and tool_calls',
|
|
3289
|
+
contract_file: contractFile
|
|
3290
|
+
}];
|
|
3291
|
+
}
|
|
3292
|
+
function checkToolCallsPresent(rfi, metadata, contractFile) {
|
|
3293
|
+
if (rfi.tool_calls_present === void 0) return [];
|
|
3294
|
+
if (rfi.tool_calls_present === metadata.tool_calls_present) return [];
|
|
3295
|
+
return [{
|
|
3296
|
+
path: "$.tool_calls",
|
|
3297
|
+
operator: "response_format",
|
|
3298
|
+
expected: rfi.tool_calls_present,
|
|
3299
|
+
found: metadata.tool_calls_present,
|
|
3300
|
+
message: rfi.tool_calls_present ? "tool_calls_present: true but no tool_calls in response" : "tool_calls_present: false but tool_calls found in response",
|
|
3301
|
+
contract_file: contractFile
|
|
3302
|
+
}];
|
|
3303
|
+
}
|
|
3304
|
+
function toRecord8(value) {
|
|
3305
|
+
return value !== null && typeof value === "object" ? value : {};
|
|
3306
|
+
}
|
|
3307
|
+
|
|
3308
|
+
// src/sessionState.ts
|
|
3309
|
+
import crypto2 from "crypto";
|
|
3310
|
+
|
|
3311
|
+
// src/phases.ts
|
|
3312
|
+
function validatePhaseTransition(toolCalls, sessionState, compiledSession) {
|
|
3313
|
+
if (!compiledSession.phases) {
|
|
3314
|
+
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3315
|
+
}
|
|
3316
|
+
const attemptedTransitions = [];
|
|
3317
|
+
for (const toolCall of toolCalls) {
|
|
3318
|
+
const contract = compiledSession.perToolContracts.get(toolCall.name);
|
|
3319
|
+
if (!contract?.transitions?.advances_to) continue;
|
|
3320
|
+
const allowedTransitions = compiledSession.transitions.get(
|
|
3321
|
+
sessionState.currentPhase ?? ""
|
|
3322
|
+
);
|
|
3323
|
+
if (!allowedTransitions?.includes(contract.transitions.advances_to)) {
|
|
3324
|
+
return {
|
|
3325
|
+
legal: false,
|
|
3326
|
+
newPhase: sessionState.currentPhase,
|
|
3327
|
+
blockedTool: toolCall.name,
|
|
3328
|
+
attemptedTransition: `${sessionState.currentPhase} \u2192 ${contract.transitions.advances_to}`,
|
|
3329
|
+
reason: "illegal_phase_transition"
|
|
3330
|
+
};
|
|
3331
|
+
}
|
|
3332
|
+
attemptedTransitions.push({
|
|
3333
|
+
tool: toolCall.name,
|
|
3334
|
+
target: contract.transitions.advances_to
|
|
3335
|
+
});
|
|
3336
|
+
}
|
|
3337
|
+
if (attemptedTransitions.length > 1) {
|
|
3338
|
+
const distinctTargets = new Set(attemptedTransitions.map((t) => t.target));
|
|
3339
|
+
if (distinctTargets.size > 1) {
|
|
3340
|
+
return {
|
|
3341
|
+
legal: false,
|
|
3342
|
+
newPhase: sessionState.currentPhase,
|
|
3343
|
+
blockedTool: attemptedTransitions.map((t) => t.tool).join(", "),
|
|
3344
|
+
attemptedTransition: attemptedTransitions.map((t) => `${t.tool} \u2192 ${t.target}`).join("; "),
|
|
3345
|
+
reason: "ambiguous_phase_transition"
|
|
3346
|
+
};
|
|
3347
|
+
}
|
|
3348
|
+
}
|
|
3349
|
+
if (attemptedTransitions.length > 0) {
|
|
3350
|
+
return { legal: true, newPhase: attemptedTransitions[0].target };
|
|
3351
|
+
}
|
|
3352
|
+
return { legal: true, newPhase: sessionState.currentPhase };
|
|
3353
|
+
}
|
|
3354
|
+
function recomputePhaseFromCommitted(committedCalls, sessionState, compiledSession) {
|
|
3355
|
+
if (!compiledSession.phases) return sessionState.currentPhase;
|
|
3356
|
+
const transitions = [];
|
|
3357
|
+
for (const tc of committedCalls) {
|
|
3358
|
+
const contract = compiledSession.perToolContracts.get(tc.toolName);
|
|
3359
|
+
if (!contract?.transitions?.advances_to) continue;
|
|
3360
|
+
transitions.push(contract.transitions.advances_to);
|
|
3361
|
+
}
|
|
3362
|
+
if (transitions.length === 0) return sessionState.currentPhase;
|
|
3363
|
+
const distinct = new Set(transitions);
|
|
3364
|
+
if (distinct.size > 1) {
|
|
3365
|
+
return sessionState.currentPhase;
|
|
3366
|
+
}
|
|
3367
|
+
return transitions[0];
|
|
3368
|
+
}
|
|
3369
|
+
function getLegalNextPhases(sessionState, compiledSession) {
|
|
3370
|
+
if (!compiledSession.phases) return [];
|
|
3371
|
+
return compiledSession.transitions.get(sessionState.currentPhase ?? "") ?? [];
|
|
3372
|
+
}
|
|
3373
|
+
|
|
3374
|
+
// src/sessionState.ts
|
|
3375
|
+
var MAX_STEPS_HARD_CAP = 1e4;
|
|
3376
|
+
function createInitialState(sessionId, options) {
|
|
3377
|
+
return {
|
|
3378
|
+
sessionId,
|
|
3379
|
+
agent: options?.agent ?? null,
|
|
3380
|
+
principal: options?.principal ?? null,
|
|
3381
|
+
startedAt: /* @__PURE__ */ new Date(),
|
|
3382
|
+
tier: options?.tier ?? "compat",
|
|
3383
|
+
stateVersion: 0,
|
|
3384
|
+
controlRevision: 0,
|
|
3385
|
+
currentPhase: null,
|
|
3386
|
+
totalStepCount: 0,
|
|
3387
|
+
totalToolCalls: 0,
|
|
3388
|
+
actualCost: 0,
|
|
3389
|
+
totalCost: 0,
|
|
3390
|
+
toolCallCounts: /* @__PURE__ */ new Map(),
|
|
3391
|
+
forbiddenTools: /* @__PURE__ */ new Set(),
|
|
3392
|
+
satisfiedPreconditions: /* @__PURE__ */ new Map(),
|
|
3393
|
+
steps: [],
|
|
3394
|
+
pendingEntries: [],
|
|
3395
|
+
lastStep: null,
|
|
3396
|
+
consecutiveBlockCount: 0,
|
|
3397
|
+
consecutiveErrorCount: 0,
|
|
3398
|
+
totalBlockCount: 0,
|
|
3399
|
+
totalUnguardedCalls: 0,
|
|
3400
|
+
killed: false,
|
|
3401
|
+
contractHash: null
|
|
3402
|
+
};
|
|
3403
|
+
}
|
|
3404
|
+
function finalizeExecutedStep(state, step, contracts, compiledSession) {
|
|
3405
|
+
const newSteps = [...state.steps, step];
|
|
3406
|
+
const newToolCallCounts = updateToolCallCounts(state.toolCallCounts, step);
|
|
3407
|
+
const resolvedContracts = compiledSession ? Array.from(compiledSession.perToolContracts.values()) : contracts;
|
|
3408
|
+
const newForbiddenTools = updateForbidden(state.forbiddenTools, step, resolvedContracts);
|
|
3409
|
+
const newSatisfiedPreconditions = updatePreconditionCache(
|
|
3410
|
+
state.satisfiedPreconditions,
|
|
3411
|
+
step
|
|
3412
|
+
);
|
|
3413
|
+
const costDelta = computeStepCost(step);
|
|
3414
|
+
const newPhase = compiledSession ? recomputePhaseFromCommitted(step.toolCalls, state, compiledSession) : state.currentPhase;
|
|
3415
|
+
return {
|
|
3416
|
+
...state,
|
|
3417
|
+
steps: newSteps,
|
|
3418
|
+
currentPhase: newPhase,
|
|
3419
|
+
totalStepCount: state.totalStepCount + 1,
|
|
3420
|
+
totalToolCalls: state.totalToolCalls + step.toolCalls.length,
|
|
3421
|
+
totalCost: state.totalCost + costDelta,
|
|
3422
|
+
toolCallCounts: newToolCallCounts,
|
|
3423
|
+
forbiddenTools: newForbiddenTools,
|
|
3424
|
+
satisfiedPreconditions: newSatisfiedPreconditions,
|
|
3425
|
+
lastStep: step,
|
|
3426
|
+
stateVersion: state.stateVersion + 1
|
|
3427
|
+
};
|
|
3428
|
+
}
|
|
3429
|
+
function updateActualCost(state, costDelta) {
|
|
3430
|
+
return {
|
|
3431
|
+
...state,
|
|
3432
|
+
actualCost: state.actualCost + costDelta
|
|
3433
|
+
};
|
|
3434
|
+
}
|
|
3435
|
+
function recordDecisionOutcome(state, outcome) {
|
|
3436
|
+
switch (outcome) {
|
|
3437
|
+
case "allowed":
|
|
3438
|
+
return {
|
|
3439
|
+
...state,
|
|
3440
|
+
consecutiveBlockCount: 0,
|
|
3441
|
+
consecutiveErrorCount: 0
|
|
3442
|
+
};
|
|
3443
|
+
case "blocked":
|
|
3444
|
+
return {
|
|
3445
|
+
...state,
|
|
3446
|
+
consecutiveBlockCount: state.consecutiveBlockCount + 1,
|
|
3447
|
+
consecutiveErrorCount: 0,
|
|
3448
|
+
totalBlockCount: state.totalBlockCount + 1
|
|
3449
|
+
};
|
|
3450
|
+
case "error":
|
|
3451
|
+
return {
|
|
3452
|
+
...state,
|
|
3453
|
+
consecutiveErrorCount: state.consecutiveErrorCount + 1,
|
|
3454
|
+
consecutiveBlockCount: 0
|
|
3455
|
+
};
|
|
3456
|
+
}
|
|
3457
|
+
}
|
|
3458
|
+
function killSession(state) {
|
|
3459
|
+
return {
|
|
3460
|
+
...state,
|
|
3461
|
+
killed: true
|
|
3462
|
+
};
|
|
3463
|
+
}
|
|
3464
|
+
function isAtHardStepCap(state) {
|
|
3465
|
+
return state.totalStepCount >= MAX_STEPS_HARD_CAP;
|
|
3466
|
+
}
|
|
3467
|
+
function computeArgumentsHash(args) {
|
|
3468
|
+
return crypto2.createHash("sha256").update(args).digest("hex").slice(0, 16);
|
|
3469
|
+
}
|
|
3470
|
+
var RESOURCE_SEPARATOR = "\0";
|
|
3471
|
+
function makeForbiddenKey(toolName, resourceValue) {
|
|
3472
|
+
if (resourceValue === void 0) return toolName;
|
|
3473
|
+
return `${toolName}${RESOURCE_SEPARATOR}${JSON.stringify(resourceValue)}`;
|
|
3474
|
+
}
|
|
3475
|
+
function isForbidden(forbiddenTools, toolName, resourceValue) {
|
|
3476
|
+
if (forbiddenTools.has(toolName)) return true;
|
|
3477
|
+
if (resourceValue !== void 0) {
|
|
3478
|
+
return forbiddenTools.has(makeForbiddenKey(toolName, resourceValue));
|
|
3479
|
+
}
|
|
3480
|
+
return false;
|
|
3481
|
+
}
|
|
3482
|
+
function updateToolCallCounts(counts, step) {
|
|
3483
|
+
const updated = new Map(counts);
|
|
3484
|
+
for (const tc of step.toolCalls) {
|
|
3485
|
+
updated.set(tc.toolName, (updated.get(tc.toolName) ?? 0) + 1);
|
|
3486
|
+
}
|
|
3487
|
+
return updated;
|
|
3488
|
+
}
|
|
3489
|
+
function updateForbidden(forbidden, step, contracts) {
|
|
3490
|
+
const updated = new Set(forbidden);
|
|
3491
|
+
const contractByTool = new Map(contracts.map((c) => [c.tool, c]));
|
|
3492
|
+
for (const tc of step.toolCalls) {
|
|
3493
|
+
const contract = contractByTool.get(tc.toolName);
|
|
3494
|
+
if (contract?.forbids_after) {
|
|
3495
|
+
for (const entry of contract.forbids_after) {
|
|
3496
|
+
if (typeof entry === "string") {
|
|
3497
|
+
updated.add(entry);
|
|
3498
|
+
} else {
|
|
3499
|
+
const resourcePath = entry.resource;
|
|
3500
|
+
if (resourcePath && tc.resourceValues) {
|
|
3501
|
+
const resourceValue = tc.resourceValues[resourcePath];
|
|
3502
|
+
if (resourceValue !== void 0) {
|
|
3503
|
+
updated.add(makeForbiddenKey(entry.tool, resourceValue));
|
|
3504
|
+
} else {
|
|
3505
|
+
updated.add(entry.tool);
|
|
3506
|
+
}
|
|
3507
|
+
} else {
|
|
3508
|
+
updated.add(entry.tool);
|
|
3509
|
+
}
|
|
3510
|
+
}
|
|
3511
|
+
}
|
|
3512
|
+
}
|
|
3513
|
+
}
|
|
3514
|
+
return updated;
|
|
3515
|
+
}
|
|
3516
|
+
function updatePreconditionCache(cache, step) {
|
|
3517
|
+
const updated = new Map(cache);
|
|
3518
|
+
for (const tc of step.toolCalls) {
|
|
3519
|
+
updated.set(tc.toolName, step.outputExtract);
|
|
3520
|
+
if (tc.resourceValues) {
|
|
3521
|
+
for (const [_path, value] of Object.entries(tc.resourceValues)) {
|
|
3522
|
+
const resourceKey = `${tc.toolName}:${JSON.stringify(value)}`;
|
|
3523
|
+
updated.set(resourceKey, step.outputExtract);
|
|
3524
|
+
}
|
|
3525
|
+
}
|
|
3526
|
+
}
|
|
3527
|
+
return updated;
|
|
3528
|
+
}
|
|
3529
|
+
function computeStepCost(step) {
|
|
3530
|
+
if (!step.usage) return 0;
|
|
3531
|
+
return (step.usage.prompt_tokens + step.usage.completion_tokens) * 1e-5;
|
|
3532
|
+
}
|
|
3533
|
+
|
|
3534
|
+
// src/sessionLimits.ts
|
|
3535
|
+
function checkSessionLimits(state, limits) {
|
|
3536
|
+
if (typeof limits.max_steps === "number" && state.totalStepCount >= limits.max_steps) {
|
|
3537
|
+
return {
|
|
3538
|
+
exceeded: true,
|
|
3539
|
+
reason: `max_steps exceeded: ${state.totalStepCount} >= ${limits.max_steps}`
|
|
3540
|
+
};
|
|
3541
|
+
}
|
|
3542
|
+
if (typeof limits.max_tool_calls === "number" && state.totalToolCalls >= limits.max_tool_calls) {
|
|
3543
|
+
return {
|
|
3544
|
+
exceeded: true,
|
|
3545
|
+
reason: `max_tool_calls exceeded: ${state.totalToolCalls} >= ${limits.max_tool_calls}`
|
|
3546
|
+
};
|
|
3547
|
+
}
|
|
3548
|
+
if (typeof limits.max_cost_per_session === "number" && state.actualCost >= limits.max_cost_per_session) {
|
|
3549
|
+
return {
|
|
3550
|
+
exceeded: true,
|
|
3551
|
+
reason: `max_cost_per_session exceeded: ${state.actualCost} >= ${limits.max_cost_per_session}`
|
|
3552
|
+
};
|
|
3553
|
+
}
|
|
3554
|
+
return { exceeded: false, reason: null };
|
|
3555
|
+
}
|
|
3556
|
+
function checkPerToolLimits(state, toolName, limits) {
|
|
3557
|
+
if (!limits.max_calls_per_tool) return { exceeded: false, reason: null };
|
|
3558
|
+
const max = limits.max_calls_per_tool[toolName];
|
|
3559
|
+
if (typeof max !== "number") return { exceeded: false, reason: null };
|
|
3560
|
+
const current = state.toolCallCounts.get(toolName) ?? 0;
|
|
3561
|
+
if (current >= max) {
|
|
3562
|
+
return {
|
|
3563
|
+
exceeded: true,
|
|
3564
|
+
reason: `max_calls_per_tool.${toolName} exceeded: ${current} >= ${max}`
|
|
3565
|
+
};
|
|
3566
|
+
}
|
|
3567
|
+
return { exceeded: false, reason: null };
|
|
3568
|
+
}
|
|
3569
|
+
function checkLoopDetection(toolName, argsString, state, config) {
|
|
3570
|
+
const { window, threshold } = config;
|
|
3571
|
+
const windowSteps = state.steps.slice(-window);
|
|
3572
|
+
const targetHash = computeArgumentsHash(argsString);
|
|
3573
|
+
const targetTuple = `${toolName}:${targetHash}`;
|
|
3574
|
+
let matchCount = 0;
|
|
3575
|
+
for (const step of windowSteps) {
|
|
3576
|
+
for (const tc of step.toolCalls) {
|
|
3577
|
+
if (`${tc.toolName}:${tc.arguments_hash}` === targetTuple) {
|
|
3578
|
+
matchCount++;
|
|
3579
|
+
}
|
|
3580
|
+
}
|
|
3581
|
+
}
|
|
3582
|
+
return {
|
|
3583
|
+
triggered: matchCount >= threshold,
|
|
3584
|
+
matchCount,
|
|
3585
|
+
threshold,
|
|
3586
|
+
window
|
|
3587
|
+
};
|
|
3588
|
+
}
|
|
3589
|
+
function checkCircuitBreaker(state, config) {
|
|
3590
|
+
if (state.consecutiveBlockCount >= config.consecutive_blocks) {
|
|
3591
|
+
return { triggered: true, reason: "consecutive_blocks" };
|
|
3592
|
+
}
|
|
3593
|
+
if (state.consecutiveErrorCount >= config.consecutive_errors) {
|
|
3594
|
+
return { triggered: true, reason: "consecutive_errors" };
|
|
3595
|
+
}
|
|
3596
|
+
return { triggered: false, reason: null };
|
|
3597
|
+
}
|
|
3598
|
+
|
|
3599
|
+
// src/preconditions.ts
|
|
3600
|
+
function evaluatePreconditions(preconditions, sessionState, currentArguments) {
|
|
3601
|
+
return preconditions.map(
|
|
3602
|
+
(p) => evaluatePrecondition(p, sessionState, currentArguments)
|
|
3603
|
+
);
|
|
3604
|
+
}
|
|
3605
|
+
function evaluatePrecondition(precondition, sessionState, currentArguments) {
|
|
3606
|
+
if (precondition.requires_step_count) {
|
|
3607
|
+
const required = precondition.requires_step_count.gte;
|
|
3608
|
+
if (sessionState.totalStepCount < required) {
|
|
3609
|
+
return {
|
|
3610
|
+
satisfied: false,
|
|
3611
|
+
detail: `Need ${required} prior steps, have ${sessionState.totalStepCount}`
|
|
3612
|
+
};
|
|
3613
|
+
}
|
|
3614
|
+
}
|
|
3615
|
+
if (precondition.requires_prior_tool) {
|
|
3616
|
+
const toolName = precondition.requires_prior_tool;
|
|
3617
|
+
const resourcePath = precondition.resource ? typeof precondition.resource === "string" ? precondition.resource : precondition.resource.path : void 0;
|
|
3618
|
+
const resourceValue = resourcePath ? extractPath(currentArguments ?? {}, resourcePath) : void 0;
|
|
3619
|
+
const cacheKey = resourceValue !== void 0 ? `${toolName}:${JSON.stringify(resourceValue)}` : toolName;
|
|
3620
|
+
let priorStep;
|
|
3621
|
+
for (let i = sessionState.steps.length - 1; i >= 0; i--) {
|
|
3622
|
+
const s = sessionState.steps[i];
|
|
3623
|
+
if (s.toolCalls.some((tc) => {
|
|
3624
|
+
if (tc.toolName !== toolName) return false;
|
|
3625
|
+
if (tc.proposal_decision !== "allowed") return false;
|
|
3626
|
+
if (resourceValue !== void 0 && tc.resourceValues?.[resourcePath] !== resourceValue) {
|
|
3627
|
+
return false;
|
|
3628
|
+
}
|
|
3629
|
+
return true;
|
|
3630
|
+
})) {
|
|
3631
|
+
priorStep = s;
|
|
3632
|
+
break;
|
|
3633
|
+
}
|
|
3634
|
+
}
|
|
3635
|
+
const cachedExtract = sessionState.satisfiedPreconditions.get(cacheKey);
|
|
3636
|
+
if (!priorStep && cachedExtract === void 0) {
|
|
3637
|
+
const detail = resourceValue !== void 0 ? `Required prior tool ${toolName} not found for resource ${JSON.stringify(resourceValue)}` : `Required prior tool ${toolName} not found in session`;
|
|
3638
|
+
return { satisfied: false, detail };
|
|
3639
|
+
}
|
|
3640
|
+
if (precondition.with_output) {
|
|
3641
|
+
const extract = priorStep?.outputExtract ?? cachedExtract ?? {};
|
|
3642
|
+
for (const assertion of precondition.with_output) {
|
|
3643
|
+
const value = extractPath(extract, assertion.path);
|
|
3644
|
+
if (assertion.equals !== void 0 && value !== assertion.equals) {
|
|
3645
|
+
return {
|
|
3646
|
+
satisfied: false,
|
|
3647
|
+
detail: `Prior tool output assertion failed: ${assertion.path} \u2014 expected ${JSON.stringify(assertion.equals)}, got ${JSON.stringify(value)}`
|
|
3648
|
+
};
|
|
3649
|
+
}
|
|
3650
|
+
}
|
|
3651
|
+
}
|
|
3652
|
+
}
|
|
3653
|
+
return { satisfied: true, detail: "" };
|
|
3654
|
+
}
|
|
3655
|
+
function extractPath(obj, path) {
|
|
3656
|
+
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
3657
|
+
if (cleanPath === "" || cleanPath === "$") return obj;
|
|
3658
|
+
const segments = cleanPath.split(".");
|
|
3659
|
+
let current = obj;
|
|
3660
|
+
for (const segment of segments) {
|
|
3661
|
+
if (current === null || current === void 0) return void 0;
|
|
3662
|
+
if (typeof current !== "object") return void 0;
|
|
3663
|
+
current = current[segment];
|
|
3664
|
+
}
|
|
3665
|
+
return current;
|
|
3666
|
+
}
|
|
3667
|
+
|
|
3668
|
+
// src/crossStep.ts
|
|
3669
|
+
function validateCrossStep(toolCalls, sessionState, contracts) {
|
|
3670
|
+
const failures = [];
|
|
3671
|
+
const contractByTool = new Map(contracts.map((c) => [c.tool, c]));
|
|
3672
|
+
const workingForbidden = new Set(sessionState.forbiddenTools);
|
|
3673
|
+
for (const tc of toolCalls) {
|
|
3674
|
+
const contract = contractByTool.get(tc.name);
|
|
3675
|
+
let parsedArgs;
|
|
3676
|
+
try {
|
|
3677
|
+
parsedArgs = JSON.parse(tc.arguments);
|
|
3678
|
+
} catch {
|
|
3679
|
+
parsedArgs = void 0;
|
|
3680
|
+
}
|
|
3681
|
+
let resourceValue;
|
|
3682
|
+
if (parsedArgs && contract?.preconditions) {
|
|
3683
|
+
for (const pre of contract.preconditions) {
|
|
3684
|
+
if (pre.resource) {
|
|
3685
|
+
const path = typeof pre.resource === "string" ? pre.resource : pre.resource.path;
|
|
3686
|
+
resourceValue = extractPath(parsedArgs, path);
|
|
3687
|
+
break;
|
|
3688
|
+
}
|
|
3689
|
+
}
|
|
3690
|
+
}
|
|
3691
|
+
if (isForbidden(workingForbidden, tc.name, resourceValue)) {
|
|
3692
|
+
failures.push({
|
|
3693
|
+
toolName: tc.name,
|
|
3694
|
+
reason: "forbidden_tool",
|
|
3695
|
+
detail: resourceValue !== void 0 ? `Tool "${tc.name}" is forbidden in this session for resource ${JSON.stringify(resourceValue)}` : `Tool "${tc.name}" is forbidden in this session`
|
|
3696
|
+
});
|
|
3697
|
+
continue;
|
|
3698
|
+
}
|
|
3699
|
+
if (contract?.preconditions && contract.preconditions.length > 0) {
|
|
3700
|
+
const results = evaluatePreconditions(
|
|
3701
|
+
contract.preconditions,
|
|
3702
|
+
sessionState,
|
|
3703
|
+
parsedArgs
|
|
3704
|
+
);
|
|
3705
|
+
for (const result of results) {
|
|
3706
|
+
if (!result.satisfied) {
|
|
3707
|
+
failures.push({
|
|
3708
|
+
toolName: tc.name,
|
|
3709
|
+
reason: "precondition_not_met",
|
|
3710
|
+
detail: result.detail
|
|
3711
|
+
});
|
|
3712
|
+
}
|
|
3713
|
+
}
|
|
3714
|
+
}
|
|
3715
|
+
if (contract?.forbids_after) {
|
|
3716
|
+
for (const entry of contract.forbids_after) {
|
|
3717
|
+
if (typeof entry === "string") {
|
|
3718
|
+
workingForbidden.add(entry);
|
|
3719
|
+
} else {
|
|
3720
|
+
const resourcePath = entry.resource;
|
|
3721
|
+
if (resourcePath && parsedArgs) {
|
|
3722
|
+
const val = extractPath(parsedArgs, resourcePath);
|
|
3723
|
+
if (val !== void 0) {
|
|
3724
|
+
workingForbidden.add(makeForbiddenKey(entry.tool, val));
|
|
3725
|
+
} else {
|
|
3726
|
+
workingForbidden.add(entry.tool);
|
|
3727
|
+
}
|
|
3728
|
+
} else {
|
|
3729
|
+
workingForbidden.add(entry.tool);
|
|
3730
|
+
}
|
|
3731
|
+
}
|
|
3732
|
+
}
|
|
3733
|
+
}
|
|
3734
|
+
}
|
|
3735
|
+
return {
|
|
3736
|
+
passed: failures.length === 0,
|
|
3737
|
+
failures
|
|
3738
|
+
};
|
|
3739
|
+
}
|
|
3740
|
+
|
|
3741
|
+
// src/argumentValues.ts
|
|
3742
|
+
function evaluateArgumentValueInvariants(parsedArguments, invariants) {
|
|
3743
|
+
const failures = [];
|
|
3744
|
+
for (const inv of invariants) {
|
|
3745
|
+
const value = extractPath(parsedArguments, inv.path);
|
|
3746
|
+
if (inv.exact_match !== void 0) {
|
|
3747
|
+
const strValue = typeof value === "string" ? value : JSON.stringify(value);
|
|
3748
|
+
if (strValue !== inv.exact_match) {
|
|
3749
|
+
failures.push({
|
|
3750
|
+
path: inv.path,
|
|
3751
|
+
operator: "exact_match",
|
|
3752
|
+
expected: inv.exact_match,
|
|
3753
|
+
actual: value,
|
|
3754
|
+
detail: `Expected exact match "${inv.exact_match}", got "${strValue}"`
|
|
3755
|
+
});
|
|
3756
|
+
}
|
|
3757
|
+
}
|
|
3758
|
+
if (inv.regex !== void 0) {
|
|
3759
|
+
const strValue = typeof value === "string" ? value : String(value);
|
|
3760
|
+
try {
|
|
3761
|
+
const re = safeRegex(inv.regex);
|
|
3762
|
+
if (!re.test(strValue)) {
|
|
3763
|
+
failures.push({
|
|
3764
|
+
path: inv.path,
|
|
3765
|
+
operator: "regex",
|
|
3766
|
+
expected: inv.regex,
|
|
3767
|
+
actual: value,
|
|
3768
|
+
detail: `Value "${strValue}" does not match regex "${inv.regex}"`
|
|
3769
|
+
});
|
|
3770
|
+
}
|
|
3771
|
+
} catch {
|
|
3772
|
+
failures.push({
|
|
3773
|
+
path: inv.path,
|
|
3774
|
+
operator: "regex",
|
|
3775
|
+
expected: inv.regex,
|
|
3776
|
+
actual: value,
|
|
3777
|
+
detail: `Invalid regex pattern: "${inv.regex}"`
|
|
3778
|
+
});
|
|
3779
|
+
}
|
|
3780
|
+
}
|
|
3781
|
+
if (inv.one_of !== void 0) {
|
|
3782
|
+
const match = inv.one_of.some((candidate) => {
|
|
3783
|
+
if (typeof candidate === typeof value) {
|
|
3784
|
+
return JSON.stringify(candidate) === JSON.stringify(value);
|
|
3785
|
+
}
|
|
3786
|
+
return false;
|
|
3787
|
+
});
|
|
3788
|
+
if (!match) {
|
|
3789
|
+
failures.push({
|
|
3790
|
+
path: inv.path,
|
|
3791
|
+
operator: "one_of",
|
|
3792
|
+
expected: inv.one_of,
|
|
3793
|
+
actual: value,
|
|
3794
|
+
detail: `Value ${JSON.stringify(value)} not in ${JSON.stringify(inv.one_of)}`
|
|
3795
|
+
});
|
|
3796
|
+
}
|
|
3797
|
+
}
|
|
3798
|
+
if (inv.type !== void 0) {
|
|
3799
|
+
const actualType = Array.isArray(value) ? "array" : typeof value;
|
|
3800
|
+
if (actualType !== inv.type) {
|
|
3801
|
+
failures.push({
|
|
3802
|
+
path: inv.path,
|
|
3803
|
+
operator: "type",
|
|
3804
|
+
expected: inv.type,
|
|
3805
|
+
actual: actualType,
|
|
3806
|
+
detail: `Expected type "${inv.type}", got "${actualType}"`
|
|
3807
|
+
});
|
|
3808
|
+
}
|
|
3809
|
+
}
|
|
3810
|
+
if (typeof inv.gte === "number") {
|
|
3811
|
+
if (typeof value !== "number" || value < inv.gte) {
|
|
3812
|
+
failures.push({
|
|
3813
|
+
path: inv.path,
|
|
3814
|
+
operator: "gte",
|
|
3815
|
+
expected: inv.gte,
|
|
3816
|
+
actual: value,
|
|
3817
|
+
detail: `Expected >= ${inv.gte}, got ${JSON.stringify(value)}`
|
|
3818
|
+
});
|
|
3819
|
+
}
|
|
3820
|
+
}
|
|
3821
|
+
if (typeof inv.lte === "number") {
|
|
3822
|
+
if (typeof value !== "number" || value > inv.lte) {
|
|
3823
|
+
failures.push({
|
|
3824
|
+
path: inv.path,
|
|
3825
|
+
operator: "lte",
|
|
3826
|
+
expected: inv.lte,
|
|
3827
|
+
actual: value,
|
|
3828
|
+
detail: `Expected <= ${inv.lte}, got ${JSON.stringify(value)}`
|
|
3829
|
+
});
|
|
3830
|
+
}
|
|
3831
|
+
}
|
|
3832
|
+
}
|
|
3833
|
+
return {
|
|
3834
|
+
passed: failures.length === 0,
|
|
3835
|
+
failures
|
|
3836
|
+
};
|
|
3837
|
+
}
|
|
3838
|
+
|
|
3839
|
+
// src/messageValidation.ts
|
|
3840
|
+
import {
|
|
3841
|
+
evaluateInvariants as evaluateInvariants2
|
|
3842
|
+
} from "@replayci/contracts-core";
|
|
3843
|
+
function validateToolResultMessages(messages, contracts, provider) {
|
|
3844
|
+
const failures = [];
|
|
3845
|
+
const contractByTool = new Map(contracts.map((c) => [c.tool, c]));
|
|
3846
|
+
const toolResults = extractToolResults(messages, provider);
|
|
3847
|
+
for (const result of toolResults) {
|
|
3848
|
+
const contract = contractByTool.get(result.toolName);
|
|
3849
|
+
if (!contract) continue;
|
|
3850
|
+
const outputInvariants = contract.assertions.output_invariants;
|
|
3851
|
+
if (outputInvariants.length === 0) continue;
|
|
3852
|
+
let parsed;
|
|
3853
|
+
try {
|
|
3854
|
+
parsed = typeof result.content === "string" ? JSON.parse(result.content) : result.content;
|
|
3855
|
+
} catch {
|
|
3856
|
+
continue;
|
|
3857
|
+
}
|
|
3858
|
+
const invariantResult = evaluateInvariants2(parsed, outputInvariants, process.env);
|
|
3859
|
+
for (const failure of invariantResult) {
|
|
3860
|
+
failures.push({
|
|
3861
|
+
toolName: result.toolName,
|
|
3862
|
+
detail: `Tool result validation failed for "${result.toolName}": ${failure.detail}`
|
|
3863
|
+
});
|
|
3864
|
+
}
|
|
3865
|
+
}
|
|
3866
|
+
return {
|
|
3867
|
+
passed: failures.length === 0,
|
|
3868
|
+
failures
|
|
3869
|
+
};
|
|
3870
|
+
}
|
|
3871
|
+
function extractToolResults(messages, provider) {
|
|
3872
|
+
const results = [];
|
|
3873
|
+
if (provider === "openai") {
|
|
3874
|
+
results.push(...extractOpenAIToolResults(messages));
|
|
3875
|
+
} else {
|
|
3876
|
+
results.push(...extractAnthropicToolResults(messages));
|
|
3877
|
+
}
|
|
3878
|
+
return results;
|
|
3879
|
+
}
|
|
3880
|
+
function extractOpenAIToolResults(messages) {
|
|
3881
|
+
const results = [];
|
|
3882
|
+
const toolCallIdToName = /* @__PURE__ */ new Map();
|
|
3883
|
+
for (const msg of messages) {
|
|
3884
|
+
const rec = toRecord9(msg);
|
|
3885
|
+
if (rec.role !== "assistant") continue;
|
|
3886
|
+
const toolCalls = rec.tool_calls;
|
|
3887
|
+
if (!Array.isArray(toolCalls)) continue;
|
|
3888
|
+
for (const tc of toolCalls) {
|
|
3889
|
+
const tcRec = toRecord9(tc);
|
|
3890
|
+
const id = typeof tcRec.id === "string" ? tcRec.id : null;
|
|
3891
|
+
const fn = toRecord9(tcRec.function);
|
|
3892
|
+
const name = typeof fn.name === "string" ? fn.name : typeof tcRec.name === "string" ? tcRec.name : null;
|
|
3893
|
+
if (id && name) {
|
|
3894
|
+
toolCallIdToName.set(id, name);
|
|
3895
|
+
}
|
|
3896
|
+
}
|
|
3897
|
+
}
|
|
3898
|
+
for (const msg of messages) {
|
|
3899
|
+
const rec = toRecord9(msg);
|
|
3900
|
+
if (rec.role !== "tool") continue;
|
|
3901
|
+
const toolCallId = typeof rec.tool_call_id === "string" ? rec.tool_call_id : null;
|
|
3902
|
+
const toolName = toolCallId ? toolCallIdToName.get(toolCallId) ?? "unknown" : "unknown";
|
|
3903
|
+
results.push({
|
|
3904
|
+
toolName,
|
|
3905
|
+
toolCallId,
|
|
3906
|
+
content: rec.content
|
|
3907
|
+
});
|
|
3908
|
+
}
|
|
3909
|
+
return results;
|
|
3910
|
+
}
|
|
3911
|
+
function extractAnthropicToolResults(messages) {
|
|
3912
|
+
const results = [];
|
|
3913
|
+
const toolUseIdToName = /* @__PURE__ */ new Map();
|
|
3914
|
+
for (const msg of messages) {
|
|
3915
|
+
const rec = toRecord9(msg);
|
|
3916
|
+
if (rec.role !== "assistant") continue;
|
|
3917
|
+
const content = rec.content;
|
|
3918
|
+
if (!Array.isArray(content)) continue;
|
|
3919
|
+
for (const block of content) {
|
|
3920
|
+
const blockRec = toRecord9(block);
|
|
3921
|
+
if (blockRec.type === "tool_use") {
|
|
3922
|
+
const id = typeof blockRec.id === "string" ? blockRec.id : null;
|
|
3923
|
+
const name = typeof blockRec.name === "string" ? blockRec.name : null;
|
|
3924
|
+
if (id && name) {
|
|
3925
|
+
toolUseIdToName.set(id, name);
|
|
3926
|
+
}
|
|
3927
|
+
}
|
|
3928
|
+
}
|
|
3929
|
+
}
|
|
3930
|
+
for (const msg of messages) {
|
|
3931
|
+
const rec = toRecord9(msg);
|
|
3932
|
+
if (rec.role !== "user") continue;
|
|
3933
|
+
const content = rec.content;
|
|
3934
|
+
if (!Array.isArray(content)) continue;
|
|
3935
|
+
for (const block of content) {
|
|
3936
|
+
const blockRec = toRecord9(block);
|
|
3937
|
+
if (blockRec.type === "tool_result") {
|
|
3938
|
+
const toolUseId = typeof blockRec.tool_use_id === "string" ? blockRec.tool_use_id : null;
|
|
3939
|
+
const toolName = toolUseId ? toolUseIdToName.get(toolUseId) ?? "unknown" : "unknown";
|
|
3940
|
+
results.push({
|
|
3941
|
+
toolName,
|
|
3942
|
+
toolCallId: toolUseId,
|
|
3943
|
+
content: blockRec.content
|
|
3944
|
+
});
|
|
3945
|
+
}
|
|
3946
|
+
}
|
|
3947
|
+
}
|
|
3948
|
+
return results;
|
|
3949
|
+
}
|
|
3950
|
+
function toRecord9(value) {
|
|
3951
|
+
return value !== null && typeof value === "object" ? value : {};
|
|
3952
|
+
}
|
|
3953
|
+
|
|
3954
|
+
// src/policy.ts
|
|
3955
|
+
function evaluatePolicy(toolName, principal, _arguments, _sessionState, policyProgram) {
|
|
3956
|
+
for (const rule of policyProgram.sessionRules) {
|
|
3957
|
+
if (!rule.deny) continue;
|
|
3958
|
+
if (rule.allow) continue;
|
|
3959
|
+
const denyPredicate = rule.deny.principal;
|
|
3960
|
+
if (evaluatePredicate(denyPredicate, principal)) {
|
|
3961
|
+
return {
|
|
3962
|
+
allowed: false,
|
|
3963
|
+
reason: `Session deny rule matched: ${denyPredicate.path}`
|
|
3964
|
+
};
|
|
3965
|
+
}
|
|
3966
|
+
}
|
|
3967
|
+
const toolPolicy = policyProgram.perToolRules.get(toolName);
|
|
3968
|
+
if (toolPolicy?.deny) {
|
|
3969
|
+
for (const rule of toolPolicy.deny) {
|
|
3970
|
+
if (evaluatePredicate(rule.principal, principal)) {
|
|
3971
|
+
return {
|
|
3972
|
+
allowed: false,
|
|
3973
|
+
reason: `Per-tool deny rule matched: ${rule.principal.path}`
|
|
3974
|
+
};
|
|
3975
|
+
}
|
|
3976
|
+
}
|
|
3977
|
+
}
|
|
3978
|
+
if (policyProgram.defaultDeny) {
|
|
3979
|
+
const sessionAllow = policyProgram.sessionRules.some((r) => {
|
|
3980
|
+
if (!r.allow) return false;
|
|
3981
|
+
if (r.allow.tools && !r.allow.tools.includes(toolName)) return false;
|
|
3982
|
+
return evaluatePredicate(r.allow.principal, principal);
|
|
3983
|
+
});
|
|
3984
|
+
const toolAllow = toolPolicy?.allow?.some(
|
|
3985
|
+
(rule) => evaluatePredicate(rule.principal, principal)
|
|
3986
|
+
) ?? false;
|
|
3987
|
+
if (!sessionAllow && !toolAllow) {
|
|
3988
|
+
return {
|
|
3989
|
+
allowed: false,
|
|
3990
|
+
reason: "default_deny: no matching allow rule"
|
|
3991
|
+
};
|
|
3992
|
+
}
|
|
3993
|
+
}
|
|
3994
|
+
return { allowed: true, reason: null };
|
|
3995
|
+
}
|
|
3996
|
+
function evaluatePredicate(predicate, principal) {
|
|
3997
|
+
const value = extractPath2(principal, predicate.path);
|
|
3998
|
+
if (predicate.equals !== void 0) {
|
|
3999
|
+
return JSON.stringify(value) === JSON.stringify(predicate.equals);
|
|
4000
|
+
}
|
|
4001
|
+
if (predicate.one_of !== void 0) {
|
|
4002
|
+
return predicate.one_of.some(
|
|
4003
|
+
(candidate) => JSON.stringify(candidate) === JSON.stringify(value)
|
|
4004
|
+
);
|
|
4005
|
+
}
|
|
4006
|
+
if (predicate.regex !== void 0) {
|
|
4007
|
+
const strValue = typeof value === "string" ? value : String(value);
|
|
4008
|
+
try {
|
|
4009
|
+
const re = safeRegex(predicate.regex);
|
|
4010
|
+
return re.test(strValue);
|
|
4011
|
+
} catch {
|
|
4012
|
+
return false;
|
|
4013
|
+
}
|
|
4014
|
+
}
|
|
4015
|
+
if (predicate.contains !== void 0) {
|
|
4016
|
+
const strValue = typeof value === "string" ? value : String(value);
|
|
4017
|
+
return strValue.includes(predicate.contains);
|
|
4018
|
+
}
|
|
4019
|
+
return false;
|
|
4020
|
+
}
|
|
4021
|
+
function extractPath2(obj, path) {
|
|
4022
|
+
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
4023
|
+
if (cleanPath === "" || cleanPath === "$") return obj;
|
|
4024
|
+
const segments = cleanPath.split(".");
|
|
4025
|
+
let current = obj;
|
|
4026
|
+
for (const seg of segments) {
|
|
4027
|
+
if (current === null || current === void 0 || typeof current !== "object") {
|
|
4028
|
+
return void 0;
|
|
4029
|
+
}
|
|
4030
|
+
current = current[seg];
|
|
4031
|
+
}
|
|
4032
|
+
return current;
|
|
4033
|
+
}
|
|
4034
|
+
|
|
4035
|
+
// src/narrow.ts
|
|
4036
|
+
function narrowTools(requestedTools, sessionState, compiledSession, unmatchedPolicy, manualFilter) {
|
|
4037
|
+
const allowed = [];
|
|
4038
|
+
const removed = [];
|
|
4039
|
+
for (const tool of requestedTools) {
|
|
4040
|
+
if (manualFilter && !manualFilter.includes(tool.name)) {
|
|
4041
|
+
removed.push({ tool: tool.name, reason: "manual_filter" });
|
|
4042
|
+
continue;
|
|
4043
|
+
}
|
|
4044
|
+
const contract = compiledSession.perToolContracts.get(tool.name);
|
|
4045
|
+
if (!contract) {
|
|
4046
|
+
if (unmatchedPolicy === "allow") {
|
|
4047
|
+
allowed.push(tool);
|
|
4048
|
+
} else {
|
|
4049
|
+
removed.push({ tool: tool.name, reason: "no_contract" });
|
|
4050
|
+
}
|
|
4051
|
+
continue;
|
|
4052
|
+
}
|
|
4053
|
+
if (sessionState.currentPhase && contract.transitions?.valid_in_phases) {
|
|
4054
|
+
if (!contract.transitions.valid_in_phases.includes(
|
|
4055
|
+
sessionState.currentPhase
|
|
4056
|
+
)) {
|
|
4057
|
+
removed.push({
|
|
4058
|
+
tool: tool.name,
|
|
4059
|
+
reason: "wrong_phase",
|
|
4060
|
+
detail: `Tool valid in [${contract.transitions.valid_in_phases.join(", ")}], current phase: ${sessionState.currentPhase}`
|
|
4061
|
+
});
|
|
4062
|
+
continue;
|
|
4063
|
+
}
|
|
4064
|
+
}
|
|
4065
|
+
if (contract.preconditions && contract.preconditions.length > 0) {
|
|
4066
|
+
const results = evaluatePreconditions(
|
|
4067
|
+
contract.preconditions,
|
|
4068
|
+
sessionState
|
|
4069
|
+
);
|
|
4070
|
+
const unsatisfied = results.find((r) => !r.satisfied);
|
|
4071
|
+
if (unsatisfied) {
|
|
4072
|
+
removed.push({
|
|
4073
|
+
tool: tool.name,
|
|
4074
|
+
reason: "precondition_not_met",
|
|
4075
|
+
detail: unsatisfied.detail
|
|
4076
|
+
});
|
|
4077
|
+
continue;
|
|
4078
|
+
}
|
|
4079
|
+
}
|
|
4080
|
+
if (sessionState.forbiddenTools.has(tool.name)) {
|
|
4081
|
+
removed.push({
|
|
4082
|
+
tool: tool.name,
|
|
4083
|
+
reason: "forbidden_in_state"
|
|
4084
|
+
});
|
|
4085
|
+
continue;
|
|
4086
|
+
}
|
|
4087
|
+
if (compiledSession.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
4088
|
+
const verdict = evaluatePolicy(
|
|
4089
|
+
tool.name,
|
|
4090
|
+
compiledSession.principal,
|
|
4091
|
+
{},
|
|
4092
|
+
sessionState,
|
|
4093
|
+
compiledSession.policyProgram
|
|
4094
|
+
);
|
|
4095
|
+
if (!verdict.allowed) {
|
|
4096
|
+
removed.push({
|
|
4097
|
+
tool: tool.name,
|
|
4098
|
+
reason: "policy_denied",
|
|
4099
|
+
detail: verdict.reason ?? "Policy deny rule matched"
|
|
4100
|
+
});
|
|
4101
|
+
continue;
|
|
4102
|
+
}
|
|
4103
|
+
}
|
|
4104
|
+
allowed.push(tool);
|
|
4105
|
+
}
|
|
4106
|
+
return { allowed, removed };
|
|
4107
|
+
}
|
|
4108
|
+
function extractToolDefinitions(tools) {
|
|
4109
|
+
const result = [];
|
|
4110
|
+
for (const tool of tools) {
|
|
4111
|
+
if (!tool || typeof tool !== "object") continue;
|
|
4112
|
+
const record = tool;
|
|
4113
|
+
const name = getToolName(record);
|
|
4114
|
+
if (name) {
|
|
4115
|
+
result.push({ ...record, name });
|
|
4116
|
+
}
|
|
4117
|
+
}
|
|
4118
|
+
return result;
|
|
4119
|
+
}
|
|
4120
|
+
function getToolName(tool) {
|
|
4121
|
+
if (typeof tool.name === "string" && tool.name.length > 0) return tool.name;
|
|
4122
|
+
const fn = tool.function;
|
|
4123
|
+
if (fn && typeof fn === "object" && typeof fn.name === "string") {
|
|
4124
|
+
return fn.name;
|
|
4125
|
+
}
|
|
4126
|
+
return void 0;
|
|
4127
|
+
}
|
|
4128
|
+
|
|
4129
|
+
// src/executionConstraints.ts
|
|
4130
|
+
import { evaluateInvariants as evaluateInvariants3 } from "@replayci/contracts-core";
|
|
4131
|
+
function enforceExecutionConstraints(toolName, args, constraints) {
|
|
4132
|
+
if (constraints.length === 0) {
|
|
4133
|
+
return { passed: true, failures: [] };
|
|
4134
|
+
}
|
|
4135
|
+
const failures = evaluateInvariants3(args, constraints, process.env);
|
|
4136
|
+
const constraintFailures = failures.map((f) => ({
|
|
4137
|
+
path: f.path,
|
|
4138
|
+
operator: f.rule,
|
|
4139
|
+
expected: f.detail,
|
|
4140
|
+
actual: f.detail
|
|
4141
|
+
}));
|
|
4142
|
+
return {
|
|
4143
|
+
passed: constraintFailures.length === 0,
|
|
4144
|
+
failures: constraintFailures
|
|
4145
|
+
};
|
|
4146
|
+
}
|
|
4147
|
+
function createWrappedToolExecutor(toolName, executor, compiledSession) {
|
|
4148
|
+
const constraints = compiledSession.executionConstraints.get(toolName);
|
|
4149
|
+
return async (args) => {
|
|
4150
|
+
if (constraints && constraints.length > 0) {
|
|
4151
|
+
const verdict = enforceExecutionConstraints(toolName, args, constraints);
|
|
4152
|
+
if (!verdict.passed) {
|
|
4153
|
+
return { result: void 0, constraint_verdict: verdict };
|
|
4154
|
+
}
|
|
4155
|
+
const result2 = await executor(args);
|
|
4156
|
+
return { result: result2, constraint_verdict: verdict };
|
|
4157
|
+
}
|
|
4158
|
+
const result = await executor(args);
|
|
4159
|
+
return {
|
|
4160
|
+
result,
|
|
4161
|
+
constraint_verdict: { passed: true, failures: [] }
|
|
4162
|
+
};
|
|
4163
|
+
};
|
|
4164
|
+
}
|
|
4165
|
+
function buildWrappedToolsMap(tools, compiledSession) {
|
|
4166
|
+
if (!tools) return {};
|
|
4167
|
+
if (!compiledSession) {
|
|
4168
|
+
return Object.fromEntries(
|
|
4169
|
+
Object.entries(tools).map(([name, executor]) => [
|
|
4170
|
+
name,
|
|
4171
|
+
async (args) => {
|
|
4172
|
+
const result = await executor(args);
|
|
4173
|
+
return { result, constraint_verdict: { passed: true, failures: [] } };
|
|
4174
|
+
}
|
|
4175
|
+
])
|
|
4176
|
+
);
|
|
4177
|
+
}
|
|
4178
|
+
return Object.fromEntries(
|
|
4179
|
+
Object.entries(tools).map(([name, executor]) => [
|
|
4180
|
+
name,
|
|
4181
|
+
createWrappedToolExecutor(name, executor, compiledSession)
|
|
4182
|
+
])
|
|
4183
|
+
);
|
|
4184
|
+
}
|
|
4185
|
+
|
|
4186
|
+
// src/runtimeClient.ts
|
|
4187
|
+
import crypto3 from "crypto";
|
|
4188
|
+
var CIRCUIT_BREAKER_FAILURE_LIMIT2 = 5;
|
|
4189
|
+
var CIRCUIT_BREAKER_MS2 = 10 * 6e4;
|
|
4190
|
+
var DEFAULT_TIMEOUT_MS2 = 3e4;
|
|
4191
|
+
var DEFAULT_RUNTIME_URL = "https://app.replayci.com";
|
|
4192
|
+
var RuntimeClientError = class extends Error {
|
|
4193
|
+
code;
|
|
4194
|
+
httpStatus;
|
|
4195
|
+
constructor(code, message, httpStatus) {
|
|
4196
|
+
super(message);
|
|
4197
|
+
this.name = "RuntimeClientError";
|
|
4198
|
+
this.code = code;
|
|
4199
|
+
this.httpStatus = httpStatus;
|
|
4200
|
+
}
|
|
4201
|
+
};
|
|
4202
|
+
function createRuntimeClient(opts) {
|
|
4203
|
+
return new RuntimeClient(opts);
|
|
4204
|
+
}
|
|
4205
|
+
var RuntimeClient = class {
|
|
4206
|
+
apiKey;
|
|
4207
|
+
baseUrl;
|
|
4208
|
+
timeoutMs;
|
|
4209
|
+
fetchImpl;
|
|
4210
|
+
now;
|
|
4211
|
+
failureCount = 0;
|
|
4212
|
+
circuitOpenUntil = 0;
|
|
4213
|
+
constructor(opts) {
|
|
4214
|
+
this.apiKey = opts.apiKey;
|
|
4215
|
+
this.baseUrl = normalizeUrl(opts.apiUrl ?? DEFAULT_RUNTIME_URL);
|
|
4216
|
+
this.timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS2;
|
|
4217
|
+
this.fetchImpl = opts.fetchImpl ?? globalThis.fetch;
|
|
4218
|
+
this.now = opts.now ?? (() => Date.now());
|
|
4219
|
+
}
|
|
4220
|
+
// -------------------------------------------------------------------------
|
|
4221
|
+
// Public API
|
|
4222
|
+
// -------------------------------------------------------------------------
|
|
4223
|
+
async createSession(input) {
|
|
4224
|
+
const body = {
|
|
4225
|
+
agent: input.agent,
|
|
4226
|
+
requested_mode: input.requestedMode,
|
|
4227
|
+
requested_tier: input.requestedTier,
|
|
4228
|
+
adapter_capability: input.adapterCapability,
|
|
4229
|
+
contract_hash: input.contractHash
|
|
4230
|
+
};
|
|
4231
|
+
if (input.sessionId) body.session_id = input.sessionId;
|
|
4232
|
+
if (input.allowAdvisoryDowngrade) body.allow_advisory_downgrade = true;
|
|
4233
|
+
if (input.provider) body.provider = input.provider;
|
|
4234
|
+
if (input.modelId !== void 0) body.model_id = input.modelId;
|
|
4235
|
+
if (input.principal !== void 0) body.principal = input.principal;
|
|
4236
|
+
if (input.compiledSession) body.compiled_session = input.compiledSession;
|
|
4237
|
+
if (input.sessionContractHash !== void 0) body.session_contract_hash = input.sessionContractHash;
|
|
4238
|
+
if (input.workflow) {
|
|
4239
|
+
const wf = {
|
|
4240
|
+
workflow_id: input.workflow.workflowId,
|
|
4241
|
+
role: input.workflow.role
|
|
4242
|
+
};
|
|
4243
|
+
if (input.workflow.compiledWorkflow) {
|
|
4244
|
+
wf.compiled_workflow = {
|
|
4245
|
+
hash: input.workflow.compiledWorkflow.hash,
|
|
4246
|
+
body: input.workflow.compiledWorkflow.body
|
|
4247
|
+
};
|
|
4248
|
+
}
|
|
4249
|
+
if (input.workflow.parentSessionId) wf.parent_session_id = input.workflow.parentSessionId;
|
|
4250
|
+
if (input.workflow.handoffId) wf.handoff_id = input.workflow.handoffId;
|
|
4251
|
+
body.workflow = wf;
|
|
4252
|
+
}
|
|
4253
|
+
const data = await this.post("/api/v1/replay/sessions", body);
|
|
4254
|
+
const s = data.session;
|
|
4255
|
+
const result = {
|
|
4256
|
+
sessionId: s.session_id,
|
|
4257
|
+
mode: s.mode,
|
|
4258
|
+
tier: s.tier,
|
|
4259
|
+
status: s.status,
|
|
4260
|
+
stateVersion: s.state_version,
|
|
4261
|
+
controlRevision: s.control_revision,
|
|
4262
|
+
leaseFence: s.lease_fence ?? null
|
|
4263
|
+
};
|
|
4264
|
+
const wfResp = data.workflow;
|
|
4265
|
+
if (wfResp) {
|
|
4266
|
+
result.workflow = {
|
|
4267
|
+
workflowId: wfResp.workflow_id,
|
|
4268
|
+
role: wfResp.role,
|
|
4269
|
+
stateVersion: wfResp.state_version,
|
|
4270
|
+
controlRevision: wfResp.control_revision,
|
|
4271
|
+
linkId: wfResp.link_id,
|
|
4272
|
+
generation: wfResp.generation,
|
|
4273
|
+
depth: wfResp.depth,
|
|
4274
|
+
status: wfResp.status
|
|
4275
|
+
};
|
|
4276
|
+
}
|
|
4277
|
+
return result;
|
|
4278
|
+
}
|
|
4279
|
+
async preflight(input) {
|
|
4280
|
+
const body = {
|
|
4281
|
+
lease_fence: input.leaseFence,
|
|
4282
|
+
provider: input.provider,
|
|
4283
|
+
model_id: input.modelId,
|
|
4284
|
+
request_envelope: input.requestEnvelope
|
|
4285
|
+
};
|
|
4286
|
+
const data = await this.post(
|
|
4287
|
+
`/api/v1/replay/sessions/${encodeURIComponent(input.sessionId)}/preflight`,
|
|
4288
|
+
body
|
|
4289
|
+
);
|
|
4290
|
+
const pr = data.prepared_request;
|
|
4291
|
+
return {
|
|
4292
|
+
preparedRequestId: pr.prepared_request_id,
|
|
4293
|
+
stateVersion: pr.state_version,
|
|
4294
|
+
controlRevision: pr.control_revision,
|
|
4295
|
+
leaseFence: pr.lease_fence,
|
|
4296
|
+
requestEnvelope: pr.request_envelope,
|
|
4297
|
+
removedTools: pr.removed_tools ?? []
|
|
4298
|
+
};
|
|
4299
|
+
}
|
|
4300
|
+
async submitProposal(input) {
|
|
4301
|
+
const body = {
|
|
4302
|
+
lease_fence: input.leaseFence,
|
|
4303
|
+
prepared_request_id: input.preparedRequestId,
|
|
4304
|
+
response_envelope: input.responseEnvelope
|
|
4305
|
+
};
|
|
4306
|
+
const data = await this.post(
|
|
4307
|
+
`/api/v1/replay/sessions/${encodeURIComponent(input.sessionId)}/proposals`,
|
|
4308
|
+
body
|
|
4309
|
+
);
|
|
4310
|
+
const d = data.decision;
|
|
4311
|
+
const pending = d.pending_calls ?? [];
|
|
4312
|
+
const blocked = d.blocked_calls ?? [];
|
|
4313
|
+
return {
|
|
4314
|
+
decision: d.result,
|
|
4315
|
+
advisory: d.mode === "advisory",
|
|
4316
|
+
stateVersion: d.state_version,
|
|
4317
|
+
pendingCalls: pending.map((pc) => ({
|
|
4318
|
+
pendingCallId: pc.pending_call_id,
|
|
4319
|
+
toolCallId: pc.tool_call_id,
|
|
4320
|
+
toolName: pc.tool_name,
|
|
4321
|
+
argumentsHash: pc.arguments_hash
|
|
4322
|
+
})),
|
|
4323
|
+
blockedCalls: blocked.map((bc) => ({
|
|
4324
|
+
toolName: bc.tool_name,
|
|
4325
|
+
reason: bc.reason
|
|
4326
|
+
}))
|
|
4327
|
+
};
|
|
4328
|
+
}
|
|
4329
|
+
async submitReceipt(input) {
|
|
4330
|
+
const body = {
|
|
4331
|
+
lease_fence: input.leaseFence,
|
|
4332
|
+
pending_call_id: input.pendingCallId,
|
|
4333
|
+
executor_kind: input.executorKind,
|
|
4334
|
+
tool_name: input.toolName,
|
|
4335
|
+
arguments_hash: input.argumentsHash,
|
|
4336
|
+
status: input.status,
|
|
4337
|
+
started_at: input.startedAt,
|
|
4338
|
+
completed_at: input.completedAt
|
|
4339
|
+
};
|
|
4340
|
+
if (input.executorId) body.executor_id = input.executorId;
|
|
4341
|
+
if (input.outputHash) body.output_hash = input.outputHash;
|
|
4342
|
+
if (input.outputExtract) body.output_extract = input.outputExtract;
|
|
4343
|
+
if (input.resourceValues) body.resource_values = input.resourceValues;
|
|
4344
|
+
if (input.evidenceArtifactHash) body.evidence_artifact_hash = input.evidenceArtifactHash;
|
|
4345
|
+
const data = await this.post(
|
|
4346
|
+
`/api/v1/replay/sessions/${encodeURIComponent(input.sessionId)}/receipts`,
|
|
4347
|
+
body
|
|
4348
|
+
);
|
|
4349
|
+
const r = data.resolution;
|
|
4350
|
+
return {
|
|
4351
|
+
accepted: r.accepted,
|
|
4352
|
+
commitState: r.commit_state,
|
|
4353
|
+
stateAdvanced: r.state_advanced,
|
|
4354
|
+
stateVersion: r.state_version
|
|
4355
|
+
};
|
|
4356
|
+
}
|
|
4357
|
+
async setToolFilter(input) {
|
|
4358
|
+
const body = {
|
|
4359
|
+
lease_fence: input.leaseFence,
|
|
4360
|
+
allowed_tools: input.allowedTools
|
|
4361
|
+
};
|
|
4362
|
+
const data = await this.post(
|
|
4363
|
+
`/api/v1/replay/sessions/${encodeURIComponent(input.sessionId)}/tool-filter`,
|
|
4364
|
+
body
|
|
4365
|
+
);
|
|
4366
|
+
const s = data.session;
|
|
4367
|
+
return {
|
|
4368
|
+
controlRevision: s.control_revision
|
|
4369
|
+
};
|
|
4370
|
+
}
|
|
4371
|
+
async reportBypass(input) {
|
|
4372
|
+
const body = {
|
|
4373
|
+
source: input.source
|
|
4374
|
+
};
|
|
4375
|
+
if (input.detail) body.detail = input.detail;
|
|
4376
|
+
const data = await this.post(
|
|
4377
|
+
`/api/v1/replay/sessions/${encodeURIComponent(input.sessionId)}/report-bypass`,
|
|
4378
|
+
body
|
|
4379
|
+
);
|
|
4380
|
+
const s = data.session;
|
|
4381
|
+
return {
|
|
4382
|
+
status: s.status
|
|
4383
|
+
};
|
|
4384
|
+
}
|
|
4385
|
+
async killSession(input) {
|
|
4386
|
+
const body = {
|
|
4387
|
+
lease_fence: input.leaseFence,
|
|
4388
|
+
reason: input.reason
|
|
4389
|
+
};
|
|
4390
|
+
const data = await this.post(
|
|
4391
|
+
`/api/v1/replay/sessions/${encodeURIComponent(input.sessionId)}/kill`,
|
|
4392
|
+
body
|
|
4393
|
+
);
|
|
4394
|
+
const s = data.session;
|
|
4395
|
+
return {
|
|
4396
|
+
status: s.status
|
|
4397
|
+
};
|
|
4398
|
+
}
|
|
4399
|
+
/** v4: Get workflow state from the runtime. */
|
|
4400
|
+
async getWorkflowState(workflowId) {
|
|
4401
|
+
const data = await this.get(
|
|
4402
|
+
`/api/v1/replay/workflows/${encodeURIComponent(workflowId)}`
|
|
4403
|
+
);
|
|
4404
|
+
const w = data.workflow;
|
|
4405
|
+
return {
|
|
4406
|
+
workflowId: w.workflow_id,
|
|
4407
|
+
rootSessionId: w.root_session_id,
|
|
4408
|
+
status: w.status,
|
|
4409
|
+
stateVersion: w.state_version,
|
|
4410
|
+
controlRevision: w.control_revision,
|
|
4411
|
+
totalSessionCount: w.total_session_count,
|
|
4412
|
+
activeSessionCount: w.active_session_count,
|
|
4413
|
+
totalStepCount: w.total_step_count,
|
|
4414
|
+
totalCost: w.total_cost,
|
|
4415
|
+
totalHandoffCount: w.total_handoff_count,
|
|
4416
|
+
unresolvedHandoffCount: w.unresolved_handoff_count,
|
|
4417
|
+
lastEventSeq: w.last_event_seq,
|
|
4418
|
+
killScope: w.kill_scope,
|
|
4419
|
+
createdAt: w.created_at,
|
|
4420
|
+
updatedAt: w.updated_at
|
|
4421
|
+
};
|
|
4422
|
+
}
|
|
4423
|
+
/** v4: Offer a handoff from a session. */
|
|
4424
|
+
async offerHandoff(input) {
|
|
4425
|
+
const body = {
|
|
4426
|
+
workflow_id: input.workflowId,
|
|
4427
|
+
from_role: input.fromRole,
|
|
4428
|
+
to_role: input.toRole,
|
|
4429
|
+
handoff_id: input.handoffId
|
|
4430
|
+
};
|
|
4431
|
+
if (input.artifactRefs !== void 0) body.artifact_refs = input.artifactRefs;
|
|
4432
|
+
if (input.summary !== void 0) body.summary = input.summary;
|
|
4433
|
+
const data = await this.post(
|
|
4434
|
+
`/api/v1/replay/sessions/${encodeURIComponent(input.sessionId)}/handoffs`,
|
|
4435
|
+
body
|
|
4436
|
+
);
|
|
4437
|
+
const h = data.handoff;
|
|
4438
|
+
return {
|
|
4439
|
+
handoffId: h.handoff_id,
|
|
4440
|
+
eventSeq: h.event_seq,
|
|
4441
|
+
stateVersion: h.state_version
|
|
4442
|
+
};
|
|
4443
|
+
}
|
|
4444
|
+
getHealth() {
|
|
4445
|
+
return {
|
|
4446
|
+
circuitOpen: this.now() < this.circuitOpenUntil,
|
|
4447
|
+
failureCount: this.failureCount,
|
|
4448
|
+
circuitOpenUntil: this.circuitOpenUntil
|
|
4449
|
+
};
|
|
4450
|
+
}
|
|
4451
|
+
isCircuitOpen() {
|
|
4452
|
+
return this.now() < this.circuitOpenUntil;
|
|
4453
|
+
}
|
|
4454
|
+
// -------------------------------------------------------------------------
|
|
4455
|
+
// Internal
|
|
4456
|
+
// -------------------------------------------------------------------------
|
|
4457
|
+
async get(path) {
|
|
4458
|
+
if (this.isCircuitOpen()) {
|
|
4459
|
+
throw new RuntimeClientError(
|
|
4460
|
+
"CIRCUIT_OPEN",
|
|
4461
|
+
"Runtime client circuit breaker is open",
|
|
4462
|
+
503
|
|
4463
|
+
);
|
|
4464
|
+
}
|
|
4465
|
+
const url = `${this.baseUrl}${path}`;
|
|
4466
|
+
const controller = new AbortController();
|
|
4467
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
4468
|
+
try {
|
|
4469
|
+
const response = await this.fetchImpl(url, {
|
|
4470
|
+
method: "GET",
|
|
4471
|
+
headers: {
|
|
4472
|
+
"Authorization": `Bearer ${this.apiKey}`
|
|
4473
|
+
},
|
|
4474
|
+
signal: controller.signal
|
|
4475
|
+
});
|
|
4476
|
+
clearTimeout(timeoutId);
|
|
4477
|
+
if (!response.ok) {
|
|
4478
|
+
const errorBody = await response.json().catch(() => ({}));
|
|
4479
|
+
const errorCode = errorBody.error ?? "UNKNOWN";
|
|
4480
|
+
const errorMessage = errorBody.message ?? `HTTP ${response.status}`;
|
|
4481
|
+
if (response.status >= 500 || response.status === 429) {
|
|
4482
|
+
this.recordFailure();
|
|
4483
|
+
}
|
|
4484
|
+
throw new RuntimeClientError(errorCode, errorMessage, response.status);
|
|
4485
|
+
}
|
|
4486
|
+
this.failureCount = 0;
|
|
4487
|
+
const data = await response.json();
|
|
4488
|
+
if (!data.ok) {
|
|
4489
|
+
throw new RuntimeClientError(
|
|
4490
|
+
data.error ?? "UNKNOWN",
|
|
4491
|
+
data.message ?? "Request failed",
|
|
4492
|
+
400
|
|
4493
|
+
);
|
|
4494
|
+
}
|
|
4495
|
+
return data;
|
|
4496
|
+
} catch (err) {
|
|
4497
|
+
clearTimeout(timeoutId);
|
|
4498
|
+
if (err instanceof RuntimeClientError) throw err;
|
|
4499
|
+
this.recordFailure();
|
|
4500
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
4501
|
+
throw new RuntimeClientError("TIMEOUT", "Runtime request timed out", 408);
|
|
4502
|
+
}
|
|
4503
|
+
throw new RuntimeClientError(
|
|
4504
|
+
"NETWORK_ERROR",
|
|
4505
|
+
err instanceof Error ? err.message : "Network error",
|
|
4506
|
+
0
|
|
4507
|
+
);
|
|
4508
|
+
}
|
|
4509
|
+
}
|
|
4510
|
+
async post(path, body) {
|
|
4511
|
+
if (this.isCircuitOpen()) {
|
|
4512
|
+
throw new RuntimeClientError(
|
|
4513
|
+
"CIRCUIT_OPEN",
|
|
4514
|
+
"Runtime client circuit breaker is open",
|
|
4515
|
+
503
|
|
4516
|
+
);
|
|
4517
|
+
}
|
|
4518
|
+
const url = `${this.baseUrl}${path}`;
|
|
4519
|
+
const idempotencyKey = generateIdempotencyKey();
|
|
4520
|
+
const controller = new AbortController();
|
|
4521
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
4522
|
+
try {
|
|
4523
|
+
const response = await this.fetchImpl(url, {
|
|
4524
|
+
method: "POST",
|
|
4525
|
+
headers: {
|
|
4526
|
+
"Content-Type": "application/json",
|
|
4527
|
+
"Authorization": `Bearer ${this.apiKey}`,
|
|
4528
|
+
"Idempotency-Key": idempotencyKey
|
|
4529
|
+
},
|
|
4530
|
+
body: JSON.stringify(body),
|
|
4531
|
+
signal: controller.signal
|
|
4532
|
+
});
|
|
4533
|
+
clearTimeout(timeoutId);
|
|
4534
|
+
if (!response.ok) {
|
|
4535
|
+
const errorBody = await response.json().catch(() => ({}));
|
|
4536
|
+
const errorCode = errorBody.error ?? "UNKNOWN";
|
|
4537
|
+
const errorMessage = errorBody.message ?? `HTTP ${response.status}`;
|
|
4538
|
+
if (response.status >= 500 || response.status === 429) {
|
|
4539
|
+
this.recordFailure();
|
|
4540
|
+
}
|
|
4541
|
+
throw new RuntimeClientError(errorCode, errorMessage, response.status);
|
|
4542
|
+
}
|
|
4543
|
+
this.failureCount = 0;
|
|
4544
|
+
const data = await response.json();
|
|
4545
|
+
if (!data.ok) {
|
|
4546
|
+
throw new RuntimeClientError(
|
|
4547
|
+
data.error ?? "UNKNOWN",
|
|
4548
|
+
data.message ?? "Request failed",
|
|
4549
|
+
400
|
|
4550
|
+
);
|
|
4551
|
+
}
|
|
4552
|
+
return data;
|
|
4553
|
+
} catch (err) {
|
|
4554
|
+
clearTimeout(timeoutId);
|
|
4555
|
+
if (err instanceof RuntimeClientError) throw err;
|
|
4556
|
+
this.recordFailure();
|
|
4557
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
4558
|
+
throw new RuntimeClientError("TIMEOUT", "Runtime request timed out", 408);
|
|
4559
|
+
}
|
|
4560
|
+
throw new RuntimeClientError(
|
|
4561
|
+
"NETWORK_ERROR",
|
|
4562
|
+
err instanceof Error ? err.message : "Network error",
|
|
4563
|
+
0
|
|
4564
|
+
);
|
|
4565
|
+
}
|
|
4566
|
+
}
|
|
4567
|
+
recordFailure() {
|
|
4568
|
+
this.failureCount++;
|
|
4569
|
+
if (this.failureCount >= CIRCUIT_BREAKER_FAILURE_LIMIT2) {
|
|
4570
|
+
this.circuitOpenUntil = this.now() + CIRCUIT_BREAKER_MS2;
|
|
4571
|
+
}
|
|
4572
|
+
}
|
|
4573
|
+
};
|
|
4574
|
+
function normalizeUrl(url) {
|
|
4575
|
+
return url.endsWith("/") ? url.slice(0, -1) : url;
|
|
4576
|
+
}
|
|
4577
|
+
function generateIdempotencyKey() {
|
|
4578
|
+
return `sdk_${crypto3.randomUUID().replace(/-/g, "")}`;
|
|
4579
|
+
}
|
|
4580
|
+
|
|
4581
|
+
// src/replay.ts
|
|
4582
|
+
var REPLAY_ATTACHED2 = /* @__PURE__ */ Symbol.for("replayci.replay_attached");
|
|
4583
|
+
var OBSERVE_WRAPPED = /* @__PURE__ */ Symbol.for("replayci.wrapped");
|
|
4584
|
+
var MAX_RETRIES = 5;
|
|
4585
|
+
var DEFAULT_AGENT2 = "default";
|
|
4586
|
+
var DEFAULT_MAX_UNGUARDED_CALLS = 3;
|
|
4587
|
+
function replay(client, opts = {}) {
|
|
4588
|
+
assertSupportedNodeRuntime();
|
|
4589
|
+
const sessionId = opts.sessionId ?? generateSessionId2();
|
|
4590
|
+
const agent = typeof opts.agent === "string" && opts.agent.length > 0 ? opts.agent : DEFAULT_AGENT2;
|
|
4591
|
+
const mode = opts.mode ?? "enforce";
|
|
4592
|
+
const gateMode = opts.gate ?? "reject_all";
|
|
4593
|
+
const onError = opts.onError ?? "block";
|
|
4594
|
+
const unmatchedPolicy = opts.unmatchedPolicy ?? "block";
|
|
4595
|
+
const maxRetries = Math.min(Math.max(0, opts.maxRetries ?? 0), MAX_RETRIES);
|
|
4596
|
+
const compatEnforcement = opts.compatEnforcement ?? "protective";
|
|
4597
|
+
const diagnostics = opts.diagnostics;
|
|
4598
|
+
let provider;
|
|
4599
|
+
try {
|
|
4600
|
+
provider = detectProvider(client);
|
|
4601
|
+
} catch {
|
|
4602
|
+
emitDiagnostic2(diagnostics, { type: "replay_inactive", reason: "unsupported_client" });
|
|
4603
|
+
return createInactiveSession(client, sessionId, "Unsupported client");
|
|
4604
|
+
}
|
|
4605
|
+
if (isObserveWrapped(client) || isReplayAttached2(client)) {
|
|
4606
|
+
emitDiagnostic2(diagnostics, { type: "replay_inactive", reason: "already_attached" });
|
|
4607
|
+
return createInactiveSession(client, sessionId, "Client already has an active observe() or replay() attachment");
|
|
4608
|
+
}
|
|
4609
|
+
let contracts;
|
|
4610
|
+
try {
|
|
4611
|
+
contracts = resolveContracts(opts);
|
|
4612
|
+
} catch (err) {
|
|
4613
|
+
const detail = err instanceof Error ? err.message : "Failed to load contracts";
|
|
4614
|
+
emitDiagnostic2(diagnostics, { type: "replay_contract_error", details: detail });
|
|
4615
|
+
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4616
|
+
}
|
|
4617
|
+
const configError = validateConfig(contracts, opts);
|
|
4618
|
+
if (configError) {
|
|
4619
|
+
emitDiagnostic2(diagnostics, { type: "replay_contract_error", details: configError.message });
|
|
4620
|
+
return createBlockingInactiveSession(client, sessionId, configError.message, configError);
|
|
4621
|
+
}
|
|
4622
|
+
let discoveredSessionYaml = null;
|
|
4623
|
+
try {
|
|
4624
|
+
discoveredSessionYaml = discoverSessionYaml(opts);
|
|
4625
|
+
} catch (err) {
|
|
4626
|
+
const detail = `session.yaml: ${err instanceof Error ? err.message : String(err)}`;
|
|
4627
|
+
emitDiagnostic2(diagnostics, { type: "replay_contract_error", details: detail });
|
|
4628
|
+
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4629
|
+
}
|
|
4630
|
+
let sessionYaml = discoveredSessionYaml;
|
|
4631
|
+
if (!sessionYaml && opts.providerConstraints) {
|
|
4632
|
+
sessionYaml = { schema_version: "1.0", agent, provider_constraints: opts.providerConstraints };
|
|
4633
|
+
} else if (sessionYaml && opts.providerConstraints && !sessionYaml.provider_constraints) {
|
|
4634
|
+
sessionYaml = { ...sessionYaml, provider_constraints: opts.providerConstraints };
|
|
4635
|
+
}
|
|
4636
|
+
let compiledSession = null;
|
|
4637
|
+
try {
|
|
4638
|
+
compiledSession = compileSession(contracts, sessionYaml, {
|
|
4639
|
+
principal: opts.principal,
|
|
4640
|
+
tools: opts.tools ? new Map(Object.entries(opts.tools)) : void 0
|
|
4641
|
+
});
|
|
4642
|
+
} catch (err) {
|
|
4643
|
+
emitDiagnostic2(diagnostics, {
|
|
4644
|
+
type: "replay_contract_error",
|
|
4645
|
+
details: `Session compilation: ${err instanceof Error ? err.message : String(err)}`
|
|
4646
|
+
});
|
|
4647
|
+
}
|
|
4648
|
+
if (compiledSession?.warnings && compiledSession.warnings.length > 0) {
|
|
4649
|
+
for (const warning of compiledSession.warnings) {
|
|
4650
|
+
emitDiagnostic2(diagnostics, {
|
|
4651
|
+
type: "replay_contract_error",
|
|
4652
|
+
details: `Compile warning: ${warning}`
|
|
4653
|
+
});
|
|
4654
|
+
}
|
|
4655
|
+
}
|
|
4656
|
+
const providerConstraints = compiledSession?.providerConstraints ?? opts.providerConstraints ?? null;
|
|
4657
|
+
if (providerConstraints) {
|
|
4658
|
+
const spec = providerConstraints[provider];
|
|
4659
|
+
if (spec) {
|
|
4660
|
+
if (spec.block_incompatible && spec.block_incompatible.length > 0) {
|
|
4661
|
+
const detail = `Provider '${provider}' is blocked by provider_constraints: ${spec.block_incompatible.join("; ")}`;
|
|
4662
|
+
const err = new ReplayConfigError("provider_incompatible", detail);
|
|
4663
|
+
emitDiagnostic2(diagnostics, { type: "replay_contract_error", details: detail });
|
|
4664
|
+
return createBlockingInactiveSession(client, sessionId, detail, err);
|
|
4665
|
+
}
|
|
4666
|
+
if (spec.warn_incompatible && spec.warn_incompatible.length > 0) {
|
|
4667
|
+
emitDiagnostic2(diagnostics, {
|
|
4668
|
+
type: "replay_provider_warning",
|
|
4669
|
+
provider,
|
|
4670
|
+
warnings: spec.warn_incompatible
|
|
4671
|
+
});
|
|
4672
|
+
}
|
|
4673
|
+
}
|
|
4674
|
+
}
|
|
4675
|
+
let compiledWorkflow = null;
|
|
4676
|
+
const workflowOpts = opts.workflow;
|
|
4677
|
+
let workflowId = null;
|
|
4678
|
+
if (workflowOpts) {
|
|
4679
|
+
if (workflowOpts.type === "root") {
|
|
4680
|
+
workflowId = workflowOpts.workflowId ?? generateWorkflowId();
|
|
4681
|
+
try {
|
|
4682
|
+
compiledWorkflow = discoverWorkflowYaml(opts, workflowOpts);
|
|
4683
|
+
} catch (err) {
|
|
4684
|
+
const detail = `workflow.yaml: ${err instanceof Error ? err.message : String(err)}`;
|
|
4685
|
+
emitDiagnostic2(diagnostics, { type: "replay_workflow_error", session_id: sessionId, details: detail });
|
|
4686
|
+
return createBlockingInactiveSession(client, sessionId, detail);
|
|
4687
|
+
}
|
|
4688
|
+
} else {
|
|
4689
|
+
workflowId = workflowOpts.workflowId;
|
|
4690
|
+
}
|
|
4691
|
+
}
|
|
4692
|
+
const terminalInfo = resolveTerminal(client, provider);
|
|
4693
|
+
if (!terminalInfo) {
|
|
4694
|
+
emitDiagnostic2(diagnostics, { type: "replay_inactive", reason: "unsupported_client" });
|
|
4695
|
+
return createInactiveSession(client, sessionId, "Could not resolve terminal resource");
|
|
4696
|
+
}
|
|
4697
|
+
const protectionLevel = determineProtectionLevel(mode, opts.tools, contracts);
|
|
4698
|
+
const maxUnguardedCalls = opts.maxUnguardedCalls ?? DEFAULT_MAX_UNGUARDED_CALLS;
|
|
4699
|
+
const narrowingFeedback = opts.narrowingFeedback ?? "silent";
|
|
4700
|
+
const apiKey = resolveApiKey2(opts);
|
|
4701
|
+
let runtimeClient = null;
|
|
4702
|
+
let runtimeSession = null;
|
|
4703
|
+
let runtimeInitPromise = null;
|
|
4704
|
+
let leaseFence = null;
|
|
4705
|
+
let runtimeDegraded = false;
|
|
4706
|
+
let runtimeInitDone = false;
|
|
4707
|
+
if (protectionLevel === "govern" && apiKey) {
|
|
4708
|
+
const runtimeUrl = opts.runtimeUrl;
|
|
4709
|
+
runtimeClient = new RuntimeClient({
|
|
4710
|
+
apiKey,
|
|
4711
|
+
apiUrl: runtimeUrl
|
|
4712
|
+
});
|
|
4713
|
+
const runtimeRequest = deriveRuntimeRequest(protectionLevel, mode);
|
|
4714
|
+
const sessionInitPayload = {
|
|
4715
|
+
agent,
|
|
4716
|
+
sessionId,
|
|
4717
|
+
requestedMode: runtimeRequest.requestedMode,
|
|
4718
|
+
requestedTier: runtimeRequest.requestedTier,
|
|
4719
|
+
adapterCapability: "full",
|
|
4720
|
+
contractHash: compiledSession?.compiledHash ?? "",
|
|
4721
|
+
compiledSession: compiledSession ? {
|
|
4722
|
+
schemaVersion: "1",
|
|
4723
|
+
hash: compiledSession.compiledHash,
|
|
4724
|
+
body: serializeCompiledSession(compiledSession)
|
|
4725
|
+
} : void 0,
|
|
4726
|
+
principal: opts.principal ?? null
|
|
4727
|
+
};
|
|
4728
|
+
if (workflowOpts && workflowId) {
|
|
4729
|
+
if (workflowOpts.type === "root" && compiledWorkflow) {
|
|
4730
|
+
const serialized = serializeCompiledWorkflow(compiledWorkflow);
|
|
4731
|
+
sessionInitPayload.workflow = {
|
|
4732
|
+
workflowId,
|
|
4733
|
+
role: workflowOpts.role,
|
|
4734
|
+
compiledWorkflow: {
|
|
4735
|
+
hash: compiledWorkflow.compiledHash,
|
|
4736
|
+
body: serialized
|
|
4737
|
+
}
|
|
4738
|
+
};
|
|
4739
|
+
} else if (workflowOpts.type === "child") {
|
|
4740
|
+
sessionInitPayload.workflow = {
|
|
4741
|
+
workflowId,
|
|
4742
|
+
role: workflowOpts.role,
|
|
4743
|
+
parentSessionId: workflowOpts.parentSessionId,
|
|
4744
|
+
handoffId: workflowOpts.handoffId
|
|
4745
|
+
};
|
|
4746
|
+
}
|
|
4747
|
+
}
|
|
4748
|
+
runtimeInitPromise = runtimeClient.createSession(sessionInitPayload).then((result) => {
|
|
4749
|
+
runtimeSession = result;
|
|
4750
|
+
leaseFence = result.leaseFence;
|
|
4751
|
+
runtimeInitDone = true;
|
|
4752
|
+
if (result.workflow && workflowOpts) {
|
|
4753
|
+
emitDiagnostic2(diagnostics, {
|
|
4754
|
+
type: "replay_workflow_attached",
|
|
4755
|
+
session_id: sessionId,
|
|
4756
|
+
workflow_id: result.workflow.workflowId,
|
|
4757
|
+
role: result.workflow.role,
|
|
4758
|
+
attach_type: workflowOpts.type
|
|
4759
|
+
});
|
|
4760
|
+
}
|
|
4761
|
+
}).catch(() => {
|
|
4762
|
+
runtimeInitDone = true;
|
|
4763
|
+
runtimeDegraded = true;
|
|
4764
|
+
emitDiagnostic2(diagnostics, { type: "replay_inactive", reason: "runtime_degraded_to_protect" });
|
|
4765
|
+
});
|
|
4766
|
+
} else {
|
|
4767
|
+
runtimeInitDone = true;
|
|
4768
|
+
}
|
|
4769
|
+
const initialTier = protectionLevel === "govern" && apiKey ? "strong" : "compat";
|
|
4770
|
+
const principalValue = opts.principal != null && typeof opts.principal === "object" && !Array.isArray(opts.principal) ? opts.principal : null;
|
|
4771
|
+
let sessionState = createInitialState(sessionId, { tier: initialTier, agent, principal: principalValue });
|
|
4772
|
+
if (compiledSession?.phases) {
|
|
4773
|
+
const initial = compiledSession.phases.find((p) => p.initial);
|
|
4774
|
+
if (initial) {
|
|
4775
|
+
sessionState = { ...sessionState, currentPhase: initial.name };
|
|
4776
|
+
}
|
|
4777
|
+
}
|
|
4778
|
+
sessionState = { ...sessionState, contractHash: compiledSession?.compiledHash ?? null };
|
|
4779
|
+
let killed = false;
|
|
4780
|
+
let killedAt = null;
|
|
4781
|
+
let restored = false;
|
|
4782
|
+
let bypassDetected = false;
|
|
4783
|
+
let lastShadowDeltaValue = null;
|
|
4784
|
+
let lastNarrowResult = null;
|
|
4785
|
+
let shadowEvaluationCount = 0;
|
|
4786
|
+
let manualFilter = null;
|
|
4787
|
+
const deferredReceipts = /* @__PURE__ */ new Map();
|
|
4788
|
+
const contractLimits = resolveSessionLimits(contracts);
|
|
4789
|
+
const compiledLimits = compiledSession?.sessionLimits;
|
|
4790
|
+
const mergedLimits = { ...contractLimits ?? {}, ...compiledLimits ?? {} };
|
|
4791
|
+
const resolvedSessionLimits = Object.keys(mergedLimits).length > 0 ? mergedLimits : null;
|
|
4792
|
+
const store = opts.store ?? null;
|
|
4793
|
+
let storeLoadPromise = null;
|
|
4794
|
+
let storeLoadDone = false;
|
|
4795
|
+
if (store) {
|
|
4796
|
+
storeLoadPromise = Promise.resolve().then(() => store.load()).then((loaded) => {
|
|
4797
|
+
if (loaded && loaded.sessionId === sessionId) {
|
|
4798
|
+
const contractDrift = loaded.contractHash !== null && loaded.contractHash !== (compiledSession?.compiledHash ?? null);
|
|
4799
|
+
sessionState = loaded;
|
|
4800
|
+
if (loaded.killed) {
|
|
4801
|
+
killed = true;
|
|
4802
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
4803
|
+
}
|
|
4804
|
+
emitDiagnostic2(diagnostics, {
|
|
4805
|
+
type: "replay_resumed",
|
|
4806
|
+
session_id: sessionId,
|
|
4807
|
+
state_version: loaded.stateVersion,
|
|
4808
|
+
contract_drift: contractDrift
|
|
4809
|
+
});
|
|
4810
|
+
} else {
|
|
4811
|
+
void Promise.resolve(store.compareAndSet(loaded?.stateVersion ?? 0, sessionState));
|
|
4812
|
+
}
|
|
4813
|
+
storeLoadDone = true;
|
|
4814
|
+
}).catch(() => {
|
|
4815
|
+
try {
|
|
4816
|
+
void Promise.resolve(store.compareAndSet(0, sessionState));
|
|
4817
|
+
} catch {
|
|
4818
|
+
}
|
|
4819
|
+
storeLoadDone = true;
|
|
4820
|
+
});
|
|
4821
|
+
} else {
|
|
4822
|
+
storeLoadDone = true;
|
|
4823
|
+
}
|
|
4824
|
+
const buffer = apiKey ? new CaptureBuffer({
|
|
4825
|
+
apiKey,
|
|
4826
|
+
endpoint: void 0,
|
|
4827
|
+
diagnostics
|
|
4828
|
+
}) : null;
|
|
4829
|
+
if (buffer) {
|
|
4830
|
+
registerBeforeExit(buffer);
|
|
4831
|
+
}
|
|
4832
|
+
function syncStateToStore(prevVersion, newState) {
|
|
4833
|
+
if (!store) return;
|
|
4834
|
+
try {
|
|
4835
|
+
const result = store.compareAndSet(prevVersion, newState);
|
|
4836
|
+
if (result && typeof result.then === "function") {
|
|
4837
|
+
void result.catch(() => {
|
|
4838
|
+
});
|
|
4839
|
+
}
|
|
4840
|
+
} catch {
|
|
4841
|
+
}
|
|
4842
|
+
}
|
|
4843
|
+
function appendCaptureToStore(capture) {
|
|
4844
|
+
if (!store) return;
|
|
4845
|
+
try {
|
|
4846
|
+
const result = store.appendCapture(capture);
|
|
4847
|
+
if (result && typeof result.then === "function") {
|
|
4848
|
+
void result.catch(() => {
|
|
4849
|
+
});
|
|
4850
|
+
}
|
|
4851
|
+
} catch {
|
|
4852
|
+
}
|
|
4853
|
+
}
|
|
4854
|
+
const enforcementCreate = async function replayEnforcementCreate(...args) {
|
|
4855
|
+
if (killed) {
|
|
4856
|
+
throw new ReplayKillError(sessionId, killedAt);
|
|
4857
|
+
}
|
|
4858
|
+
if (restored) {
|
|
4859
|
+
throw new ReplayContractError(
|
|
4860
|
+
"Session has been restored \u2014 wrapper is inert",
|
|
4861
|
+
{ action: "block", tool_calls: [], blocked: [], response_modification: "reject_all" },
|
|
4862
|
+
"",
|
|
4863
|
+
[]
|
|
4864
|
+
);
|
|
4865
|
+
}
|
|
4866
|
+
if (runtimeInitPromise && !runtimeInitDone) {
|
|
4867
|
+
await runtimeInitPromise;
|
|
4868
|
+
}
|
|
4869
|
+
if (storeLoadPromise && !storeLoadDone) {
|
|
4870
|
+
await storeLoadPromise;
|
|
4871
|
+
}
|
|
4872
|
+
if (protectionLevel === "govern" && runtimeDegraded && sessionState.tier === "strong") {
|
|
4873
|
+
sessionState = { ...sessionState, tier: "compat" };
|
|
4874
|
+
}
|
|
4875
|
+
const effectiveTier = sessionState.tier;
|
|
4876
|
+
const isCompatAdvisory = effectiveTier === "compat" && compatEnforcement === "advisory";
|
|
4877
|
+
if (protectionLevel === "govern" && runtimeDegraded && onError === "block" && !isCompatAdvisory) {
|
|
4878
|
+
throw new ReplayInternalError("Govern mode requires runtime \u2014 runtime unavailable", { sessionId });
|
|
4879
|
+
}
|
|
4880
|
+
const guardStart = Date.now();
|
|
4881
|
+
const timing = {
|
|
4882
|
+
narrow_ms: 0,
|
|
4883
|
+
pre_check_ms: 0,
|
|
4884
|
+
llm_call_ms: 0,
|
|
4885
|
+
validate_ms: 0,
|
|
4886
|
+
cross_step_ms: 0,
|
|
4887
|
+
phase_ms: 0,
|
|
4888
|
+
argument_values_ms: 0,
|
|
4889
|
+
policy_ms: 0,
|
|
4890
|
+
gate_ms: 0,
|
|
4891
|
+
finalize_ms: 0,
|
|
4892
|
+
runtime_ms: 0,
|
|
4893
|
+
total_ms: 0,
|
|
4894
|
+
enforcement_ms: 0
|
|
4895
|
+
};
|
|
4896
|
+
const request = toRecord10(args[0]);
|
|
4897
|
+
const requestToolNames = extractRequestToolNames(request);
|
|
4898
|
+
let narrowResult = null;
|
|
4899
|
+
let activeArgs = args;
|
|
4900
|
+
if (compiledSession && Array.isArray(request.tools) && request.tools.length > 0) {
|
|
4901
|
+
const toolDefs = extractToolDefinitions(request.tools);
|
|
4902
|
+
if (toolDefs.length > 0) {
|
|
4903
|
+
narrowResult = narrowTools(
|
|
4904
|
+
toolDefs,
|
|
4905
|
+
sessionState,
|
|
4906
|
+
compiledSession,
|
|
4907
|
+
unmatchedPolicy,
|
|
4908
|
+
manualFilter
|
|
4909
|
+
);
|
|
4910
|
+
lastNarrowResult = narrowResult;
|
|
4911
|
+
if (narrowResult.removed.length > 0) {
|
|
4912
|
+
if (mode === "enforce") {
|
|
4913
|
+
const modifiedRequest = { ...request };
|
|
4914
|
+
if (narrowResult.allowed.length === 0) {
|
|
4915
|
+
modifiedRequest.tools = [];
|
|
4916
|
+
delete modifiedRequest.tool_choice;
|
|
4917
|
+
} else {
|
|
4918
|
+
modifiedRequest.tools = narrowResult.allowed;
|
|
4919
|
+
}
|
|
4920
|
+
if (narrowingFeedback === "inject") {
|
|
4921
|
+
const injectionMsg = buildNarrowingInjectionMessage(narrowResult);
|
|
4922
|
+
injectNarrowingSystemMessage(modifiedRequest, injectionMsg, provider);
|
|
4923
|
+
emitDiagnostic2(diagnostics, {
|
|
4924
|
+
type: "replay_narrow_injected",
|
|
4925
|
+
session_id: sessionId,
|
|
4926
|
+
message: injectionMsg
|
|
4927
|
+
});
|
|
4928
|
+
}
|
|
4929
|
+
activeArgs = [modifiedRequest, ...Array.prototype.slice.call(args, 1)];
|
|
4930
|
+
}
|
|
4931
|
+
emitDiagnostic2(diagnostics, {
|
|
4932
|
+
type: "replay_narrow",
|
|
4933
|
+
session_id: sessionId,
|
|
4934
|
+
removed: narrowResult.removed
|
|
4935
|
+
});
|
|
4936
|
+
try {
|
|
4937
|
+
opts.onNarrow?.(narrowResult);
|
|
4938
|
+
} catch {
|
|
4939
|
+
}
|
|
4940
|
+
}
|
|
4941
|
+
}
|
|
4942
|
+
}
|
|
4943
|
+
timing.narrow_ms = Date.now() - guardStart;
|
|
4944
|
+
const preCheckStart = Date.now();
|
|
4945
|
+
try {
|
|
4946
|
+
if (mode === "enforce" && resolvedSessionLimits) {
|
|
4947
|
+
const limitResult = checkSessionLimits(sessionState, resolvedSessionLimits);
|
|
4948
|
+
if (limitResult.exceeded) {
|
|
4949
|
+
const decision = {
|
|
4950
|
+
action: "block",
|
|
4951
|
+
tool_calls: [],
|
|
4952
|
+
blocked: [{
|
|
4953
|
+
tool_name: "_session",
|
|
4954
|
+
arguments: "",
|
|
4955
|
+
reason: "session_limit_exceeded",
|
|
4956
|
+
contract_file: "",
|
|
4957
|
+
failures: [{ path: "$", operator: "session_limit", expected: "", found: "", message: limitResult.reason ?? "session limit exceeded" }]
|
|
4958
|
+
}],
|
|
4959
|
+
response_modification: gateMode
|
|
4960
|
+
};
|
|
4961
|
+
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
4962
|
+
if (resolvedSessionLimits.circuit_breaker) {
|
|
4963
|
+
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
4964
|
+
if (cbResult.triggered) {
|
|
4965
|
+
killed = true;
|
|
4966
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
4967
|
+
sessionState = killSession(sessionState);
|
|
4968
|
+
emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
|
|
4969
|
+
}
|
|
4970
|
+
}
|
|
4971
|
+
timing.pre_check_ms = Date.now() - preCheckStart;
|
|
4972
|
+
captureDecision(
|
|
4973
|
+
decision,
|
|
4974
|
+
null,
|
|
4975
|
+
request,
|
|
4976
|
+
guardStart,
|
|
4977
|
+
requestToolNames,
|
|
4978
|
+
null,
|
|
4979
|
+
narrowResult,
|
|
4980
|
+
null,
|
|
4981
|
+
null,
|
|
4982
|
+
null,
|
|
4983
|
+
void 0,
|
|
4984
|
+
timing
|
|
4985
|
+
);
|
|
4986
|
+
if (isCompatAdvisory) {
|
|
4987
|
+
emitDiagnostic2(diagnostics, {
|
|
4988
|
+
type: "replay_compat_advisory",
|
|
4989
|
+
session_id: sessionId,
|
|
4990
|
+
would_block: decision.blocked,
|
|
4991
|
+
details: limitResult.reason ?? "session limit exceeded"
|
|
4992
|
+
});
|
|
4993
|
+
} else {
|
|
4994
|
+
throw buildContractError2(decision);
|
|
4995
|
+
}
|
|
4996
|
+
}
|
|
4997
|
+
if (isAtHardStepCap(sessionState)) {
|
|
4998
|
+
const decision = {
|
|
4999
|
+
action: "block",
|
|
5000
|
+
tool_calls: [],
|
|
5001
|
+
blocked: [{
|
|
5002
|
+
tool_name: "_session",
|
|
5003
|
+
arguments: "",
|
|
5004
|
+
reason: "session_limit_exceeded",
|
|
5005
|
+
contract_file: "",
|
|
5006
|
+
failures: [{ path: "$", operator: "session_limit", expected: "", found: "", message: "hard step cap (10,000) reached" }]
|
|
5007
|
+
}],
|
|
5008
|
+
response_modification: gateMode
|
|
5009
|
+
};
|
|
5010
|
+
timing.pre_check_ms = Date.now() - preCheckStart;
|
|
5011
|
+
captureDecision(
|
|
5012
|
+
decision,
|
|
5013
|
+
null,
|
|
5014
|
+
request,
|
|
5015
|
+
guardStart,
|
|
5016
|
+
requestToolNames,
|
|
5017
|
+
null,
|
|
5018
|
+
narrowResult,
|
|
5019
|
+
null,
|
|
5020
|
+
null,
|
|
5021
|
+
null,
|
|
5022
|
+
void 0,
|
|
5023
|
+
timing
|
|
5024
|
+
);
|
|
5025
|
+
throw buildContractError2(decision);
|
|
5026
|
+
}
|
|
5027
|
+
}
|
|
5028
|
+
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
5029
|
+
if (messages.length > 0) {
|
|
5030
|
+
const msgResult = validateToolResultMessages(messages, contracts, provider);
|
|
5031
|
+
if (!msgResult.passed) {
|
|
5032
|
+
emitDiagnostic2(diagnostics, {
|
|
5033
|
+
type: "replay_contract_error",
|
|
5034
|
+
details: `Message validation: ${msgResult.failures.map((f) => f.detail).join("; ")}`
|
|
5035
|
+
});
|
|
5036
|
+
}
|
|
5037
|
+
}
|
|
5038
|
+
if (messages.length > 0) {
|
|
5039
|
+
const toolResults = extractToolResults(messages, provider);
|
|
5040
|
+
if (toolResults.length > 0) {
|
|
5041
|
+
const outputUpdates = extractOutputFromToolResults(toolResults, sessionState, contracts);
|
|
5042
|
+
sessionState = applyOutputExtracts(sessionState, outputUpdates);
|
|
5043
|
+
}
|
|
5044
|
+
}
|
|
5045
|
+
const inputFailures = evaluateInputInvariants(request, contracts);
|
|
5046
|
+
if (mode === "enforce" && inputFailures.length > 0) {
|
|
5047
|
+
if (onError === "block") {
|
|
5048
|
+
const decision = {
|
|
5049
|
+
action: "block",
|
|
5050
|
+
tool_calls: [],
|
|
5051
|
+
blocked: [{
|
|
5052
|
+
tool_name: "_request",
|
|
5053
|
+
arguments: "",
|
|
5054
|
+
reason: "input_invariant_failed",
|
|
5055
|
+
contract_file: inputFailures[0]?.contract_file ?? "",
|
|
5056
|
+
failures: inputFailures
|
|
5057
|
+
}],
|
|
5058
|
+
response_modification: gateMode
|
|
5059
|
+
};
|
|
5060
|
+
timing.pre_check_ms = Date.now() - preCheckStart;
|
|
5061
|
+
captureDecision(
|
|
5062
|
+
decision,
|
|
5063
|
+
null,
|
|
5064
|
+
request,
|
|
5065
|
+
guardStart,
|
|
5066
|
+
requestToolNames,
|
|
5067
|
+
null,
|
|
5068
|
+
narrowResult,
|
|
5069
|
+
null,
|
|
5070
|
+
null,
|
|
5071
|
+
null,
|
|
5072
|
+
void 0,
|
|
5073
|
+
timing
|
|
5074
|
+
);
|
|
5075
|
+
throw buildContractError2(decision);
|
|
5076
|
+
}
|
|
5077
|
+
}
|
|
5078
|
+
timing.pre_check_ms = Date.now() - preCheckStart;
|
|
5079
|
+
let lastError = null;
|
|
5080
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
5081
|
+
if (killed) throw new ReplayKillError(sessionId, killedAt);
|
|
5082
|
+
let attemptPreparedRequestId = null;
|
|
5083
|
+
let attemptDegraded = false;
|
|
5084
|
+
let attemptPendingCalls = null;
|
|
5085
|
+
const isActiveGovern = protectionLevel === "govern" && !runtimeDegraded && runtimeClient != null && runtimeSession != null && leaseFence != null;
|
|
5086
|
+
if (isActiveGovern) {
|
|
5087
|
+
const rtPreflightStart = Date.now();
|
|
5088
|
+
try {
|
|
5089
|
+
const pf = await runtimeClient.preflight({
|
|
5090
|
+
sessionId,
|
|
5091
|
+
leaseFence,
|
|
5092
|
+
requestEnvelope: activeArgs[0],
|
|
5093
|
+
provider,
|
|
5094
|
+
modelId: typeof request.model === "string" ? request.model : null
|
|
5095
|
+
});
|
|
5096
|
+
attemptPreparedRequestId = pf.preparedRequestId;
|
|
5097
|
+
leaseFence = pf.leaseFence;
|
|
5098
|
+
} catch (err) {
|
|
5099
|
+
attemptDegraded = true;
|
|
5100
|
+
if (runtimeClient.isCircuitOpen()) runtimeDegraded = true;
|
|
5101
|
+
emitDiagnostic2(diagnostics, {
|
|
5102
|
+
type: "replay_inactive",
|
|
5103
|
+
reason: "runtime_preflight_failed",
|
|
5104
|
+
error_message: err instanceof Error ? err.message : String(err)
|
|
5105
|
+
});
|
|
5106
|
+
}
|
|
5107
|
+
timing.runtime_ms += Date.now() - rtPreflightStart;
|
|
5108
|
+
}
|
|
5109
|
+
const llmCallStart = Date.now();
|
|
5110
|
+
const response = await terminalInfo.originalCreate.apply(this, activeArgs);
|
|
5111
|
+
timing.llm_call_ms += Date.now() - llmCallStart;
|
|
5112
|
+
if (killed) throw new ReplayKillError(sessionId, killedAt);
|
|
5113
|
+
const responseUsage = extractUsage(response, provider);
|
|
5114
|
+
if (responseUsage) {
|
|
5115
|
+
const costDelta = (responseUsage.prompt_tokens + responseUsage.completion_tokens) * 1e-5;
|
|
5116
|
+
sessionState = updateActualCost(sessionState, costDelta);
|
|
5117
|
+
}
|
|
5118
|
+
if (mode === "log-only") {
|
|
5119
|
+
captureDecision(
|
|
5120
|
+
{ action: "allow", tool_calls: extractToolCalls(response, provider) },
|
|
5121
|
+
response,
|
|
5122
|
+
request,
|
|
5123
|
+
guardStart,
|
|
5124
|
+
requestToolNames,
|
|
5125
|
+
null,
|
|
5126
|
+
narrowResult,
|
|
5127
|
+
null,
|
|
5128
|
+
null,
|
|
5129
|
+
null,
|
|
5130
|
+
void 0,
|
|
5131
|
+
timing
|
|
5132
|
+
);
|
|
5133
|
+
return response;
|
|
5134
|
+
}
|
|
5135
|
+
const toolCalls = extractToolCalls(response, provider);
|
|
5136
|
+
const validateStart = Date.now();
|
|
5137
|
+
const validation = validateResponse2(response, toolCalls, contracts, requestToolNames, unmatchedPolicy, provider);
|
|
5138
|
+
timing.validate_ms += Date.now() - validateStart;
|
|
5139
|
+
if (isActiveGovern && !attemptDegraded && attemptPreparedRequestId) {
|
|
5140
|
+
const rtProposalStart = Date.now();
|
|
5141
|
+
try {
|
|
5142
|
+
const pr = await runtimeClient.submitProposal({
|
|
5143
|
+
sessionId,
|
|
5144
|
+
leaseFence,
|
|
5145
|
+
preparedRequestId: attemptPreparedRequestId,
|
|
5146
|
+
responseEnvelope: response
|
|
5147
|
+
});
|
|
5148
|
+
attemptPendingCalls = new Map(
|
|
5149
|
+
pr.pendingCalls.map((pc) => [pc.toolCallId, pc])
|
|
5150
|
+
);
|
|
5151
|
+
for (const bc of pr.blockedCalls) {
|
|
5152
|
+
validation.failures.push({
|
|
5153
|
+
path: `$.tool_calls.${bc.toolName}`,
|
|
5154
|
+
operator: "server_blocked",
|
|
5155
|
+
expected: "allowed",
|
|
5156
|
+
found: bc.reason,
|
|
5157
|
+
message: `Server blocked: ${bc.toolName} \u2014 ${bc.reason}`,
|
|
5158
|
+
contract_file: ""
|
|
5159
|
+
});
|
|
5160
|
+
}
|
|
5161
|
+
} catch (err) {
|
|
5162
|
+
attemptDegraded = true;
|
|
5163
|
+
if (runtimeClient.isCircuitOpen()) runtimeDegraded = true;
|
|
5164
|
+
emitDiagnostic2(diagnostics, {
|
|
5165
|
+
type: "replay_inactive",
|
|
5166
|
+
reason: "runtime_proposal_failed",
|
|
5167
|
+
error_message: err instanceof Error ? err.message : String(err)
|
|
5168
|
+
});
|
|
5169
|
+
}
|
|
5170
|
+
timing.runtime_ms += Date.now() - rtProposalStart;
|
|
5171
|
+
}
|
|
5172
|
+
const crossStepStart = Date.now();
|
|
5173
|
+
const crossStepContracts = compiledSession ? Array.from(compiledSession.perToolContracts.values()) : contracts;
|
|
5174
|
+
const crossStepResult = validateCrossStep(toolCalls, sessionState, crossStepContracts);
|
|
5175
|
+
if (!crossStepResult.passed) {
|
|
5176
|
+
for (const f of crossStepResult.failures) {
|
|
5177
|
+
validation.failures.push({
|
|
5178
|
+
path: `$.tool_calls.${f.toolName}`,
|
|
5179
|
+
operator: f.reason,
|
|
5180
|
+
expected: "",
|
|
5181
|
+
found: "",
|
|
5182
|
+
message: f.detail,
|
|
5183
|
+
contract_file: ""
|
|
5184
|
+
});
|
|
5185
|
+
}
|
|
5186
|
+
}
|
|
5187
|
+
timing.cross_step_ms += Date.now() - crossStepStart;
|
|
5188
|
+
let phaseResult = null;
|
|
5189
|
+
const phaseStart = Date.now();
|
|
5190
|
+
if (compiledSession) {
|
|
5191
|
+
phaseResult = validatePhaseTransition(toolCalls, sessionState, compiledSession);
|
|
5192
|
+
if (!phaseResult.legal) {
|
|
5193
|
+
validation.failures.push({
|
|
5194
|
+
path: `$.tool_calls.${phaseResult.blockedTool}`,
|
|
5195
|
+
operator: phaseResult.reason,
|
|
5196
|
+
expected: "",
|
|
5197
|
+
found: phaseResult.attemptedTransition,
|
|
5198
|
+
message: `Phase transition blocked: ${phaseResult.attemptedTransition} (${phaseResult.reason})`,
|
|
5199
|
+
contract_file: ""
|
|
5200
|
+
});
|
|
5201
|
+
}
|
|
5202
|
+
}
|
|
5203
|
+
timing.phase_ms += Date.now() - phaseStart;
|
|
5204
|
+
const argValuesStart = Date.now();
|
|
5205
|
+
for (const tc of toolCalls) {
|
|
5206
|
+
const contract = contracts.find((c) => c.tool === tc.name);
|
|
5207
|
+
if (contract?.argument_value_invariants && contract.argument_value_invariants.length > 0) {
|
|
5208
|
+
let parsedArgs;
|
|
5209
|
+
try {
|
|
5210
|
+
parsedArgs = JSON.parse(tc.arguments);
|
|
5211
|
+
} catch {
|
|
5212
|
+
parsedArgs = {};
|
|
5213
|
+
}
|
|
5214
|
+
const avResult = evaluateArgumentValueInvariants(parsedArgs, contract.argument_value_invariants);
|
|
5215
|
+
if (!avResult.passed) {
|
|
5216
|
+
for (const f of avResult.failures) {
|
|
5217
|
+
validation.failures.push({
|
|
5218
|
+
path: f.path,
|
|
5219
|
+
operator: f.operator,
|
|
5220
|
+
expected: String(f.expected),
|
|
5221
|
+
found: String(f.actual),
|
|
5222
|
+
message: f.detail,
|
|
5223
|
+
contract_file: contract.contract_file ?? contract.tool,
|
|
5224
|
+
_tool_call_id: tc.id,
|
|
5225
|
+
_tool_call_arguments: tc.arguments
|
|
5226
|
+
});
|
|
5227
|
+
}
|
|
5228
|
+
}
|
|
5229
|
+
}
|
|
5230
|
+
if (resolvedSessionLimits) {
|
|
5231
|
+
const perToolResult = checkPerToolLimits(sessionState, tc.name, resolvedSessionLimits);
|
|
5232
|
+
if (perToolResult.exceeded) {
|
|
5233
|
+
validation.failures.push({
|
|
5234
|
+
path: `$.tool_calls.${tc.name}`,
|
|
5235
|
+
operator: "session_limit",
|
|
5236
|
+
expected: "",
|
|
5237
|
+
found: "",
|
|
5238
|
+
message: perToolResult.reason ?? "per-tool limit exceeded",
|
|
5239
|
+
contract_file: ""
|
|
5240
|
+
});
|
|
5241
|
+
}
|
|
5242
|
+
}
|
|
5243
|
+
if (resolvedSessionLimits?.loop_detection) {
|
|
5244
|
+
const loopResult = checkLoopDetection(
|
|
5245
|
+
tc.name,
|
|
5246
|
+
tc.arguments,
|
|
5247
|
+
sessionState,
|
|
5248
|
+
resolvedSessionLimits.loop_detection
|
|
5249
|
+
);
|
|
5250
|
+
if (loopResult.triggered) {
|
|
5251
|
+
validation.failures.push({
|
|
5252
|
+
path: `$.tool_calls.${tc.name}`,
|
|
5253
|
+
operator: "loop_detected",
|
|
5254
|
+
expected: `< ${loopResult.threshold} occurrences in window ${loopResult.window}`,
|
|
5255
|
+
found: String(loopResult.matchCount),
|
|
5256
|
+
message: `Loop detected: ${tc.name} repeated ${loopResult.matchCount} times in last ${loopResult.window} steps`,
|
|
5257
|
+
contract_file: ""
|
|
5258
|
+
});
|
|
5259
|
+
}
|
|
5260
|
+
}
|
|
5261
|
+
}
|
|
5262
|
+
timing.argument_values_ms += Date.now() - argValuesStart;
|
|
5263
|
+
let policyVerdicts = null;
|
|
5264
|
+
const policyStart = Date.now();
|
|
5265
|
+
if (compiledSession?.policyProgram && compiledSession.principal !== null && compiledSession.principal !== void 0) {
|
|
5266
|
+
policyVerdicts = /* @__PURE__ */ new Map();
|
|
5267
|
+
for (const tc of toolCalls) {
|
|
5268
|
+
const verdict = evaluatePolicy(
|
|
5269
|
+
tc.name,
|
|
5270
|
+
compiledSession.principal,
|
|
5271
|
+
(() => {
|
|
5272
|
+
try {
|
|
5273
|
+
return JSON.parse(tc.arguments);
|
|
5274
|
+
} catch {
|
|
5275
|
+
return {};
|
|
5276
|
+
}
|
|
5277
|
+
})(),
|
|
5278
|
+
sessionState,
|
|
5279
|
+
compiledSession.policyProgram
|
|
5280
|
+
);
|
|
5281
|
+
policyVerdicts.set(tc.name, verdict);
|
|
5282
|
+
if (!verdict.allowed) {
|
|
5283
|
+
validation.failures.push({
|
|
5284
|
+
path: `$.tool_calls.${tc.name}`,
|
|
5285
|
+
operator: "policy_denied",
|
|
5286
|
+
expected: "allowed",
|
|
5287
|
+
found: verdict.reason ?? "denied",
|
|
5288
|
+
message: `Policy denied: ${tc.name} \u2014 ${verdict.reason}`,
|
|
5289
|
+
contract_file: ""
|
|
5290
|
+
});
|
|
5291
|
+
}
|
|
5292
|
+
}
|
|
5293
|
+
}
|
|
5294
|
+
timing.policy_ms += Date.now() - policyStart;
|
|
5295
|
+
if (mode === "shadow") {
|
|
5296
|
+
const shadowGateStart = Date.now();
|
|
5297
|
+
const shadowDecision = validation.failures.length > 0 ? {
|
|
5298
|
+
action: "block",
|
|
5299
|
+
tool_calls: toolCalls,
|
|
5300
|
+
blocked: buildBlockedCalls(toolCalls, validation.failures, validation.unmatchedBlocked),
|
|
5301
|
+
response_modification: gateMode
|
|
5302
|
+
} : { action: "allow", tool_calls: toolCalls };
|
|
5303
|
+
const shadowDelta = {
|
|
5304
|
+
would_have_blocked: shadowDecision.action === "block" ? shadowDecision.blocked : [],
|
|
5305
|
+
would_have_narrowed: narrowResult?.removed ?? [],
|
|
5306
|
+
current_phase: sessionState.currentPhase,
|
|
5307
|
+
legal_next_phases: compiledSession ? getLegalNextPhases(sessionState, compiledSession) : []
|
|
5308
|
+
};
|
|
5309
|
+
lastShadowDeltaValue = shadowDelta;
|
|
5310
|
+
shadowEvaluationCount++;
|
|
5311
|
+
timing.gate_ms += Date.now() - shadowGateStart;
|
|
5312
|
+
captureDecision(shadowDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, shadowDelta, timing);
|
|
5313
|
+
return response;
|
|
5314
|
+
}
|
|
5315
|
+
if (isCompatAdvisory) {
|
|
5316
|
+
const advisoryGateStart = Date.now();
|
|
5317
|
+
const advisoryDecision = buildDecision(toolCalls, validation, gateMode, compiledSession);
|
|
5318
|
+
timing.gate_ms += Date.now() - advisoryGateStart;
|
|
5319
|
+
const advisoryFinalizeStart = Date.now();
|
|
5320
|
+
if (advisoryDecision.action === "allow" || advisoryDecision.action === "block") {
|
|
5321
|
+
const completedStep = buildCompletedStep(
|
|
5322
|
+
sessionState.totalStepCount,
|
|
5323
|
+
sessionId,
|
|
5324
|
+
toolCalls,
|
|
5325
|
+
contracts,
|
|
5326
|
+
response,
|
|
5327
|
+
provider,
|
|
5328
|
+
effectiveTier,
|
|
5329
|
+
compiledSession
|
|
5330
|
+
);
|
|
5331
|
+
if (phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase) {
|
|
5332
|
+
completedStep.phaseTransition = `${sessionState.currentPhase} \u2192 ${phaseResult.newPhase}`;
|
|
5333
|
+
completedStep.phase = phaseResult.newPhase;
|
|
5334
|
+
} else {
|
|
5335
|
+
completedStep.phase = sessionState.currentPhase;
|
|
5336
|
+
}
|
|
5337
|
+
const prevVersion = sessionState.stateVersion;
|
|
5338
|
+
sessionState = finalizeExecutedStep(sessionState, completedStep, contracts, compiledSession);
|
|
5339
|
+
syncStateToStore(prevVersion, sessionState);
|
|
5340
|
+
}
|
|
5341
|
+
if (advisoryDecision.action === "block") {
|
|
5342
|
+
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
5343
|
+
emitDiagnostic2(diagnostics, {
|
|
5344
|
+
type: "replay_compat_advisory",
|
|
5345
|
+
session_id: sessionId,
|
|
5346
|
+
would_block: advisoryDecision.blocked,
|
|
5347
|
+
details: advisoryDecision.blocked.map((b) => `${b.tool_name}: ${b.reason}`).join("; ")
|
|
5348
|
+
});
|
|
5349
|
+
} else {
|
|
5350
|
+
sessionState = recordDecisionOutcome(sessionState, "allowed");
|
|
5351
|
+
}
|
|
5352
|
+
timing.finalize_ms += Date.now() - advisoryFinalizeStart;
|
|
5353
|
+
captureDecision(advisoryDecision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing);
|
|
5354
|
+
return response;
|
|
5355
|
+
}
|
|
5356
|
+
const enforceGateStart = Date.now();
|
|
5357
|
+
const decision = buildDecision(toolCalls, validation, gateMode, compiledSession);
|
|
5358
|
+
timing.gate_ms += Date.now() - enforceGateStart;
|
|
5359
|
+
if (decision.action === "allow") {
|
|
5360
|
+
const enforceFinalizeStart = Date.now();
|
|
5361
|
+
const completedStep = buildCompletedStep(
|
|
5362
|
+
sessionState.totalStepCount,
|
|
5363
|
+
sessionId,
|
|
5364
|
+
toolCalls,
|
|
5365
|
+
contracts,
|
|
5366
|
+
response,
|
|
5367
|
+
provider,
|
|
5368
|
+
effectiveTier,
|
|
5369
|
+
compiledSession
|
|
5370
|
+
);
|
|
5371
|
+
if (phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase) {
|
|
5372
|
+
completedStep.phaseTransition = `${sessionState.currentPhase} \u2192 ${phaseResult.newPhase}`;
|
|
5373
|
+
completedStep.phase = phaseResult.newPhase;
|
|
5374
|
+
} else {
|
|
5375
|
+
completedStep.phase = sessionState.currentPhase;
|
|
5376
|
+
}
|
|
5377
|
+
const prevVersionAllow = sessionState.stateVersion;
|
|
5378
|
+
sessionState = finalizeExecutedStep(sessionState, completedStep, contracts, compiledSession);
|
|
5379
|
+
sessionState = recordDecisionOutcome(sessionState, "allowed");
|
|
5380
|
+
syncStateToStore(prevVersionAllow, sessionState);
|
|
5381
|
+
timing.finalize_ms += Date.now() - enforceFinalizeStart;
|
|
5382
|
+
if (isActiveGovern && !attemptDegraded && attemptPendingCalls && attemptPendingCalls.size > 0) {
|
|
5383
|
+
for (const [toolCallId, pending] of attemptPendingCalls) {
|
|
5384
|
+
deferredReceipts.set(toolCallId, {
|
|
5385
|
+
pendingCallId: pending.pendingCallId,
|
|
5386
|
+
toolName: pending.toolName,
|
|
5387
|
+
argumentsHash: stripHashPrefix(pending.argumentsHash)
|
|
5388
|
+
});
|
|
5389
|
+
}
|
|
5390
|
+
}
|
|
5391
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing);
|
|
5392
|
+
return response;
|
|
5393
|
+
}
|
|
5394
|
+
sessionState = recordDecisionOutcome(sessionState, "blocked");
|
|
5395
|
+
if (isActiveGovern && !attemptDegraded && attemptPendingCalls && attemptPendingCalls.size > 0) {
|
|
5396
|
+
const rtBlockReceiptStart = Date.now();
|
|
5397
|
+
const blockedToolCallIds = new Set(
|
|
5398
|
+
decision.action === "block" ? decision.blocked.map((b) => {
|
|
5399
|
+
const tc = toolCalls.find((c) => c.name === b.tool_name && c.arguments === b.arguments);
|
|
5400
|
+
return tc?.id;
|
|
5401
|
+
}).filter((id) => id != null) : []
|
|
5402
|
+
);
|
|
5403
|
+
const receiptNow = (/* @__PURE__ */ new Date()).toISOString();
|
|
5404
|
+
for (const [toolCallId, pending] of attemptPendingCalls) {
|
|
5405
|
+
if (blockedToolCallIds.has(toolCallId) || gateMode === "reject_all") {
|
|
5406
|
+
try {
|
|
5407
|
+
await runtimeClient.submitReceipt({
|
|
5408
|
+
sessionId,
|
|
5409
|
+
leaseFence,
|
|
5410
|
+
pendingCallId: pending.pendingCallId,
|
|
5411
|
+
executorKind: "WRAPPED_EXECUTOR",
|
|
5412
|
+
toolName: pending.toolName,
|
|
5413
|
+
argumentsHash: stripHashPrefix(pending.argumentsHash),
|
|
5414
|
+
status: "DISCARDED",
|
|
5415
|
+
startedAt: receiptNow,
|
|
5416
|
+
completedAt: receiptNow
|
|
5417
|
+
});
|
|
5418
|
+
} catch (err) {
|
|
5419
|
+
attemptDegraded = true;
|
|
5420
|
+
if (runtimeClient.isCircuitOpen()) runtimeDegraded = true;
|
|
5421
|
+
emitDiagnostic2(diagnostics, {
|
|
5422
|
+
type: "replay_inactive",
|
|
5423
|
+
reason: "runtime_receipt_failed",
|
|
5424
|
+
error_message: err instanceof Error ? err.message : String(err)
|
|
5425
|
+
});
|
|
5426
|
+
break;
|
|
5427
|
+
}
|
|
5428
|
+
} else {
|
|
5429
|
+
deferredReceipts.set(toolCallId, {
|
|
5430
|
+
pendingCallId: pending.pendingCallId,
|
|
5431
|
+
toolName: pending.toolName,
|
|
5432
|
+
argumentsHash: stripHashPrefix(pending.argumentsHash)
|
|
5433
|
+
});
|
|
5434
|
+
}
|
|
5435
|
+
}
|
|
5436
|
+
timing.runtime_ms += Date.now() - rtBlockReceiptStart;
|
|
5437
|
+
}
|
|
5438
|
+
if (resolvedSessionLimits?.circuit_breaker) {
|
|
5439
|
+
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
5440
|
+
if (cbResult.triggered) {
|
|
5441
|
+
killed = true;
|
|
5442
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
5443
|
+
sessionState = killSession(sessionState);
|
|
5444
|
+
emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
|
|
5445
|
+
}
|
|
5446
|
+
}
|
|
5447
|
+
if (attempt < maxRetries) {
|
|
5448
|
+
lastError = new ReplayContractError(
|
|
5449
|
+
`Blocked on attempt ${attempt + 1}`,
|
|
5450
|
+
decision,
|
|
5451
|
+
decision.blocked[0]?.contract_file ?? "",
|
|
5452
|
+
decision.blocked[0]?.failures ?? []
|
|
5453
|
+
);
|
|
5454
|
+
continue;
|
|
5455
|
+
}
|
|
5456
|
+
captureDecision(decision, response, request, guardStart, requestToolNames, crossStepResult, narrowResult, phaseResult, policyVerdicts, null, void 0, timing);
|
|
5457
|
+
return applyGateDecision(decision, response, provider, gateMode, opts.onBlock);
|
|
5458
|
+
}
|
|
5459
|
+
if (lastError) throw lastError;
|
|
5460
|
+
throw new ReplayInternalError("Retry loop exhausted without result", { sessionId });
|
|
5461
|
+
} catch (err) {
|
|
5462
|
+
if (err instanceof ReplayContractError || err instanceof ReplayKillError) {
|
|
5463
|
+
throw err;
|
|
5464
|
+
}
|
|
5465
|
+
sessionState = recordDecisionOutcome(sessionState, "error");
|
|
5466
|
+
if (resolvedSessionLimits?.circuit_breaker) {
|
|
5467
|
+
const cbResult = checkCircuitBreaker(sessionState, resolvedSessionLimits.circuit_breaker);
|
|
5468
|
+
if (cbResult.triggered) {
|
|
5469
|
+
killed = true;
|
|
5470
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
5471
|
+
sessionState = killSession(sessionState);
|
|
5472
|
+
emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
|
|
5473
|
+
}
|
|
5474
|
+
}
|
|
5475
|
+
if (onError === "block") {
|
|
5476
|
+
throw new ReplayInternalError("Enforcement pipeline internal error", { cause: err, sessionId });
|
|
5477
|
+
}
|
|
5478
|
+
sessionState = { ...sessionState, totalUnguardedCalls: sessionState.totalUnguardedCalls + 1 };
|
|
5479
|
+
if (sessionState.totalUnguardedCalls >= maxUnguardedCalls) {
|
|
5480
|
+
killed = true;
|
|
5481
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
5482
|
+
sessionState = killSession(sessionState);
|
|
5483
|
+
emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
|
|
5484
|
+
}
|
|
5485
|
+
return terminalInfo.originalCreate.apply(this, args);
|
|
5486
|
+
}
|
|
5487
|
+
};
|
|
5488
|
+
const wrapperClient = createWrapperClient(client, provider, enforcementCreate);
|
|
5489
|
+
const bypassCreate = function replayBypassProxy(...args) {
|
|
5490
|
+
bypassDetected = true;
|
|
5491
|
+
emitDiagnostic2(diagnostics, { type: "replay_bypass_detected", session_id: sessionId });
|
|
5492
|
+
if (runtimeClient && runtimeSession) {
|
|
5493
|
+
runtimeClient.reportBypass({
|
|
5494
|
+
sessionId,
|
|
5495
|
+
source: "bypass_proxy",
|
|
5496
|
+
detail: "Direct call on original client detected"
|
|
5497
|
+
}).catch(() => {
|
|
5498
|
+
});
|
|
5499
|
+
}
|
|
5500
|
+
return terminalInfo.originalCreate.apply(this, args);
|
|
5501
|
+
};
|
|
5502
|
+
terminalInfo.terminal[terminalInfo.methodName] = bypassCreate;
|
|
5503
|
+
setReplayAttached(client);
|
|
5504
|
+
emitDiagnostic2(diagnostics, {
|
|
5505
|
+
type: "replay_activated",
|
|
5506
|
+
session_id: sessionId,
|
|
5507
|
+
provider,
|
|
5508
|
+
agent,
|
|
5509
|
+
mode
|
|
5510
|
+
});
|
|
5511
|
+
const session = {
|
|
5512
|
+
client: wrapperClient,
|
|
5513
|
+
async flush() {
|
|
5514
|
+
if (!buffer) {
|
|
5515
|
+
return { captured: 0, sent: 0, active: false, errors: [] };
|
|
5516
|
+
}
|
|
5517
|
+
return buffer.flush();
|
|
5518
|
+
},
|
|
5519
|
+
restore() {
|
|
5520
|
+
if (restored) return;
|
|
5521
|
+
restored = true;
|
|
5522
|
+
if (terminalInfo.terminal[terminalInfo.methodName] === bypassCreate) {
|
|
5523
|
+
terminalInfo.terminal[terminalInfo.methodName] = terminalInfo.originalCreate;
|
|
5524
|
+
}
|
|
5525
|
+
clearReplayAttached(client);
|
|
5526
|
+
buffer?.close();
|
|
5527
|
+
},
|
|
5528
|
+
kill() {
|
|
5529
|
+
if (killed) return;
|
|
5530
|
+
killed = true;
|
|
5531
|
+
killedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
5532
|
+
const prevVersionKill = sessionState.stateVersion;
|
|
5533
|
+
sessionState = killSession(sessionState);
|
|
5534
|
+
syncStateToStore(prevVersionKill, sessionState);
|
|
5535
|
+
emitDiagnostic2(diagnostics, { type: "replay_kill", session_id: sessionId });
|
|
5536
|
+
if (runtimeClient && runtimeSession && leaseFence) {
|
|
5537
|
+
runtimeClient.killSession({
|
|
5538
|
+
sessionId,
|
|
5539
|
+
leaseFence,
|
|
5540
|
+
reason: "sdk_kill"
|
|
5541
|
+
}).catch(() => {
|
|
5542
|
+
});
|
|
5543
|
+
}
|
|
5544
|
+
buffer?.flush().catch(() => {
|
|
5545
|
+
});
|
|
5546
|
+
},
|
|
5547
|
+
getHealth() {
|
|
5548
|
+
const isAuthoritative = runtimeSession != null && !runtimeDegraded;
|
|
5549
|
+
const effectiveProtection = runtimeDegraded ? "protect" : protectionLevel;
|
|
5550
|
+
let durability;
|
|
5551
|
+
if (isAuthoritative) {
|
|
5552
|
+
durability = runtimeClient?.isCircuitOpen() ? "degraded-local" : "server";
|
|
5553
|
+
} else {
|
|
5554
|
+
durability = runtimeDegraded ? "degraded-local" : "inactive";
|
|
5555
|
+
}
|
|
5556
|
+
let authorityState;
|
|
5557
|
+
if (killed) authorityState = "killed";
|
|
5558
|
+
else if (bypassDetected) authorityState = "compromised";
|
|
5559
|
+
else if (isAuthoritative) authorityState = "active";
|
|
5560
|
+
else authorityState = "advisory";
|
|
5561
|
+
return {
|
|
5562
|
+
status: killed ? "inactive" : sessionState.consecutiveErrorCount > 0 ? "degraded" : "healthy",
|
|
5563
|
+
authorityState,
|
|
5564
|
+
protectionLevel: effectiveProtection,
|
|
5565
|
+
durability,
|
|
5566
|
+
tier: sessionState.tier,
|
|
5567
|
+
compatEnforcement,
|
|
5568
|
+
cluster_detected: false,
|
|
5569
|
+
bypass_detected: bypassDetected,
|
|
5570
|
+
totalSteps: sessionState.totalStepCount,
|
|
5571
|
+
totalBlocks: sessionState.totalBlockCount,
|
|
5572
|
+
totalErrors: sessionState.consecutiveErrorCount,
|
|
5573
|
+
killed,
|
|
5574
|
+
shadowEvaluations: shadowEvaluationCount
|
|
5575
|
+
};
|
|
5576
|
+
},
|
|
5577
|
+
/**
|
|
5578
|
+
* v2: Return redacted session state snapshot.
|
|
5579
|
+
* @see specs/replay-v2.md § getState() contract
|
|
5580
|
+
*/
|
|
5581
|
+
getState() {
|
|
5582
|
+
return buildStateSnapshot(sessionState, toNarrowingSnapshot(lastNarrowResult));
|
|
5583
|
+
},
|
|
5584
|
+
getLastNarrowing() {
|
|
5585
|
+
return toNarrowingSnapshot(lastNarrowResult);
|
|
5586
|
+
},
|
|
5587
|
+
getLastShadowDelta() {
|
|
5588
|
+
return lastShadowDeltaValue;
|
|
5589
|
+
},
|
|
5590
|
+
/**
|
|
5591
|
+
* v3: Manually restrict available tools within compiled legal space.
|
|
5592
|
+
* @see specs/replay-v3.md § narrow() / widen()
|
|
5593
|
+
*/
|
|
5594
|
+
narrow(toolFilter) {
|
|
5595
|
+
if (killed || restored) return;
|
|
5596
|
+
manualFilter = toolFilter.length > 0 ? toolFilter : null;
|
|
5597
|
+
sessionState = { ...sessionState, controlRevision: sessionState.controlRevision + 1 };
|
|
5598
|
+
if (runtimeClient && runtimeSession && leaseFence) {
|
|
5599
|
+
runtimeClient.setToolFilter({
|
|
5600
|
+
sessionId,
|
|
5601
|
+
leaseFence,
|
|
5602
|
+
allowedTools: manualFilter
|
|
5603
|
+
}).catch(() => {
|
|
5604
|
+
});
|
|
5605
|
+
}
|
|
5606
|
+
},
|
|
5607
|
+
/**
|
|
5608
|
+
* v3: Remove manual restriction, return to contract-driven narrowing.
|
|
5609
|
+
* @see specs/replay-v3.md § narrow() / widen()
|
|
5610
|
+
*/
|
|
5611
|
+
widen() {
|
|
5612
|
+
if (killed || restored) return;
|
|
5613
|
+
if (manualFilter === null) return;
|
|
5614
|
+
manualFilter = null;
|
|
5615
|
+
sessionState = { ...sessionState, controlRevision: sessionState.controlRevision + 1 };
|
|
5616
|
+
if (runtimeClient && runtimeSession && leaseFence) {
|
|
5617
|
+
runtimeClient.setToolFilter({
|
|
5618
|
+
sessionId,
|
|
5619
|
+
leaseFence,
|
|
5620
|
+
allowedTools: null
|
|
5621
|
+
}).catch(() => {
|
|
5622
|
+
});
|
|
5623
|
+
}
|
|
5624
|
+
},
|
|
5625
|
+
tools: wrapToolsWithDeferredReceipts(
|
|
5626
|
+
buildWrappedToolsMap(opts.tools, compiledSession)
|
|
5627
|
+
),
|
|
5628
|
+
async getWorkflowState() {
|
|
5629
|
+
if (!workflowId || !runtimeClient || runtimeDegraded) return null;
|
|
5630
|
+
if (runtimeInitPromise && !runtimeInitDone) await runtimeInitPromise;
|
|
5631
|
+
if (!runtimeSession?.workflow) return null;
|
|
5632
|
+
try {
|
|
5633
|
+
const state = await runtimeClient.getWorkflowState(workflowId);
|
|
5634
|
+
return {
|
|
5635
|
+
workflowId: state.workflowId,
|
|
5636
|
+
rootSessionId: state.rootSessionId,
|
|
5637
|
+
status: state.status,
|
|
5638
|
+
stateVersion: state.stateVersion,
|
|
5639
|
+
controlRevision: state.controlRevision,
|
|
5640
|
+
totalSessionCount: state.totalSessionCount,
|
|
5641
|
+
activeSessionCount: state.activeSessionCount,
|
|
5642
|
+
totalStepCount: state.totalStepCount,
|
|
5643
|
+
totalCost: state.totalCost,
|
|
5644
|
+
totalHandoffCount: state.totalHandoffCount,
|
|
5645
|
+
unresolvedHandoffCount: state.unresolvedHandoffCount,
|
|
5646
|
+
lastEventSeq: state.lastEventSeq,
|
|
5647
|
+
killScope: state.killScope,
|
|
5648
|
+
createdAt: state.createdAt,
|
|
5649
|
+
updatedAt: state.updatedAt
|
|
5650
|
+
};
|
|
5651
|
+
} catch {
|
|
5652
|
+
return null;
|
|
5653
|
+
}
|
|
5654
|
+
},
|
|
5655
|
+
async handoff(offer) {
|
|
5656
|
+
if (!workflowId || !runtimeClient || runtimeDegraded) return null;
|
|
5657
|
+
if (runtimeInitPromise && !runtimeInitDone) await runtimeInitPromise;
|
|
5658
|
+
if (!runtimeSession?.workflow) return null;
|
|
5659
|
+
try {
|
|
5660
|
+
const result = await runtimeClient.offerHandoff({
|
|
5661
|
+
sessionId,
|
|
5662
|
+
workflowId,
|
|
5663
|
+
fromRole: runtimeSession.workflow.role,
|
|
5664
|
+
toRole: offer.toRole,
|
|
5665
|
+
handoffId: offer.handoffId,
|
|
5666
|
+
artifactRefs: offer.artifactRefs,
|
|
5667
|
+
summary: offer.summary
|
|
5668
|
+
});
|
|
5669
|
+
return {
|
|
5670
|
+
handoffId: result.handoffId,
|
|
5671
|
+
eventSeq: result.eventSeq,
|
|
5672
|
+
stateVersion: result.stateVersion
|
|
5673
|
+
};
|
|
5674
|
+
} catch {
|
|
5675
|
+
return null;
|
|
5676
|
+
}
|
|
5677
|
+
}
|
|
5678
|
+
};
|
|
5679
|
+
return session;
|
|
5680
|
+
function wrapToolsWithDeferredReceipts(baseTools) {
|
|
5681
|
+
const wrapped = {};
|
|
5682
|
+
for (const [toolName, executor] of Object.entries(baseTools)) {
|
|
5683
|
+
wrapped[toolName] = async (args) => {
|
|
5684
|
+
const result = await executor(args);
|
|
5685
|
+
if (runtimeClient && leaseFence && !runtimeDegraded) {
|
|
5686
|
+
for (const [callId, deferred] of deferredReceipts) {
|
|
5687
|
+
if (deferred.toolName === toolName) {
|
|
5688
|
+
deferredReceipts.delete(callId);
|
|
5689
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
5690
|
+
try {
|
|
5691
|
+
const receiptResult = await runtimeClient.submitReceipt({
|
|
5692
|
+
sessionId,
|
|
5693
|
+
leaseFence,
|
|
5694
|
+
pendingCallId: deferred.pendingCallId,
|
|
5695
|
+
executorKind: "WRAPPED_EXECUTOR",
|
|
5696
|
+
toolName: deferred.toolName,
|
|
5697
|
+
argumentsHash: deferred.argumentsHash,
|
|
5698
|
+
status: "SUCCEEDED",
|
|
5699
|
+
startedAt: now,
|
|
5700
|
+
completedAt: now
|
|
5701
|
+
});
|
|
5702
|
+
if (receiptResult.stateAdvanced) {
|
|
5703
|
+
sessionState = { ...sessionState, stateVersion: receiptResult.stateVersion };
|
|
5704
|
+
}
|
|
5705
|
+
} catch {
|
|
5706
|
+
}
|
|
5707
|
+
break;
|
|
5708
|
+
}
|
|
5709
|
+
}
|
|
5710
|
+
}
|
|
5711
|
+
return result;
|
|
5712
|
+
};
|
|
5713
|
+
}
|
|
5714
|
+
return wrapped;
|
|
5715
|
+
}
|
|
5716
|
+
function captureDecision(decision, response, request, guardStart, requestToolNames, crossStep, narrowing = null, phaseResult = null, policyVerdictMap = null, constraintVerdictVal = null, shadowDelta = void 0, timingParam) {
|
|
5717
|
+
if (!buffer && !store) return;
|
|
5718
|
+
if (timingParam) {
|
|
5719
|
+
timingParam.total_ms = Date.now() - guardStart;
|
|
5720
|
+
timingParam.enforcement_ms = timingParam.total_ms - timingParam.llm_call_ms;
|
|
5721
|
+
}
|
|
5722
|
+
const guardOverheadMs = timingParam ? timingParam.enforcement_ms : Date.now() - guardStart;
|
|
5723
|
+
const phaseTransitionStr = phaseResult && !phaseResult.legal ? phaseResult.attemptedTransition : phaseResult && phaseResult.legal && phaseResult.newPhase !== sessionState.currentPhase ? `${sessionState.currentPhase} \u2192 ${phaseResult.newPhase}` : null;
|
|
5724
|
+
const primaryTool = decision.tool_calls[0]?.name;
|
|
5725
|
+
const capturedPolicyVerdict = primaryTool && policyVerdictMap ? policyVerdictMap.get(primaryTool) ?? null : null;
|
|
5726
|
+
const replayMeta = {
|
|
5727
|
+
session_id: sessionId,
|
|
5728
|
+
step_index: sessionState.totalStepCount,
|
|
5729
|
+
mode,
|
|
5730
|
+
decision,
|
|
5731
|
+
contract_hashes: contracts.map((c) => c.tool_schema_hash ?? c.tool),
|
|
5732
|
+
guard_overhead_ms: guardOverheadMs,
|
|
5733
|
+
timing: timingParam,
|
|
5734
|
+
commit_tier: sessionState.tier,
|
|
5735
|
+
principal: opts.principal ?? null,
|
|
5736
|
+
policy_verdict: capturedPolicyVerdict,
|
|
5737
|
+
execution_constraint_verdict: constraintVerdictVal,
|
|
5738
|
+
counterfactual: {
|
|
5739
|
+
tools_removed: narrowing?.removed ?? [],
|
|
5740
|
+
calls_blocked: decision.action === "block" ? decision.blocked : []
|
|
5741
|
+
},
|
|
5742
|
+
// v2 additions
|
|
5743
|
+
cross_step: crossStep ? { passed: crossStep.passed, failures: crossStep.failures.map((f) => ({ toolName: f.toolName, reason: f.reason, detail: f.detail })) } : null,
|
|
5744
|
+
session_state_hash: null,
|
|
5745
|
+
state_version: sessionState.stateVersion,
|
|
5746
|
+
// v3 additions
|
|
5747
|
+
narrowing,
|
|
5748
|
+
phase: sessionState.currentPhase,
|
|
5749
|
+
phase_transition: phaseTransitionStr,
|
|
5750
|
+
shadow_delta: shadowDelta,
|
|
5751
|
+
receipt: null
|
|
5752
|
+
};
|
|
5753
|
+
const capturedCall = {
|
|
5754
|
+
schema_version: CAPTURE_SCHEMA_VERSION_CURRENT,
|
|
5755
|
+
agent,
|
|
5756
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5757
|
+
provider,
|
|
5758
|
+
model_id: typeof request.model === "string" ? request.model : "",
|
|
5759
|
+
primary_tool_name: decision.tool_calls[0]?.name ?? null,
|
|
5760
|
+
tool_names: decision.tool_calls.map((tc) => tc.name),
|
|
5761
|
+
request: {
|
|
5762
|
+
tools: requestToolNames.map((name) => ({ name }))
|
|
5763
|
+
},
|
|
5764
|
+
response: {
|
|
5765
|
+
tool_calls: decision.tool_calls.map((tc) => ({
|
|
5766
|
+
name: tc.name,
|
|
5767
|
+
arguments: tc.arguments
|
|
5768
|
+
})),
|
|
5769
|
+
content: null
|
|
5770
|
+
},
|
|
5771
|
+
latency_ms: Date.now() - guardStart,
|
|
5772
|
+
sdk_session_id: sessionId
|
|
5773
|
+
};
|
|
5774
|
+
try {
|
|
5775
|
+
const serialized = JSON.stringify({ ...capturedCall, replay: replayMeta });
|
|
5776
|
+
const { redacted } = redactCapture(serialized);
|
|
5777
|
+
const redactedCall = JSON.parse(redacted);
|
|
5778
|
+
if (buffer) buffer.push(redactedCall);
|
|
5779
|
+
appendCaptureToStore(redactedCall);
|
|
5780
|
+
} catch {
|
|
5781
|
+
}
|
|
5782
|
+
}
|
|
5783
|
+
}
|
|
5784
|
+
function resolveContracts(opts) {
|
|
5785
|
+
if (opts.contracts) {
|
|
5786
|
+
return loadContracts(opts.contracts);
|
|
5787
|
+
}
|
|
5788
|
+
if (opts.contractsDir) {
|
|
5789
|
+
return loadContracts(opts.contractsDir);
|
|
5790
|
+
}
|
|
5791
|
+
throw new ReplayConfigError("compilation_failed", "No contracts or contractsDir provided");
|
|
5792
|
+
}
|
|
5793
|
+
var SESSION_YAML_NAMES2 = ["session.yaml", "session.yml"];
|
|
5794
|
+
function discoverSessionYaml(opts) {
|
|
5795
|
+
if (opts.sessionYamlPath) {
|
|
5796
|
+
const resolved = pathResolve(opts.sessionYamlPath);
|
|
5797
|
+
const raw = readFileSync2(resolved, "utf8");
|
|
5798
|
+
return parseSessionYaml(raw);
|
|
5799
|
+
}
|
|
5800
|
+
if (opts.contractsDir) {
|
|
5801
|
+
const dir = pathResolve(opts.contractsDir);
|
|
5802
|
+
for (const name of SESSION_YAML_NAMES2) {
|
|
5803
|
+
const candidate = join3(dir, name);
|
|
5804
|
+
if (existsSync2(candidate)) {
|
|
5805
|
+
const raw = readFileSync2(candidate, "utf8");
|
|
5806
|
+
return parseSessionYaml(raw);
|
|
5807
|
+
}
|
|
5808
|
+
}
|
|
5809
|
+
}
|
|
5810
|
+
return null;
|
|
5811
|
+
}
|
|
5812
|
+
var WORKFLOW_YAML_NAMES = ["workflow.yaml", "workflow.yml"];
|
|
5813
|
+
function discoverWorkflowYaml(opts, workflowOpts) {
|
|
5814
|
+
let raw = null;
|
|
5815
|
+
if (workflowOpts.workflowYamlPath) {
|
|
5816
|
+
const resolved = pathResolve(workflowOpts.workflowYamlPath);
|
|
5817
|
+
raw = readFileSync2(resolved, "utf8");
|
|
5818
|
+
}
|
|
5819
|
+
if (!raw && opts.contractsDir) {
|
|
5820
|
+
const dir = pathResolve(opts.contractsDir);
|
|
5821
|
+
for (const name of WORKFLOW_YAML_NAMES) {
|
|
5822
|
+
const candidate = join3(dir, name);
|
|
5823
|
+
if (existsSync2(candidate)) {
|
|
5824
|
+
raw = readFileSync2(candidate, "utf8");
|
|
5825
|
+
break;
|
|
5826
|
+
}
|
|
5827
|
+
}
|
|
5828
|
+
}
|
|
5829
|
+
if (!raw) return null;
|
|
5830
|
+
const parsed = parseWorkflowYaml(raw);
|
|
5831
|
+
return compileWorkflow(parsed);
|
|
5832
|
+
}
|
|
5833
|
+
function generateWorkflowId() {
|
|
5834
|
+
return `rw_${crypto4.randomUUID().replace(/-/g, "").slice(0, 24)}`;
|
|
5835
|
+
}
|
|
5836
|
+
function validateConfig(contracts, opts) {
|
|
5837
|
+
const hasPolicyBlock = contracts.some((c) => c.policy != null);
|
|
5838
|
+
if (hasPolicyBlock && opts.principal === void 0) {
|
|
5839
|
+
const toolsWithPolicy = contracts.filter((c) => c.policy != null).map((c) => c.tool).join(", ");
|
|
5840
|
+
return new ReplayConfigError(
|
|
5841
|
+
"policy_without_principal",
|
|
5842
|
+
`Contracts with policy blocks (${toolsWithPolicy}) require a principal in replay() options`
|
|
5843
|
+
);
|
|
5844
|
+
}
|
|
5845
|
+
const toolsMap = opts.tools ?? {};
|
|
5846
|
+
const toolsWithConstraints = contracts.filter((c) => c.execution_constraints != null).filter((c) => !(c.tool in toolsMap));
|
|
5847
|
+
if (toolsWithConstraints.length > 0) {
|
|
5848
|
+
const names = toolsWithConstraints.map((c) => c.tool).join(", ");
|
|
5849
|
+
return new ReplayConfigError(
|
|
5850
|
+
"constraints_without_wrapper",
|
|
5851
|
+
`Contracts with execution_constraints (${names}) require matching entries in the tools map`
|
|
5852
|
+
);
|
|
5853
|
+
}
|
|
5854
|
+
return null;
|
|
5855
|
+
}
|
|
5856
|
+
function resolveTerminal(client, provider) {
|
|
5857
|
+
try {
|
|
5858
|
+
if (provider === "openai") {
|
|
5859
|
+
const chat = client.chat;
|
|
5860
|
+
const completions = chat.completions;
|
|
5861
|
+
const create2 = completions.create;
|
|
5862
|
+
if (typeof create2 !== "function") return null;
|
|
5863
|
+
return {
|
|
5864
|
+
terminal: completions,
|
|
5865
|
+
methodName: "create",
|
|
5866
|
+
originalCreate: create2
|
|
5867
|
+
};
|
|
5868
|
+
}
|
|
5869
|
+
const messages = client.messages;
|
|
5870
|
+
const create = messages.create;
|
|
5871
|
+
if (typeof create !== "function") return null;
|
|
5872
|
+
return {
|
|
5873
|
+
terminal: messages,
|
|
5874
|
+
methodName: "create",
|
|
5875
|
+
originalCreate: create
|
|
5876
|
+
};
|
|
5877
|
+
} catch {
|
|
5878
|
+
return null;
|
|
5879
|
+
}
|
|
5880
|
+
}
|
|
5881
|
+
function createWrapperClient(originalClient, provider, enforcementCreate) {
|
|
5882
|
+
if (provider === "openai") {
|
|
5883
|
+
const origChat = originalClient.chat;
|
|
5884
|
+
const origComp = origChat.completions;
|
|
5885
|
+
const compWrapper = Object.create(origComp);
|
|
5886
|
+
compWrapper.create = enforcementCreate;
|
|
5887
|
+
const chatWrapper = Object.create(origChat);
|
|
5888
|
+
chatWrapper.completions = compWrapper;
|
|
5889
|
+
const wrapper2 = Object.create(originalClient);
|
|
5890
|
+
wrapper2.chat = chatWrapper;
|
|
5891
|
+
return wrapper2;
|
|
5892
|
+
}
|
|
5893
|
+
const origMessages = originalClient.messages;
|
|
5894
|
+
const msgWrapper = Object.create(origMessages);
|
|
5895
|
+
msgWrapper.create = enforcementCreate;
|
|
5896
|
+
const wrapper = Object.create(originalClient);
|
|
5897
|
+
wrapper.messages = msgWrapper;
|
|
5898
|
+
return wrapper;
|
|
5899
|
+
}
|
|
5900
|
+
function validateResponse2(response, toolCalls, contracts, requestToolNames, unmatchedPolicy, provider) {
|
|
5901
|
+
const failures = [];
|
|
5902
|
+
const unmatchedBlocked = [];
|
|
5903
|
+
const matched = matchContracts(contracts, toolCalls, void 0);
|
|
5904
|
+
const unmatched = findUnmatchedTools(toolCalls, matched);
|
|
5905
|
+
if (unmatchedPolicy === "block" && unmatched.length > 0) {
|
|
5906
|
+
for (const tc of unmatched) {
|
|
5907
|
+
unmatchedBlocked.push({
|
|
5908
|
+
tool_name: tc.name,
|
|
5909
|
+
arguments: tc.arguments,
|
|
5910
|
+
reason: "unmatched_tool_blocked",
|
|
5911
|
+
contract_file: "",
|
|
5912
|
+
failures: [{
|
|
5913
|
+
path: "$.tool_calls",
|
|
5914
|
+
operator: "contract_match",
|
|
5915
|
+
expected: "known tool",
|
|
5916
|
+
found: tc.name,
|
|
5917
|
+
message: `No contract for tool "${tc.name}"`
|
|
5918
|
+
}]
|
|
5919
|
+
});
|
|
5920
|
+
}
|
|
5921
|
+
}
|
|
5922
|
+
for (const contract of matched) {
|
|
5923
|
+
const outputInvariants = contract.assertions.output_invariants;
|
|
5924
|
+
if (outputInvariants.length > 0) {
|
|
5925
|
+
const normalizedResponse = buildNormalizedResponse(response, toolCalls);
|
|
5926
|
+
const result = evaluateInvariants4(normalizedResponse, outputInvariants, process.env);
|
|
5927
|
+
for (const failure of result) {
|
|
5928
|
+
failures.push({
|
|
5929
|
+
path: failure.path,
|
|
5930
|
+
operator: failure.rule,
|
|
5931
|
+
expected: failure.detail,
|
|
5932
|
+
found: failure.detail,
|
|
5933
|
+
message: failure.detail,
|
|
5934
|
+
contract_file: contract.contract_file ?? contract.tool
|
|
5935
|
+
});
|
|
5936
|
+
}
|
|
5937
|
+
}
|
|
5938
|
+
if (contract.expected_tool_calls && contract.expected_tool_calls.length > 0) {
|
|
5939
|
+
const result = evaluateExpectedToolCalls2(
|
|
5940
|
+
toolCalls,
|
|
5941
|
+
contract.expected_tool_calls,
|
|
5942
|
+
contract.pass_threshold ?? 1,
|
|
5943
|
+
contract.tool_call_match_mode ?? "any",
|
|
5944
|
+
process.env
|
|
5945
|
+
);
|
|
5946
|
+
for (const failure of result.failures) {
|
|
5947
|
+
failures.push({
|
|
5948
|
+
path: failure.path,
|
|
5949
|
+
operator: failure.rule,
|
|
5950
|
+
expected: failure.detail,
|
|
5951
|
+
found: failure.detail,
|
|
5952
|
+
message: failure.detail,
|
|
5953
|
+
contract_file: contract.contract_file ?? contract.tool
|
|
5954
|
+
});
|
|
5955
|
+
}
|
|
5956
|
+
}
|
|
5957
|
+
}
|
|
5958
|
+
const formatResult = evaluateResponseFormatInvariants(response, contracts, requestToolNames, provider);
|
|
5959
|
+
failures.push(...formatResult.failures);
|
|
5960
|
+
return { failures, unmatchedBlocked, matchedContracts: matched };
|
|
5961
|
+
}
|
|
5962
|
+
function buildNormalizedResponse(_response, toolCalls) {
|
|
5963
|
+
return {
|
|
5964
|
+
tool_calls: toolCalls.map((tc) => {
|
|
5965
|
+
let parsedArgs;
|
|
5966
|
+
try {
|
|
5967
|
+
parsedArgs = JSON.parse(tc.arguments);
|
|
5968
|
+
} catch {
|
|
5969
|
+
parsedArgs = null;
|
|
5970
|
+
}
|
|
5971
|
+
return {
|
|
5972
|
+
id: tc.id,
|
|
5973
|
+
name: tc.name,
|
|
5974
|
+
function: { name: tc.name },
|
|
5975
|
+
arguments: parsedArgs
|
|
5976
|
+
};
|
|
5977
|
+
})
|
|
5978
|
+
};
|
|
5979
|
+
}
|
|
5980
|
+
function buildDecision(toolCalls, validation, gateMode, compiled) {
|
|
5981
|
+
const blocked = [
|
|
5982
|
+
...validation.unmatchedBlocked,
|
|
5983
|
+
...buildBlockedCalls(toolCalls, validation.failures, [])
|
|
5984
|
+
];
|
|
5985
|
+
if (compiled) {
|
|
5986
|
+
const failedTools = new Set(validation.failures.map((f) => f.path.split(".").pop() ?? ""));
|
|
5987
|
+
const alreadyBlocked = new Set(blocked.map((b) => b.tool_name));
|
|
5988
|
+
for (const tc of toolCalls) {
|
|
5989
|
+
if (alreadyBlocked.has(tc.name)) continue;
|
|
5990
|
+
if (!failedTools.has(tc.name)) continue;
|
|
5991
|
+
const contract = compiled.perToolContracts.get(tc.name);
|
|
5992
|
+
if (contract?.effectiveGate === "block") {
|
|
5993
|
+
blocked.push({
|
|
5994
|
+
tool_name: tc.name,
|
|
5995
|
+
arguments: tc.arguments,
|
|
5996
|
+
reason: "risk_gate_blocked",
|
|
5997
|
+
contract_file: "",
|
|
5998
|
+
failures: [{
|
|
5999
|
+
path: `$.tool_calls.${tc.name}`,
|
|
6000
|
+
operator: "effective_gate",
|
|
6001
|
+
expected: "allow",
|
|
6002
|
+
found: "block",
|
|
6003
|
+
message: `Tool '${tc.name}' blocked by risk_defaults (side_effect: ${contract.side_effect ?? "unknown"}, effectiveGate: block)`,
|
|
6004
|
+
contract_file: ""
|
|
6005
|
+
}]
|
|
6006
|
+
});
|
|
6007
|
+
}
|
|
6008
|
+
}
|
|
6009
|
+
}
|
|
6010
|
+
if (blocked.length === 0) {
|
|
6011
|
+
return { action: "allow", tool_calls: toolCalls };
|
|
6012
|
+
}
|
|
6013
|
+
return {
|
|
6014
|
+
action: "block",
|
|
6015
|
+
tool_calls: toolCalls,
|
|
6016
|
+
blocked,
|
|
6017
|
+
response_modification: gateMode
|
|
6018
|
+
};
|
|
6019
|
+
}
|
|
6020
|
+
function buildBlockedCalls(toolCalls, failures, additionalBlocked) {
|
|
6021
|
+
if (failures.length === 0) return additionalBlocked;
|
|
6022
|
+
const failuresByKey = /* @__PURE__ */ new Map();
|
|
6023
|
+
for (const failure of failures) {
|
|
6024
|
+
const fr = failure;
|
|
6025
|
+
let key;
|
|
6026
|
+
let toolName;
|
|
6027
|
+
let args;
|
|
6028
|
+
if (fr._tool_call_id) {
|
|
6029
|
+
key = fr._tool_call_id;
|
|
6030
|
+
const tc = toolCalls.find((c) => c.id === fr._tool_call_id);
|
|
6031
|
+
toolName = tc?.name ?? extractToolNameFromFailure(failure, toolCalls);
|
|
6032
|
+
args = fr._tool_call_arguments ?? tc?.arguments ?? "";
|
|
6033
|
+
} else {
|
|
6034
|
+
toolName = extractToolNameFromFailure(failure, toolCalls);
|
|
6035
|
+
const tc = toolCalls.find((c) => c.name === toolName);
|
|
6036
|
+
key = `name:${toolName}`;
|
|
6037
|
+
args = tc?.arguments ?? "";
|
|
6038
|
+
}
|
|
6039
|
+
const existing = failuresByKey.get(key);
|
|
6040
|
+
if (existing) {
|
|
6041
|
+
existing.failures.push(failure);
|
|
6042
|
+
} else {
|
|
6043
|
+
failuresByKey.set(key, { toolName, arguments: args, failures: [failure] });
|
|
6044
|
+
}
|
|
6045
|
+
}
|
|
6046
|
+
const blocked = [...additionalBlocked];
|
|
6047
|
+
for (const [, entry] of failuresByKey) {
|
|
6048
|
+
const reason = determineBlockReason(entry.failures);
|
|
6049
|
+
blocked.push({
|
|
6050
|
+
tool_name: entry.toolName,
|
|
6051
|
+
arguments: entry.arguments,
|
|
6052
|
+
reason,
|
|
6053
|
+
contract_file: entry.failures[0]?.contract_file ?? "",
|
|
6054
|
+
failures: entry.failures
|
|
6055
|
+
});
|
|
6056
|
+
}
|
|
6057
|
+
return blocked;
|
|
6058
|
+
}
|
|
6059
|
+
function extractToolNameFromFailure(failure, toolCalls) {
|
|
6060
|
+
const pathMatch = failure.path?.match(/\$\.tool_calls\.(\w+)/);
|
|
6061
|
+
if (pathMatch) {
|
|
6062
|
+
const candidate = pathMatch[1];
|
|
6063
|
+
if (toolCalls.some((tc) => tc.name === candidate)) return candidate;
|
|
6064
|
+
}
|
|
6065
|
+
if (failure.contract_file) {
|
|
6066
|
+
const candidate = toolCalls.find((tc) => failure.contract_file.includes(tc.name));
|
|
6067
|
+
if (candidate) return candidate.name;
|
|
6068
|
+
}
|
|
6069
|
+
return toolCalls[0]?.name ?? "_response";
|
|
6070
|
+
}
|
|
6071
|
+
function determineBlockReason(failures) {
|
|
6072
|
+
for (const f of failures) {
|
|
6073
|
+
if (f.operator === "response_format") return "response_format_invalid";
|
|
6074
|
+
if (f.operator === "contract_match") return "unmatched_tool_blocked";
|
|
6075
|
+
if (f.operator === "precondition_not_met") return "precondition_not_met";
|
|
6076
|
+
if (f.operator === "forbidden_tool") return "forbidden_tool";
|
|
6077
|
+
if (f.operator === "session_limit") return "session_limit_exceeded";
|
|
6078
|
+
if (f.operator === "loop_detected") return "loop_detected";
|
|
6079
|
+
if (f.operator === "policy_denied") return "policy_denied";
|
|
6080
|
+
if (f.operator === "execution_constraint_violated") return "execution_constraint_violated";
|
|
6081
|
+
if (f.operator === "exact_match" || f.operator === "argument_value_mismatch") return "argument_value_mismatch";
|
|
6082
|
+
}
|
|
6083
|
+
return "output_invariant_failed";
|
|
6084
|
+
}
|
|
6085
|
+
function evaluateInputInvariants(request, contracts) {
|
|
6086
|
+
const failures = [];
|
|
6087
|
+
const requestToolNames = extractRequestToolNames(request);
|
|
6088
|
+
const requestToolSet = new Set(requestToolNames);
|
|
6089
|
+
for (const contract of contracts) {
|
|
6090
|
+
if (!requestToolSet.has(contract.tool)) continue;
|
|
6091
|
+
if (contract.assertions.input_invariants.length === 0) continue;
|
|
6092
|
+
const result = evaluateInvariants4(request, contract.assertions.input_invariants, process.env);
|
|
6093
|
+
for (const failure of result) {
|
|
6094
|
+
failures.push({
|
|
6095
|
+
path: failure.path,
|
|
6096
|
+
operator: failure.rule,
|
|
6097
|
+
expected: failure.detail,
|
|
6098
|
+
found: failure.detail,
|
|
6099
|
+
message: failure.detail,
|
|
6100
|
+
contract_file: contract.contract_file ?? contract.tool
|
|
6101
|
+
});
|
|
6102
|
+
}
|
|
6103
|
+
}
|
|
6104
|
+
return failures;
|
|
6105
|
+
}
|
|
6106
|
+
function extractRequestToolNames(request) {
|
|
6107
|
+
const tools = request.tools;
|
|
6108
|
+
if (!Array.isArray(tools)) return [];
|
|
6109
|
+
return tools.map((tool) => {
|
|
6110
|
+
const record = toRecord10(tool);
|
|
6111
|
+
const name = typeof record.name === "string" ? record.name : typeof toRecord10(record.function).name === "string" ? toRecord10(record.function).name : void 0;
|
|
6112
|
+
return name;
|
|
6113
|
+
}).filter((name) => name !== void 0 && name.length > 0);
|
|
6114
|
+
}
|
|
6115
|
+
function buildContractError2(decision) {
|
|
6116
|
+
if (decision.action !== "block") {
|
|
6117
|
+
throw new Error("Cannot build contract error from allow decision");
|
|
6118
|
+
}
|
|
6119
|
+
const first = decision.blocked[0];
|
|
6120
|
+
return new ReplayContractError(
|
|
6121
|
+
`Tool call blocked: ${first?.tool_name ?? "unknown"} \u2014 ${first?.reason ?? "unknown"}`,
|
|
6122
|
+
decision,
|
|
6123
|
+
first?.contract_file ?? "",
|
|
6124
|
+
first?.failures ?? []
|
|
6125
|
+
);
|
|
6126
|
+
}
|
|
6127
|
+
function buildCompletedStep(stepIndex, sessionId, toolCalls, contracts, response, provider, tier = "strong", compiled) {
|
|
6128
|
+
const contractByTool = new Map(contracts.map((c) => [c.tool, c]));
|
|
6129
|
+
const completedToolCalls = toolCalls.map((tc) => {
|
|
6130
|
+
const contract = contractByTool.get(tc.name);
|
|
6131
|
+
const compiledContract = compiled?.perToolContracts.get(tc.name);
|
|
6132
|
+
let parsedArgs;
|
|
6133
|
+
try {
|
|
6134
|
+
parsedArgs = JSON.parse(tc.arguments);
|
|
6135
|
+
} catch {
|
|
6136
|
+
parsedArgs = void 0;
|
|
6137
|
+
}
|
|
6138
|
+
let resourceValues = null;
|
|
6139
|
+
const preconditions = compiledContract?.preconditions ?? contract?.preconditions;
|
|
6140
|
+
if (preconditions && parsedArgs) {
|
|
6141
|
+
for (const p of preconditions) {
|
|
6142
|
+
if (p.resource) {
|
|
6143
|
+
const resourcePath = typeof p.resource === "string" ? p.resource : p.resource.path;
|
|
6144
|
+
const val = extractResourceValue(parsedArgs, resourcePath);
|
|
6145
|
+
if (val !== void 0) {
|
|
6146
|
+
if (!resourceValues) resourceValues = {};
|
|
6147
|
+
resourceValues[resourcePath] = val;
|
|
6148
|
+
}
|
|
6149
|
+
}
|
|
6150
|
+
}
|
|
6151
|
+
}
|
|
6152
|
+
const resolvedCommitRequirement = compiledContract?.commit_requirement ?? contract?.commit_requirement ?? "acknowledged";
|
|
6153
|
+
return {
|
|
6154
|
+
toolName: tc.name,
|
|
6155
|
+
arguments_hash: computeArgumentsHash(tc.arguments),
|
|
6156
|
+
proposal_decision: "allowed",
|
|
6157
|
+
execution_state: "outcome_recorded",
|
|
6158
|
+
evidence_level: "acknowledged",
|
|
6159
|
+
commit_requirement: resolvedCommitRequirement,
|
|
6160
|
+
commit_state: "committed",
|
|
6161
|
+
commit_tier: tier,
|
|
6162
|
+
executor_attested: false,
|
|
6163
|
+
contractFile: contract?.contract_file ?? null,
|
|
6164
|
+
resourceValues,
|
|
6165
|
+
policyVerdict: null,
|
|
6166
|
+
constraintVerdict: null
|
|
6167
|
+
};
|
|
6168
|
+
});
|
|
6169
|
+
const respRec = toRecord10(response);
|
|
6170
|
+
const choices = Array.isArray(respRec.choices) ? respRec.choices : [];
|
|
6171
|
+
const firstChoice = toRecord10(choices[0]);
|
|
6172
|
+
const finish_reason = typeof firstChoice.finish_reason === "string" ? firstChoice.finish_reason : null;
|
|
6173
|
+
const model = typeof respRec.model === "string" ? respRec.model : null;
|
|
6174
|
+
const usageRec = toRecord10(respRec.usage);
|
|
6175
|
+
const usage = typeof usageRec.prompt_tokens === "number" && typeof usageRec.completion_tokens === "number" ? { prompt_tokens: usageRec.prompt_tokens, completion_tokens: usageRec.completion_tokens } : null;
|
|
6176
|
+
return {
|
|
6177
|
+
stepIndex,
|
|
6178
|
+
stepId: `${sessionId}_step_${stepIndex}`,
|
|
6179
|
+
toolCalls: completedToolCalls,
|
|
6180
|
+
proposal_decision: "allowed",
|
|
6181
|
+
commit_state: "committed",
|
|
6182
|
+
commit_tier: tier,
|
|
6183
|
+
max_evidence_level: "acknowledged",
|
|
6184
|
+
invariantFailures: [],
|
|
6185
|
+
phase: null,
|
|
6186
|
+
phaseTransition: null,
|
|
6187
|
+
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6188
|
+
outputExtract: {},
|
|
6189
|
+
finish_reason,
|
|
6190
|
+
model,
|
|
6191
|
+
usage
|
|
6192
|
+
};
|
|
6193
|
+
}
|
|
6194
|
+
function extractResourceValue(args, path) {
|
|
6195
|
+
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
6196
|
+
if (cleanPath === "" || cleanPath === "$") return args;
|
|
6197
|
+
const segments = cleanPath.split(".");
|
|
6198
|
+
let current = args;
|
|
6199
|
+
for (const seg of segments) {
|
|
6200
|
+
if (current === null || current === void 0 || typeof current !== "object") return void 0;
|
|
6201
|
+
current = current[seg];
|
|
6202
|
+
}
|
|
6203
|
+
return current;
|
|
6204
|
+
}
|
|
6205
|
+
function setAtPath(obj, path, value) {
|
|
6206
|
+
const cleanPath = path.startsWith("$.") ? path.slice(2) : path;
|
|
6207
|
+
if (cleanPath === "" || cleanPath === "$") return;
|
|
6208
|
+
const segments = cleanPath.split(".");
|
|
6209
|
+
let current = obj;
|
|
6210
|
+
for (let i = 0; i < segments.length - 1; i++) {
|
|
6211
|
+
if (current[segments[i]] === void 0 || current[segments[i]] === null || typeof current[segments[i]] !== "object") {
|
|
6212
|
+
current[segments[i]] = {};
|
|
6213
|
+
}
|
|
6214
|
+
current = current[segments[i]];
|
|
6215
|
+
}
|
|
6216
|
+
current[segments[segments.length - 1]] = value;
|
|
6217
|
+
}
|
|
6218
|
+
function collectWithOutputPaths(toolName, contracts) {
|
|
6219
|
+
const paths = /* @__PURE__ */ new Set();
|
|
6220
|
+
for (const contract of contracts) {
|
|
6221
|
+
if (!contract.preconditions) continue;
|
|
6222
|
+
for (const p of contract.preconditions) {
|
|
6223
|
+
if (p.requires_prior_tool === toolName && p.with_output) {
|
|
6224
|
+
for (const assertion of p.with_output) {
|
|
6225
|
+
paths.add(assertion.path);
|
|
6226
|
+
}
|
|
6227
|
+
}
|
|
6228
|
+
}
|
|
6229
|
+
}
|
|
6230
|
+
return Array.from(paths);
|
|
6231
|
+
}
|
|
6232
|
+
function extractOutputFromToolResults(toolResults, state, contracts) {
|
|
6233
|
+
const updates = /* @__PURE__ */ new Map();
|
|
6234
|
+
for (const result of toolResults) {
|
|
6235
|
+
if (result.toolName === "unknown") continue;
|
|
6236
|
+
const paths = collectWithOutputPaths(result.toolName, contracts);
|
|
6237
|
+
if (paths.length === 0) continue;
|
|
6238
|
+
let parsed;
|
|
6239
|
+
try {
|
|
6240
|
+
parsed = typeof result.content === "string" ? JSON.parse(result.content) : result.content;
|
|
6241
|
+
} catch {
|
|
6242
|
+
continue;
|
|
6243
|
+
}
|
|
6244
|
+
if (parsed === null || typeof parsed !== "object") continue;
|
|
6245
|
+
const extract = {};
|
|
6246
|
+
for (const path of paths) {
|
|
6247
|
+
const value = extractResourceValue(parsed, path);
|
|
6248
|
+
if (value !== void 0) {
|
|
6249
|
+
setAtPath(extract, path, value);
|
|
6250
|
+
}
|
|
6251
|
+
}
|
|
6252
|
+
if (Object.keys(extract).length === 0) continue;
|
|
6253
|
+
let matchingStep;
|
|
6254
|
+
for (let i = state.steps.length - 1; i >= 0; i--) {
|
|
6255
|
+
if (state.steps[i].toolCalls.some((tc) => tc.toolName === result.toolName)) {
|
|
6256
|
+
matchingStep = state.steps[i];
|
|
6257
|
+
break;
|
|
6258
|
+
}
|
|
6259
|
+
}
|
|
6260
|
+
if (matchingStep) {
|
|
6261
|
+
const existing = updates.get(matchingStep.stepIndex) ?? {};
|
|
6262
|
+
updates.set(matchingStep.stepIndex, { ...existing, ...extract });
|
|
6263
|
+
}
|
|
6264
|
+
}
|
|
6265
|
+
return updates;
|
|
6266
|
+
}
|
|
6267
|
+
function applyOutputExtracts(state, updates) {
|
|
6268
|
+
if (updates.size === 0) return state;
|
|
6269
|
+
const newSteps = state.steps.map((step) => {
|
|
6270
|
+
const extract = updates.get(step.stepIndex);
|
|
6271
|
+
if (!extract) return step;
|
|
6272
|
+
return { ...step, outputExtract: { ...step.outputExtract, ...extract } };
|
|
6273
|
+
});
|
|
6274
|
+
const newCache = new Map(state.satisfiedPreconditions);
|
|
6275
|
+
for (const step of newSteps) {
|
|
6276
|
+
if (!updates.has(step.stepIndex)) continue;
|
|
6277
|
+
for (const tc of step.toolCalls) {
|
|
6278
|
+
newCache.set(tc.toolName, step.outputExtract);
|
|
6279
|
+
if (tc.resourceValues) {
|
|
6280
|
+
for (const [_path, value] of Object.entries(tc.resourceValues)) {
|
|
6281
|
+
newCache.set(`${tc.toolName}:${JSON.stringify(value)}`, step.outputExtract);
|
|
6282
|
+
}
|
|
6283
|
+
}
|
|
6284
|
+
}
|
|
6285
|
+
}
|
|
6286
|
+
const newLastStep = state.lastStep && updates.has(state.lastStep.stepIndex) ? newSteps.find((s) => s.stepIndex === state.lastStep.stepIndex) ?? state.lastStep : state.lastStep;
|
|
6287
|
+
return {
|
|
6288
|
+
...state,
|
|
6289
|
+
steps: newSteps,
|
|
6290
|
+
satisfiedPreconditions: newCache,
|
|
6291
|
+
lastStep: newLastStep
|
|
6292
|
+
};
|
|
6293
|
+
}
|
|
6294
|
+
function resolveSessionLimits(contracts) {
|
|
6295
|
+
for (const c of contracts) {
|
|
6296
|
+
if (c.session_limits) return c.session_limits;
|
|
6297
|
+
}
|
|
6298
|
+
return null;
|
|
6299
|
+
}
|
|
6300
|
+
function buildStateSnapshot(state, lastNarrowing = null) {
|
|
6301
|
+
const lastStep = state.lastStep ? {
|
|
6302
|
+
stepIndex: state.lastStep.stepIndex,
|
|
6303
|
+
stepId: state.lastStep.stepId,
|
|
6304
|
+
toolCalls: state.lastStep.toolCalls.map((tc) => ({
|
|
6305
|
+
toolName: tc.toolName,
|
|
6306
|
+
arguments_hash: tc.arguments_hash,
|
|
6307
|
+
contractFile: tc.contractFile
|
|
6308
|
+
})),
|
|
6309
|
+
invariantFailures: state.lastStep.invariantFailures,
|
|
6310
|
+
phase: state.lastStep.phase,
|
|
6311
|
+
phaseTransition: state.lastStep.phaseTransition,
|
|
6312
|
+
completedAt: state.lastStep.completedAt,
|
|
6313
|
+
outputExtract: {},
|
|
6314
|
+
// FIX-14: Redacted per specs/replay-v2.md § getState()
|
|
6315
|
+
finish_reason: state.lastStep.finish_reason,
|
|
6316
|
+
model: state.lastStep.model
|
|
6317
|
+
} : null;
|
|
6318
|
+
return Object.freeze({
|
|
6319
|
+
sessionId: state.sessionId,
|
|
6320
|
+
agent: state.agent,
|
|
6321
|
+
principal: null,
|
|
6322
|
+
// Redacted — may contain user-scoped identities
|
|
6323
|
+
startedAt: state.startedAt,
|
|
6324
|
+
stateVersion: state.stateVersion,
|
|
6325
|
+
controlRevision: state.controlRevision,
|
|
6326
|
+
currentPhase: state.currentPhase,
|
|
6327
|
+
totalStepCount: state.totalStepCount,
|
|
6328
|
+
totalToolCalls: state.totalToolCalls,
|
|
6329
|
+
totalCost: state.totalCost,
|
|
6330
|
+
actualCost: state.actualCost,
|
|
6331
|
+
toolCallCounts: Object.fromEntries(state.toolCallCounts),
|
|
6332
|
+
forbiddenTools: Array.from(state.forbiddenTools),
|
|
6333
|
+
satisfiedPreconditions: Object.fromEntries(
|
|
6334
|
+
Array.from(state.satisfiedPreconditions.keys()).map((k) => [k, {}])
|
|
6335
|
+
),
|
|
6336
|
+
// FIX-14: Values redacted (contain raw outputExtract)
|
|
6337
|
+
lastStep,
|
|
6338
|
+
lastNarrowing,
|
|
6339
|
+
killed: state.killed,
|
|
6340
|
+
totalUnguardedCalls: state.totalUnguardedCalls,
|
|
6341
|
+
consecutiveBlockCount: state.consecutiveBlockCount,
|
|
6342
|
+
totalBlockCount: state.totalBlockCount
|
|
6343
|
+
});
|
|
6344
|
+
}
|
|
6345
|
+
var EMPTY_STATE_SNAPSHOT = Object.freeze({
|
|
6346
|
+
sessionId: "",
|
|
6347
|
+
agent: null,
|
|
6348
|
+
principal: null,
|
|
6349
|
+
startedAt: /* @__PURE__ */ new Date(0),
|
|
6350
|
+
stateVersion: 0,
|
|
6351
|
+
controlRevision: 0,
|
|
6352
|
+
currentPhase: null,
|
|
6353
|
+
totalStepCount: 0,
|
|
6354
|
+
totalToolCalls: 0,
|
|
6355
|
+
totalCost: 0,
|
|
6356
|
+
actualCost: 0,
|
|
6357
|
+
toolCallCounts: {},
|
|
6358
|
+
forbiddenTools: [],
|
|
6359
|
+
satisfiedPreconditions: {},
|
|
6360
|
+
lastStep: null,
|
|
6361
|
+
lastNarrowing: null,
|
|
6362
|
+
killed: false,
|
|
6363
|
+
totalUnguardedCalls: 0,
|
|
6364
|
+
consecutiveBlockCount: 0,
|
|
6365
|
+
totalBlockCount: 0
|
|
6366
|
+
});
|
|
6367
|
+
function createInactiveSession(client, sessionId, reason) {
|
|
6368
|
+
return {
|
|
6369
|
+
client,
|
|
6370
|
+
flush: () => Promise.resolve({ captured: 0, sent: 0, active: false, errors: [] }),
|
|
6371
|
+
restore() {
|
|
6372
|
+
},
|
|
6373
|
+
kill() {
|
|
6374
|
+
},
|
|
6375
|
+
getHealth: () => ({
|
|
6376
|
+
status: "inactive",
|
|
6377
|
+
authorityState: "inactive",
|
|
6378
|
+
protectionLevel: "monitor",
|
|
6379
|
+
durability: "inactive",
|
|
6380
|
+
tier: "compat",
|
|
6381
|
+
compatEnforcement: "protective",
|
|
6382
|
+
cluster_detected: false,
|
|
6383
|
+
bypass_detected: false,
|
|
6384
|
+
totalSteps: 0,
|
|
6385
|
+
totalBlocks: 0,
|
|
6386
|
+
totalErrors: 0,
|
|
6387
|
+
killed: false,
|
|
6388
|
+
shadowEvaluations: 0
|
|
6389
|
+
}),
|
|
6390
|
+
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6391
|
+
getLastNarrowing: () => null,
|
|
6392
|
+
getLastShadowDelta: () => null,
|
|
6393
|
+
narrow() {
|
|
6394
|
+
},
|
|
6395
|
+
widen() {
|
|
6396
|
+
},
|
|
6397
|
+
tools: {},
|
|
6398
|
+
getWorkflowState: () => Promise.resolve(null),
|
|
6399
|
+
handoff: () => Promise.resolve(null)
|
|
6400
|
+
};
|
|
6401
|
+
}
|
|
6402
|
+
function createBlockingInactiveSession(client, sessionId, detail, configError) {
|
|
6403
|
+
const error = configError ?? new ReplayConfigError("compilation_failed", detail);
|
|
6404
|
+
const provider = detectProviderSafe(client);
|
|
6405
|
+
const blockingCreate = () => {
|
|
6406
|
+
throw error;
|
|
6407
|
+
};
|
|
6408
|
+
const wrapperClient = provider ? createWrapperClient(client, provider, blockingCreate) : client;
|
|
6409
|
+
return {
|
|
6410
|
+
client: wrapperClient,
|
|
6411
|
+
flush: () => Promise.resolve({ captured: 0, sent: 0, active: false, errors: [] }),
|
|
6412
|
+
restore() {
|
|
6413
|
+
},
|
|
6414
|
+
kill() {
|
|
6415
|
+
},
|
|
6416
|
+
getHealth: () => ({
|
|
6417
|
+
status: "inactive",
|
|
6418
|
+
authorityState: "inactive",
|
|
6419
|
+
protectionLevel: "monitor",
|
|
6420
|
+
durability: "inactive",
|
|
6421
|
+
tier: "compat",
|
|
6422
|
+
compatEnforcement: "protective",
|
|
6423
|
+
cluster_detected: false,
|
|
6424
|
+
bypass_detected: false,
|
|
6425
|
+
totalSteps: 0,
|
|
6426
|
+
totalBlocks: 0,
|
|
6427
|
+
totalErrors: 0,
|
|
6428
|
+
killed: false,
|
|
6429
|
+
shadowEvaluations: 0
|
|
6430
|
+
}),
|
|
6431
|
+
getState: () => EMPTY_STATE_SNAPSHOT,
|
|
6432
|
+
getLastNarrowing: () => null,
|
|
6433
|
+
getLastShadowDelta: () => null,
|
|
6434
|
+
narrow() {
|
|
6435
|
+
},
|
|
6436
|
+
widen() {
|
|
6437
|
+
},
|
|
6438
|
+
tools: {},
|
|
6439
|
+
getWorkflowState: () => Promise.resolve(null),
|
|
6440
|
+
handoff: () => Promise.resolve(null)
|
|
6441
|
+
};
|
|
6442
|
+
}
|
|
6443
|
+
function toNarrowingSnapshot(result) {
|
|
6444
|
+
if (!result || result.removed.length === 0) return null;
|
|
6445
|
+
return {
|
|
6446
|
+
removed: result.removed.map((r) => ({
|
|
6447
|
+
tool: r.tool,
|
|
6448
|
+
reason: r.reason,
|
|
6449
|
+
...r.detail != null ? { detail: r.detail } : {}
|
|
6450
|
+
})),
|
|
6451
|
+
removedCount: result.removed.length,
|
|
6452
|
+
allowedCount: result.allowed.length
|
|
6453
|
+
};
|
|
6454
|
+
}
|
|
6455
|
+
function buildNarrowingInjectionMessage(narrowResult) {
|
|
6456
|
+
const lines = narrowResult.removed.map((r) => {
|
|
6457
|
+
const reason = r.reason === "policy_denied" ? "restricted" : r.reason;
|
|
6458
|
+
const detail = r.reason === "policy_denied" ? "" : r.detail ? `: ${r.detail}` : "";
|
|
6459
|
+
return `- ${r.tool}: ${reason}${detail}`;
|
|
6460
|
+
});
|
|
6461
|
+
return `[System: The following tools are not available for this request:
|
|
6462
|
+
${lines.join("\n")}
|
|
6463
|
+
Please work with the available tools.]`;
|
|
6464
|
+
}
|
|
6465
|
+
function injectNarrowingSystemMessage(request, message, provider) {
|
|
6466
|
+
if (provider === "openai") {
|
|
6467
|
+
const messages = Array.isArray(request.messages) ? request.messages : [];
|
|
6468
|
+
request.messages = [{ role: "system", content: message }, ...messages];
|
|
6469
|
+
} else {
|
|
6470
|
+
const existing = request.system;
|
|
6471
|
+
if (typeof existing === "string") {
|
|
6472
|
+
request.system = message + "\n\n" + existing;
|
|
6473
|
+
} else if (Array.isArray(existing)) {
|
|
6474
|
+
request.system = [{ type: "text", text: message }, ...existing];
|
|
6475
|
+
} else {
|
|
6476
|
+
request.system = message;
|
|
6477
|
+
}
|
|
6478
|
+
}
|
|
6479
|
+
}
|
|
6480
|
+
function isObserveWrapped(client) {
|
|
6481
|
+
return Boolean(client[OBSERVE_WRAPPED]);
|
|
6482
|
+
}
|
|
6483
|
+
function isReplayAttached2(client) {
|
|
6484
|
+
return Boolean(client[REPLAY_ATTACHED2]);
|
|
6485
|
+
}
|
|
6486
|
+
function setReplayAttached(client) {
|
|
6487
|
+
try {
|
|
6488
|
+
client[REPLAY_ATTACHED2] = true;
|
|
6489
|
+
} catch {
|
|
6490
|
+
}
|
|
6491
|
+
}
|
|
6492
|
+
function clearReplayAttached(client) {
|
|
6493
|
+
try {
|
|
6494
|
+
delete client[REPLAY_ATTACHED2];
|
|
6495
|
+
} catch {
|
|
6496
|
+
}
|
|
6497
|
+
}
|
|
6498
|
+
function detectProviderSafe(client) {
|
|
6499
|
+
try {
|
|
6500
|
+
return detectProvider(client);
|
|
6501
|
+
} catch {
|
|
6502
|
+
return null;
|
|
6503
|
+
}
|
|
6504
|
+
}
|
|
6505
|
+
function resolveApiKey2(opts) {
|
|
6506
|
+
if (typeof opts.apiKey === "string" && opts.apiKey.length > 0) {
|
|
6507
|
+
return opts.apiKey;
|
|
6508
|
+
}
|
|
6509
|
+
const envKey = typeof process !== "undefined" ? process.env.REPLAYCI_API_KEY : void 0;
|
|
6510
|
+
return typeof envKey === "string" && envKey.length > 0 ? envKey : void 0;
|
|
6511
|
+
}
|
|
6512
|
+
function generateSessionId2() {
|
|
6513
|
+
return `rs_${crypto4.randomUUID().replace(/-/g, "").slice(0, 24)}`;
|
|
6514
|
+
}
|
|
6515
|
+
function stripHashPrefix(hash) {
|
|
6516
|
+
return hash.startsWith("sha256:") ? hash.slice(7) : hash;
|
|
6517
|
+
}
|
|
6518
|
+
function emitDiagnostic2(diagnostics, event) {
|
|
6519
|
+
try {
|
|
6520
|
+
diagnostics?.(event);
|
|
6521
|
+
} catch {
|
|
6522
|
+
}
|
|
6523
|
+
}
|
|
6524
|
+
function toRecord10(value) {
|
|
6525
|
+
return value !== null && typeof value === "object" ? value : {};
|
|
6526
|
+
}
|
|
6527
|
+
function determineProtectionLevel(mode, tools, contracts) {
|
|
6528
|
+
if (mode === "shadow" || mode === "log-only") return "monitor";
|
|
6529
|
+
if (!tools || Object.keys(tools).length === 0) return "protect";
|
|
6530
|
+
const stateBearingTools = contracts.filter(isStateBearing);
|
|
6531
|
+
if (stateBearingTools.length === 0) return "protect";
|
|
6532
|
+
const wrappedTools = new Set(Object.keys(tools));
|
|
6533
|
+
const allWrapped = stateBearingTools.every((c) => wrappedTools.has(c.tool));
|
|
6534
|
+
return allWrapped ? "govern" : "protect";
|
|
6535
|
+
}
|
|
6536
|
+
function isStateBearing(contract) {
|
|
6537
|
+
if (contract.commit_requirement != null && contract.commit_requirement !== "none") return true;
|
|
6538
|
+
if (contract.transitions != null) return true;
|
|
6539
|
+
if (contract.execution_constraints != null) return true;
|
|
6540
|
+
if (contract.forbids_after != null && contract.forbids_after.length > 0) return true;
|
|
6541
|
+
return false;
|
|
6542
|
+
}
|
|
6543
|
+
function deriveRuntimeRequest(protectionLevel, mode) {
|
|
6544
|
+
if (protectionLevel === "govern") {
|
|
6545
|
+
return { requestedMode: "authoritative", requestedTier: "strong" };
|
|
6546
|
+
}
|
|
6547
|
+
if (protectionLevel === "protect" && mode === "enforce") {
|
|
6548
|
+
return { requestedMode: "advisory", requestedTier: "compat" };
|
|
6549
|
+
}
|
|
6550
|
+
return { requestedMode: "advisory", requestedTier: "compat" };
|
|
6551
|
+
}
|
|
6552
|
+
|
|
6553
|
+
// src/memoryStore.ts
|
|
6554
|
+
var MemoryStore = class {
|
|
6555
|
+
state = null;
|
|
6556
|
+
captures = [];
|
|
6557
|
+
compareAndSet(currentVersion, newState) {
|
|
6558
|
+
if (this.state === null) {
|
|
6559
|
+
this.state = newState;
|
|
6560
|
+
return { success: true };
|
|
6561
|
+
}
|
|
6562
|
+
if (this.state.stateVersion !== currentVersion) {
|
|
6563
|
+
return { success: false };
|
|
6564
|
+
}
|
|
6565
|
+
this.state = newState;
|
|
6566
|
+
return { success: true };
|
|
6567
|
+
}
|
|
6568
|
+
load() {
|
|
6569
|
+
return this.state;
|
|
6570
|
+
}
|
|
6571
|
+
appendCapture(capture) {
|
|
6572
|
+
this.captures.push(capture);
|
|
6573
|
+
}
|
|
6574
|
+
/** @internal Test-only: get all captured calls. */
|
|
6575
|
+
getCapturedCalls() {
|
|
6576
|
+
return this.captures;
|
|
6577
|
+
}
|
|
6578
|
+
};
|
|
2814
6579
|
export {
|
|
6580
|
+
MemoryStore,
|
|
6581
|
+
ReplayConfigError,
|
|
6582
|
+
ReplayContractError,
|
|
6583
|
+
ReplayKillError,
|
|
6584
|
+
RuntimeClient,
|
|
6585
|
+
RuntimeClientError,
|
|
6586
|
+
createRuntimeClient,
|
|
2815
6587
|
observe,
|
|
2816
6588
|
prepareContracts,
|
|
6589
|
+
replay,
|
|
2817
6590
|
validate
|
|
2818
6591
|
};
|