@axiastudio/aioc 0.1.0-beta.2 → 0.1.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +65 -26
- package/dist/canonical-json.d.ts +8 -0
- package/dist/canonical-json.d.ts.map +1 -0
- package/dist/canonical-json.js +76 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/run-record-utils.d.ts +84 -0
- package/dist/run-record-utils.d.ts.map +1 -0
- package/dist/run-record-utils.js +447 -0
- package/dist/run-recorder-runtime.d.ts.map +1 -1
- package/dist/run-recorder-runtime.js +6 -70
- package/dist/run.d.ts +7 -0
- package/dist/run.d.ts.map +1 -1
- package/package.json +6 -4
package/README.md
CHANGED
|
@@ -64,6 +64,10 @@ Canonical examples reference:
|
|
|
64
64
|
- run record hook via `run(..., { record })` for external persistence/audit adapters
|
|
65
65
|
- run record prompt snapshots per turn (`turn`, `agentName`, `promptVersion`, `promptHash`, optional `promptText`)
|
|
66
66
|
- run record request fingerprints per turn (`requestHash`, segment hashes, `runtimeVersion`, `fingerprintSchemaVersion`)
|
|
67
|
+
- run record utilities:
|
|
68
|
+
- `extractToolCalls(...)`
|
|
69
|
+
- `compareRunRecords(...)`
|
|
70
|
+
- `replayFromRunRecord(...)` with modes `live | strict | hybrid`
|
|
67
71
|
- JSON helper `toJsonValue(...)` to map runtime artifacts (for example `RunRecord.items`) into JSON-safe values for storage adapters
|
|
68
72
|
- message helpers `user(...)`, `assistant(...)`, `system(...)`
|
|
69
73
|
- `setDefaultProvider(...)`
|
|
@@ -92,6 +96,63 @@ Privacy baseline highlights:
|
|
|
92
96
|
- `record.contextRedactor` should be considered mandatory for production run-record persistence.
|
|
93
97
|
- sink adapters should implement encryption, access controls, retention, and deletion policies.
|
|
94
98
|
|
|
99
|
+
## RunRecord Utilities
|
|
100
|
+
|
|
101
|
+
### `extractToolCalls(...)`
|
|
102
|
+
|
|
103
|
+
```ts
|
|
104
|
+
import { extractToolCalls } from "@axiastudio/aioc";
|
|
105
|
+
|
|
106
|
+
const calls = extractToolCalls(runRecord);
|
|
107
|
+
// [{ callId, name, arguments, output?, hasOutput, turn?, argsCanonicalJson, argsHash }]
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### `compareRunRecords(...)`
|
|
111
|
+
|
|
112
|
+
```ts
|
|
113
|
+
import { compareRunRecords } from "@axiastudio/aioc";
|
|
114
|
+
|
|
115
|
+
const comparison = compareRunRecords(runRecordA, runRecordB, {
|
|
116
|
+
responseMatchMode: "exact",
|
|
117
|
+
includeSections: ["response", "toolCalls", "policy", "guardrails", "metadata"],
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
if (!comparison.equal) {
|
|
121
|
+
console.log(comparison.summary);
|
|
122
|
+
console.log(comparison.metrics);
|
|
123
|
+
console.log(comparison.differences);
|
|
124
|
+
}
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### `replayFromRunRecord(...)` (strict)
|
|
128
|
+
|
|
129
|
+
```ts
|
|
130
|
+
import { allow, replayFromRunRecord } from "@axiastudio/aioc";
|
|
131
|
+
|
|
132
|
+
const replay = await replayFromRunRecord({
|
|
133
|
+
sourceRunRecord,
|
|
134
|
+
agent,
|
|
135
|
+
mode: "strict",
|
|
136
|
+
runOptions: {
|
|
137
|
+
policies: {
|
|
138
|
+
toolPolicy: () => allow("allow_replay"),
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
console.log(replay.result.finalOutput);
|
|
144
|
+
console.log(replay.replayStats);
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
`replayFromRunRecord(...)` does not bypass policy enforcement: in `strict` and `hybrid` modes, provide `runOptions.policies` when tool/handoff execution must be authorized.
|
|
148
|
+
|
|
149
|
+
### Minimal examples (step-by-step)
|
|
150
|
+
|
|
151
|
+
- `npm run example:rru:01-extract` (`extractToolCalls(...)` from a static `RunRecord`)
|
|
152
|
+
- `npm run example:rru:02-compare` (`compareRunRecords(...)` with structured diff output)
|
|
153
|
+
- `npm run example:rru:03-replay-strict` (`replayFromRunRecord(...)` in `strict` mode)
|
|
154
|
+
- `npm run example:rru:04-replay-hybrid` (`replayFromRunRecord(...)` in `hybrid` mode)
|
|
155
|
+
|
|
95
156
|
## Test Commands
|
|
96
157
|
|
|
97
158
|
- `npm run test:unit`
|
|
@@ -104,34 +165,12 @@ Privacy baseline highlights:
|
|
|
104
165
|
- `npm run example:hello` (minimal single-agent run)
|
|
105
166
|
- `npm run example:tool-policy` (tool calls with deterministic policy gate)
|
|
106
167
|
- `npm run example:run-record` (run record persistence with redaction + audit)
|
|
168
|
+
- `npm run example:rru:01-extract` (minimal extract tool-calls example)
|
|
169
|
+
- `npm run example:rru:02-compare` (minimal run-record comparison example)
|
|
170
|
+
- `npm run example:rru:03-replay-strict` (minimal strict replay example)
|
|
171
|
+
- `npm run example:rru:04-replay-hybrid` (minimal hybrid replay example)
|
|
107
172
|
- `npm run example:non-regression` (advanced v1/v2 run-record diff)
|
|
108
173
|
|
|
109
|
-
## Python Alpha Port
|
|
110
|
-
|
|
111
|
-
Python runtime is available under `py/` (Python 3.11+), with governance-first parity against core TS semantics:
|
|
112
|
-
|
|
113
|
-
- `Agent`, `RunContext`, `run(...)` (stream/non-stream), tool registration, handoff
|
|
114
|
-
- deterministic default-deny policy gates for tool/handoff
|
|
115
|
-
- unified tool/handoff output envelope `{ status, code, publicReason, data }`
|
|
116
|
-
- typed runtime errors for deny/guardrail/max-turns
|
|
117
|
-
- run record sink adapter + context redaction + policy/guardrail decision capture
|
|
118
|
-
- provider setup helpers: `setup_mistral()`, `setup_openai()`, `setup_provider()`
|
|
119
|
-
- JSON-safe helper `to_json_value(...)`
|
|
120
|
-
|
|
121
|
-
Python test command:
|
|
122
|
-
|
|
123
|
-
- `cd py && python3 -m unittest discover -s tests -p 'test_*.py'`
|
|
124
|
-
|
|
125
|
-
Python examples:
|
|
126
|
-
|
|
127
|
-
- `cd py && python3 examples/basic/hello_world.py`
|
|
128
|
-
- `cd py && python3 examples/basic/tool_policy_allow_deny.py`
|
|
129
|
-
- `cd py && python3 examples/basic/run_record_sink.py`
|
|
130
|
-
|
|
131
|
-
Migration mapping reference:
|
|
132
|
-
|
|
133
|
-
- `docs/TS-PY-MIGRATION.md`
|
|
134
|
-
|
|
135
174
|
## License
|
|
136
175
|
|
|
137
176
|
- Project license: `MIT` (`LICENSE`)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export type CanonicalJsonValue = null | string | number | boolean | CanonicalJsonValue[] | {
|
|
2
|
+
[key: string]: CanonicalJsonValue;
|
|
3
|
+
};
|
|
4
|
+
export declare function normalizeCanonicalJsonValue(value: unknown, seen?: WeakSet<object>): CanonicalJsonValue;
|
|
5
|
+
export declare function toCanonicalJson(value: unknown): string;
|
|
6
|
+
export declare function hashCanonicalJson(canonicalJson: string): string;
|
|
7
|
+
export declare function hashCanonicalJsonValue(value: unknown): string;
|
|
8
|
+
//# sourceMappingURL=canonical-json.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"canonical-json.d.ts","sourceRoot":"","sources":["../src/canonical-json.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,kBAAkB,GAC1B,IAAI,GACJ,MAAM,GACN,MAAM,GACN,OAAO,GACP,kBAAkB,EAAE,GACpB;IAAE,CAAC,GAAG,EAAE,MAAM,GAAG,kBAAkB,CAAA;CAAE,CAAC;AAE1C,wBAAgB,2BAA2B,CACzC,KAAK,EAAE,OAAO,EACd,IAAI,GAAE,OAAO,CAAC,MAAM,CAAiB,GACpC,kBAAkB,CAiFpB;AAED,wBAAgB,eAAe,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAEtD;AAED,wBAAgB,iBAAiB,CAAC,aAAa,EAAE,MAAM,GAAG,MAAM,CAE/D;AAED,wBAAgB,sBAAsB,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAE7D"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.normalizeCanonicalJsonValue = normalizeCanonicalJsonValue;
|
|
4
|
+
exports.toCanonicalJson = toCanonicalJson;
|
|
5
|
+
exports.hashCanonicalJson = hashCanonicalJson;
|
|
6
|
+
exports.hashCanonicalJsonValue = hashCanonicalJsonValue;
|
|
7
|
+
const node_crypto_1 = require("node:crypto");
|
|
8
|
+
function normalizeCanonicalJsonValue(value, seen = new WeakSet()) {
|
|
9
|
+
if (value === null) {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
const valueType = typeof value;
|
|
13
|
+
if (valueType === "string" ||
|
|
14
|
+
valueType === "number" ||
|
|
15
|
+
valueType === "boolean") {
|
|
16
|
+
return value;
|
|
17
|
+
}
|
|
18
|
+
if (valueType === "undefined") {
|
|
19
|
+
return "[undefined]";
|
|
20
|
+
}
|
|
21
|
+
if (valueType === "bigint") {
|
|
22
|
+
return `[bigint:${String(value)}]`;
|
|
23
|
+
}
|
|
24
|
+
if (valueType === "symbol") {
|
|
25
|
+
return `[symbol:${String(value)}]`;
|
|
26
|
+
}
|
|
27
|
+
if (valueType === "function") {
|
|
28
|
+
return "[function]";
|
|
29
|
+
}
|
|
30
|
+
if (value instanceof Date) {
|
|
31
|
+
return value.toISOString();
|
|
32
|
+
}
|
|
33
|
+
if (value instanceof RegExp) {
|
|
34
|
+
return value.toString();
|
|
35
|
+
}
|
|
36
|
+
if (Array.isArray(value)) {
|
|
37
|
+
return value.map((entry) => normalizeCanonicalJsonValue(entry, seen));
|
|
38
|
+
}
|
|
39
|
+
if (value instanceof Set) {
|
|
40
|
+
const entries = [...value].map((entry) => normalizeCanonicalJsonValue(entry, seen));
|
|
41
|
+
entries.sort((left, right) => JSON.stringify(left).localeCompare(JSON.stringify(right)));
|
|
42
|
+
return entries;
|
|
43
|
+
}
|
|
44
|
+
if (value instanceof Map) {
|
|
45
|
+
const entries = [...value.entries()].map(([key, entry]) => [
|
|
46
|
+
normalizeCanonicalJsonValue(key, seen),
|
|
47
|
+
normalizeCanonicalJsonValue(entry, seen),
|
|
48
|
+
]);
|
|
49
|
+
entries.sort((left, right) => JSON.stringify(left[0]).localeCompare(JSON.stringify(right[0])));
|
|
50
|
+
return entries;
|
|
51
|
+
}
|
|
52
|
+
if (value && typeof value === "object") {
|
|
53
|
+
const objectValue = value;
|
|
54
|
+
if (seen.has(objectValue)) {
|
|
55
|
+
return "[circular]";
|
|
56
|
+
}
|
|
57
|
+
seen.add(objectValue);
|
|
58
|
+
const normalized = {};
|
|
59
|
+
const keys = Object.keys(objectValue).sort();
|
|
60
|
+
for (const key of keys) {
|
|
61
|
+
normalized[key] = normalizeCanonicalJsonValue(objectValue[key], seen);
|
|
62
|
+
}
|
|
63
|
+
seen.delete(objectValue);
|
|
64
|
+
return normalized;
|
|
65
|
+
}
|
|
66
|
+
return String(value);
|
|
67
|
+
}
|
|
68
|
+
function toCanonicalJson(value) {
|
|
69
|
+
return JSON.stringify(normalizeCanonicalJsonValue(value));
|
|
70
|
+
}
|
|
71
|
+
function hashCanonicalJson(canonicalJson) {
|
|
72
|
+
return (0, node_crypto_1.createHash)("sha256").update(canonicalJson).digest("hex");
|
|
73
|
+
}
|
|
74
|
+
function hashCanonicalJsonValue(value) {
|
|
75
|
+
return hashCanonicalJson(toCanonicalJson(value));
|
|
76
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -13,6 +13,7 @@ export * from "./providers/mistral";
|
|
|
13
13
|
export * from "./providers/openai";
|
|
14
14
|
export * from "./run";
|
|
15
15
|
export * from "./run-record";
|
|
16
|
+
export * from "./run-record-utils";
|
|
16
17
|
export * from "./run-context";
|
|
17
18
|
export * from "./tool";
|
|
18
19
|
export * from "./types";
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,cAAc,UAAU,CAAC;AACzB,cAAc,UAAU,CAAC;AACzB,cAAc,cAAc,CAAC;AAC7B,cAAc,QAAQ,CAAC;AACvB,cAAc,UAAU,CAAC;AACzB,cAAc,YAAY,CAAC;AAC3B,cAAc,UAAU,CAAC;AACzB,cAAc,kBAAkB,CAAC;AACjC,cAAc,kBAAkB,CAAC;AACjC,cAAc,8BAA8B,CAAC;AAC7C,cAAc,qBAAqB,CAAC;AACpC,cAAc,oBAAoB,CAAC;AACnC,cAAc,OAAO,CAAC;AACtB,cAAc,cAAc,CAAC;AAC7B,cAAc,eAAe,CAAC;AAC9B,cAAc,QAAQ,CAAC;AACvB,cAAc,SAAS,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,cAAc,UAAU,CAAC;AACzB,cAAc,UAAU,CAAC;AACzB,cAAc,cAAc,CAAC;AAC7B,cAAc,QAAQ,CAAC;AACvB,cAAc,UAAU,CAAC;AACzB,cAAc,YAAY,CAAC;AAC3B,cAAc,UAAU,CAAC;AACzB,cAAc,kBAAkB,CAAC;AACjC,cAAc,kBAAkB,CAAC;AACjC,cAAc,8BAA8B,CAAC;AAC7C,cAAc,qBAAqB,CAAC;AACpC,cAAc,oBAAoB,CAAC;AACnC,cAAc,OAAO,CAAC;AACtB,cAAc,cAAc,CAAC;AAC7B,cAAc,oBAAoB,CAAC;AACnC,cAAc,eAAe,CAAC;AAC9B,cAAc,QAAQ,CAAC;AACvB,cAAc,SAAS,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -29,6 +29,7 @@ __exportStar(require("./providers/mistral"), exports);
|
|
|
29
29
|
__exportStar(require("./providers/openai"), exports);
|
|
30
30
|
__exportStar(require("./run"), exports);
|
|
31
31
|
__exportStar(require("./run-record"), exports);
|
|
32
|
+
__exportStar(require("./run-record-utils"), exports);
|
|
32
33
|
__exportStar(require("./run-context"), exports);
|
|
33
34
|
__exportStar(require("./tool"), exports);
|
|
34
35
|
__exportStar(require("./types"), exports);
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { Agent } from "./agent";
|
|
2
|
+
import type { RunRecord } from "./run-record";
|
|
3
|
+
import type { AgentInputItem, NonStreamRunOptions, RunResult } from "./types";
|
|
4
|
+
export interface ExtractedToolCall {
|
|
5
|
+
callId: string;
|
|
6
|
+
name: string;
|
|
7
|
+
arguments: unknown;
|
|
8
|
+
output?: unknown;
|
|
9
|
+
hasOutput: boolean;
|
|
10
|
+
turn?: number;
|
|
11
|
+
argsCanonicalJson: string;
|
|
12
|
+
argsHash: string;
|
|
13
|
+
}
|
|
14
|
+
export declare function extractToolCalls<TContext>(input: RunRecord<TContext>): ExtractedToolCall[];
|
|
15
|
+
export declare function extractToolCalls(input: AgentInputItem[]): ExtractedToolCall[];
|
|
16
|
+
export type RunRecordComparisonSection = "response" | "toolCalls" | "policy" | "guardrails" | "metadata";
|
|
17
|
+
export interface CompareRunRecordsOptions {
|
|
18
|
+
includeSections?: RunRecordComparisonSection[];
|
|
19
|
+
excludeSections?: RunRecordComparisonSection[];
|
|
20
|
+
responseMatchMode?: "exact";
|
|
21
|
+
}
|
|
22
|
+
export interface RunRecordDifference {
|
|
23
|
+
path: string;
|
|
24
|
+
kind: "mismatch" | "missing_left" | "missing_right";
|
|
25
|
+
left?: unknown;
|
|
26
|
+
right?: unknown;
|
|
27
|
+
}
|
|
28
|
+
export interface RunRecordComparisonSummary {
|
|
29
|
+
sameFinalResponse: boolean;
|
|
30
|
+
sameToolCallShape: boolean;
|
|
31
|
+
samePolicyDecisions: boolean;
|
|
32
|
+
sameGuardrailDecisions: boolean;
|
|
33
|
+
}
|
|
34
|
+
export interface RunRecordComparisonMetrics {
|
|
35
|
+
responseLengthA: number;
|
|
36
|
+
responseLengthB: number;
|
|
37
|
+
toolCallsA: number;
|
|
38
|
+
toolCallsB: number;
|
|
39
|
+
matchedToolCalls: number;
|
|
40
|
+
missingToolCalls: number;
|
|
41
|
+
extraToolCalls: number;
|
|
42
|
+
}
|
|
43
|
+
export interface RunRecordComparison {
|
|
44
|
+
equal: boolean;
|
|
45
|
+
summary: RunRecordComparisonSummary;
|
|
46
|
+
metrics: RunRecordComparisonMetrics;
|
|
47
|
+
differences: RunRecordDifference[];
|
|
48
|
+
}
|
|
49
|
+
export declare function compareRunRecords<TContextA, TContextB>(left: RunRecord<TContextA>, right: RunRecord<TContextB>, options?: CompareRunRecordsOptions): RunRecordComparison;
|
|
50
|
+
export type ReplayMode = "live" | "strict" | "hybrid";
|
|
51
|
+
export interface MissingToolCallResolution {
|
|
52
|
+
action: "throw" | "use_live" | "use_output";
|
|
53
|
+
output?: unknown;
|
|
54
|
+
}
|
|
55
|
+
export interface ReplayMissingToolCallInput {
|
|
56
|
+
mode: ReplayMode;
|
|
57
|
+
toolName: string;
|
|
58
|
+
arguments: unknown;
|
|
59
|
+
argsCanonicalJson: string;
|
|
60
|
+
argsHash: string;
|
|
61
|
+
}
|
|
62
|
+
export type ReplayMissingToolCallHandler = (input: ReplayMissingToolCallInput) => MissingToolCallResolution | Promise<MissingToolCallResolution>;
|
|
63
|
+
export interface ReplayStats {
|
|
64
|
+
recordedToolCalls: number;
|
|
65
|
+
replayedFromRecord: number;
|
|
66
|
+
missingToolCalls: number;
|
|
67
|
+
liveFallbackCalls: number;
|
|
68
|
+
}
|
|
69
|
+
export interface ReplayFromRunRecordInput<TContext = unknown> {
|
|
70
|
+
sourceRunRecord: RunRecord<TContext>;
|
|
71
|
+
agent?: Agent<TContext>;
|
|
72
|
+
agentFactory?: () => Agent<TContext> | Promise<Agent<TContext>>;
|
|
73
|
+
mode: ReplayMode;
|
|
74
|
+
runOptions?: Omit<NonStreamRunOptions<TContext>, "stream">;
|
|
75
|
+
metadataOverrides?: Record<string, unknown>;
|
|
76
|
+
onMissingToolCall?: ReplayMissingToolCallHandler;
|
|
77
|
+
}
|
|
78
|
+
export interface ReplayFromRunRecordResult<TContext = unknown> {
|
|
79
|
+
result: RunResult<TContext>;
|
|
80
|
+
replayRunRecord?: RunRecord<TContext>;
|
|
81
|
+
replayStats: ReplayStats;
|
|
82
|
+
}
|
|
83
|
+
export declare function replayFromRunRecord<TContext = unknown>(input: ReplayFromRunRecordInput<TContext>): Promise<ReplayFromRunRecordResult<TContext>>;
|
|
84
|
+
//# sourceMappingURL=run-record-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-record-utils.d.ts","sourceRoot":"","sources":["../src/run-record-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,SAAS,CAAC;AAGhC,OAAO,KAAK,EAAE,SAAS,EAAmC,MAAM,cAAc,CAAC;AAE/E,OAAO,KAAK,EAAE,cAAc,EAAE,mBAAmB,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAE9E,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS,EAAE,OAAO,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,EAAE,MAAM,CAAC;IAC1B,QAAQ,EAAE,MAAM,CAAC;CAClB;AAsBD,wBAAgB,gBAAgB,CAAC,QAAQ,EACvC,KAAK,EAAE,SAAS,CAAC,QAAQ,CAAC,GACzB,iBAAiB,EAAE,CAAC;AAEvB,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,cAAc,EAAE,GAAG,iBAAiB,EAAE,CAAC;AA+C/E,MAAM,MAAM,0BAA0B,GAClC,UAAU,GACV,WAAW,GACX,QAAQ,GACR,YAAY,GACZ,UAAU,CAAC;AAEf,MAAM,WAAW,wBAAwB;IACvC,eAAe,CAAC,EAAE,0BAA0B,EAAE,CAAC;IAC/C,eAAe,CAAC,EAAE,0BAA0B,EAAE,CAAC;IAC/C,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC7B;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,UAAU,GAAG,cAAc,GAAG,eAAe,CAAC;IACpD,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,MAAM,WAAW,0BAA0B;IACzC,iBAAiB,EAAE,OAAO,CAAC;IAC3B,iBAAiB,EAAE,OAAO,CAAC;IAC3B,mBAAmB,EAAE,OAAO,CAAC;IAC7B,sBAAsB,EAAE,OAAO,CAAC;CACjC;AAED,MAAM,WAAW,0BAA0B;IACzC,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,CAAC;IACzB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,OAAO,CAAC;IACf,OAAO,EAAE,0BAA0B,CAAC;IACpC,OAAO,EAAE,0BAA0B,CAAC;IACpC,WAAW,EAAE,mBAAmB,EAAE,CAAC;CACpC;AA4FD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,EACpD,IAAI,EAAE,SAAS,CAAC,SAAS,CAAC,EAC1B,KAAK,EAAE,SAAS,CAAC,SAAS,CAAC,EAC3B,OAAO,CAAC,EAAE,wBAAwB,GACjC,mBAAmB,CAwKrB;AAED,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,QAAQ,CAAC;AAEtD,MAAM,WAAW,yBAAyB;IACxC,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,YAAY,CAAC;IAC5C,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,0BAA0B;IACzC,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,OAAO,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,MAAM,4BAA4B,GAAG,CACzC,KAAK,EAAE,0BAA0B,KAC9B,yBAAyB,GAAG,OAAO,CAAC,yBAAyB,CAAC,CAAC;AAEpE,MAAM,WAAW,WAAW;IAC1B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,kBAAkB,EAAE,MAAM,CAAC;IAC3B,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,wBAAwB,CAAC,QAAQ,GAAG,OAAO;IAC1D,eAAe,EAAE,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,KAAK,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IACxB,YAAY,CAAC,EAAE,MAAM,KAAK,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC;IAChE,IAAI,EAAE,UAAU,CAAC;IACjB,UAAU,CAAC,EAAE,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,EAAE,QAAQ,CAAC,CAAC;IAC3D,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC5C,iBAAiB,CAAC,EAAE,4BAA4B,CAAC;CAClD;AAED,MAAM,WAAW,yBAAyB,CAAC,QAAQ,GAAG,OAAO;IAC3D,MAAM,EAAE,SAAS,CAAC,QAAQ,CAAC,CAAC;IAC5B,eAAe,CAAC,EAAE,SAAS,CAAC,QAAQ,CAAC,CAAC;IACtC,WAAW,EAAE,WAAW,CAAC;CAC1B;AAiND,wBAAsB,mBAAmB,CAAC,QAAQ,GAAG,OAAO,EAC1D,KAAK,EAAE,wBAAwB,CAAC,QAAQ,CAAC,GACxC,OAAO,CAAC,yBAAyB,CAAC,QAAQ,CAAC,CAAC,CA8D9C"}
|
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.extractToolCalls = extractToolCalls;
|
|
4
|
+
exports.compareRunRecords = compareRunRecords;
|
|
5
|
+
exports.replayFromRunRecord = replayFromRunRecord;
|
|
6
|
+
const agent_1 = require("./agent");
|
|
7
|
+
const canonical_json_1 = require("./canonical-json");
|
|
8
|
+
const run_1 = require("./run");
|
|
9
|
+
function buildExtractedToolCall(callId, name, rawArguments, turn) {
|
|
10
|
+
const normalizedArguments = typeof rawArguments === "undefined" ? {} : rawArguments;
|
|
11
|
+
const argsCanonicalJson = (0, canonical_json_1.toCanonicalJson)(normalizedArguments);
|
|
12
|
+
return {
|
|
13
|
+
callId,
|
|
14
|
+
name,
|
|
15
|
+
arguments: normalizedArguments,
|
|
16
|
+
hasOutput: false,
|
|
17
|
+
turn,
|
|
18
|
+
argsCanonicalJson,
|
|
19
|
+
argsHash: (0, canonical_json_1.hashCanonicalJson)(argsCanonicalJson),
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
function extractToolCalls(input) {
|
|
23
|
+
const items = Array.isArray(input) ? input : input.items;
|
|
24
|
+
const orderedCalls = [];
|
|
25
|
+
const callsById = new Map();
|
|
26
|
+
let toolTurn = 0;
|
|
27
|
+
for (const item of items) {
|
|
28
|
+
if (item.type === "tool_call_item") {
|
|
29
|
+
toolTurn += 1;
|
|
30
|
+
const extracted = buildExtractedToolCall(item.callId, item.name, item.arguments, toolTurn);
|
|
31
|
+
callsById.set(item.callId, extracted);
|
|
32
|
+
orderedCalls.push(extracted);
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
if (item.type === "tool_call_output_item") {
|
|
36
|
+
const existing = callsById.get(item.callId);
|
|
37
|
+
if (!existing) {
|
|
38
|
+
const orphanCall = buildExtractedToolCall(item.callId, "", {}, undefined);
|
|
39
|
+
orphanCall.output = item.output;
|
|
40
|
+
orphanCall.hasOutput = true;
|
|
41
|
+
callsById.set(item.callId, orphanCall);
|
|
42
|
+
orderedCalls.push(orphanCall);
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
existing.output = item.output;
|
|
46
|
+
existing.hasOutput = true;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return orderedCalls.map((call) => ({ ...call }));
|
|
50
|
+
}
|
|
51
|
+
function compareArraysByCanonical(path, left, right, differences) {
|
|
52
|
+
let same = left.length === right.length;
|
|
53
|
+
if (left.length !== right.length) {
|
|
54
|
+
differences.push({
|
|
55
|
+
path: `${path}.length`,
|
|
56
|
+
kind: "mismatch",
|
|
57
|
+
left: left.length,
|
|
58
|
+
right: right.length,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
const maxLength = Math.max(left.length, right.length);
|
|
62
|
+
for (let index = 0; index < maxLength; index += 1) {
|
|
63
|
+
const leftItem = left[index];
|
|
64
|
+
const rightItem = right[index];
|
|
65
|
+
if (typeof leftItem === "undefined") {
|
|
66
|
+
same = false;
|
|
67
|
+
differences.push({
|
|
68
|
+
path: `${path}[${index}]`,
|
|
69
|
+
kind: "missing_left",
|
|
70
|
+
right: rightItem,
|
|
71
|
+
});
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
if (typeof rightItem === "undefined") {
|
|
75
|
+
same = false;
|
|
76
|
+
differences.push({
|
|
77
|
+
path: `${path}[${index}]`,
|
|
78
|
+
kind: "missing_right",
|
|
79
|
+
left: leftItem,
|
|
80
|
+
});
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
const leftCanonical = (0, canonical_json_1.toCanonicalJson)(leftItem);
|
|
84
|
+
const rightCanonical = (0, canonical_json_1.toCanonicalJson)(rightItem);
|
|
85
|
+
if (leftCanonical !== rightCanonical) {
|
|
86
|
+
same = false;
|
|
87
|
+
differences.push({
|
|
88
|
+
path: `${path}[${index}]`,
|
|
89
|
+
kind: "mismatch",
|
|
90
|
+
left: leftItem,
|
|
91
|
+
right: rightItem,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return same;
|
|
96
|
+
}
|
|
97
|
+
function resolveSections(options) {
|
|
98
|
+
const allSections = [
|
|
99
|
+
"response",
|
|
100
|
+
"toolCalls",
|
|
101
|
+
"policy",
|
|
102
|
+
"guardrails",
|
|
103
|
+
"metadata",
|
|
104
|
+
];
|
|
105
|
+
const included = options?.includeSections
|
|
106
|
+
? new Set(options.includeSections)
|
|
107
|
+
: new Set(allSections);
|
|
108
|
+
for (const section of options?.excludeSections ?? []) {
|
|
109
|
+
included.delete(section);
|
|
110
|
+
}
|
|
111
|
+
return included;
|
|
112
|
+
}
|
|
113
|
+
function toToolCallKey(call) {
|
|
114
|
+
return `${call.name}\u001f${call.argsHash}`;
|
|
115
|
+
}
|
|
116
|
+
function countByKey(calls) {
|
|
117
|
+
const counts = new Map();
|
|
118
|
+
for (const call of calls) {
|
|
119
|
+
const key = toToolCallKey(call);
|
|
120
|
+
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
121
|
+
}
|
|
122
|
+
return counts;
|
|
123
|
+
}
|
|
124
|
+
function compareRunRecords(left, right, options) {
|
|
125
|
+
const sections = resolveSections(options);
|
|
126
|
+
const differences = [];
|
|
127
|
+
const toolCallsLeft = extractToolCalls(left);
|
|
128
|
+
const toolCallsRight = extractToolCalls(right);
|
|
129
|
+
const metrics = {
|
|
130
|
+
responseLengthA: left.response.length,
|
|
131
|
+
responseLengthB: right.response.length,
|
|
132
|
+
toolCallsA: toolCallsLeft.length,
|
|
133
|
+
toolCallsB: toolCallsRight.length,
|
|
134
|
+
matchedToolCalls: 0,
|
|
135
|
+
missingToolCalls: 0,
|
|
136
|
+
extraToolCalls: 0,
|
|
137
|
+
};
|
|
138
|
+
const leftCounts = countByKey(toolCallsLeft);
|
|
139
|
+
const rightCounts = countByKey(toolCallsRight);
|
|
140
|
+
const allKeys = new Set([...leftCounts.keys(), ...rightCounts.keys()]);
|
|
141
|
+
for (const key of allKeys) {
|
|
142
|
+
const leftCount = leftCounts.get(key) ?? 0;
|
|
143
|
+
const rightCount = rightCounts.get(key) ?? 0;
|
|
144
|
+
metrics.matchedToolCalls += Math.min(leftCount, rightCount);
|
|
145
|
+
if (leftCount > rightCount) {
|
|
146
|
+
metrics.missingToolCalls += leftCount - rightCount;
|
|
147
|
+
}
|
|
148
|
+
if (rightCount > leftCount) {
|
|
149
|
+
metrics.extraToolCalls += rightCount - leftCount;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
let sameFinalResponse = true;
|
|
153
|
+
if (sections.has("response")) {
|
|
154
|
+
const responseMatchMode = options?.responseMatchMode ?? "exact";
|
|
155
|
+
if (responseMatchMode === "exact" && left.response !== right.response) {
|
|
156
|
+
sameFinalResponse = false;
|
|
157
|
+
differences.push({
|
|
158
|
+
path: "response",
|
|
159
|
+
kind: "mismatch",
|
|
160
|
+
left: left.response,
|
|
161
|
+
right: right.response,
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
let sameToolCallShape = true;
|
|
166
|
+
if (sections.has("toolCalls")) {
|
|
167
|
+
if (toolCallsLeft.length !== toolCallsRight.length) {
|
|
168
|
+
sameToolCallShape = false;
|
|
169
|
+
differences.push({
|
|
170
|
+
path: "toolCalls.length",
|
|
171
|
+
kind: "mismatch",
|
|
172
|
+
left: toolCallsLeft.length,
|
|
173
|
+
right: toolCallsRight.length,
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
const maxLength = Math.max(toolCallsLeft.length, toolCallsRight.length);
|
|
177
|
+
for (let index = 0; index < maxLength; index += 1) {
|
|
178
|
+
const leftCall = toolCallsLeft[index];
|
|
179
|
+
const rightCall = toolCallsRight[index];
|
|
180
|
+
if (!leftCall) {
|
|
181
|
+
sameToolCallShape = false;
|
|
182
|
+
differences.push({
|
|
183
|
+
path: `toolCalls[${index}]`,
|
|
184
|
+
kind: "missing_left",
|
|
185
|
+
right: rightCall,
|
|
186
|
+
});
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
if (!rightCall) {
|
|
190
|
+
sameToolCallShape = false;
|
|
191
|
+
differences.push({
|
|
192
|
+
path: `toolCalls[${index}]`,
|
|
193
|
+
kind: "missing_right",
|
|
194
|
+
left: leftCall,
|
|
195
|
+
});
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
if (leftCall.name !== rightCall.name) {
|
|
199
|
+
sameToolCallShape = false;
|
|
200
|
+
differences.push({
|
|
201
|
+
path: `toolCalls[${index}].name`,
|
|
202
|
+
kind: "mismatch",
|
|
203
|
+
left: leftCall.name,
|
|
204
|
+
right: rightCall.name,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
if (leftCall.argsHash !== rightCall.argsHash) {
|
|
208
|
+
sameToolCallShape = false;
|
|
209
|
+
differences.push({
|
|
210
|
+
path: `toolCalls[${index}].argsHash`,
|
|
211
|
+
kind: "mismatch",
|
|
212
|
+
left: leftCall.argsHash,
|
|
213
|
+
right: rightCall.argsHash,
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
if (leftCall.hasOutput !== rightCall.hasOutput) {
|
|
217
|
+
differences.push({
|
|
218
|
+
path: `toolCalls[${index}].hasOutput`,
|
|
219
|
+
kind: "mismatch",
|
|
220
|
+
left: leftCall.hasOutput,
|
|
221
|
+
right: rightCall.hasOutput,
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
if (leftCall.hasOutput && rightCall.hasOutput) {
|
|
225
|
+
const leftOutputCanonical = (0, canonical_json_1.toCanonicalJson)(leftCall.output);
|
|
226
|
+
const rightOutputCanonical = (0, canonical_json_1.toCanonicalJson)(rightCall.output);
|
|
227
|
+
if (leftOutputCanonical !== rightOutputCanonical) {
|
|
228
|
+
differences.push({
|
|
229
|
+
path: `toolCalls[${index}].output`,
|
|
230
|
+
kind: "mismatch",
|
|
231
|
+
left: leftCall.output,
|
|
232
|
+
right: rightCall.output,
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
let samePolicyDecisions = true;
|
|
239
|
+
if (sections.has("policy")) {
|
|
240
|
+
samePolicyDecisions = compareArraysByCanonical("policyDecisions", left.policyDecisions, right.policyDecisions, differences);
|
|
241
|
+
}
|
|
242
|
+
let sameGuardrailDecisions = true;
|
|
243
|
+
if (sections.has("guardrails")) {
|
|
244
|
+
sameGuardrailDecisions = compareArraysByCanonical("guardrailDecisions", left.guardrailDecisions ?? [], right.guardrailDecisions ?? [], differences);
|
|
245
|
+
}
|
|
246
|
+
if (sections.has("metadata")) {
|
|
247
|
+
const leftMetadataCanonical = (0, canonical_json_1.toCanonicalJson)(left.metadata ?? {});
|
|
248
|
+
const rightMetadataCanonical = (0, canonical_json_1.toCanonicalJson)(right.metadata ?? {});
|
|
249
|
+
if (leftMetadataCanonical !== rightMetadataCanonical) {
|
|
250
|
+
differences.push({
|
|
251
|
+
path: "metadata",
|
|
252
|
+
kind: "mismatch",
|
|
253
|
+
left: left.metadata,
|
|
254
|
+
right: right.metadata,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return {
|
|
259
|
+
equal: differences.length === 0,
|
|
260
|
+
summary: {
|
|
261
|
+
sameFinalResponse,
|
|
262
|
+
sameToolCallShape,
|
|
263
|
+
samePolicyDecisions,
|
|
264
|
+
sameGuardrailDecisions,
|
|
265
|
+
},
|
|
266
|
+
metrics,
|
|
267
|
+
differences,
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
function createMissingToolCallError(input) {
|
|
271
|
+
return new Error([
|
|
272
|
+
`Missing recorded tool output for "${input.toolName}".`,
|
|
273
|
+
`argsHash=${input.argsHash}.`,
|
|
274
|
+
`argsCanonicalJson=${input.argsCanonicalJson}`,
|
|
275
|
+
].join(" "));
|
|
276
|
+
}
|
|
277
|
+
function buildRecordedToolQueues(source) {
|
|
278
|
+
const calls = extractToolCalls(source).filter((call) => call.hasOutput && call.name.length > 0);
|
|
279
|
+
const queues = new Map();
|
|
280
|
+
for (const call of calls) {
|
|
281
|
+
const key = toToolCallKey(call);
|
|
282
|
+
const queue = queues.get(key);
|
|
283
|
+
if (queue) {
|
|
284
|
+
queue.push(call);
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
287
|
+
queues.set(key, [call]);
|
|
288
|
+
}
|
|
289
|
+
return queues;
|
|
290
|
+
}
|
|
291
|
+
function findRecordedToolOutput(queues, toolName, args, mode) {
|
|
292
|
+
const argsCanonicalJson = (0, canonical_json_1.toCanonicalJson)(args);
|
|
293
|
+
const argsHash = (0, canonical_json_1.hashCanonicalJson)(argsCanonicalJson);
|
|
294
|
+
const key = `${toolName}\u001f${argsHash}`;
|
|
295
|
+
const queue = queues.get(key);
|
|
296
|
+
if (queue && queue.length > 0) {
|
|
297
|
+
const matched = queue.shift();
|
|
298
|
+
if (matched) {
|
|
299
|
+
return {
|
|
300
|
+
status: "recorded",
|
|
301
|
+
output: matched.output,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
return {
|
|
306
|
+
status: "missing",
|
|
307
|
+
input: {
|
|
308
|
+
mode,
|
|
309
|
+
toolName,
|
|
310
|
+
arguments: args,
|
|
311
|
+
argsCanonicalJson,
|
|
312
|
+
argsHash,
|
|
313
|
+
},
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
function resolveMissingToolCall(mode, input, handler) {
|
|
317
|
+
if (!handler) {
|
|
318
|
+
if (mode === "hybrid") {
|
|
319
|
+
return { action: "use_live" };
|
|
320
|
+
}
|
|
321
|
+
return { action: "throw" };
|
|
322
|
+
}
|
|
323
|
+
return handler(input);
|
|
324
|
+
}
|
|
325
|
+
function isRunRecordSink(sink) {
|
|
326
|
+
return typeof sink === "object" && sink !== null && "write" in sink;
|
|
327
|
+
}
|
|
328
|
+
async function writeRunRecordToSink(sink, record) {
|
|
329
|
+
if (!sink) {
|
|
330
|
+
return;
|
|
331
|
+
}
|
|
332
|
+
if (typeof sink === "function") {
|
|
333
|
+
await sink(record);
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
if (isRunRecordSink(sink)) {
|
|
337
|
+
await sink.write(record);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
function cloneAgentWithReplayedTools(sourceAgent, mode, queues, replayStats, onMissingToolCall, cache = new Map()) {
|
|
341
|
+
const existing = cache.get(sourceAgent);
|
|
342
|
+
if (existing) {
|
|
343
|
+
return existing;
|
|
344
|
+
}
|
|
345
|
+
const cloned = new agent_1.Agent({
|
|
346
|
+
name: sourceAgent.name,
|
|
347
|
+
handoffDescription: sourceAgent.handoffDescription,
|
|
348
|
+
instructions: sourceAgent.instructions,
|
|
349
|
+
promptVersion: sourceAgent.promptVersion,
|
|
350
|
+
model: sourceAgent.model,
|
|
351
|
+
modelSettings: sourceAgent.modelSettings,
|
|
352
|
+
tools: [],
|
|
353
|
+
handoffs: [],
|
|
354
|
+
outputGuardrails: sourceAgent.outputGuardrails,
|
|
355
|
+
});
|
|
356
|
+
cache.set(sourceAgent, cloned);
|
|
357
|
+
const wrappedTools = sourceAgent.tools.map((toolDefinition) => {
|
|
358
|
+
return {
|
|
359
|
+
name: toolDefinition.name,
|
|
360
|
+
description: toolDefinition.description,
|
|
361
|
+
parameters: toolDefinition.parameters,
|
|
362
|
+
execute: async (input, runContext) => {
|
|
363
|
+
const lookup = findRecordedToolOutput(queues, toolDefinition.name, input, mode);
|
|
364
|
+
if (lookup.status === "recorded") {
|
|
365
|
+
replayStats.replayedFromRecord += 1;
|
|
366
|
+
return lookup.output;
|
|
367
|
+
}
|
|
368
|
+
replayStats.missingToolCalls += 1;
|
|
369
|
+
const resolution = await resolveMissingToolCall(mode, lookup.input, onMissingToolCall);
|
|
370
|
+
if (resolution.action === "use_output") {
|
|
371
|
+
replayStats.replayedFromRecord += 1;
|
|
372
|
+
return resolution.output;
|
|
373
|
+
}
|
|
374
|
+
if (resolution.action === "use_live") {
|
|
375
|
+
replayStats.liveFallbackCalls += 1;
|
|
376
|
+
return toolDefinition.execute(input, runContext);
|
|
377
|
+
}
|
|
378
|
+
throw createMissingToolCallError(lookup.input);
|
|
379
|
+
},
|
|
380
|
+
};
|
|
381
|
+
});
|
|
382
|
+
const wrappedHandoffs = sourceAgent.handoffs.map((handoffAgent) => cloneAgentWithReplayedTools(handoffAgent, mode, queues, replayStats, onMissingToolCall, cache));
|
|
383
|
+
cloned.tools = wrappedTools;
|
|
384
|
+
cloned.handoffs = wrappedHandoffs;
|
|
385
|
+
return cloned;
|
|
386
|
+
}
|
|
387
|
+
async function resolveReplayAgent(input) {
|
|
388
|
+
if (input.agent && input.agentFactory) {
|
|
389
|
+
throw new Error("Replay configuration is ambiguous: provide either agent or agentFactory, not both.");
|
|
390
|
+
}
|
|
391
|
+
if (input.agentFactory) {
|
|
392
|
+
return input.agentFactory();
|
|
393
|
+
}
|
|
394
|
+
if (input.agent) {
|
|
395
|
+
return input.agent;
|
|
396
|
+
}
|
|
397
|
+
throw new Error("Replay configuration is missing an agent or agentFactory.");
|
|
398
|
+
}
|
|
399
|
+
async function replayFromRunRecord(input) {
|
|
400
|
+
const baseAgent = await resolveReplayAgent(input);
|
|
401
|
+
const extractedSourceCalls = extractToolCalls(input.sourceRunRecord);
|
|
402
|
+
const replayStats = {
|
|
403
|
+
recordedToolCalls: extractedSourceCalls.filter((call) => call.hasOutput)
|
|
404
|
+
.length,
|
|
405
|
+
replayedFromRecord: 0,
|
|
406
|
+
missingToolCalls: 0,
|
|
407
|
+
liveFallbackCalls: 0,
|
|
408
|
+
};
|
|
409
|
+
const mode = input.mode;
|
|
410
|
+
const replayAgent = mode === "live"
|
|
411
|
+
? baseAgent
|
|
412
|
+
: cloneAgentWithReplayedTools(baseAgent, mode, buildRecordedToolQueues(input.sourceRunRecord), replayStats, input.onMissingToolCall);
|
|
413
|
+
const runOptions = input.runOptions ?? {};
|
|
414
|
+
const replayContext = typeof runOptions.context === "undefined"
|
|
415
|
+
? input.sourceRunRecord.contextSnapshot
|
|
416
|
+
: runOptions.context;
|
|
417
|
+
const replayPolicies = runOptions.policies;
|
|
418
|
+
let replayRunRecord;
|
|
419
|
+
const shouldCaptureReplayRecord = Boolean(runOptions.record) || Boolean(input.metadataOverrides);
|
|
420
|
+
let replayRecordOptions = runOptions.record;
|
|
421
|
+
if (shouldCaptureReplayRecord) {
|
|
422
|
+
const sink = runOptions.record?.sink;
|
|
423
|
+
replayRecordOptions = {
|
|
424
|
+
...(runOptions.record ?? {}),
|
|
425
|
+
metadata: {
|
|
426
|
+
...(runOptions.record?.metadata ?? {}),
|
|
427
|
+
...(input.metadataOverrides ?? {}),
|
|
428
|
+
},
|
|
429
|
+
sink: async (record) => {
|
|
430
|
+
replayRunRecord = record;
|
|
431
|
+
await writeRunRecordToSink(sink, record);
|
|
432
|
+
},
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
const result = await (0, run_1.run)(replayAgent, input.sourceRunRecord.question, {
|
|
436
|
+
...runOptions,
|
|
437
|
+
context: replayContext,
|
|
438
|
+
policies: replayPolicies,
|
|
439
|
+
stream: false,
|
|
440
|
+
record: replayRecordOptions,
|
|
441
|
+
});
|
|
442
|
+
return {
|
|
443
|
+
result,
|
|
444
|
+
replayRunRecord,
|
|
445
|
+
replayStats,
|
|
446
|
+
};
|
|
447
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run-recorder-runtime.d.ts","sourceRoot":"","sources":["../src/run-recorder-runtime.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"run-recorder-runtime.d.ts","sourceRoot":"","sources":["../src/run-recorder-runtime.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,uBAAuB,EACvB,oBAAoB,EACpB,oBAAoB,EACpB,wBAAwB,EAGxB,gBAAgB,EAEjB,MAAM,cAAc,CAAC;AACtB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAE9C,MAAM,MAAM,2BAA2B,GAAG,IAAI,CAC5C,oBAAoB,EACpB,WAAW,CACZ,CAAC;AACF,MAAM,MAAM,8BAA8B,GAAG,IAAI,CAC/C,uBAAuB,EACvB,WAAW,CACZ,CAAC;AACF,MAAM,MAAM,2BAA2B,GAAG,IAAI,CAC5C,oBAAoB,EACpB,WAAW,GAAG,YAAY,GAAG,YAAY,CAC1C,GAAG;IACF,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AACF,MAAM,MAAM,+BAA+B,GAAG,IAAI,CAChD,wBAAwB,EACtB,WAAW,GACX,gBAAgB,GAChB,0BAA0B,GAC1B,aAAa,GACb,kBAAkB,GAClB,cAAc,GACd,WAAW,GACX,mBAAmB,GACnB,cAAc,GACd,WAAW,CACd,GAAG;IACF,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,KAAK,EAAE,KAAK,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,OAAO,CAAC;KACrB,CAAC,CAAC;IACH,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACzC,CAAC;AA+BF,UAAU,wBAAwB,CAAC,QAAQ;IACzC,KAAK,EAAE,MAAM,GAAG,cAAc,EAAE,CAAC;IACjC,OAAO,EAAE,QAAQ,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,gBAAgB,CAAC,QAAQ,CAAC,CAAC;CAC5C;AAED,UAAU,0BAA0B;IAClC,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,cAAc,EAAE,CAAC;CACzB;AAyFD,qBAAa,WAAW,CAAC,QAAQ,GAAG,OAAO;IACzC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAmC;IACnE,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;IACtC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA0B;IACpD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAA4C;IAC5E,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAU;IAE5C,OAAO,CAAC,QAAQ,CAAC,eAAe,CAA8B;IAC9D,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAiC;IACpE,OAAO,CAAC,QAAQ,CAAC,eAAe,CAA8B;IAC9D,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAkC;IACtE,OAAO,CAAC,mBAAmB,CAAM;IACjC,OAAO,CAAC,gBAAgB,CAAS;IAEjC,OAAO;WAoBM,MAAM,CAAC,QAAQ,GAAG,OAAO,EACpC,OAAO,EAAE,wBAAwB,CAAC,QAAQ,CAAC,GAC1C,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;IAsBjC,gBAAgB,GAAI,UAAU,2BAA2B,KAAG,IAAI,CAK9D;IAEF,mBAAmB,GAAI,UAAU,8BAA8B,KAAG,IAAI,CAKpE;IAEF,gBAAgB,GAAI,UAAU,2BAA2B,KAAG,IAAI,CAW9D;IAEF,oBAAoB,GAClB,aAAa,+BAA+B,KAC3C,IAAI,CAgCL;IAEF,eAAe,GAAI,SAAS,MAAM,KAAG,IAAI,CAEvC;IAEI,aAAa,CAAC,OAAO,EAAE,0BAA0B,GAAG,OAAO,CAAC,IAAI,CAAC;IAIjE,UAAU,CACd,OAAO,EAAE,0BAA0B,EACnC,KAAK,EAAE,OAAO,GACb,OAAO,CAAC,IAAI,CAAC;YAIF,IAAI;CA0CnB"}
|
|
@@ -4,6 +4,7 @@ exports.RunRecorder = void 0;
|
|
|
4
4
|
const node_crypto_1 = require("node:crypto");
|
|
5
5
|
const node_fs_1 = require("node:fs");
|
|
6
6
|
const node_path_1 = require("node:path");
|
|
7
|
+
const canonical_json_1 = require("./canonical-json");
|
|
7
8
|
const REQUEST_FINGERPRINT_SCHEMA_VERSION = "request-fingerprint.v1";
|
|
8
9
|
function resolveRuntimeVersion() {
|
|
9
10
|
const envVersion = process.env.AIOC_RUNTIME_VERSION?.trim() ??
|
|
@@ -26,71 +27,6 @@ function resolveRuntimeVersion() {
|
|
|
26
27
|
return "unknown";
|
|
27
28
|
}
|
|
28
29
|
const RUNTIME_VERSION = resolveRuntimeVersion();
|
|
29
|
-
function normalizeForHash(value, seen = new WeakSet()) {
|
|
30
|
-
if (value === null) {
|
|
31
|
-
return null;
|
|
32
|
-
}
|
|
33
|
-
const valueType = typeof value;
|
|
34
|
-
if (valueType === "string" ||
|
|
35
|
-
valueType === "number" ||
|
|
36
|
-
valueType === "boolean") {
|
|
37
|
-
return value;
|
|
38
|
-
}
|
|
39
|
-
if (valueType === "undefined") {
|
|
40
|
-
return "[undefined]";
|
|
41
|
-
}
|
|
42
|
-
if (valueType === "bigint") {
|
|
43
|
-
return `[bigint:${String(value)}]`;
|
|
44
|
-
}
|
|
45
|
-
if (valueType === "symbol") {
|
|
46
|
-
return `[symbol:${String(value)}]`;
|
|
47
|
-
}
|
|
48
|
-
if (valueType === "function") {
|
|
49
|
-
return "[function]";
|
|
50
|
-
}
|
|
51
|
-
if (value instanceof Date) {
|
|
52
|
-
return value.toISOString();
|
|
53
|
-
}
|
|
54
|
-
if (value instanceof RegExp) {
|
|
55
|
-
return value.toString();
|
|
56
|
-
}
|
|
57
|
-
if (Array.isArray(value)) {
|
|
58
|
-
return value.map((item) => normalizeForHash(item, seen));
|
|
59
|
-
}
|
|
60
|
-
if (value instanceof Set) {
|
|
61
|
-
const entries = [...value].map((entry) => normalizeForHash(entry, seen));
|
|
62
|
-
entries.sort((left, right) => JSON.stringify(left).localeCompare(JSON.stringify(right)));
|
|
63
|
-
return entries;
|
|
64
|
-
}
|
|
65
|
-
if (value instanceof Map) {
|
|
66
|
-
const entries = [...value.entries()].map(([key, entry]) => [
|
|
67
|
-
normalizeForHash(key, seen),
|
|
68
|
-
normalizeForHash(entry, seen),
|
|
69
|
-
]);
|
|
70
|
-
entries.sort((left, right) => JSON.stringify(left[0]).localeCompare(JSON.stringify(right[0])));
|
|
71
|
-
return entries;
|
|
72
|
-
}
|
|
73
|
-
if (value && typeof value === "object") {
|
|
74
|
-
const objectValue = value;
|
|
75
|
-
if (seen.has(objectValue)) {
|
|
76
|
-
return "[circular]";
|
|
77
|
-
}
|
|
78
|
-
seen.add(objectValue);
|
|
79
|
-
const normalized = {};
|
|
80
|
-
const keys = Object.keys(objectValue).sort();
|
|
81
|
-
for (const key of keys) {
|
|
82
|
-
normalized[key] = normalizeForHash(objectValue[key], seen);
|
|
83
|
-
}
|
|
84
|
-
seen.delete(objectValue);
|
|
85
|
-
return normalized;
|
|
86
|
-
}
|
|
87
|
-
return String(value);
|
|
88
|
-
}
|
|
89
|
-
function hashForFingerprint(value) {
|
|
90
|
-
return (0, node_crypto_1.createHash)("sha256")
|
|
91
|
-
.update(JSON.stringify(normalizeForHash(value)))
|
|
92
|
-
.digest("hex");
|
|
93
|
-
}
|
|
94
30
|
function extractQuestion(input) {
|
|
95
31
|
if (typeof input === "string") {
|
|
96
32
|
return input;
|
|
@@ -233,11 +169,11 @@ class RunRecorder {
|
|
|
233
169
|
model: fingerprint.model,
|
|
234
170
|
runtimeVersion: RUNTIME_VERSION,
|
|
235
171
|
fingerprintSchemaVersion: REQUEST_FINGERPRINT_SCHEMA_VERSION,
|
|
236
|
-
requestHash:
|
|
237
|
-
systemPromptHash:
|
|
238
|
-
messagesHash:
|
|
239
|
-
toolsHash:
|
|
240
|
-
modelSettingsHash:
|
|
172
|
+
requestHash: (0, canonical_json_1.hashCanonicalJsonValue)(requestPayload),
|
|
173
|
+
systemPromptHash: (0, canonical_json_1.hashCanonicalJsonValue)(normalizedSystemPrompt),
|
|
174
|
+
messagesHash: (0, canonical_json_1.hashCanonicalJsonValue)(fingerprint.messages),
|
|
175
|
+
toolsHash: (0, canonical_json_1.hashCanonicalJsonValue)(normalizedTools),
|
|
176
|
+
modelSettingsHash: (0, canonical_json_1.hashCanonicalJsonValue)(normalizedModelSettings),
|
|
241
177
|
messageCount: fingerprint.messages.length,
|
|
242
178
|
toolCount: normalizedTools.length,
|
|
243
179
|
});
|
package/dist/run.d.ts
CHANGED
|
@@ -4,6 +4,13 @@ type MutableRunState<TContext> = {
|
|
|
4
4
|
history: AgentInputItem[];
|
|
5
5
|
lastAgent: Agent<TContext>;
|
|
6
6
|
};
|
|
7
|
+
export type ToolResultEnvelopeStatus = "ok" | "denied";
|
|
8
|
+
export interface ToolResultEnvelope {
|
|
9
|
+
status: ToolResultEnvelopeStatus;
|
|
10
|
+
code: string | null;
|
|
11
|
+
publicReason: string | null;
|
|
12
|
+
data: unknown | null;
|
|
13
|
+
}
|
|
7
14
|
export declare class StreamedRunResult<TContext = unknown> {
|
|
8
15
|
private consumed;
|
|
9
16
|
private stream;
|
package/dist/run.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../src/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,SAAS,CAAC;AAuBhC,OAAO,EACL,cAAc,EAEd,mBAAmB,EAGnB,SAAS,EACT,cAAc,EACd,gBAAgB,EACjB,MAAM,SAAS,CAAC;AAUjB,KAAK,eAAe,CAAC,QAAQ,IAAI;IAC/B,OAAO,EAAE,cAAc,EAAE,CAAC;IAC1B,SAAS,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CAC5B,CAAC;
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../src/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,SAAS,CAAC;AAuBhC,OAAO,EACL,cAAc,EAEd,mBAAmB,EAGnB,SAAS,EACT,cAAc,EACd,gBAAgB,EACjB,MAAM,SAAS,CAAC;AAUjB,KAAK,eAAe,CAAC,QAAQ,IAAI;IAC/B,OAAO,EAAE,cAAc,EAAE,CAAC;IAC1B,SAAS,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;CAC5B,CAAC;AAGF,MAAM,MAAM,wBAAwB,GAAG,IAAI,GAAG,QAAQ,CAAC;AAEvD,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,wBAAwB,CAAC;IACjC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,IAAI,EAAE,OAAO,GAAG,IAAI,CAAC;CACtB;AA8oBD,qBAAa,iBAAiB,CAAC,QAAQ,GAAG,OAAO;IAC/C,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,MAAM,CAA0C;IACxD,OAAO,CAAC,KAAK,CAA4B;gBAGvC,MAAM,EAAE,aAAa,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC,EAC/C,KAAK,EAAE,eAAe,CAAC,QAAQ,CAAC;IAMlC,QAAQ,IAAI,aAAa,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAQnD,IAAI,OAAO,IAAI,cAAc,EAAE,CAE9B;IAED,IAAI,SAAS,IAAI,KAAK,CAAC,QAAQ,CAAC,CAE/B;CACF;AAED,wBAAsB,GAAG,CAAC,QAAQ,GAAG,OAAO,EAC1C,aAAa,EAAE,KAAK,CAAC,QAAQ,CAAC,EAC9B,KAAK,EAAE,MAAM,GAAG,cAAc,EAAE,EAChC,OAAO,EAAE,gBAAgB,CAAC,QAAQ,CAAC,GAClC,OAAO,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC;AAExC,wBAAsB,GAAG,CAAC,QAAQ,GAAG,OAAO,EAC1C,aAAa,EAAE,KAAK,CAAC,QAAQ,CAAC,EAC9B,KAAK,EAAE,MAAM,GAAG,cAAc,EAAE,EAChC,OAAO,CAAC,EAAE,mBAAmB,CAAC,QAAQ,CAAC,GACtC,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@axiastudio/aioc",
|
|
3
|
-
"version": "0.1.0-beta.
|
|
3
|
+
"version": "0.1.0-beta.3",
|
|
4
4
|
"main": "dist/index.js",
|
|
5
5
|
"types": "dist/index.d.ts",
|
|
6
6
|
"files": [
|
|
@@ -20,10 +20,12 @@
|
|
|
20
20
|
"example:hello": "tsx src/examples/basic/hello-world.ts",
|
|
21
21
|
"example:tool-policy": "tsx src/examples/basic/tools.ts",
|
|
22
22
|
"example:run-record": "tsx src/examples/basic/run-record-sink.ts",
|
|
23
|
+
"example:rru:01-extract": "tsx src/examples/run-record-utils-minimal/01-extract-tool-calls.ts",
|
|
24
|
+
"example:rru:02-compare": "tsx src/examples/run-record-utils-minimal/02-compare-run-records.ts",
|
|
25
|
+
"example:rru:03-replay-strict": "tsx src/examples/run-record-utils-minimal/03-replay-strict.ts",
|
|
26
|
+
"example:rru:04-replay-hybrid": "tsx src/examples/run-record-utils-minimal/04-replay-hybrid.ts",
|
|
23
27
|
"example:non-regression": "tsx src/examples/non-regression/v1-v2-runrecord-diff.ts",
|
|
24
28
|
"test:mistral": "tsx src/examples/mistral-smoke.ts",
|
|
25
|
-
"test:guardrail": "tsx src/examples/guardrail-smoke.ts",
|
|
26
|
-
"test:policy": "tsx src/examples/policy-smoke.ts",
|
|
27
29
|
"prepack": "npm run build:package"
|
|
28
30
|
},
|
|
29
31
|
"keywords": [
|
|
@@ -55,7 +57,7 @@
|
|
|
55
57
|
"homepage": "https://github.com/axiastudio/aioc",
|
|
56
58
|
"publishConfig": {
|
|
57
59
|
"access": "public",
|
|
58
|
-
"tag": "
|
|
60
|
+
"tag": "latest"
|
|
59
61
|
},
|
|
60
62
|
"dependencies": {
|
|
61
63
|
"dotenv": "^17.2.3",
|