@ariaflowagents/eval 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # AriaFlow Eval
2
+
3
+ `@ariaflowagents/eval` provides deterministic replay and assertions for AriaFlow transcript events.
4
+
5
+ This package is intentionally separate from `@ariaflowagents/core` to avoid runtime bloat.
6
+
7
+ ## What It Solves
8
+
9
+ - Validate event contracts without depending on exact LLM wording.
10
+ - Replay stored `.jsonl` transcripts in CI.
11
+ - Catch regressions in tool-call integrity and flow behavior.
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ bun add @ariaflowagents/eval
17
+ ```
18
+
19
+ ## Example
20
+
21
+ ```ts
22
+ import { TranscriptReplay } from '@ariaflowagents/eval';
23
+
24
+ const replay = await TranscriptReplay.fromFile('./transcripts/run.jsonl');
25
+
26
+ replay
27
+ .expectEventOrder(['input', 'tool-call', 'tool-result', 'done'])
28
+ .expectToolCalled('start_order')
29
+ .expectNoToolMismatches()
30
+ .expectNoErrors()
31
+ .expectDone();
32
+ ```
33
+
34
+ ## Replay Tests from Stored Transcripts
35
+
36
+ Yes, this is the intended home for replay tests.
37
+
38
+ Typical workflow:
39
+
40
+ 1. Run production-like examples and store transcript files.
41
+ 2. Commit selected golden transcripts.
42
+ 3. In CI, load those files with `TranscriptReplay`.
43
+ 4. Assert structural contracts (event order, tool integrity, flow end behavior).
44
+
45
+ This makes tests stable even when model wording changes.
46
+
47
+ ## Golden Fixtures
48
+
49
+ Golden fixtures are committed in `fixtures/golden/*.jsonl`, with expectations in `fixtures/golden.manifest.json`.
50
+
51
+ Run the suite:
52
+
53
+ ```bash
54
+ bun run --filter '@ariaflowagents/eval' test:golden
55
+ ```
56
+
57
+ Workspace shortcut from repo root:
58
+
59
+ ```bash
60
+ bun run test:golden
61
+ ```
@@ -0,0 +1,25 @@
1
+ export interface GoldenToolExpectation {
2
+ name: string;
3
+ minCount?: number;
4
+ }
5
+ export interface GoldenCase {
6
+ name: string;
7
+ file: string;
8
+ expectEventOrder?: string[];
9
+ expectNoErrors?: boolean;
10
+ expectDone?: boolean;
11
+ expectNoToolMismatches?: boolean;
12
+ requireTools?: GoldenToolExpectation[];
13
+ }
14
+ export interface GoldenSuiteResult {
15
+ total: number;
16
+ passed: number;
17
+ failed: number;
18
+ failures: Array<{
19
+ name: string;
20
+ error: string;
21
+ }>;
22
+ }
23
+ export declare function loadGoldenManifest(manifestPath: string): Promise<GoldenCase[]>;
24
+ export declare function runGoldenSuite(manifestPath?: string): Promise<GoldenSuiteResult>;
25
+ //# sourceMappingURL=golden.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"golden.d.ts","sourceRoot":"","sources":["../src/golden.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,sBAAsB,CAAC,EAAE,OAAO,CAAC;IACjC,YAAY,CAAC,EAAE,qBAAqB,EAAE,CAAC;CACxC;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CAClD;AA2CD,wBAAsB,kBAAkB,CAAC,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,EAAE,CAAC,CAapF;AAuBD,wBAAsB,cAAc,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,iBAAiB,CAAC,CA6BtF"}
package/dist/golden.js ADDED
@@ -0,0 +1,121 @@
1
+ import { readFile } from 'fs/promises';
2
+ import { dirname, resolve } from 'path';
3
+ import { fileURLToPath } from 'url';
4
+ import { ReplayAssertionError, TranscriptReplay } from './replay.js';
5
+ function isRecord(value) {
6
+ return value !== null && typeof value === 'object';
7
+ }
8
+ function toGoldenCase(value, index) {
9
+ if (!isRecord(value)) {
10
+ throw new Error(`Invalid manifest item at index ${index}: expected object`);
11
+ }
12
+ if (typeof value.name !== 'string' || value.name.length === 0) {
13
+ throw new Error(`Invalid manifest item at index ${index}: missing name`);
14
+ }
15
+ if (typeof value.file !== 'string' || value.file.length === 0) {
16
+ throw new Error(`Invalid manifest item at index ${index}: missing file`);
17
+ }
18
+ const tools = [];
19
+ if (Array.isArray(value.requireTools)) {
20
+ for (const tool of value.requireTools) {
21
+ if (!isRecord(tool) || typeof tool.name !== 'string' || tool.name.length === 0) {
22
+ throw new Error(`Invalid requireTools entry for case "${value.name}"`);
23
+ }
24
+ tools.push({
25
+ name: tool.name,
26
+ minCount: typeof tool.minCount === 'number' ? tool.minCount : 1,
27
+ });
28
+ }
29
+ }
30
+ return {
31
+ name: value.name,
32
+ file: value.file,
33
+ expectEventOrder: Array.isArray(value.expectEventOrder)
34
+ ? value.expectEventOrder.filter((v) => typeof v === 'string')
35
+ : undefined,
36
+ expectNoErrors: value.expectNoErrors !== false,
37
+ expectDone: value.expectDone !== false,
38
+ expectNoToolMismatches: value.expectNoToolMismatches !== false,
39
+ requireTools: tools,
40
+ };
41
+ }
42
+ export async function loadGoldenManifest(manifestPath) {
43
+ const absolutePath = resolve(manifestPath);
44
+ const text = await readFile(absolutePath, 'utf8');
45
+ let parsed;
46
+ try {
47
+ parsed = JSON.parse(text);
48
+ }
49
+ catch (error) {
50
+ throw new Error(`Invalid manifest JSON at ${absolutePath}: ${error.message}`);
51
+ }
52
+ if (!Array.isArray(parsed)) {
53
+ throw new Error(`Invalid manifest format at ${absolutePath}: expected array`);
54
+ }
55
+ return parsed.map((item, index) => toGoldenCase(item, index));
56
+ }
57
+ async function runGoldenCase(baseDir, goldenCase) {
58
+ const path = resolve(baseDir, goldenCase.file);
59
+ const replay = await TranscriptReplay.fromFile(path);
60
+ if (goldenCase.expectEventOrder && goldenCase.expectEventOrder.length > 0) {
61
+ replay.expectEventOrder(goldenCase.expectEventOrder);
62
+ }
63
+ if (goldenCase.expectNoErrors !== false) {
64
+ replay.expectNoErrors();
65
+ }
66
+ if (goldenCase.expectDone !== false) {
67
+ replay.expectDone();
68
+ }
69
+ if (goldenCase.expectNoToolMismatches !== false) {
70
+ replay.expectNoToolMismatches();
71
+ }
72
+ for (const tool of goldenCase.requireTools ?? []) {
73
+ replay.expectToolCalled(tool.name, tool.minCount ?? 1);
74
+ }
75
+ }
76
+ export async function runGoldenSuite(manifestPath) {
77
+ const currentDir = dirname(fileURLToPath(import.meta.url));
78
+ const effectiveManifestPath = manifestPath
79
+ ? resolve(manifestPath)
80
+ : resolve(currentDir, '../fixtures/golden.manifest.json');
81
+ const manifestDir = dirname(effectiveManifestPath);
82
+ const goldenCases = await loadGoldenManifest(effectiveManifestPath);
83
+ const failures = [];
84
+ let passed = 0;
85
+ for (const goldenCase of goldenCases) {
86
+ try {
87
+ await runGoldenCase(manifestDir, goldenCase);
88
+ passed += 1;
89
+ }
90
+ catch (error) {
91
+ const message = error instanceof ReplayAssertionError || error instanceof Error
92
+ ? error.message
93
+ : String(error);
94
+ failures.push({ name: goldenCase.name, error: message });
95
+ }
96
+ }
97
+ return {
98
+ total: goldenCases.length,
99
+ passed,
100
+ failed: failures.length,
101
+ failures,
102
+ };
103
+ }
104
+ async function main() {
105
+ const manifestArg = process.argv[2];
106
+ const result = await runGoldenSuite(manifestArg);
107
+ if (result.failed === 0) {
108
+ console.log(`[ariaflow-eval] Golden suite passed (${result.passed}/${result.total}).`);
109
+ return;
110
+ }
111
+ console.error(`[ariaflow-eval] Golden suite failed (${result.failed}/${result.total}).`);
112
+ for (const failure of result.failures) {
113
+ console.error(`- ${failure.name}: ${failure.error}`);
114
+ }
115
+ process.exitCode = 1;
116
+ }
117
+ const invokedPath = process.argv[1] ? resolve(process.argv[1]) : '';
118
+ if (invokedPath && invokedPath === fileURLToPath(import.meta.url)) {
119
+ void main();
120
+ }
121
+ //# sourceMappingURL=golden.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"golden.js","sourceRoot":"","sources":["../src/golden.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAwBrE,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,CAAC;AACrD,CAAC;AAED,SAAS,YAAY,CAAC,KAAc,EAAE,KAAa;IACjD,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,kCAAkC,KAAK,mBAAmB,CAAC,CAAC;IAC9E,CAAC;IACD,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9D,MAAM,IAAI,KAAK,CAAC,kCAAkC,KAAK,gBAAgB,CAAC,CAAC;IAC3E,CAAC;IACD,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9D,MAAM,IAAI,KAAK,CAAC,kCAAkC,KAAK,gBAAgB,CAAC,CAAC;IAC3E,CAAC;IAED,MAAM,KAAK,GAA4B,EAAE,CAAC;IAC1C,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC;QACtC,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;YACtC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC/E,MAAM,IAAI,KAAK,CAAC,wCAAwC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC;YACzE,CAAC;YACD,KAAK,CAAC,IAAI,CAAC;gBACT,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,QAAQ,EAAE,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;aAChE,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO;QACL,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,gBAAgB,EAAE,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC;YACrD,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC;YAC1E,CAAC,CAAC,SAAS;QACb,cAAc,EAAE,KAAK,CAAC,cAAc,KAAK,KAAK;QAC9C,UAAU,EAAE,KAAK,CAAC,UAAU,KAAK,KAAK;QACtC,sBAAsB,EAAE,KAAK,CAAC,sBAAsB,KAAK,KAAK;QAC9D,YAAY,EAAE,KAAK;KACpB,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,YAAoB;IAC3D,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAC3C,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IAClD,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,YAAY,KAAM,KAAe,CAAC,OAAO,EAAE,CAAC,CAAC;IAC3F,CAAC;IACD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,8BAA8B,YAAY,kBAAkB,CAAC,CAAC;IAChF,CAAC;IACD,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;AAChE,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,OAAe,EAAE,UAAsB;IAClE,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,EAAE,UAAU,CAAC,IAAI,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAErD,IAAI,UAAU,CAAC,gBAAgB,IAAI,UAAU,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1E,MAAM,CAAC,gBAAgB,CAAC,UAAU,CAAC,gBAAgB,CAAC,CAAC;IACvD,CAAC;IACD,IAAI,UAAU,CAAC,cAAc,KAAK,KAAK,EAAE,CAAC;QACxC,MAAM,CAAC,cAAc,EAAE,CAAC;IAC1B,CAAC;IACD,IAAI,UAAU,CAAC,UAAU,KAAK,KAAK,EAAE,CAAC;QACpC,MAAM,CAAC,UAAU,EAAE,CAAC;IACtB,CAAC;IACD,IAAI,UAAU,CAAC,sBAAsB,KAAK,KAAK,EAAE,CAAC;QAChD,MAAM,CAAC,sBAAsB,EAAE,CAAC;IAClC,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,UAAU,CAAC,YAAY,IAAI,EAAE,EAAE,CAAC;QACjD,MAAM,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IACzD,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,YAAqB;IACxD,MAAM,UAAU,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3D,MAAM,qBAAqB,GAAG,YAAY;QACxC,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC;QACvB,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,kCAAkC,CAAC,CAAC;IAC5D,MAAM,WAAW,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;IACnD,MAAM,WAAW,GAAG,MAAM,kBAAkB,CAAC,qBAAqB,CAAC,CAAC;IAEpE,MAAM,QAAQ,GAA2C,EAAE,CAAC;IAC5D,IAAI,MAAM,GAAG,CAAC,CAAC;IAEf,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,IAAI,CAAC;YACH,MAAM,aAAa,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,CAAC;QACd,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,OAAO,GAAG,KAAK,YAAY,oBAAoB,IAAI,KAAK,YAAY,KAAK;gBAC7E,CAAC,CAAC,KAAK,CAAC,OAAO;gBACf,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAClB,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;IAED,OAAO;QACL,KAAK,EAAE,WAAW,CAAC,MAAM;QACzB,MAAM;QACN,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,QAAQ;KACT,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpC,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,WAAW,CAAC,CAAC;IAEjD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,wCAAwC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC;QACvF,OAAO;IACT,CAAC;IAED,OAAO,CAAC,KAAK,CAAC,wCAAwC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC;IACzF,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACtC,OAAO,CAAC,KAAK,CAAC,KAAK,OAAO,CAAC,IAAI,KAAK,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;AACvB,CAAC;AAED,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AACpE,IAAI,WAAW,IAAI,WAAW,KAAK,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;IAClE,KAAK,IAAI,EAAE,CAAC;AACd,CAAC"}
@@ -0,0 +1,6 @@
1
+ export type { TranscriptEvent, ReplayPart, ReplayStats } from './types.js';
2
+ export { readTranscriptFile, readTranscriptDirectory, listTranscriptFiles, } from './io.js';
3
+ export { TranscriptReplay, ReplayAssertionError, } from './replay.js';
4
+ export type { GoldenCase, GoldenSuiteResult, GoldenToolExpectation, } from './golden.js';
5
+ export { loadGoldenManifest, runGoldenSuite, } from './golden.js';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EAAE,eAAe,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAC3E,OAAO,EACL,kBAAkB,EAClB,uBAAuB,EACvB,mBAAmB,GACpB,MAAM,SAAS,CAAC;AACjB,OAAO,EACL,gBAAgB,EAChB,oBAAoB,GACrB,MAAM,aAAa,CAAC;AACrB,YAAY,EACV,UAAU,EACV,iBAAiB,EACjB,qBAAqB,GACtB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,kBAAkB,EAClB,cAAc,GACf,MAAM,aAAa,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,4 @@
1
+ export { readTranscriptFile, readTranscriptDirectory, listTranscriptFiles, } from './io.js';
2
+ export { TranscriptReplay, ReplayAssertionError, } from './replay.js';
3
+ export { loadGoldenManifest, runGoldenSuite, } from './golden.js';
4
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,kBAAkB,EAClB,uBAAuB,EACvB,mBAAmB,GACpB,MAAM,SAAS,CAAC;AACjB,OAAO,EACL,gBAAgB,EAChB,oBAAoB,GACrB,MAAM,aAAa,CAAC;AAMrB,OAAO,EACL,kBAAkB,EAClB,cAAc,GACf,MAAM,aAAa,CAAC"}
package/dist/io.d.ts ADDED
@@ -0,0 +1,5 @@
1
+ import type { TranscriptEvent } from './types.js';
2
+ export declare function readTranscriptFile(path: string): Promise<TranscriptEvent[]>;
3
+ export declare function listTranscriptFiles(directory: string): Promise<string[]>;
4
+ export declare function readTranscriptDirectory(directory: string): Promise<TranscriptEvent[]>;
5
+ //# sourceMappingURL=io.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"io.d.ts","sourceRoot":"","sources":["../src/io.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAmClD,wBAAsB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAqBjF;AAED,wBAAsB,mBAAmB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAO9E;AAED,wBAAsB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAS3F"}
package/dist/io.js ADDED
@@ -0,0 +1,71 @@
1
+ import { readdir, readFile } from 'fs/promises';
2
+ import { resolve } from 'path';
3
+ function isRecord(value) {
4
+ return value !== null && typeof value === 'object';
5
+ }
6
+ function toTranscriptEvent(value, source, line) {
7
+ if (!isRecord(value)) {
8
+ throw new Error(`Invalid transcript payload at ${source}:${line} (expected object)`);
9
+ }
10
+ if (typeof value.sessionId !== 'string') {
11
+ throw new Error(`Invalid transcript payload at ${source}:${line} (missing sessionId)`);
12
+ }
13
+ if (typeof value.agentId !== 'string') {
14
+ throw new Error(`Invalid transcript payload at ${source}:${line} (missing agentId)`);
15
+ }
16
+ if (typeof value.timestamp !== 'string') {
17
+ throw new Error(`Invalid transcript payload at ${source}:${line} (missing timestamp)`);
18
+ }
19
+ if (!isRecord(value.part) || typeof value.part.type !== 'string') {
20
+ throw new Error(`Invalid transcript payload at ${source}:${line} (missing part.type)`);
21
+ }
22
+ const event = {
23
+ sessionId: value.sessionId,
24
+ agentId: value.agentId,
25
+ timestamp: value.timestamp,
26
+ part: value.part,
27
+ };
28
+ if (typeof value.fullText === 'string') {
29
+ event.fullText = value.fullText;
30
+ }
31
+ return event;
32
+ }
33
+ export async function readTranscriptFile(path) {
34
+ const absolutePath = resolve(path);
35
+ const text = await readFile(absolutePath, 'utf8');
36
+ const lines = text.split(/\r?\n/);
37
+ const events = [];
38
+ for (let i = 0; i < lines.length; i++) {
39
+ const line = lines[i].trim();
40
+ if (!line)
41
+ continue;
42
+ let parsed;
43
+ try {
44
+ parsed = JSON.parse(line);
45
+ }
46
+ catch (error) {
47
+ throw new Error(`Invalid JSON at ${absolutePath}:${i + 1}: ${error.message}`);
48
+ }
49
+ events.push(toTranscriptEvent(parsed, absolutePath, i + 1));
50
+ }
51
+ return events;
52
+ }
53
+ export async function listTranscriptFiles(directory) {
54
+ const absoluteDirectory = resolve(directory);
55
+ const entries = await readdir(absoluteDirectory, { withFileTypes: true });
56
+ return entries
57
+ .filter(entry => entry.isFile() && entry.name.endsWith('.jsonl'))
58
+ .map(entry => resolve(absoluteDirectory, entry.name))
59
+ .sort((a, b) => a.localeCompare(b));
60
+ }
61
+ export async function readTranscriptDirectory(directory) {
62
+ const files = await listTranscriptFiles(directory);
63
+ const allEvents = [];
64
+ for (const file of files) {
65
+ const events = await readTranscriptFile(file);
66
+ allEvents.push(...events);
67
+ }
68
+ allEvents.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
69
+ return allEvents;
70
+ }
71
+ //# sourceMappingURL=io.js.map
package/dist/io.js.map ADDED
@@ -0,0 +1 @@
1
+ {"version":3,"file":"io.js","sourceRoot":"","sources":["../src/io.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AAG/B,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,CAAC;AACrD,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAc,EAAE,MAAc,EAAE,IAAY;IACrE,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,iCAAiC,MAAM,IAAI,IAAI,oBAAoB,CAAC,CAAC;IACvF,CAAC;IACD,IAAI,OAAO,KAAK,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,iCAAiC,MAAM,IAAI,IAAI,sBAAsB,CAAC,CAAC;IACzF,CAAC;IACD,IAAI,OAAO,KAAK,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;QACtC,MAAM,IAAI,KAAK,CAAC,iCAAiC,MAAM,IAAI,IAAI,oBAAoB,CAAC,CAAC;IACvF,CAAC;IACD,IAAI,OAAO,KAAK,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,iCAAiC,MAAM,IAAI,IAAI,sBAAsB,CAAC,CAAC;IACzF,CAAC;IACD,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QACjE,MAAM,IAAI,KAAK,CAAC,iCAAiC,MAAM,IAAI,IAAI,sBAAsB,CAAC,CAAC;IACzF,CAAC;IAED,MAAM,KAAK,GAAoB;QAC7B,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,IAAI,EAAE,KAAK,CAAC,IAA+B;KAC5C,CAAC;IACF,IAAI,OAAO,KAAK,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QACvC,KAAK,CAAC,QAAQ,GAAG,KAAK,CAAC,QAAQ,CAAC;IAClC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,IAAY;IACnD,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAClC,MAAM,MAAM,GAAsB,EAAE,CAAC;IAErC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CACb,mBAAmB,YAAY,IAAI,CAAC,GAAG,CAAC,KAAM,KAAe,CAAC,OAAO,EAAE,CACxE,CAAC;QACJ,CAAC;QACD,MAAM,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,YAAY,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,SAAiB;IACzD,MAAM,iBAAiB,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;IAC7C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,iBAAiB,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1E,OAAO,OAAO;SACX,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;SAChE,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,OAAO,CAAC,iBAAiB,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;SACpD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC;AACxC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,SAAiB;IAC7D,MAAM,KAAK,GAAG,MAAM,mBAAmB,CAAC,SAAS,CAAC,CAAC;IACnD,MAAM,SAAS,GAAsB,EAAE,CAAC;IACxC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;QAC9C,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;IAC5B,CAAC;IACD,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;IACjE,OAAO,SAAS,CAAC;AACnB,CAAC"}
@@ -0,0 +1,19 @@
1
+ import type { ReplayStats, TranscriptEvent } from './types.js';
2
+ export declare class ReplayAssertionError extends Error {
3
+ constructor(message: string);
4
+ }
5
+ export declare class TranscriptReplay {
6
+ private readonly events;
7
+ constructor(events: TranscriptEvent[]);
8
+ static fromFile(path: string): Promise<TranscriptReplay>;
9
+ static fromDirectory(path: string): Promise<TranscriptReplay>;
10
+ allEvents(): TranscriptEvent[];
11
+ eventTypes(): string[];
12
+ stats(): ReplayStats;
13
+ expectEventOrder(expectedSubsequence: string[]): this;
14
+ expectNoErrors(): this;
15
+ expectDone(): this;
16
+ expectToolCalled(toolName: string, minCount?: number): this;
17
+ expectNoToolMismatches(): this;
18
+ }
19
+ //# sourceMappingURL=replay.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"replay.d.ts","sourceRoot":"","sources":["../src/replay.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAE/D,qBAAa,oBAAqB,SAAQ,KAAK;gBACjC,OAAO,EAAE,MAAM;CAI5B;AAoBD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAoB;gBAE/B,MAAM,EAAE,eAAe,EAAE;WAIxB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;WAKjD,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAKnE,SAAS,IAAI,eAAe,EAAE;IAI9B,UAAU,IAAI,MAAM,EAAE;IAItB,KAAK,IAAI,WAAW;IAoBpB,gBAAgB,CAAC,mBAAmB,EAAE,MAAM,EAAE,GAAG,IAAI;IAkBrD,cAAc,IAAI,IAAI;IAUtB,UAAU,IAAI,IAAI;IAQlB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,GAAE,MAAU,GAAG,IAAI;IAY9D,sBAAsB,IAAI,IAAI;CAqC/B"}
package/dist/replay.js ADDED
@@ -0,0 +1,128 @@
1
+ import { readTranscriptDirectory, readTranscriptFile } from './io.js';
2
+ export class ReplayAssertionError extends Error {
3
+ constructor(message) {
4
+ super(message);
5
+ this.name = 'ReplayAssertionError';
6
+ }
7
+ }
8
+ function getEventType(event) {
9
+ return event.part.type;
10
+ }
11
+ function isToolEventType(type) {
12
+ return type === 'tool-call' || type === 'tool-result' || type === 'tool-error';
13
+ }
14
+ function getToolCallId(event) {
15
+ const id = event.part.toolCallId;
16
+ return typeof id === 'string' && id.length > 0 ? id : null;
17
+ }
18
+ function getToolName(event) {
19
+ const toolName = event.part.toolName;
20
+ return typeof toolName === 'string' && toolName.length > 0 ? toolName : null;
21
+ }
22
+ export class TranscriptReplay {
23
+ events;
24
+ constructor(events) {
25
+ this.events = [...events].sort((a, b) => a.timestamp.localeCompare(b.timestamp));
26
+ }
27
+ static async fromFile(path) {
28
+ const events = await readTranscriptFile(path);
29
+ return new TranscriptReplay(events);
30
+ }
31
+ static async fromDirectory(path) {
32
+ const events = await readTranscriptDirectory(path);
33
+ return new TranscriptReplay(events);
34
+ }
35
+ allEvents() {
36
+ return [...this.events];
37
+ }
38
+ eventTypes() {
39
+ return this.events.map(getEventType);
40
+ }
41
+ stats() {
42
+ const byType = {};
43
+ const sessions = new Set();
44
+ const agents = new Set();
45
+ for (const event of this.events) {
46
+ const type = getEventType(event);
47
+ byType[type] = (byType[type] ?? 0) + 1;
48
+ sessions.add(event.sessionId);
49
+ agents.add(event.agentId);
50
+ }
51
+ return {
52
+ totalEvents: this.events.length,
53
+ byType,
54
+ sessions: [...sessions].sort(),
55
+ agents: [...agents].sort(),
56
+ };
57
+ }
58
+ expectEventOrder(expectedSubsequence) {
59
+ if (expectedSubsequence.length === 0)
60
+ return this;
61
+ const actual = this.eventTypes();
62
+ let cursor = 0;
63
+ for (const next of expectedSubsequence) {
64
+ while (cursor < actual.length && actual[cursor] !== next) {
65
+ cursor += 1;
66
+ }
67
+ if (cursor >= actual.length) {
68
+ throw new ReplayAssertionError(`Expected event sequence not found. Missing "${next}" after [${expectedSubsequence.join(', ')}]`);
69
+ }
70
+ cursor += 1;
71
+ }
72
+ return this;
73
+ }
74
+ expectNoErrors() {
75
+ const errors = this.events.filter(event => getEventType(event) === 'error');
76
+ if (errors.length > 0) {
77
+ throw new ReplayAssertionError(`Expected no "error" events, but found ${errors.length}`);
78
+ }
79
+ return this;
80
+ }
81
+ expectDone() {
82
+ const hasDone = this.events.some(event => getEventType(event) === 'done');
83
+ if (!hasDone) {
84
+ throw new ReplayAssertionError('Expected at least one "done" event');
85
+ }
86
+ return this;
87
+ }
88
+ expectToolCalled(toolName, minCount = 1) {
89
+ const count = this.events.filter(event => getEventType(event) === 'tool-call' && getToolName(event) === toolName).length;
90
+ if (count < minCount) {
91
+ throw new ReplayAssertionError(`Expected tool "${toolName}" to be called at least ${minCount} time(s), found ${count}`);
92
+ }
93
+ return this;
94
+ }
95
+ expectNoToolMismatches() {
96
+ const callsById = new Map();
97
+ let unmatchedResults = 0;
98
+ let mismatchedToolNames = 0;
99
+ for (const event of this.events) {
100
+ const type = getEventType(event);
101
+ if (!isToolEventType(type))
102
+ continue;
103
+ const id = getToolCallId(event);
104
+ if (!id)
105
+ continue;
106
+ if (type === 'tool-call') {
107
+ const toolName = getToolName(event) ?? '';
108
+ callsById.set(id, toolName);
109
+ continue;
110
+ }
111
+ const callToolName = callsById.get(id);
112
+ if (!callToolName) {
113
+ unmatchedResults += 1;
114
+ continue;
115
+ }
116
+ const resultToolName = getToolName(event) ?? '';
117
+ if (resultToolName && callToolName && resultToolName !== callToolName) {
118
+ mismatchedToolNames += 1;
119
+ }
120
+ callsById.delete(id);
121
+ }
122
+ if (callsById.size > 0 || unmatchedResults > 0 || mismatchedToolNames > 0) {
123
+ throw new ReplayAssertionError(`Tool mismatch detected: unmatchedCalls=${callsById.size}, unmatchedResults=${unmatchedResults}, nameMismatches=${mismatchedToolNames}`);
124
+ }
125
+ return this;
126
+ }
127
+ }
128
+ //# sourceMappingURL=replay.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"replay.js","sourceRoot":"","sources":["../src/replay.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,kBAAkB,EAAE,MAAM,SAAS,CAAC;AAGtE,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED,SAAS,YAAY,CAAC,KAAsB;IAC1C,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY;IACnC,OAAO,IAAI,KAAK,WAAW,IAAI,IAAI,KAAK,aAAa,IAAI,IAAI,KAAK,YAAY,CAAC;AACjF,CAAC;AAED,SAAS,aAAa,CAAC,KAAsB;IAC3C,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC;IACjC,OAAO,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7D,CAAC;AAED,SAAS,WAAW,CAAC,KAAsB;IACzC,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC;IACrC,OAAO,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC;AAC/E,CAAC;AAED,MAAM,OAAO,gBAAgB;IACV,MAAM,CAAoB;IAE3C,YAAY,MAAyB;QACnC,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;IACnF,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAY;QAChC,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,IAAI,CAAC,CAAC;QAC9C,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;IACtC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,IAAY;QACrC,MAAM,MAAM,GAAG,MAAM,uBAAuB,CAAC,IAAI,CAAC,CAAC;QACnD,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;IACtC,CAAC;IAED,SAAS;QACP,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IAC1B,CAAC;IAED,UAAU;QACR,OAAO,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IACvC,CAAC;IAED,KAAK;QACH,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;QACnC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;QAEjC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACvC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YAC9B,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC;QAED,OAAO;YACL,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;YAC/B,MAAM;YACN,QAAQ,EAAE,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,EAAE;YAC9B,MAAM,EAAE,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,EAAE;SAC3B,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,mBAA6B;QAC5C,IAAI,mBAAmB,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC;QAClD,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QACjC,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,KAAK,MAAM,IAAI,IAAI,mBAAmB,EAAE,CAAC;YACvC,OAAO,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;gBACzD,MAAM,IAAI,CAAC,CAAC;YACd,CAAC;YACD,IAAI,MAAM,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;gBAC5B,MAAM,IAAI,oBAAoB,CAC5B,+CAA+C,IAAI,YAAY,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CACjG,CAAC;YACJ,CAAC;YACD,MAAM,IAAI,CAAC,CAAC;QACd,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,cAAc;QACZ,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,KAAK,OAAO,CAAC,CAAC;QAC5E,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,MAAM,IAAI,oBAAoB,CAC5B,yCAAyC,MAAM,CAAC,MAAM,EAAE,CACzD,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,UAAU;QACR,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,KAAK,MAAM,CAAC,CAAC;QAC1E,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,oBAAoB,CAAC,oCAAoC,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,gBAAgB,CAAC,QAAgB,EAAE,WAAmB,CAAC;QACrD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAC9B,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,KAAK,CAAC,KAAK,WAAW,IAAI,WAAW,CAAC,KAAK,CAAC,KAAK,QAAQ,CAChF,CAAC,MAAM,CAAC;QACT,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;YACrB,MAAM,IAAI,oBAAoB,CAC5B,kBAAkB,QAAQ,2BAA2B,QAAQ,mBAAmB,KAAK,EAAE,CACxF,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,sBAAsB;QACpB,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC5C,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,IAAI,mBAAmB,GAAG,CAAC,CAAC;QAE5B,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACjC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC;gBAAE,SAAS;YACrC,MAAM,EAAE,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;YAChC,IAAI,CAAC,EAAE;gBAAE,SAAS;YAElB,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBAC1C,SAAS,CAAC,GAAG,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;gBAC5B,SAAS;YACX,CAAC;YAED,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACvC,IAAI,CAAC,YAAY,EAAE,CAAC;gBAClB,gBAAgB,IAAI,CAAC,CAAC;gBACtB,SAAS;YACX,CAAC;YAED,MAAM,cAAc,GAAG,WAAW,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YAChD,IAAI,cAAc,IAAI,YAAY,IAAI,cAAc,KAAK,YAAY,EAAE,CAAC;gBACtE,mBAAmB,IAAI,CAAC,CAAC;YAC3B,CAAC;YACD,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACvB,CAAC;QAED,IAAI,SAAS,CAAC,IAAI,GAAG,CAAC,IAAI,gBAAgB,GAAG,CAAC,IAAI,mBAAmB,GAAG,CAAC,EAAE,CAAC;YAC1E,MAAM,IAAI,oBAAoB,CAC5B,0CAA0C,SAAS,CAAC,IAAI,sBAAsB,gBAAgB,oBAAoB,mBAAmB,EAAE,CACxI,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;CACF"}
@@ -0,0 +1,18 @@
1
+ export interface ReplayPart {
2
+ type: string;
3
+ [key: string]: unknown;
4
+ }
5
+ export interface TranscriptEvent {
6
+ sessionId: string;
7
+ agentId: string;
8
+ timestamp: string;
9
+ part: ReplayPart;
10
+ fullText?: string;
11
+ }
12
+ export interface ReplayStats {
13
+ totalEvents: number;
14
+ byType: Record<string, number>;
15
+ sessions: string[];
16
+ agents: string[];
17
+ }
18
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,WAAW;IAC1B,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB"}
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,4 @@
1
+ {"sessionId":"golden-quickstart-1","agentId":"quickstart-hello-world","timestamp":"2026-02-15T00:00:00.000Z","part":{"type":"input","text":"My favorite color is blue"}}
2
+ {"sessionId":"golden-quickstart-1","agentId":"quickstart-hello-world","timestamp":"2026-02-15T00:00:00.800Z","part":{"type":"tool-call","toolCallId":"tc-quick-1","toolName":"record_favorite_color_func","args":{"color":"blue"}}}
3
+ {"sessionId":"golden-quickstart-1","agentId":"quickstart-hello-world","timestamp":"2026-02-15T00:00:01.200Z","part":{"type":"tool-result","toolCallId":"tc-quick-1","toolName":"record_favorite_color_func","result":{"__flow_transition":true,"targetNode":"end","data":{"color":"blue"}}}}
4
+ {"sessionId":"golden-quickstart-1","agentId":"quickstart-hello-world","timestamp":"2026-02-15T00:00:01.900Z","part":{"type":"done","sessionId":"golden-quickstart-1"},"fullText":"Thanks for telling me your favorite color."}
@@ -0,0 +1,6 @@
1
+ {"sessionId":"golden-warm-transfer-1","agentId":"warm-transfer-flow","timestamp":"2026-02-15T00:10:00.000Z","part":{"type":"input","text":"I want to place an order"}}
2
+ {"sessionId":"golden-warm-transfer-1","agentId":"warm-transfer-flow","timestamp":"2026-02-15T00:10:00.700Z","part":{"type":"tool-call","toolCallId":"tc-warm-1","toolName":"start_order","args":{}}}
3
+ {"sessionId":"golden-warm-transfer-1","agentId":"warm-transfer-flow","timestamp":"2026-02-15T00:10:01.100Z","part":{"type":"tool-result","toolCallId":"tc-warm-1","toolName":"start_order","result":{"__flow_transition":true,"targetNode":"transferring_to_human_agent","data":{"status":"error","error":"Order backend unavailable"}}}}
4
+ {"sessionId":"golden-warm-transfer-1","agentId":"warm-transfer-flow","timestamp":"2026-02-15T00:10:01.900Z","part":{"type":"tool-call","toolCallId":"tc-warm-2","toolName":"start_human_agent_interaction","args":{}}}
5
+ {"sessionId":"golden-warm-transfer-1","agentId":"warm-transfer-flow","timestamp":"2026-02-15T00:10:02.400Z","part":{"type":"tool-result","toolCallId":"tc-warm-2","toolName":"start_human_agent_interaction","result":{"__flow_transition":true,"targetNode":"human_agent_interaction","data":{}}}}
6
+ {"sessionId":"golden-warm-transfer-1","agentId":"warm-transfer-flow","timestamp":"2026-02-15T00:10:03.000Z","part":{"type":"done","sessionId":"golden-warm-transfer-1"},"fullText":"Transferring you to a human agent now."}
@@ -0,0 +1,32 @@
1
+ [
2
+ {
3
+ "name": "quickstart_favorite_color_path",
4
+ "file": "golden/quickstart_favorite_color_path.jsonl",
5
+ "expectEventOrder": ["input", "tool-call", "tool-result", "done"],
6
+ "expectNoErrors": true,
7
+ "expectDone": true,
8
+ "expectNoToolMismatches": true,
9
+ "requireTools": [
10
+ { "name": "record_favorite_color_func", "minCount": 1 }
11
+ ]
12
+ },
13
+ {
14
+ "name": "warm_transfer_handoff_path",
15
+ "file": "golden/warm_transfer_handoff_path.jsonl",
16
+ "expectEventOrder": [
17
+ "input",
18
+ "tool-call",
19
+ "tool-result",
20
+ "tool-call",
21
+ "tool-result",
22
+ "done"
23
+ ],
24
+ "expectNoErrors": true,
25
+ "expectDone": true,
26
+ "expectNoToolMismatches": true,
27
+ "requireTools": [
28
+ { "name": "start_order", "minCount": 1 },
29
+ { "name": "start_human_agent_interaction", "minCount": 1 }
30
+ ]
31
+ }
32
+ ]
package/package.json ADDED
@@ -0,0 +1,33 @@
1
+ {
2
+ "name": "@ariaflowagents/eval",
3
+ "version": "0.7.0",
4
+ "description": "Deterministic conversation replay and assertions for AriaFlow transcripts",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "default": "./dist/index.js"
12
+ }
13
+ },
14
+ "scripts": {
15
+ "build": "tsc -p tsconfig.json",
16
+ "clean": "rm -rf dist",
17
+ "test": "tsc -p tsconfig.json && node --test test/*.test.js",
18
+ "test:golden": "tsc -p tsconfig.json && node dist/golden.js",
19
+ "prepublishOnly": "tsc -p tsconfig.json"
20
+ },
21
+ "files": [
22
+ "dist",
23
+ "fixtures",
24
+ "README.md"
25
+ ],
26
+ "devDependencies": {
27
+ "@types/node": "^20.11.0",
28
+ "typescript": "^5.3.0"
29
+ },
30
+ "publishConfig": {
31
+ "access": "public"
32
+ }
33
+ }