@evalgate/sdk 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +638 -0
- package/README.md +398 -0
- package/dist/assertions.d.ts +189 -0
- package/dist/assertions.js +662 -0
- package/dist/batch.d.ts +68 -0
- package/dist/batch.js +179 -0
- package/dist/cache.d.ts +65 -0
- package/dist/cache.js +131 -0
- package/dist/cli/api.d.ts +108 -0
- package/dist/cli/api.js +132 -0
- package/dist/cli/baseline.d.ts +10 -0
- package/dist/cli/baseline.js +172 -0
- package/dist/cli/check.d.ts +73 -0
- package/dist/cli/check.js +355 -0
- package/dist/cli/ci-context.d.ts +6 -0
- package/dist/cli/ci-context.js +112 -0
- package/dist/cli/ci.d.ts +45 -0
- package/dist/cli/ci.js +192 -0
- package/dist/cli/config.d.ts +30 -0
- package/dist/cli/config.js +230 -0
- package/dist/cli/constants.d.ts +15 -0
- package/dist/cli/constants.js +18 -0
- package/dist/cli/diff.d.ts +173 -0
- package/dist/cli/diff.js +685 -0
- package/dist/cli/discover.d.ts +84 -0
- package/dist/cli/discover.js +419 -0
- package/dist/cli/doctor.d.ts +88 -0
- package/dist/cli/doctor.js +675 -0
- package/dist/cli/env.d.ts +21 -0
- package/dist/cli/env.js +42 -0
- package/dist/cli/explain.d.ts +58 -0
- package/dist/cli/explain.js +561 -0
- package/dist/cli/formatters/github.d.ts +8 -0
- package/dist/cli/formatters/github.js +135 -0
- package/dist/cli/formatters/human.d.ts +6 -0
- package/dist/cli/formatters/human.js +110 -0
- package/dist/cli/formatters/json.d.ts +6 -0
- package/dist/cli/formatters/json.js +10 -0
- package/dist/cli/formatters/pr-comment.d.ts +12 -0
- package/dist/cli/formatters/pr-comment.js +103 -0
- package/dist/cli/formatters/types.d.ts +103 -0
- package/dist/cli/formatters/types.js +8 -0
- package/dist/cli/gate.d.ts +21 -0
- package/dist/cli/gate.js +179 -0
- package/dist/cli/impact-analysis.d.ts +63 -0
- package/dist/cli/impact-analysis.js +252 -0
- package/dist/cli/index.d.ts +9 -0
- package/dist/cli/index.js +332 -0
- package/dist/cli/init.d.ts +16 -0
- package/dist/cli/init.js +292 -0
- package/dist/cli/manifest.d.ts +103 -0
- package/dist/cli/manifest.js +282 -0
- package/dist/cli/migrate.d.ts +41 -0
- package/dist/cli/migrate.js +349 -0
- package/dist/cli/policy-packs.d.ts +23 -0
- package/dist/cli/policy-packs.js +89 -0
- package/dist/cli/print-config.d.ts +29 -0
- package/dist/cli/print-config.js +270 -0
- package/dist/cli/profiles.d.ts +28 -0
- package/dist/cli/profiles.js +30 -0
- package/dist/cli/reason-codes.d.ts +17 -0
- package/dist/cli/reason-codes.js +19 -0
- package/dist/cli/regression-gate.d.ts +15 -0
- package/dist/cli/regression-gate.js +341 -0
- package/dist/cli/render/snippet.d.ts +5 -0
- package/dist/cli/render/snippet.js +15 -0
- package/dist/cli/render/sort.d.ts +10 -0
- package/dist/cli/render/sort.js +24 -0
- package/dist/cli/report/build-check-report.d.ts +19 -0
- package/dist/cli/report/build-check-report.js +132 -0
- package/dist/cli/run.d.ts +101 -0
- package/dist/cli/run.js +395 -0
- package/dist/cli/share.d.ts +17 -0
- package/dist/cli/share.js +91 -0
- package/dist/cli/upgrade.d.ts +15 -0
- package/dist/cli/upgrade.js +492 -0
- package/dist/cli/workspace.d.ts +31 -0
- package/dist/cli/workspace.js +68 -0
- package/dist/client.d.ts +368 -0
- package/dist/client.js +893 -0
- package/dist/client.request.test.d.ts +1 -0
- package/dist/client.request.test.js +232 -0
- package/dist/context.d.ts +134 -0
- package/dist/context.js +215 -0
- package/dist/errors.d.ts +82 -0
- package/dist/errors.js +298 -0
- package/dist/export.d.ts +195 -0
- package/dist/export.js +344 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.js +153 -0
- package/dist/integrations/anthropic.d.ts +91 -0
- package/dist/integrations/anthropic.js +163 -0
- package/dist/integrations/openai-eval.d.ts +57 -0
- package/dist/integrations/openai-eval.js +232 -0
- package/dist/integrations/openai.d.ts +92 -0
- package/dist/integrations/openai.js +160 -0
- package/dist/local.d.ts +39 -0
- package/dist/local.js +148 -0
- package/dist/logger.d.ts +128 -0
- package/dist/logger.js +227 -0
- package/dist/matchers/index.d.ts +1 -0
- package/dist/matchers/index.js +6 -0
- package/dist/matchers/to-pass-gate.d.ts +29 -0
- package/dist/matchers/to-pass-gate.js +35 -0
- package/dist/pagination.d.ts +74 -0
- package/dist/pagination.js +139 -0
- package/dist/regression.d.ts +100 -0
- package/dist/regression.js +44 -0
- package/dist/runtime/adapters/config-to-dsl.d.ts +33 -0
- package/dist/runtime/adapters/config-to-dsl.js +400 -0
- package/dist/runtime/adapters/testsuite-to-dsl.d.ts +63 -0
- package/dist/runtime/adapters/testsuite-to-dsl.js +276 -0
- package/dist/runtime/context.d.ts +26 -0
- package/dist/runtime/context.js +74 -0
- package/dist/runtime/eval.d.ts +46 -0
- package/dist/runtime/eval.js +244 -0
- package/dist/runtime/execution-mode.d.ts +80 -0
- package/dist/runtime/execution-mode.js +357 -0
- package/dist/runtime/executor.d.ts +16 -0
- package/dist/runtime/executor.js +152 -0
- package/dist/runtime/registry.d.ts +78 -0
- package/dist/runtime/registry.js +403 -0
- package/dist/runtime/run-report.d.ts +200 -0
- package/dist/runtime/run-report.js +222 -0
- package/dist/runtime/types.d.ts +356 -0
- package/dist/runtime/types.js +76 -0
- package/dist/snapshot.d.ts +176 -0
- package/dist/snapshot.js +322 -0
- package/dist/streaming.d.ts +173 -0
- package/dist/streaming.js +268 -0
- package/dist/testing.d.ts +273 -0
- package/dist/testing.js +317 -0
- package/dist/types.d.ts +754 -0
- package/dist/types.js +54 -0
- package/dist/utils/input-hash.d.ts +8 -0
- package/dist/utils/input-hash.js +41 -0
- package/dist/version.d.ts +7 -0
- package/dist/version.js +10 -0
- package/dist/workflows.d.ts +389 -0
- package/dist/workflows.js +671 -0
- package/package.json +117 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Snapshot Testing System
|
|
3
|
+
* Tier 4.16: Visual regression detection for LLM outputs
|
|
4
|
+
*
|
|
5
|
+
* ⚠️ NOTE: This module requires Node.js and will not work in browsers.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { snapshot, loadSnapshot } from '@ai-eval-platform/sdk';
|
|
10
|
+
*
|
|
11
|
+
* const output = await generateText('Write a haiku about coding');
|
|
12
|
+
* await snapshot(output, 'haiku-test');
|
|
13
|
+
*
|
|
14
|
+
* // Later, compare with snapshot
|
|
15
|
+
* const saved = await loadSnapshot('haiku-test');
|
|
16
|
+
* const matches = compareSnapshots(saved, output);
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
export interface SnapshotMetadata {
|
|
20
|
+
/** Snapshot name/ID */
|
|
21
|
+
name: string;
|
|
22
|
+
/** When snapshot was created */
|
|
23
|
+
createdAt: string;
|
|
24
|
+
/** Content hash for change detection */
|
|
25
|
+
hash: string;
|
|
26
|
+
/** Optional tags for organization */
|
|
27
|
+
tags?: string[];
|
|
28
|
+
/** Additional metadata */
|
|
29
|
+
metadata?: Record<string, unknown>;
|
|
30
|
+
}
|
|
31
|
+
export interface SnapshotData {
|
|
32
|
+
/** The actual output that was snapshotted */
|
|
33
|
+
output: string;
|
|
34
|
+
/** Metadata about the snapshot */
|
|
35
|
+
metadata: SnapshotMetadata;
|
|
36
|
+
}
|
|
37
|
+
export interface SnapshotComparison {
|
|
38
|
+
/** Whether snapshots match */
|
|
39
|
+
matches: boolean;
|
|
40
|
+
/** Similarity score 0-1 */
|
|
41
|
+
similarity: number;
|
|
42
|
+
/** Differences found */
|
|
43
|
+
differences: string[];
|
|
44
|
+
/** Original snapshot */
|
|
45
|
+
original: string;
|
|
46
|
+
/** New output */
|
|
47
|
+
current: string;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Snapshot manager
|
|
51
|
+
*/
|
|
52
|
+
export declare class SnapshotManager {
|
|
53
|
+
private snapshotDir;
|
|
54
|
+
constructor(snapshotDir?: string);
|
|
55
|
+
/**
|
|
56
|
+
* Ensure snapshot directory exists
|
|
57
|
+
*/
|
|
58
|
+
private ensureSnapshotDir;
|
|
59
|
+
/**
|
|
60
|
+
* Get snapshot file path with security checks
|
|
61
|
+
*/
|
|
62
|
+
private getSnapshotPath;
|
|
63
|
+
/**
|
|
64
|
+
* Generate content hash
|
|
65
|
+
*/
|
|
66
|
+
private generateHash;
|
|
67
|
+
/**
|
|
68
|
+
* Save a snapshot
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* ```typescript
|
|
72
|
+
* const manager = new SnapshotManager();
|
|
73
|
+
* await manager.save('haiku-test', output, { tags: ['poetry'] });
|
|
74
|
+
* ```
|
|
75
|
+
*/
|
|
76
|
+
save(name: string, output: string, options?: {
|
|
77
|
+
tags?: string[];
|
|
78
|
+
metadata?: Record<string, unknown>;
|
|
79
|
+
overwrite?: boolean;
|
|
80
|
+
}): Promise<SnapshotData>;
|
|
81
|
+
/**
|
|
82
|
+
* Load a snapshot
|
|
83
|
+
*
|
|
84
|
+
* @example
|
|
85
|
+
* ```typescript
|
|
86
|
+
* const snapshot = await manager.load('haiku-test');
|
|
87
|
+
* console.log(snapshot.output);
|
|
88
|
+
* ```
|
|
89
|
+
*/
|
|
90
|
+
load(name: string): Promise<SnapshotData>;
|
|
91
|
+
/**
|
|
92
|
+
* Compare current output with saved snapshot
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```typescript
|
|
96
|
+
* const comparison = await manager.compare('haiku-test', currentOutput);
|
|
97
|
+
* if (!comparison.matches) {
|
|
98
|
+
* console.log('Differences:', comparison.differences);
|
|
99
|
+
* }
|
|
100
|
+
* ```
|
|
101
|
+
*/
|
|
102
|
+
compare(name: string, currentOutput: string): Promise<SnapshotComparison>;
|
|
103
|
+
/**
|
|
104
|
+
* List all snapshots
|
|
105
|
+
*
|
|
106
|
+
* @example
|
|
107
|
+
* ```typescript
|
|
108
|
+
* const snapshots = await manager.list();
|
|
109
|
+
* snapshots.forEach(s => console.log(s.metadata.name));
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
list(): Promise<SnapshotData[]>;
|
|
113
|
+
/**
|
|
114
|
+
* Delete a snapshot
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* await manager.delete('old-test');
|
|
119
|
+
* ```
|
|
120
|
+
*/
|
|
121
|
+
delete(name: string): Promise<void>;
|
|
122
|
+
/**
|
|
123
|
+
* Update a snapshot with new output
|
|
124
|
+
*
|
|
125
|
+
* @example
|
|
126
|
+
* ```typescript
|
|
127
|
+
* await manager.update('haiku-test', newOutput);
|
|
128
|
+
* ```
|
|
129
|
+
*/
|
|
130
|
+
update(name: string, output: string): Promise<SnapshotData>;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Save a snapshot (convenience function)
|
|
134
|
+
*
|
|
135
|
+
* @example
|
|
136
|
+
* ```typescript
|
|
137
|
+
* const output = await generateText('Write a haiku');
|
|
138
|
+
* await snapshot(output, 'haiku-test');
|
|
139
|
+
* ```
|
|
140
|
+
*/
|
|
141
|
+
export declare function snapshot(output: string, name: string, options?: {
|
|
142
|
+
tags?: string[];
|
|
143
|
+
metadata?: Record<string, unknown>;
|
|
144
|
+
overwrite?: boolean;
|
|
145
|
+
dir?: string;
|
|
146
|
+
}): Promise<SnapshotData>;
|
|
147
|
+
/**
|
|
148
|
+
* Load a snapshot (convenience function)
|
|
149
|
+
*
|
|
150
|
+
* @example
|
|
151
|
+
* ```typescript
|
|
152
|
+
* const saved = await loadSnapshot('haiku-test');
|
|
153
|
+
* console.log(saved.output);
|
|
154
|
+
* ```
|
|
155
|
+
*/
|
|
156
|
+
export declare function loadSnapshot(name: string, dir?: string): Promise<SnapshotData>;
|
|
157
|
+
/**
|
|
158
|
+
* Compare with snapshot (convenience function)
|
|
159
|
+
*
|
|
160
|
+
* @example
|
|
161
|
+
* ```typescript
|
|
162
|
+
* const comparison = await compareWithSnapshot('haiku-test', currentOutput);
|
|
163
|
+
* if (!comparison.matches) {
|
|
164
|
+
* console.log('Output changed!');
|
|
165
|
+
* }
|
|
166
|
+
* ```
|
|
167
|
+
*/
|
|
168
|
+
export declare function compareWithSnapshot(name: string, currentOutput: string, dir?: string): Promise<SnapshotComparison>;
|
|
169
|
+
/**
|
|
170
|
+
* Delete a snapshot (convenience function)
|
|
171
|
+
*/
|
|
172
|
+
export declare function deleteSnapshot(name: string, dir?: string): Promise<void>;
|
|
173
|
+
/**
|
|
174
|
+
* List all snapshots (convenience function)
|
|
175
|
+
*/
|
|
176
|
+
export declare function listSnapshots(dir?: string): Promise<SnapshotData[]>;
|
package/dist/snapshot.js
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Snapshot Testing System
|
|
4
|
+
* Tier 4.16: Visual regression detection for LLM outputs
|
|
5
|
+
*
|
|
6
|
+
* ⚠️ NOTE: This module requires Node.js and will not work in browsers.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* import { snapshot, loadSnapshot } from '@ai-eval-platform/sdk';
|
|
11
|
+
*
|
|
12
|
+
* const output = await generateText('Write a haiku about coding');
|
|
13
|
+
* await snapshot(output, 'haiku-test');
|
|
14
|
+
*
|
|
15
|
+
* // Later, compare with snapshot
|
|
16
|
+
* const saved = await loadSnapshot('haiku-test');
|
|
17
|
+
* const matches = compareSnapshots(saved, output);
|
|
18
|
+
* ```
|
|
19
|
+
*/
|
|
20
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
21
|
+
if (k2 === undefined) k2 = k;
|
|
22
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
23
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
24
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
25
|
+
}
|
|
26
|
+
Object.defineProperty(o, k2, desc);
|
|
27
|
+
}) : (function(o, m, k, k2) {
|
|
28
|
+
if (k2 === undefined) k2 = k;
|
|
29
|
+
o[k2] = m[k];
|
|
30
|
+
}));
|
|
31
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
32
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
33
|
+
}) : function(o, v) {
|
|
34
|
+
o["default"] = v;
|
|
35
|
+
});
|
|
36
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
37
|
+
var ownKeys = function(o) {
|
|
38
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
39
|
+
var ar = [];
|
|
40
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
41
|
+
return ar;
|
|
42
|
+
};
|
|
43
|
+
return ownKeys(o);
|
|
44
|
+
};
|
|
45
|
+
return function (mod) {
|
|
46
|
+
if (mod && mod.__esModule) return mod;
|
|
47
|
+
var result = {};
|
|
48
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
49
|
+
__setModuleDefault(result, mod);
|
|
50
|
+
return result;
|
|
51
|
+
};
|
|
52
|
+
})();
|
|
53
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
54
|
+
exports.SnapshotManager = void 0;
|
|
55
|
+
exports.snapshot = snapshot;
|
|
56
|
+
exports.loadSnapshot = loadSnapshot;
|
|
57
|
+
exports.compareWithSnapshot = compareWithSnapshot;
|
|
58
|
+
exports.deleteSnapshot = deleteSnapshot;
|
|
59
|
+
exports.listSnapshots = listSnapshots;
|
|
60
|
+
// Environment check
|
|
61
|
+
const isNode = typeof process !== "undefined" && process.versions?.node;
|
|
62
|
+
if (!isNode) {
|
|
63
|
+
throw new Error("Snapshot testing requires Node.js and cannot run in browsers. " +
|
|
64
|
+
"This feature uses the filesystem for storing snapshots.");
|
|
65
|
+
}
|
|
66
|
+
const crypto = __importStar(require("node:crypto"));
|
|
67
|
+
const fs = __importStar(require("node:fs"));
|
|
68
|
+
const path = __importStar(require("node:path"));
|
|
69
|
+
/**
|
|
70
|
+
* Snapshot manager
|
|
71
|
+
*/
|
|
72
|
+
class SnapshotManager {
|
|
73
|
+
constructor(snapshotDir = "./.snapshots") {
|
|
74
|
+
this.snapshotDir = snapshotDir;
|
|
75
|
+
this.ensureSnapshotDir();
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Ensure snapshot directory exists
|
|
79
|
+
*/
|
|
80
|
+
ensureSnapshotDir() {
|
|
81
|
+
if (!fs.existsSync(this.snapshotDir)) {
|
|
82
|
+
fs.mkdirSync(this.snapshotDir, { recursive: true });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Get snapshot file path with security checks
|
|
87
|
+
*/
|
|
88
|
+
getSnapshotPath(name) {
|
|
89
|
+
// Security: prevent empty names
|
|
90
|
+
if (!name || name.trim().length === 0) {
|
|
91
|
+
throw new Error("Snapshot name cannot be empty");
|
|
92
|
+
}
|
|
93
|
+
// Security: prevent path traversal
|
|
94
|
+
if (name.includes("..") || name.includes("/") || name.includes("\\")) {
|
|
95
|
+
throw new Error('Snapshot name cannot contain path separators or ".."');
|
|
96
|
+
}
|
|
97
|
+
// Sanitize to alphanumeric, hyphens, and underscores
|
|
98
|
+
const sanitized = name.replace(/[^a-zA-Z0-9-_]/g, "-");
|
|
99
|
+
// Security: ensure sanitized name is not empty
|
|
100
|
+
if (sanitized.length === 0) {
|
|
101
|
+
throw new Error("Snapshot name must contain at least one alphanumeric character");
|
|
102
|
+
}
|
|
103
|
+
// Security: prevent absolute paths
|
|
104
|
+
const filePath = path.join(this.snapshotDir, `${sanitized}.json`);
|
|
105
|
+
const resolvedPath = path.resolve(filePath);
|
|
106
|
+
const resolvedDir = path.resolve(this.snapshotDir);
|
|
107
|
+
if (!resolvedPath.startsWith(resolvedDir)) {
|
|
108
|
+
throw new Error("Invalid snapshot path: path traversal detected");
|
|
109
|
+
}
|
|
110
|
+
return filePath;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Generate content hash
|
|
114
|
+
*/
|
|
115
|
+
generateHash(content) {
|
|
116
|
+
return crypto.createHash("sha256").update(content).digest("hex");
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Save a snapshot
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```typescript
|
|
123
|
+
* const manager = new SnapshotManager();
|
|
124
|
+
* await manager.save('haiku-test', output, { tags: ['poetry'] });
|
|
125
|
+
* ```
|
|
126
|
+
*/
|
|
127
|
+
async save(name, output, options) {
|
|
128
|
+
const filePath = this.getSnapshotPath(name);
|
|
129
|
+
// Check if snapshot exists
|
|
130
|
+
if (!options?.overwrite && fs.existsSync(filePath)) {
|
|
131
|
+
throw new Error(`Snapshot '${name}' already exists. Use overwrite: true to update.`);
|
|
132
|
+
}
|
|
133
|
+
const snapshotData = {
|
|
134
|
+
output,
|
|
135
|
+
metadata: {
|
|
136
|
+
name,
|
|
137
|
+
createdAt: new Date().toISOString(),
|
|
138
|
+
hash: this.generateHash(output),
|
|
139
|
+
tags: options?.tags,
|
|
140
|
+
metadata: options?.metadata,
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
fs.writeFileSync(filePath, JSON.stringify(snapshotData, null, 2));
|
|
144
|
+
return snapshotData;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Load a snapshot
|
|
148
|
+
*
|
|
149
|
+
* @example
|
|
150
|
+
* ```typescript
|
|
151
|
+
* const snapshot = await manager.load('haiku-test');
|
|
152
|
+
* console.log(snapshot.output);
|
|
153
|
+
* ```
|
|
154
|
+
*/
|
|
155
|
+
async load(name) {
|
|
156
|
+
const filePath = this.getSnapshotPath(name);
|
|
157
|
+
if (!fs.existsSync(filePath)) {
|
|
158
|
+
throw new Error(`Snapshot '${name}' not found`);
|
|
159
|
+
}
|
|
160
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
161
|
+
return JSON.parse(content);
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Compare current output with saved snapshot
|
|
165
|
+
*
|
|
166
|
+
* @example
|
|
167
|
+
* ```typescript
|
|
168
|
+
* const comparison = await manager.compare('haiku-test', currentOutput);
|
|
169
|
+
* if (!comparison.matches) {
|
|
170
|
+
* console.log('Differences:', comparison.differences);
|
|
171
|
+
* }
|
|
172
|
+
* ```
|
|
173
|
+
*/
|
|
174
|
+
async compare(name, currentOutput) {
|
|
175
|
+
const snapshot = await this.load(name);
|
|
176
|
+
const original = snapshot.output;
|
|
177
|
+
// Exact match check
|
|
178
|
+
const exactMatch = original === currentOutput;
|
|
179
|
+
// Calculate similarity (simple line-based diff)
|
|
180
|
+
const originalLines = original.split("\n");
|
|
181
|
+
const currentLines = currentOutput.split("\n");
|
|
182
|
+
const differences = [];
|
|
183
|
+
const maxLines = Math.max(originalLines.length, currentLines.length);
|
|
184
|
+
let matchingLines = 0;
|
|
185
|
+
for (let i = 0; i < maxLines; i++) {
|
|
186
|
+
const origLine = originalLines[i] || "";
|
|
187
|
+
const currLine = currentLines[i] || "";
|
|
188
|
+
if (origLine === currLine) {
|
|
189
|
+
matchingLines++;
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
differences.push(`Line ${i + 1}: "${origLine}" → "${currLine}"`);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
const similarity = maxLines > 0 ? matchingLines / maxLines : 1;
|
|
196
|
+
return {
|
|
197
|
+
matches: exactMatch,
|
|
198
|
+
similarity,
|
|
199
|
+
differences,
|
|
200
|
+
original,
|
|
201
|
+
current: currentOutput,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* List all snapshots
|
|
206
|
+
*
|
|
207
|
+
* @example
|
|
208
|
+
* ```typescript
|
|
209
|
+
* const snapshots = await manager.list();
|
|
210
|
+
* snapshots.forEach(s => console.log(s.metadata.name));
|
|
211
|
+
* ```
|
|
212
|
+
*/
|
|
213
|
+
async list() {
|
|
214
|
+
const files = fs.readdirSync(this.snapshotDir);
|
|
215
|
+
const snapshots = [];
|
|
216
|
+
for (const file of files) {
|
|
217
|
+
if (file.endsWith(".json")) {
|
|
218
|
+
const content = fs.readFileSync(path.join(this.snapshotDir, file), "utf-8");
|
|
219
|
+
snapshots.push(JSON.parse(content));
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
return snapshots;
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Delete a snapshot
|
|
226
|
+
*
|
|
227
|
+
* @example
|
|
228
|
+
* ```typescript
|
|
229
|
+
* await manager.delete('old-test');
|
|
230
|
+
* ```
|
|
231
|
+
*/
|
|
232
|
+
async delete(name) {
|
|
233
|
+
const filePath = this.getSnapshotPath(name);
|
|
234
|
+
if (!fs.existsSync(filePath)) {
|
|
235
|
+
throw new Error(`Snapshot '${name}' not found`);
|
|
236
|
+
}
|
|
237
|
+
fs.unlinkSync(filePath);
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Update a snapshot with new output
|
|
241
|
+
*
|
|
242
|
+
* @example
|
|
243
|
+
* ```typescript
|
|
244
|
+
* await manager.update('haiku-test', newOutput);
|
|
245
|
+
* ```
|
|
246
|
+
*/
|
|
247
|
+
async update(name, output) {
|
|
248
|
+
const existing = await this.load(name);
|
|
249
|
+
return this.save(name, output, {
|
|
250
|
+
tags: existing.metadata.tags,
|
|
251
|
+
metadata: existing.metadata.metadata,
|
|
252
|
+
overwrite: true,
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
exports.SnapshotManager = SnapshotManager;
|
|
257
|
+
// Global snapshot manager instance
|
|
258
|
+
let globalManager;
|
|
259
|
+
/**
|
|
260
|
+
* Get or create global snapshot manager
|
|
261
|
+
*/
|
|
262
|
+
function getSnapshotManager(dir) {
|
|
263
|
+
if (!globalManager || dir) {
|
|
264
|
+
globalManager = new SnapshotManager(dir);
|
|
265
|
+
}
|
|
266
|
+
return globalManager;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Save a snapshot (convenience function)
|
|
270
|
+
*
|
|
271
|
+
* @example
|
|
272
|
+
* ```typescript
|
|
273
|
+
* const output = await generateText('Write a haiku');
|
|
274
|
+
* await snapshot(output, 'haiku-test');
|
|
275
|
+
* ```
|
|
276
|
+
*/
|
|
277
|
+
async function snapshot(output, name, options) {
|
|
278
|
+
const manager = getSnapshotManager(options?.dir);
|
|
279
|
+
return manager.save(name, output, options);
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* Load a snapshot (convenience function)
|
|
283
|
+
*
|
|
284
|
+
* @example
|
|
285
|
+
* ```typescript
|
|
286
|
+
* const saved = await loadSnapshot('haiku-test');
|
|
287
|
+
* console.log(saved.output);
|
|
288
|
+
* ```
|
|
289
|
+
*/
|
|
290
|
+
async function loadSnapshot(name, dir) {
|
|
291
|
+
const manager = getSnapshotManager(dir);
|
|
292
|
+
return manager.load(name);
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Compare with snapshot (convenience function)
|
|
296
|
+
*
|
|
297
|
+
* @example
|
|
298
|
+
* ```typescript
|
|
299
|
+
* const comparison = await compareWithSnapshot('haiku-test', currentOutput);
|
|
300
|
+
* if (!comparison.matches) {
|
|
301
|
+
* console.log('Output changed!');
|
|
302
|
+
* }
|
|
303
|
+
* ```
|
|
304
|
+
*/
|
|
305
|
+
async function compareWithSnapshot(name, currentOutput, dir) {
|
|
306
|
+
const manager = getSnapshotManager(dir);
|
|
307
|
+
return manager.compare(name, currentOutput);
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Delete a snapshot (convenience function)
|
|
311
|
+
*/
|
|
312
|
+
async function deleteSnapshot(name, dir) {
|
|
313
|
+
const manager = getSnapshotManager(dir);
|
|
314
|
+
return manager.delete(name);
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* List all snapshots (convenience function)
|
|
318
|
+
*/
|
|
319
|
+
async function listSnapshots(dir) {
|
|
320
|
+
const manager = getSnapshotManager(dir);
|
|
321
|
+
return manager.list();
|
|
322
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming & Batch Operations
|
|
3
|
+
* Tier 2.8: Handle large datasets efficiently
|
|
4
|
+
*
|
|
5
|
+
* @example
|
|
6
|
+
* ```typescript
|
|
7
|
+
* import { streamEvaluations, batchCreate } from '@ai-eval-platform/sdk';
|
|
8
|
+
*
|
|
9
|
+
* // Stream large evaluation results
|
|
10
|
+
* for await (const result of streamEvaluations(client, config)) {
|
|
11
|
+
* console.log(`Progress: ${result.completed}/${result.total}`);
|
|
12
|
+
* }
|
|
13
|
+
*
|
|
14
|
+
* // Batch create traces
|
|
15
|
+
* await batchCreate(client.traces, traces, { batchSize: 100 });
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
export interface BatchOptions {
|
|
19
|
+
/** Batch size (default: 100) */
|
|
20
|
+
batchSize?: number;
|
|
21
|
+
/** Run batches in parallel (default: true) */
|
|
22
|
+
parallel?: boolean;
|
|
23
|
+
/** Delay between batches in ms (default: 0) */
|
|
24
|
+
delayMs?: number;
|
|
25
|
+
/** Callback for progress updates */
|
|
26
|
+
onProgress?: (progress: BatchProgress) => void;
|
|
27
|
+
/** Callback for batch errors */
|
|
28
|
+
onError?: (error: BatchError) => void;
|
|
29
|
+
/** Continue on error (default: true) */
|
|
30
|
+
continueOnError?: boolean;
|
|
31
|
+
}
|
|
32
|
+
export interface BatchProgress {
|
|
33
|
+
/** Total items */
|
|
34
|
+
total: number;
|
|
35
|
+
/** Completed items */
|
|
36
|
+
completed: number;
|
|
37
|
+
/** Failed items */
|
|
38
|
+
failed: number;
|
|
39
|
+
/** Current batch number */
|
|
40
|
+
batch: number;
|
|
41
|
+
/** Total batches */
|
|
42
|
+
totalBatches: number;
|
|
43
|
+
}
|
|
44
|
+
export interface BatchError {
|
|
45
|
+
/** Batch number where error occurred */
|
|
46
|
+
batch: number;
|
|
47
|
+
/** Item index in batch */
|
|
48
|
+
index: number;
|
|
49
|
+
/** The error */
|
|
50
|
+
error: Error;
|
|
51
|
+
/** The item that failed */
|
|
52
|
+
item: unknown;
|
|
53
|
+
}
|
|
54
|
+
export interface BatchResult<T> {
|
|
55
|
+
/** Successfully processed items */
|
|
56
|
+
successful: T[];
|
|
57
|
+
/** Failed items */
|
|
58
|
+
failed: Array<{
|
|
59
|
+
item: unknown;
|
|
60
|
+
error: Error;
|
|
61
|
+
}>;
|
|
62
|
+
/** Summary */
|
|
63
|
+
summary: {
|
|
64
|
+
total: number;
|
|
65
|
+
successful: number;
|
|
66
|
+
failed: number;
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Batch create items
|
|
71
|
+
*
|
|
72
|
+
* @example
|
|
73
|
+
* ```typescript
|
|
74
|
+
* const traces = [
|
|
75
|
+
* { name: 'trace-1', traceId: 'id-1' },
|
|
76
|
+
* { name: 'trace-2', traceId: 'id-2' },
|
|
77
|
+
* // ... 1000 more
|
|
78
|
+
* ];
|
|
79
|
+
*
|
|
80
|
+
* const result = await batchCreate(
|
|
81
|
+
* (item) => client.traces.create(item),
|
|
82
|
+
* traces,
|
|
83
|
+
* {
|
|
84
|
+
* batchSize: 100,
|
|
85
|
+
* onProgress: (p) => console.log(`${p.completed}/${p.total}`)
|
|
86
|
+
* }
|
|
87
|
+
* );
|
|
88
|
+
* ```
|
|
89
|
+
*/
|
|
90
|
+
export declare function batchProcess<TInput, TOutput>(processor: (item: TInput) => Promise<TOutput>, items: TInput[], options?: BatchOptions): Promise<BatchResult<TOutput>>;
|
|
91
|
+
/**
|
|
92
|
+
* Stream evaluation results
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```typescript
|
|
96
|
+
* const config = {
|
|
97
|
+
* cases: [...],
|
|
98
|
+
* executor: async (input) => callLLM(input)
|
|
99
|
+
* };
|
|
100
|
+
*
|
|
101
|
+
* for await (const result of streamEvaluation(config)) {
|
|
102
|
+
* console.log(`Case ${result.caseId}: ${result.passed ? 'PASS' : 'FAIL'}`);
|
|
103
|
+
* console.log(`Progress: ${result.completed}/${result.total}`);
|
|
104
|
+
* }
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
export declare function streamEvaluation<T>(config: {
|
|
108
|
+
cases: T[];
|
|
109
|
+
executor: (testCase: T) => Promise<unknown>;
|
|
110
|
+
onProgress?: (progress: BatchProgress) => void;
|
|
111
|
+
}): AsyncGenerator<{
|
|
112
|
+
caseId: string;
|
|
113
|
+
case: T;
|
|
114
|
+
result: unknown;
|
|
115
|
+
passed: boolean;
|
|
116
|
+
completed: number;
|
|
117
|
+
total: number;
|
|
118
|
+
}>;
|
|
119
|
+
/**
|
|
120
|
+
* Batch read with pagination
|
|
121
|
+
*
|
|
122
|
+
* @example
|
|
123
|
+
* ```typescript
|
|
124
|
+
* const allTraces = await batchRead(
|
|
125
|
+
* (params) => client.traces.list(params),
|
|
126
|
+
* { pageSize: 100 }
|
|
127
|
+
* );
|
|
128
|
+
* ```
|
|
129
|
+
*/
|
|
130
|
+
export declare function batchRead<T>(fetcher: (params: {
|
|
131
|
+
limit: number;
|
|
132
|
+
offset: number;
|
|
133
|
+
}) => Promise<T[]>, options?: {
|
|
134
|
+
pageSize?: number;
|
|
135
|
+
maxPages?: number;
|
|
136
|
+
onProgress?: (page: number, items: number) => void;
|
|
137
|
+
}): Promise<T[]>;
|
|
138
|
+
/**
|
|
139
|
+
* Rate-limited batch processor
|
|
140
|
+
*
|
|
141
|
+
* @example
|
|
142
|
+
* ```typescript
|
|
143
|
+
* const limiter = new RateLimiter({ requestsPerSecond: 10 });
|
|
144
|
+
*
|
|
145
|
+
* for (const item of items) {
|
|
146
|
+
* await limiter.throttle(() => client.traces.create(item));
|
|
147
|
+
* }
|
|
148
|
+
* ```
|
|
149
|
+
*/
|
|
150
|
+
export declare class RateLimiter {
|
|
151
|
+
private queue;
|
|
152
|
+
private processing;
|
|
153
|
+
private requestsPerSecond;
|
|
154
|
+
private interval;
|
|
155
|
+
constructor(options: {
|
|
156
|
+
requestsPerSecond: number;
|
|
157
|
+
});
|
|
158
|
+
/**
|
|
159
|
+
* Throttle a function call
|
|
160
|
+
*/
|
|
161
|
+
throttle<T>(fn: () => Promise<T>): Promise<T>;
|
|
162
|
+
private process;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Chunk array into smaller arrays
|
|
166
|
+
*
|
|
167
|
+
* @example
|
|
168
|
+
* ```typescript
|
|
169
|
+
* const chunks = chunk([1, 2, 3, 4, 5], 2);
|
|
170
|
+
* // [[1, 2], [3, 4], [5]]
|
|
171
|
+
* ```
|
|
172
|
+
*/
|
|
173
|
+
export declare function chunk<T>(array: T[], size: number): T[][];
|