@elsium-ai/testing 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/determinism.d.ts +28 -0
- package/dist/determinism.d.ts.map +1 -0
- package/dist/eval.d.ts +85 -0
- package/dist/eval.d.ts.map +1 -0
- package/dist/fixtures.d.ts +31 -0
- package/dist/fixtures.d.ts.map +1 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1263 -0
- package/dist/mock-provider.d.ts +26 -0
- package/dist/mock-provider.d.ts.map +1 -0
- package/dist/pinning.d.ts +30 -0
- package/dist/pinning.d.ts.map +1 -0
- package/dist/prompts.d.ts +33 -0
- package/dist/prompts.d.ts.map +1 -0
- package/dist/regression.d.ts +37 -0
- package/dist/regression.d.ts.map +1 -0
- package/dist/replay.d.ts +19 -0
- package/dist/replay.d.ts.map +1 -0
- package/dist/snapshot.d.ts +31 -0
- package/dist/snapshot.d.ts.map +1 -0
- package/package.json +38 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export interface DeterminismResult {
|
|
2
|
+
deterministic: boolean;
|
|
3
|
+
runs: number;
|
|
4
|
+
uniqueOutputs: number;
|
|
5
|
+
outputs: string[];
|
|
6
|
+
variance: number;
|
|
7
|
+
}
|
|
8
|
+
export interface StabilityResult {
|
|
9
|
+
stable: boolean;
|
|
10
|
+
runs: number;
|
|
11
|
+
uniqueOutputs: number;
|
|
12
|
+
outputs: Array<{
|
|
13
|
+
output: string;
|
|
14
|
+
timestamp: number;
|
|
15
|
+
}>;
|
|
16
|
+
variance: number;
|
|
17
|
+
}
|
|
18
|
+
export declare function assertDeterministic(fn: (seed?: number) => Promise<string>, options?: {
|
|
19
|
+
runs?: number;
|
|
20
|
+
seed?: number;
|
|
21
|
+
tolerance?: number;
|
|
22
|
+
}): Promise<DeterminismResult>;
|
|
23
|
+
export declare function assertStable(fn: (seed?: number) => Promise<string>, options?: {
|
|
24
|
+
intervalMs?: number;
|
|
25
|
+
runs?: number;
|
|
26
|
+
seed?: number;
|
|
27
|
+
}): Promise<StabilityResult>;
|
|
28
|
+
//# sourceMappingURL=determinism.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"determinism.d.ts","sourceRoot":"","sources":["../src/determinism.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,iBAAiB;IACjC,aAAa,EAAE,OAAO,CAAA;IACtB,IAAI,EAAE,MAAM,CAAA;IACZ,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,MAAM,EAAE,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;CAChB;AAED,MAAM,WAAW,eAAe;IAC/B,MAAM,EAAE,OAAO,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;IACZ,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IACrD,QAAQ,EAAE,MAAM,CAAA;CAChB;AAED,wBAAsB,mBAAmB,CACxC,EAAE,EAAE,CAAC,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EACtC,OAAO,CAAC,EAAE;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5D,OAAO,CAAC,iBAAiB,CAAC,CA8B5B;AAED,wBAAsB,YAAY,CACjC,EAAE,EAAE,CAAC,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EACtC,OAAO,CAAC,EAAE;IAAE,UAAU,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAA;CAAE,GAC7D,OAAO,CAAC,eAAe,CAAC,CA0B1B"}
|
package/dist/eval.d.ts
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
export interface EvalCase {
|
|
2
|
+
name: string;
|
|
3
|
+
input: string;
|
|
4
|
+
expected?: string;
|
|
5
|
+
criteria?: EvalCriterion[];
|
|
6
|
+
tags?: string[];
|
|
7
|
+
}
|
|
8
|
+
export type LLMJudge = (prompt: string) => Promise<{
|
|
9
|
+
score: number;
|
|
10
|
+
reasoning: string;
|
|
11
|
+
}>;
|
|
12
|
+
export type EvalCriterion = {
|
|
13
|
+
type: 'contains';
|
|
14
|
+
value: string;
|
|
15
|
+
caseSensitive?: boolean;
|
|
16
|
+
} | {
|
|
17
|
+
type: 'not_contains';
|
|
18
|
+
value: string;
|
|
19
|
+
caseSensitive?: boolean;
|
|
20
|
+
} | {
|
|
21
|
+
type: 'matches';
|
|
22
|
+
pattern: string;
|
|
23
|
+
flags?: string;
|
|
24
|
+
} | {
|
|
25
|
+
type: 'length_min';
|
|
26
|
+
value: number;
|
|
27
|
+
} | {
|
|
28
|
+
type: 'length_max';
|
|
29
|
+
value: number;
|
|
30
|
+
} | {
|
|
31
|
+
type: 'json_valid';
|
|
32
|
+
} | {
|
|
33
|
+
type: 'json_matches';
|
|
34
|
+
schema: Record<string, unknown>;
|
|
35
|
+
} | {
|
|
36
|
+
type: 'custom';
|
|
37
|
+
name: string;
|
|
38
|
+
fn: (output: string) => boolean;
|
|
39
|
+
} | {
|
|
40
|
+
type: 'llm_judge';
|
|
41
|
+
prompt: string;
|
|
42
|
+
judge: LLMJudge;
|
|
43
|
+
threshold?: number;
|
|
44
|
+
} | {
|
|
45
|
+
type: 'semantic_similarity';
|
|
46
|
+
reference: string;
|
|
47
|
+
threshold?: number;
|
|
48
|
+
} | {
|
|
49
|
+
type: 'factual_accuracy';
|
|
50
|
+
facts: string[];
|
|
51
|
+
threshold?: number;
|
|
52
|
+
};
|
|
53
|
+
export interface EvalResult {
|
|
54
|
+
name: string;
|
|
55
|
+
passed: boolean;
|
|
56
|
+
score: number;
|
|
57
|
+
criteria: CriterionResult[];
|
|
58
|
+
input: string;
|
|
59
|
+
output: string;
|
|
60
|
+
durationMs: number;
|
|
61
|
+
tags: string[];
|
|
62
|
+
}
|
|
63
|
+
export interface CriterionResult {
|
|
64
|
+
type: string;
|
|
65
|
+
passed: boolean;
|
|
66
|
+
message: string;
|
|
67
|
+
}
|
|
68
|
+
export interface EvalSuiteConfig {
|
|
69
|
+
name: string;
|
|
70
|
+
cases: EvalCase[];
|
|
71
|
+
runner: (input: string) => Promise<string>;
|
|
72
|
+
concurrency?: number;
|
|
73
|
+
}
|
|
74
|
+
export interface EvalSuiteResult {
|
|
75
|
+
name: string;
|
|
76
|
+
total: number;
|
|
77
|
+
passed: number;
|
|
78
|
+
failed: number;
|
|
79
|
+
score: number;
|
|
80
|
+
results: EvalResult[];
|
|
81
|
+
durationMs: number;
|
|
82
|
+
}
|
|
83
|
+
export declare function runEvalSuite(config: EvalSuiteConfig): Promise<EvalSuiteResult>;
|
|
84
|
+
export declare function formatEvalReport(result: EvalSuiteResult): string;
|
|
85
|
+
//# sourceMappingURL=eval.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../src/eval.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,QAAQ;IACxB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,QAAQ,CAAC,EAAE,aAAa,EAAE,CAAA;IAC1B,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;CACf;AAED,MAAM,MAAM,QAAQ,GAAG,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,CAAC,CAAA;AAExF,MAAM,MAAM,aAAa,GACtB;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,OAAO,CAAA;CAAE,GAC5D;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,OAAO,CAAA;CAAE,GAChE;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GACpD;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,IAAI,EAAE,YAAY,CAAA;CAAE,GACtB;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,GACzD;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAA;CAAE,GACjE;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,QAAQ,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAC1E;IAAE,IAAI,EAAE,qBAAqB,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GACtE;IAAE,IAAI,EAAE,kBAAkB,CAAC;IAAC,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,CAAA;AAEpE,MAAM,WAAW,UAAU;IAC1B,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,OAAO,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,eAAe,EAAE,CAAA;IAC3B,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,EAAE,MAAM,CAAA;IAClB,IAAI,EAAE,MAAM,EAAE,CAAA;CACd;AAED,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,OAAO,CAAA;IACf,OAAO,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,QAAQ,EAAE,CAAA;IACjB,MAAM,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAA;IAC1C,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,EAAE,MAAM,CAAA;IACb,OAAO,EAAE,UAAU,EAAE,CAAA;IACrB,UAAU,EAAE,MAAM,CAAA;CAClB;AAkUD,wBAAsB,YAAY,CAAC,MAAM,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC,CA+BpF;AAID,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,eAAe,GAAG,MAAM,CA0BhE"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { type MockProvider, type MockResponseConfig } from './mock-provider';
|
|
2
|
+
export interface FixtureEntry {
|
|
3
|
+
request: {
|
|
4
|
+
messages: Array<{
|
|
5
|
+
role: string;
|
|
6
|
+
content: string;
|
|
7
|
+
}>;
|
|
8
|
+
model?: string;
|
|
9
|
+
system?: string;
|
|
10
|
+
};
|
|
11
|
+
response: MockResponseConfig;
|
|
12
|
+
timestamp?: string;
|
|
13
|
+
}
|
|
14
|
+
export interface Fixture {
|
|
15
|
+
readonly name: string;
|
|
16
|
+
readonly entries: FixtureEntry[];
|
|
17
|
+
toProvider(options?: {
|
|
18
|
+
matching?: 'sequential' | 'request-hash';
|
|
19
|
+
}): MockProvider;
|
|
20
|
+
toJSON(): string;
|
|
21
|
+
}
|
|
22
|
+
export declare function createFixture(name: string, entries: FixtureEntry[]): Fixture;
|
|
23
|
+
export declare function loadFixture(json: string): Fixture;
|
|
24
|
+
export interface FixtureRecorder {
|
|
25
|
+
wrap(provider: MockProvider): MockProvider;
|
|
26
|
+
getEntries(): FixtureEntry[];
|
|
27
|
+
toFixture(name: string): Fixture;
|
|
28
|
+
clear(): void;
|
|
29
|
+
}
|
|
30
|
+
export declare function createRecorder(): FixtureRecorder;
|
|
31
|
+
//# sourceMappingURL=fixtures.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fixtures.d.ts","sourceRoot":"","sources":["../src/fixtures.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,YAAY,EAAE,KAAK,kBAAkB,EAAgB,MAAM,iBAAiB,CAAA;AAE1F,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE;QACR,QAAQ,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;QAClD,KAAK,CAAC,EAAE,MAAM,CAAA;QACd,MAAM,CAAC,EAAE,MAAM,CAAA;KACf,CAAA;IACD,QAAQ,EAAE,kBAAkB,CAAA;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,OAAO;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,OAAO,EAAE,YAAY,EAAE,CAAA;IAChC,UAAU,CAAC,OAAO,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,YAAY,GAAG,cAAc,CAAA;KAAE,GAAG,YAAY,CAAA;IAChF,MAAM,IAAI,MAAM,CAAA;CAChB;AAOD,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAyD5E;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAGjD;AAID,MAAM,WAAW,eAAe;IAC/B,IAAI,CAAC,QAAQ,EAAE,YAAY,GAAG,YAAY,CAAA;IAC1C,UAAU,IAAI,YAAY,EAAE,CAAA;IAC5B,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAA;IAChC,KAAK,IAAI,IAAI,CAAA;CACb;AAED,wBAAgB,cAAc,IAAI,eAAe,CAgDhD"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export { mockProvider } from './mock-provider';
|
|
2
|
+
export type { MockProvider, MockProviderOptions, MockResponseConfig } from './mock-provider';
|
|
3
|
+
export { createFixture, loadFixture, createRecorder } from './fixtures';
|
|
4
|
+
export type { Fixture, FixtureEntry, FixtureRecorder } from './fixtures';
|
|
5
|
+
export { runEvalSuite, formatEvalReport } from './eval';
|
|
6
|
+
export type { EvalCase, EvalCriterion, EvalResult, CriterionResult, EvalSuiteConfig, EvalSuiteResult, LLMJudge, } from './eval';
|
|
7
|
+
export { createSnapshotStore, hashOutput, testSnapshot } from './snapshot';
|
|
8
|
+
export type { PromptSnapshot, SnapshotStore, SnapshotTestResult } from './snapshot';
|
|
9
|
+
export { createPromptRegistry, definePrompt } from './prompts';
|
|
10
|
+
export type { PromptDefinition, PromptDiff, DiffLine, PromptRegistry } from './prompts';
|
|
11
|
+
export { createRegressionSuite } from './regression';
|
|
12
|
+
export type { RegressionBaseline, RegressionResult, RegressionDetail, RegressionSuite, } from './regression';
|
|
13
|
+
export { createReplayRecorder, createReplayPlayer } from './replay';
|
|
14
|
+
export type { ReplayEntry, ReplayRecorder, ReplayPlayer } from './replay';
|
|
15
|
+
export { createPinStore, pinOutput } from './pinning';
|
|
16
|
+
export type { Pin, PinStore, PinResult } from './pinning';
|
|
17
|
+
export { assertDeterministic, assertStable } from './determinism';
|
|
18
|
+
export type { DeterminismResult, StabilityResult } from './determinism';
|
|
19
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAC9C,YAAY,EAAE,YAAY,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAA;AAG5F,OAAO,EAAE,aAAa,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,YAAY,CAAA;AACvE,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,YAAY,CAAA;AAGxE,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,QAAQ,CAAA;AACvD,YAAY,EACX,QAAQ,EACR,aAAa,EACb,UAAU,EACV,eAAe,EACf,eAAe,EACf,eAAe,EACf,QAAQ,GACR,MAAM,QAAQ,CAAA;AAGf,OAAO,EAAE,mBAAmB,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAC1E,YAAY,EAAE,cAAc,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAA;AAGnF,OAAO,EAAE,oBAAoB,EAAE,YAAY,EAAE,MAAM,WAAW,CAAA;AAC9D,YAAY,EAAE,gBAAgB,EAAE,UAAU,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,WAAW,CAAA;AAGvF,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAA;AACpD,YAAY,EACX,kBAAkB,EAClB,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,GACf,MAAM,cAAc,CAAA;AAGrB,OAAO,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA;AACnE,YAAY,EAAE,WAAW,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAGzE,OAAO,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,WAAW,CAAA;AACrD,YAAY,EAAE,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,WAAW,CAAA;AAGzD,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,eAAe,CAAA;AACjE,YAAY,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA"}
|