@elsium-ai/testing 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ export interface DeterminismResult {
2
+ deterministic: boolean;
3
+ runs: number;
4
+ uniqueOutputs: number;
5
+ outputs: string[];
6
+ variance: number;
7
+ }
8
+ export interface StabilityResult {
9
+ stable: boolean;
10
+ runs: number;
11
+ uniqueOutputs: number;
12
+ outputs: Array<{
13
+ output: string;
14
+ timestamp: number;
15
+ }>;
16
+ variance: number;
17
+ }
18
+ export declare function assertDeterministic(fn: (seed?: number) => Promise<string>, options?: {
19
+ runs?: number;
20
+ seed?: number;
21
+ tolerance?: number;
22
+ }): Promise<DeterminismResult>;
23
+ export declare function assertStable(fn: (seed?: number) => Promise<string>, options?: {
24
+ intervalMs?: number;
25
+ runs?: number;
26
+ seed?: number;
27
+ }): Promise<StabilityResult>;
28
+ //# sourceMappingURL=determinism.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"determinism.d.ts","sourceRoot":"","sources":["../src/determinism.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,iBAAiB;IACjC,aAAa,EAAE,OAAO,CAAA;IACtB,IAAI,EAAE,MAAM,CAAA;IACZ,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,MAAM,EAAE,CAAA;IACjB,QAAQ,EAAE,MAAM,CAAA;CAChB;AAED,MAAM,WAAW,eAAe;IAC/B,MAAM,EAAE,OAAO,CAAA;IACf,IAAI,EAAE,MAAM,CAAA;IACZ,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IACrD,QAAQ,EAAE,MAAM,CAAA;CAChB;AAED,wBAAsB,mBAAmB,CACxC,EAAE,EAAE,CAAC,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EACtC,OAAO,CAAC,EAAE;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5D,OAAO,CAAC,iBAAiB,CAAC,CA8B5B;AAED,wBAAsB,YAAY,CACjC,EAAE,EAAE,CAAC,IAAI,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EACtC,OAAO,CAAC,EAAE;IAAE,UAAU,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAA;CAAE,GAC7D,OAAO,CAAC,eAAe,CAAC,CA0B1B"}
package/dist/eval.d.ts ADDED
@@ -0,0 +1,85 @@
1
+ export interface EvalCase {
2
+ name: string;
3
+ input: string;
4
+ expected?: string;
5
+ criteria?: EvalCriterion[];
6
+ tags?: string[];
7
+ }
8
+ export type LLMJudge = (prompt: string) => Promise<{
9
+ score: number;
10
+ reasoning: string;
11
+ }>;
12
+ export type EvalCriterion = {
13
+ type: 'contains';
14
+ value: string;
15
+ caseSensitive?: boolean;
16
+ } | {
17
+ type: 'not_contains';
18
+ value: string;
19
+ caseSensitive?: boolean;
20
+ } | {
21
+ type: 'matches';
22
+ pattern: string;
23
+ flags?: string;
24
+ } | {
25
+ type: 'length_min';
26
+ value: number;
27
+ } | {
28
+ type: 'length_max';
29
+ value: number;
30
+ } | {
31
+ type: 'json_valid';
32
+ } | {
33
+ type: 'json_matches';
34
+ schema: Record<string, unknown>;
35
+ } | {
36
+ type: 'custom';
37
+ name: string;
38
+ fn: (output: string) => boolean;
39
+ } | {
40
+ type: 'llm_judge';
41
+ prompt: string;
42
+ judge: LLMJudge;
43
+ threshold?: number;
44
+ } | {
45
+ type: 'semantic_similarity';
46
+ reference: string;
47
+ threshold?: number;
48
+ } | {
49
+ type: 'factual_accuracy';
50
+ facts: string[];
51
+ threshold?: number;
52
+ };
53
+ export interface EvalResult {
54
+ name: string;
55
+ passed: boolean;
56
+ score: number;
57
+ criteria: CriterionResult[];
58
+ input: string;
59
+ output: string;
60
+ durationMs: number;
61
+ tags: string[];
62
+ }
63
+ export interface CriterionResult {
64
+ type: string;
65
+ passed: boolean;
66
+ message: string;
67
+ }
68
+ export interface EvalSuiteConfig {
69
+ name: string;
70
+ cases: EvalCase[];
71
+ runner: (input: string) => Promise<string>;
72
+ concurrency?: number;
73
+ }
74
+ export interface EvalSuiteResult {
75
+ name: string;
76
+ total: number;
77
+ passed: number;
78
+ failed: number;
79
+ score: number;
80
+ results: EvalResult[];
81
+ durationMs: number;
82
+ }
83
+ export declare function runEvalSuite(config: EvalSuiteConfig): Promise<EvalSuiteResult>;
84
+ export declare function formatEvalReport(result: EvalSuiteResult): string;
85
+ //# sourceMappingURL=eval.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../src/eval.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,QAAQ;IACxB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,QAAQ,CAAC,EAAE,aAAa,EAAE,CAAA;IAC1B,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;CACf;AAED,MAAM,MAAM,QAAQ,GAAG,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,CAAC,CAAA;AAExF,MAAM,MAAM,aAAa,GACtB;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,OAAO,CAAA;CAAE,GAC5D;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,OAAO,CAAA;CAAE,GAChE;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GACpD;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,IAAI,EAAE,YAAY,CAAA;CAAE,GACtB;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAAE,GACzD;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAA;CAAE,GACjE;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,QAAQ,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GAC1E;IAAE,IAAI,EAAE,qBAAqB,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,GACtE;IAAE,IAAI,EAAE,kBAAkB,CAAC;IAAC,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,CAAA;AAEpE,MAAM,WAAW,UAAU;IAC1B,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,OAAO,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,EAAE,eAAe,EAAE,CAAA;IAC3B,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,EAAE,MAAM,CAAA;IAClB,IAAI,EAAE,MAAM,EAAE,CAAA;CACd;AAED,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,OAAO,CAAA;IACf,OAAO,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,QAAQ,EAAE,CAAA;IACjB,MAAM,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAA;IAC1C,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,EAAE,MAAM,CAAA;IACb,OAAO,EAAE,UAAU,EAAE,CAAA;IACrB,UAAU,EAAE,MAAM,CAAA;CAClB;AAkUD,wBAAsB,YAAY,CAAC,MAAM,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC,CA+BpF;AAID,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,eAAe,GAAG,MAAM,CA0BhE"}
@@ -0,0 +1,31 @@
1
+ import { type MockProvider, type MockResponseConfig } from './mock-provider';
2
+ export interface FixtureEntry {
3
+ request: {
4
+ messages: Array<{
5
+ role: string;
6
+ content: string;
7
+ }>;
8
+ model?: string;
9
+ system?: string;
10
+ };
11
+ response: MockResponseConfig;
12
+ timestamp?: string;
13
+ }
14
+ export interface Fixture {
15
+ readonly name: string;
16
+ readonly entries: FixtureEntry[];
17
+ toProvider(options?: {
18
+ matching?: 'sequential' | 'request-hash';
19
+ }): MockProvider;
20
+ toJSON(): string;
21
+ }
22
+ export declare function createFixture(name: string, entries: FixtureEntry[]): Fixture;
23
+ export declare function loadFixture(json: string): Fixture;
24
+ export interface FixtureRecorder {
25
+ wrap(provider: MockProvider): MockProvider;
26
+ getEntries(): FixtureEntry[];
27
+ toFixture(name: string): Fixture;
28
+ clear(): void;
29
+ }
30
+ export declare function createRecorder(): FixtureRecorder;
31
+ //# sourceMappingURL=fixtures.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fixtures.d.ts","sourceRoot":"","sources":["../src/fixtures.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,YAAY,EAAE,KAAK,kBAAkB,EAAgB,MAAM,iBAAiB,CAAA;AAE1F,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE;QACR,QAAQ,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;QAClD,KAAK,CAAC,EAAE,MAAM,CAAA;QACd,MAAM,CAAC,EAAE,MAAM,CAAA;KACf,CAAA;IACD,QAAQ,EAAE,kBAAkB,CAAA;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,OAAO;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,OAAO,EAAE,YAAY,EAAE,CAAA;IAChC,UAAU,CAAC,OAAO,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,YAAY,GAAG,cAAc,CAAA;KAAE,GAAG,YAAY,CAAA;IAChF,MAAM,IAAI,MAAM,CAAA;CAChB;AAOD,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAyD5E;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAGjD;AAID,MAAM,WAAW,eAAe;IAC/B,IAAI,CAAC,QAAQ,EAAE,YAAY,GAAG,YAAY,CAAA;IAC1C,UAAU,IAAI,YAAY,EAAE,CAAA;IAC5B,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAA;IAChC,KAAK,IAAI,IAAI,CAAA;CACb;AAED,wBAAgB,cAAc,IAAI,eAAe,CAgDhD"}
@@ -0,0 +1,19 @@
1
+ export { mockProvider } from './mock-provider';
2
+ export type { MockProvider, MockProviderOptions, MockResponseConfig } from './mock-provider';
3
+ export { createFixture, loadFixture, createRecorder } from './fixtures';
4
+ export type { Fixture, FixtureEntry, FixtureRecorder } from './fixtures';
5
+ export { runEvalSuite, formatEvalReport } from './eval';
6
+ export type { EvalCase, EvalCriterion, EvalResult, CriterionResult, EvalSuiteConfig, EvalSuiteResult, LLMJudge, } from './eval';
7
+ export { createSnapshotStore, hashOutput, testSnapshot } from './snapshot';
8
+ export type { PromptSnapshot, SnapshotStore, SnapshotTestResult } from './snapshot';
9
+ export { createPromptRegistry, definePrompt } from './prompts';
10
+ export type { PromptDefinition, PromptDiff, DiffLine, PromptRegistry } from './prompts';
11
+ export { createRegressionSuite } from './regression';
12
+ export type { RegressionBaseline, RegressionResult, RegressionDetail, RegressionSuite, } from './regression';
13
+ export { createReplayRecorder, createReplayPlayer } from './replay';
14
+ export type { ReplayEntry, ReplayRecorder, ReplayPlayer } from './replay';
15
+ export { createPinStore, pinOutput } from './pinning';
16
+ export type { Pin, PinStore, PinResult } from './pinning';
17
+ export { assertDeterministic, assertStable } from './determinism';
18
+ export type { DeterminismResult, StabilityResult } from './determinism';
19
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAC9C,YAAY,EAAE,YAAY,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAA;AAG5F,OAAO,EAAE,aAAa,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,YAAY,CAAA;AACvE,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,YAAY,CAAA;AAGxE,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,QAAQ,CAAA;AACvD,YAAY,EACX,QAAQ,EACR,aAAa,EACb,UAAU,EACV,eAAe,EACf,eAAe,EACf,eAAe,EACf,QAAQ,GACR,MAAM,QAAQ,CAAA;AAGf,OAAO,EAAE,mBAAmB,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAC1E,YAAY,EAAE,cAAc,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAA;AAGnF,OAAO,EAAE,oBAAoB,EAAE,YAAY,EAAE,MAAM,WAAW,CAAA;AAC9D,YAAY,EAAE,gBAAgB,EAAE,UAAU,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,WAAW,CAAA;AAGvF,OAAO,EAAE,qBAAqB,EAAE,MAAM,cAAc,CAAA;AACpD,YAAY,EACX,kBAAkB,EAClB,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,GACf,MAAM,cAAc,CAAA;AAGrB,OAAO,EAAE,oBAAoB,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA;AACnE,YAAY,EAAE,WAAW,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAGzE,OAAO,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,WAAW,CAAA;AACrD,YAAY,EAAE,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,WAAW,CAAA;AAGzD,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,eAAe,CAAA;AACjE,YAAY,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA"}