@docshield/didactic 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +332 -183
- package/dist/index.cjs +1090 -550
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +134 -65
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +134 -65
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +1085 -552
- package/dist/index.mjs.map +1 -1
- package/package.json +20 -3
package/dist/index.d.cts
CHANGED
|
@@ -1,10 +1,70 @@
|
|
|
1
|
+
//#region src/optimizer/types.d.ts
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Inline optimization config for didactic.eval().
|
|
5
|
+
*/
|
|
6
|
+
type OptimizeConfig = {
|
|
7
|
+
systemPrompt: string;
|
|
8
|
+
patchSystemPrompt?: string;
|
|
9
|
+
mergeSystemPrompt?: string;
|
|
10
|
+
targetSuccessRate: number;
|
|
11
|
+
maxIterations?: number;
|
|
12
|
+
maxCost?: number;
|
|
13
|
+
apiKey: string;
|
|
14
|
+
storeLogs?: boolean | string;
|
|
15
|
+
provider: LLMProviders;
|
|
16
|
+
thinking?: boolean;
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* Result for a single optimization iteration.
|
|
20
|
+
*/
|
|
21
|
+
interface IterationResult<TInput = unknown, TOutput = unknown> {
|
|
22
|
+
iteration: number;
|
|
23
|
+
systemPrompt: string;
|
|
24
|
+
passed: number;
|
|
25
|
+
total: number;
|
|
26
|
+
testCases: TestCaseResult<TInput, TOutput>[];
|
|
27
|
+
cost: number;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Final result from optimization.
|
|
31
|
+
*/
|
|
32
|
+
interface OptimizeResult<TInput = unknown, TOutput = unknown> {
|
|
33
|
+
success: boolean;
|
|
34
|
+
finalPrompt: string;
|
|
35
|
+
iterations: IterationResult<TInput, TOutput>[];
|
|
36
|
+
totalCost: number;
|
|
37
|
+
logFolder?: string;
|
|
38
|
+
}
|
|
39
|
+
//#endregion
|
|
1
40
|
//#region src/types.d.ts
|
|
41
|
+
/**
|
|
42
|
+
* Supported LLM providers.
|
|
43
|
+
* Used by both optimizer and LLM-based comparators.
|
|
44
|
+
*/
|
|
45
|
+
declare enum LLMProviders {
|
|
46
|
+
anthropic_claude_opus = "anthropic_claude_opus",
|
|
47
|
+
anthropic_claude_sonnet = "anthropic_claude_sonnet",
|
|
48
|
+
anthropic_claude_haiku = "anthropic_claude_haiku",
|
|
49
|
+
openai_gpt5 = "openai_gpt5",
|
|
50
|
+
openai_gpt5_mini = "openai_gpt5_mini",
|
|
51
|
+
}
|
|
2
52
|
/**
|
|
3
53
|
* Result returned by a comparator function.
|
|
4
54
|
*/
|
|
5
55
|
interface ComparatorResult {
|
|
6
56
|
passed: boolean;
|
|
7
57
|
similarity?: number;
|
|
58
|
+
rationale?: string;
|
|
59
|
+
cost?: number;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* LLM configuration for use by LLM-based comparators.
|
|
63
|
+
* Can be specified at the top level of eval config to avoid repeating apiKey.
|
|
64
|
+
*/
|
|
65
|
+
interface LLMConfig {
|
|
66
|
+
apiKey: string;
|
|
67
|
+
provider?: LLMProviders;
|
|
8
68
|
}
|
|
9
69
|
/**
|
|
10
70
|
* Context passed to comparators for cross-field access.
|
|
@@ -12,15 +72,29 @@ interface ComparatorResult {
|
|
|
12
72
|
interface ComparatorContext {
|
|
13
73
|
expectedParent: unknown;
|
|
14
74
|
actualParent: unknown;
|
|
75
|
+
llmConfig?: LLMConfig;
|
|
15
76
|
}
|
|
16
77
|
/**
|
|
17
78
|
* A comparator function that compares expected vs actual.
|
|
79
|
+
* Can be synchronous or asynchronous (for LLM-based comparators).
|
|
18
80
|
*/
|
|
19
|
-
type Comparator<T = unknown> = (expected: T, actual: T, context?: ComparatorContext) => ComparatorResult
|
|
81
|
+
type Comparator<T = unknown> = (expected: T, actual: T, context?: ComparatorContext) => ComparatorResult | Promise<ComparatorResult>;
|
|
20
82
|
/**
|
|
21
|
-
*
|
|
83
|
+
* Marker interface for comparators with ordering metadata.
|
|
84
|
+
* Created by the unordered() wrapper function.
|
|
22
85
|
*/
|
|
23
|
-
|
|
86
|
+
interface ComparatorWithOrdering<T = unknown> {
|
|
87
|
+
(expected: T, actual: T, context?: ComparatorContext): ComparatorResult | Promise<ComparatorResult>;
|
|
88
|
+
_unordered: true;
|
|
89
|
+
_nestedComparators?: NestedComparatorConfig;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Recursive comparator configuration that matches data shape.
|
|
93
|
+
* Can be a comparator function, a comparator with ordering, or a nested object.
|
|
94
|
+
*/
|
|
95
|
+
type NestedComparatorConfig = {
|
|
96
|
+
[key: string]: Comparator<any> | ComparatorWithOrdering<any> | NestedComparatorConfig;
|
|
97
|
+
};
|
|
24
98
|
/**
|
|
25
99
|
* The result returned by an executor.
|
|
26
100
|
*/
|
|
@@ -40,19 +114,6 @@ interface TestCase<TInput = unknown, TOutput = unknown> {
|
|
|
40
114
|
input: TInput;
|
|
41
115
|
expected: TOutput;
|
|
42
116
|
}
|
|
43
|
-
/**
|
|
44
|
-
* Inline optimization config for didactic.eval().
|
|
45
|
-
*/
|
|
46
|
-
type OptimizeConfig = {
|
|
47
|
-
systemPrompt: string;
|
|
48
|
-
targetSuccessRate: number;
|
|
49
|
-
maxIterations?: number;
|
|
50
|
-
maxCost?: number;
|
|
51
|
-
apiKey: string;
|
|
52
|
-
storeLogs?: boolean | string;
|
|
53
|
-
provider: LLMProviders;
|
|
54
|
-
thinking?: boolean;
|
|
55
|
-
};
|
|
56
117
|
/**
|
|
57
118
|
* Base eval configuration shared by both modes.
|
|
58
119
|
*/
|
|
@@ -61,23 +122,27 @@ interface BaseEvalConfig<TInput = unknown, TOutput = unknown> {
|
|
|
61
122
|
executor: Executor<TInput, TOutput>;
|
|
62
123
|
testCases: TestCase<TInput, TOutput>[];
|
|
63
124
|
perTestThreshold?: number;
|
|
64
|
-
unorderedList?: boolean;
|
|
65
125
|
optimize?: OptimizeConfig;
|
|
66
126
|
rateLimitBatch?: number;
|
|
67
127
|
rateLimitPause?: number;
|
|
128
|
+
llmConfig?: LLMConfig;
|
|
129
|
+
storeLogs?: boolean | string;
|
|
68
130
|
}
|
|
69
131
|
/**
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
*
|
|
132
|
+
* Top-level comparators configuration.
|
|
133
|
+
* Can be:
|
|
134
|
+
* - A single comparator function for root-level primitives/arrays (e.g., `exact`, `numeric`)
|
|
135
|
+
* - A ComparatorWithOrdering for unordered root-level arrays (e.g., `unordered(exact)`)
|
|
136
|
+
* - A nested object structure matching your data shape
|
|
73
137
|
*/
|
|
74
|
-
type ComparatorsConfig =
|
|
138
|
+
type ComparatorsConfig = NestedComparatorConfig | ComparatorWithOrdering<unknown> | Comparator<unknown>;
|
|
75
139
|
/**
|
|
76
140
|
* Main eval configuration.
|
|
77
|
-
*
|
|
141
|
+
* - `comparators` (optional): field mapping or single comparator. Defaults to `exact` for entire shape.
|
|
142
|
+
* - `comparatorOverride`: whole-object comparison (bypasses field-level comparison).
|
|
78
143
|
*/
|
|
79
144
|
type EvalConfig<TInput = unknown, TOutput = unknown> = (BaseEvalConfig<TInput, TOutput> & {
|
|
80
|
-
comparators
|
|
145
|
+
comparators?: ComparatorsConfig;
|
|
81
146
|
comparatorOverride?: undefined;
|
|
82
147
|
}) | (BaseEvalConfig<TInput, TOutput> & {
|
|
83
148
|
comparatorOverride: Comparator<TOutput>;
|
|
@@ -90,6 +155,8 @@ interface FieldResult {
|
|
|
90
155
|
passed: boolean;
|
|
91
156
|
expected: unknown;
|
|
92
157
|
actual: unknown;
|
|
158
|
+
rationale?: string;
|
|
159
|
+
cost?: number;
|
|
93
160
|
}
|
|
94
161
|
/**
|
|
95
162
|
* Result for a single test case.
|
|
@@ -100,6 +167,7 @@ interface TestCaseResult<TInput = unknown, TOutput = unknown> {
|
|
|
100
167
|
actual?: TOutput;
|
|
101
168
|
additionalContext?: unknown;
|
|
102
169
|
cost?: number;
|
|
170
|
+
comparatorCost?: number;
|
|
103
171
|
passed: boolean;
|
|
104
172
|
fields: Record<string, FieldResult>;
|
|
105
173
|
error?: string;
|
|
@@ -120,47 +188,11 @@ interface EvalResult<TInput = unknown, TOutput = unknown> {
|
|
|
120
188
|
totalFields: number;
|
|
121
189
|
accuracy: number;
|
|
122
190
|
cost: number;
|
|
123
|
-
|
|
124
|
-
/**
|
|
125
|
-
* Chat message for LLM calls.
|
|
126
|
-
*/
|
|
127
|
-
interface Message {
|
|
128
|
-
role: 'system' | 'user' | 'assistant';
|
|
129
|
-
content: string;
|
|
130
|
-
}
|
|
131
|
-
/**
|
|
132
|
-
* Supported LLM providers.
|
|
133
|
-
*/
|
|
134
|
-
declare enum LLMProviders {
|
|
135
|
-
anthropic_claude_opus = "anthropic_claude_opus",
|
|
136
|
-
anthropic_claude_sonnet = "anthropic_claude_sonnet",
|
|
137
|
-
anthropic_claude_haiku = "anthropic_claude_haiku",
|
|
138
|
-
openai_gpt5 = "openai_gpt5",
|
|
139
|
-
openai_gpt5_mini = "openai_gpt5_mini",
|
|
140
|
-
}
|
|
141
|
-
/**
|
|
142
|
-
* Result for a single optimization iteration.
|
|
143
|
-
*/
|
|
144
|
-
interface IterationResult<TInput = unknown, TOutput = unknown> {
|
|
145
|
-
iteration: number;
|
|
146
|
-
systemPrompt: string;
|
|
147
|
-
passed: number;
|
|
148
|
-
total: number;
|
|
149
|
-
testCases: TestCaseResult<TInput, TOutput>[];
|
|
150
|
-
cost: number;
|
|
151
|
-
}
|
|
152
|
-
/**
|
|
153
|
-
* Final result from optimization.
|
|
154
|
-
*/
|
|
155
|
-
interface OptimizeResult<TInput = unknown, TOutput = unknown> {
|
|
156
|
-
success: boolean;
|
|
157
|
-
finalPrompt: string;
|
|
158
|
-
iterations: IterationResult<TInput, TOutput>[];
|
|
159
|
-
totalCost: number;
|
|
191
|
+
comparatorCost: number;
|
|
160
192
|
logFolder?: string;
|
|
161
193
|
}
|
|
162
194
|
//#endregion
|
|
163
|
-
//#region src/executors.d.ts
|
|
195
|
+
//#region src/eval/executors.d.ts
|
|
164
196
|
/**
|
|
165
197
|
* Configuration for endpoint executor.
|
|
166
198
|
*/
|
|
@@ -243,7 +275,7 @@ declare function fn<TInput, TOutput extends object, TRaw = TOutput>(config: FnCo
|
|
|
243
275
|
*/
|
|
244
276
|
declare function mock<TInput, TOutput extends object>(outputsOrFn: TOutput[] | ((input: TInput, systemPrompt?: string) => TOutput)): Executor<TInput, TOutput>;
|
|
245
277
|
//#endregion
|
|
246
|
-
//#region src/comparators.d.ts
|
|
278
|
+
//#region src/eval/comparators/comparators.d.ts
|
|
247
279
|
/** Checks if actual string contains a substring. */
|
|
248
280
|
declare function contains(substring: string): Comparator<string>;
|
|
249
281
|
/** Creates a comparator with custom logic. */
|
|
@@ -269,14 +301,51 @@ declare function within(config: {
|
|
|
269
301
|
tolerance: number;
|
|
270
302
|
mode?: 'percentage' | 'absolute';
|
|
271
303
|
}): Comparator<number>;
|
|
304
|
+
/** Configuration for LLM-based comparison. */
|
|
305
|
+
interface LLMCompareConfig {
|
|
306
|
+
provider?: LLMProviders;
|
|
307
|
+
/** API key for LLM provider. If not provided, uses llmConfig.apiKey from eval config. */
|
|
308
|
+
apiKey?: string;
|
|
309
|
+
systemPrompt?: string;
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Uses an LLM to compare expected vs actual values.
|
|
313
|
+
* Returns a comparison result with rationale and cost tracking.
|
|
314
|
+
* Default provider: anthropic_claude_haiku (fastest, cheapest).
|
|
315
|
+
*/
|
|
316
|
+
declare function llmCompare(config: LLMCompareConfig): Comparator;
|
|
317
|
+
/**
|
|
318
|
+
* Marks a comparator or comparator config as unordered.
|
|
319
|
+
* When applied to an array field, items will be matched by similarity
|
|
320
|
+
* rather than index position (using Hungarian algorithm).
|
|
321
|
+
*
|
|
322
|
+
* @example
|
|
323
|
+
* // Unordered array of objects
|
|
324
|
+
* lineItems: unordered({
|
|
325
|
+
* description: name,
|
|
326
|
+
* price: within({ tolerance: 5 })
|
|
327
|
+
* })
|
|
328
|
+
*
|
|
329
|
+
* @example
|
|
330
|
+
* // Unordered array of primitives
|
|
331
|
+
* tags: unordered(exact)
|
|
332
|
+
*
|
|
333
|
+
* @example
|
|
334
|
+
* // When entire output is an array
|
|
335
|
+
* comparators: unordered({
|
|
336
|
+
* carrier: exact,
|
|
337
|
+
* premium: within({ tolerance: 0.05 })
|
|
338
|
+
* })
|
|
339
|
+
*/
|
|
340
|
+
declare function unordered<T>(comparator: Comparator<T> | NestedComparatorConfig): ComparatorWithOrdering<T>;
|
|
272
341
|
//#endregion
|
|
273
|
-
//#region src/eval.d.ts
|
|
342
|
+
//#region src/eval/eval.d.ts
|
|
274
343
|
/**
|
|
275
344
|
* Run all test cases and return results.
|
|
276
345
|
*/
|
|
277
346
|
declare function evaluate<TInput, TOutput>(config: EvalConfig<TInput, TOutput>): Promise<EvalResult<TInput, TOutput>>;
|
|
278
347
|
//#endregion
|
|
279
|
-
//#region src/optimizer.d.ts
|
|
348
|
+
//#region src/optimizer/optimizer.d.ts
|
|
280
349
|
declare function optimize<TInput, TOutput>(evalConfig: EvalConfig<TInput, TOutput>, config: OptimizeConfig): Promise<OptimizeResult<TInput, TOutput>>;
|
|
281
350
|
//#endregion
|
|
282
351
|
//#region src/index.d.ts
|
|
@@ -335,5 +404,5 @@ declare const didactic: {
|
|
|
335
404
|
fn<TInput, TOutput extends object>(config: FnConfig<TInput, TOutput>): Executor<TInput, TOutput>;
|
|
336
405
|
};
|
|
337
406
|
//#endregion
|
|
338
|
-
export { type Comparator, type ComparatorContext, type
|
|
407
|
+
export { type Comparator, type ComparatorContext, type ComparatorResult, type EndpointConfig, type EvalConfig, type EvalResult, type Executor, type ExecutorResult, type FnConfig, type LLMCompareConfig, type LLMConfig, LLMProviders, type OptimizeConfig, type OptimizeResult, type TestCase, type TestCaseResult, contains, custom, date, didactic as default, didactic as didact, didactic, endpoint, evaluate, exact, fn, llmCompare, mock, name, numeric, oneOf, optimize, presence, unordered, within };
|
|
339
408
|
//# sourceMappingURL=index.d.cts.map
|
package/dist/index.d.cts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.cts","names":[],"sources":["../src/types.ts","../src/executors.ts","../src/comparators.ts","../src/eval.ts","../src/optimizer.ts","../src/index.ts"],"sourcesContent":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"index.d.cts","names":[],"sources":["../src/optimizer/types.ts","../src/types.ts","../src/eval/executors.ts","../src/eval/comparators/comparators.ts","../src/eval/eval.ts","../src/optimizer/optimizer.ts","../src/index.ts"],"sourcesContent":[],"mappings":";;;;;KAmBY,cAAA;;ECTA,iBAAY,CAAA,EAAA,MAAA;EA4BP,iBAAA,CAAA,EAAgB,MAAA;EAWhB,iBAAS,EAAA,MAEb;EAMI,aAAA,CAAA,EAAA,MAAiB;EAUtB,OAAA,CAAA,EAAA,MAAU;EACV,MAAA,EAAA,MAAA;EACF,SAAA,CAAA,EAAA,OAAA,GAAA,MAAA;EACE,QAAA,ED1CA,YC0CA;EACP,QAAA,CAAA,EAAA,OAAA;CAA2B;;;AAMhC;AAEc,UD5CG,eC4CH,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,CAAA;EACF,SAAA,EAAA,MAAA;EACE,YAAA,EAAA,MAAA;EACT,MAAA,EAAA,MAAA;EAA2B,KAAA,EAAA,MAAA;EAAR,SAAA,ED1CX,cC0CW,CD1CI,MC0CJ,ED1CY,OC0CZ,CAAA,EAAA;EAED,IAAA,EAAA,MAAA;;AAOvB;;;AAKM,UDjDW,cCiDX,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,CAAA;EAAsB,OAAA,EAAA,OAAA;EAUX,WAAA,EAAA,MAAc;EASnB,UAAA,EDjEE,eCiEM,CDjEU,MCiEV,EDjEkB,OCiElB,CAAA,EAAA;EACX,SAAA,EAAA,MAAA;EAEmB,SAAA,CAAA,EAAA,MAAA;;;;ADnG5B;AAgBA;;;AAKa,aC9BD,YAAA;ED8Be,qBAAA,GAAA,uBAAA;EAOV,uBAAc,GAAA,yBAAA;EAGD,sBAAA,GAAA,wBAAA;EAAQ,WAAA,GAAA,aAAA;EAAxB,gBAAA,GAAA,kBAAA;;;;ACxCd;AA4BiB,UAAA,gBAAA,CAAgB;EAWhB,MAAA,EAAA,OAAS;EAQT,UAAA,CAAA,EAAA,MAAA;EAUL,SAAA,CAAA,EAAA,MAAU;EACV,IAAA,CAAA,EAAA,MAAA;;;;;;AAGmB,UAtBd,SAAA,CAsBc;EAMd,MAAA,EAAA,MAAA;EAEH,QAAA,CAAA,EA5BD,YA4BC;;;;;AAGU,UAzBP,iBAAA,CAyBO;EAED,cAAA,EAAA,OAAA;EAAsB,YAAA,EAAA,OAAA;EAOjC,SAAA,CAAA,EA/BE,SA+BF;;;;;AAeZ;AASY,KAhDA,UAgDQ,CAAA,IAAA,OAAA,CAAA,GAAA,CAAA,QAAA,EA/CR,CA+CQ,EAAA,MAAA,EA9CV,CA8CU,EAAA,OAAA,CAAA,EA7CR,iBA6CQ,EAAA,GA5Cf,gBA4Ce,GA5CI,OA4CJ,CA5CY,gBA4CZ,CAAA;;;;;AAGR,UAzCK,sBAyCL,CAAA,IAAA,OAAA,CAAA,CAAA;EASK,CAAA,QAAA,EAhDH,CAgDW,EAAA,MAAA,EA/Cb,CA+Ca,EAAA,OAEb,CAFa,EA9CX,iBAgDF,CAAA,EA/CP,gBA+Cc,GA/CK,OA+CL,CA/Ca,gBA+Cb,CAAA;EAMT,UAAA,EAAA,IAAA;EAEW,kBAAA,CAAA,EArDE,sBAqDF;;;;;;AAGR,KAjDD,sBAAA,GAiDC;EAGC,CAAA,GAAA,EAAA,MAAA,CAAA,EAlDR,UAkDQ,CAAA,GAAA,CAAA,GAhDR,sBAgDQ,CAAA,GAAA,CAAA,GA/CR,sBA+CQ;CAAS;AAWvB;;;AAGI,UAnDa,cAmDb,CAAA,UAAA,OAAA,CAAA,CAAA;EAAU,MAAA,EAlDJ,OAkDI;EAOF,iBAAU,CAAA,EAAA,OAAA;EACF,IAAA,CAAA,EAAA,MAAA;;;;;AAIQ,KAtDhB,QAsDgB,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,GAAA,CAAA,KAAA,EArDnB,MAqDmB,EAAA,YAAA,CAAA,EAAA,MAAA,EAAA,GAnDvB,OAmDuB,CAnDf,cAmDe,CAnDA,OAmDA,CAAA,CAAA;;;;AACQ,UA3CnB,QA2CmB,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,CAAA;EAOnB,KAAA,EAjDR,MAiDQ;EAWA,QAAA,EA3DL,OA2DmB;;;;;UArDrB,cA6DA,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,CAAA;EAAM,YAAA,CAAA,EAAA,MAAA;EAUC,QAAA,EArEL,QAqEe,CArEN,MAqEM,EArEE,OAqEF,CAAA;EAEC,SAAA,EAtEf,QAsEe,CAtEN,MAsEM,EAtEE,OAsEF,CAAA,EAAA;EAAQ,gBAAA,CAAA,EAAA,MAAA;EAAvB,QAAA,CAAA,EApEA,cAoEA;EAAc,cAAA,CAAA,EAAA,MAAA;;cAjEb;;ACzId;AAeA;;;;;;;AAGkC,KDkItB,iBAAA,GACR,sBCnI8B,GDoI9B,sBCpI8B,CAAA,OAAA,CAAA,GDqI9B,UCrI8B,CAAA,OAAA,CAAA;;;AAclC;;;AAGY,KD2HA,UC3HA,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,GAAA,CD4HP,cC5HO,CD4HQ,MC5HR,ED4HgB,OC5HhB,CAAA,GAAA;EAAQ,WAAA,CAAA,ED6HA,iBC7HA;EAAjB,kBAAA,CAAA,EAAA,SAAA;CAAQ,CAAA,GAAA,CDgIN,cChIM,CDgIS,MChIT,EDgIiB,OChIjB,CAAA,GAAA;EAmFK,kBAAE,ED8CQ,UC9CR,CD8CmB,OC9CnB,CAAA;EAAwC,WAAA,CAAA,EAAA,SAAA;CACvC,CAAA;;;;AACP,UDmDK,WAAA,CCnDL;EAAQ,MAAA,EAAA,OAAA;EAAjB,QAAA,EAAA,OAAA;EAAQ,MAAA,EAAA,OAAA;EAqCK,SAAI,CAAA,EAAA,MAAA;EACL,IAAA,CAAA,EAAA,MAAA;;;;;AACZ,UDuBc,cCvBd,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,CAAA;EAAQ,KAAA,EDwBF,MCxBE;YDyBC;WACD;;EEjKK,IAAA,CAAA,EAAA,MAAQ;EAQR,cAAM,CAAA,EAAA,MAAA;EACA,MAAA,EAAA,OAAA;EAAW,MAAA,EF6JvB,ME7JuB,CAAA,MAAA,EF6JR,WE7JQ,CAAA;EAAa,KAAA,CAAA,EAAA,MAAA;EAC/B,YAAA,EAAA,MAAA;EAAX,WAAA,EAAA,MAAA;EAAU,QAAA,EAAA,MAAA;AAQd;AAqBA;AASA;AAwCA;AAkCgB,UFsDC,UEtDI,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,CAAA;EACK,YAAA,CAAA,EAAA,MAAA;EACZ,SAAA,EFsDD,cEtDC,CFsDc,MEtDd,EFsDsB,OEtDtB,CAAA,EAAA;EAAX,MAAA,EAAA,MAAA;EAAU,KAAA,EAAA,MAAA;EAeG,WAAQ,EAAA,MAAA;EAQR,aAAM,EAAA,MAGlB;EAsBa,WAAA,EAAA,MAAgB;EAgDjB,QAAA,EAAA,MAAU;EA2EV,IAAA,EAAA,MAAS;EACA,cAAA,EAAA,MAAA;EAAX,SAAA,CAAA,EAAA,MAAA;;;;AHnTd;AAgBA;;AAKoC,UElCnB,cFkCmB,CAAA,UAAA,OAAA,CAAA,CAAA;EAAvB,MAAA,CAAA,EAAA,MAAA,GAAA,KAAA;EAAc,OAAA,CAAA,EEhCf,MFgCe,CAAA,MAAA,EAAA,MAAA,CAAA;EAOV,WAAA,CAAA,EAAA,CAAA,QAAc,EAAA,GAAA,EAAA,GEtCI,OFsCJ;EAGD,oBAAA,CAAA,EAAA,CAAA,QAAA,EAAA,GAAA,EAAA,GAAA,OAAA;EAAQ,OAAA,CAAA,EAAA,CAAA,QAAA,EAAA,GAAA,EAAA,GAAA,MAAA;EAAxB,OAAA,CAAA,EAAA,MAAA;;;;;ACxCd;AA4BA;AAWA;AAQiB,UCpCA,QDoCiB,CAAA,MAAA,EAAA,OAGpB,EAAA,OCvCoC,ODuC3B,CAAA,CAAA;EAOX,EAAA,EAAA,CAAA,KAAA,EC7CE,MD6CQ,EAAA,YAAA,CAAA,EAAA,MAAA,EAAA,GC7C0B,OD6C1B,CC7CkC,ID6ClC,CAAA;EACV,WAAA,CAAA,EAAA,CAAA,MAAA,EC7Ca,ID6Cb,EAAA,GC7CsB,OD6CtB;EACF,oBAAA,CAAA,EAAA,CAAA,MAAA,EC7CwB,ID6CxB,EAAA,GAAA,OAAA;EACE,OAAA,CAAA,EAAA,CAAA,MAAA,EC7CS,ID6CT,EAAA,GAAA,MAAA;;;;;AAOZ;;;;;;;AAOuB,iBC9CP,QD8CO,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,CAAA,GAAA,EAAA,MAAA,EAAA,MAAA,CAAA,EC5Cb,cD4Ca,CC5CE,OD4CF,CAAA,CAAA,EC3CpB,QD2CoB,CC3CX,MD2CW,EC3CH,OD2CG,CAAA;;AAOvB;;;;;AAeA;AASA;;;;;;AAYA;AAGC;;;;;;;;;AAasB,iBCnBP,EDmBO,CAAA,MAAA,EAAA,gBAAA,MAAA,EAAA,OCnBmC,ODmBnC,CAAA,CAAA,MAAA,EClBb,QDkBa,CClBJ,MDkBI,EClBI,ODkBJ,EClBa,IDkBb,CAAA,CAAA,ECjBpB,QDiBoB,CCjBX,MDiBW,ECjBH,ODiBG,CAAA;AAWvB;;;;;AAUA;;;;;;;;;;;AAaA;AAWA;;;;;AAQU,iBCjCM,IDiCN,CAAA,MAAA,EAAA,gBAAA,MAAA,CAAA,CAAA,WAAA,EChCK,ODgCL,EAAA,GAAA,CAAA,CAAA,KAAA,EChC0B,MDgC1B,EAAA,YAAA,CAAA,EAAA,MAAA,EAAA,GChC4D,ODgC5D,CAAA,CAAA,EC/BP,QD+BO,CC/BE,MD+BF,EC/BU,OD+BV,CAAA;;;ADjLV;AAgBiB,iBGLD,QAAA,CHKgB,SAAA,EAAA,MAAA,CAAA,EGLa,UHKb,CAAA,MAAA,CAAA;;AAKI,iBGFpB,MHEoB,CAAA,CAAA,CAAA,CAAA,MAAA,EAAA;EAAvB,OAAA,EAAA,CAAA,QAAA,EGDS,CHCT,EAAA,MAAA,EGDoB,CHCpB,EAAA,OAAA,CAAA,EGDiC,iBHCjC,EAAA,GAAA,OAAA;CAAc,CAAA,EGAvB,UHAuB,CGAZ,CHAY,CAAA;AAO3B;AAG8B,iBGFd,IAAA,CHEc,QAAA,EAAA,OAAA,EAAA,MAAA,EAAA,OAAA,CAAA,EGF4B,gBHE5B;;AAAhB,iBGmBE,KAAA,CHnBF,QAAA,EAAA,OAAA,EAAA,MAAA,EAAA,OAAA,CAAA,EGmB6C,gBHnB7C;;iBG4BE,IAAA,sCAA0C;;cAwC7C,kDACwB;EF7GzB,QAAA,EAAA,CAAA,QAAY,EAAA,OAAA,EAAA,MAAA,EAAA,OAAA,EAAA,GE+GyB,gBF/GzB;AA4BxB,CAAA;AAWA;AAQiB,iBE+FD,KF/FkB,CAAA,UAGpB,MAAS,CAAA,CAAA,aAAA,EAAA,SE6FG,CF7FH,EAAA,CAAA,EE8FpB,UF9FoB,CE8FT,CF9FS,CAAA;AAOvB;AACY,iBEqGI,QAAA,CFrGJ,QAAA,EAAA,OAAA,EAAA,MAAA,EAAA,OAAA,CAAA,EEqGkD,gBFrGlD;;AAEA,iBE2GI,MAAA,CF3GJ,MAAA,EAAA;EACP,SAAA,EAAA,MAAA;EAA2B,IAAA,CAAA,EAAA,YAAA,GAAA,UAAA;CAAR,CAAA,EE6GpB,UF7GoB,CAAA,MAAA,CAAA;;AAMP,UE6HA,gBAAA,CF7HsB;EAEzB,QAAA,CAAA,EE4HD,YF5HC;EACF;EACE,MAAA,CAAA,EAAA,MAAA;EACT,YAAA,CAAA,EAAA,MAAA;;;;;AASL;;AAIM,iBE2JU,UAAA,CF3JV,MAAA,EE2J6B,gBF3J7B,CAAA,EE2JgD,UF3JhD;;;AAWN;AASA;;;;;;AAYA;AAGC;;;;;;;;;;AAwBD;;;AAGI,iBEwKY,SFxKZ,CAAA,CAAA,CAAA,CAAA,UAAA,EEyKU,UFzKV,CEyKqB,CFzKrB,CAAA,GEyK0B,sBFzK1B,CAAA,EE0KD,sBF1KC,CE0KsB,CF1KtB,CAAA;;;AD1IJ;AAgBA;;AAKoC,iBIhBd,QJgBc,CAAA,MAAA,EAAA,OAAA,CAAA,CAAA,MAAA,EIf1B,UJe0B,CIff,MJee,EIfP,OJeO,CAAA,CAAA,EIdjC,OJciC,CIdzB,UJcyB,CIdd,MJcc,EIdN,OJcM,CAAA,CAAA;;;AALnB,iBKOK,QLPU,CAAA,MAAA,EAAA,OAAA,CAAA,CAAA,UAAA,EKQlB,ULRkB,CKQP,MLRO,EKQC,OLRD,CAAA,EAAA,MAAA,EKStB,cLTsB,CAAA,EKU7B,OLV6B,CKUrB,cLVqB,CKUN,MLVM,EKUE,OLVF,CAAA,CAAA;;;;;;iBMyCvB,sCACC,WAAW,QAAQ;YAAuB;ALnEpD,CAAA,CAAA,EKoEG,OLpES,CKoED,cLpEa,CKoEE,MLpEF,EKoEU,OLpEV,CAAA,CAAA;AA4BxB,iBKyCS,YLzCwB,CAAA,MAAA,EAAA,OAAA,CAAA,CAAA,MAAA,EK0CvB,UL1CuB,CK0CZ,ML1CY,EK0CJ,OL1CI,CAAA,GAAA;EAWhB,QAAA,CAAA,EAAA,SAAS;AAQ1B,CAAA,CAAA,EKwBG,OLxBc,CKwBN,ULxBM,CKwBK,MLxBY,EKwBJ,OLrBhB,CAAA,CAAA;AAOd,iBKeS,YLfa,CAAA,MAAA,EAAA,OAAA,CAAA,CAAA,MAAA,EKgBZ,ULhBY,CKgBD,MLhBC,EKgBO,OLhBP,CAAA,CAAA,EKiBnB,OLjBmB,CKiBX,ULjBW,CKiBA,MLjBA,EKiBQ,OLjBR,CAAA,GKiBmB,cLjBnB,CKiBkC,MLjBlC,EKiB0C,OLjB1C,CAAA,CAAA;;;;;;;;AAUtB;;;;;;;;;AAcA;;;;;AAeA;AASA;;;;AAGK,cKGQ,QLHR,EAAA;EAAO;AASZ;AAGC;EAOoB,IAAA,EAAA,mBAAA;EAAQ;;;EACC,QAAA,CAAA,MAAA,EAAA,OAAA,CAAA,CAAA,UAAA,EKPd,ULOc,CKPH,MLOG,EKPK,OLOL,CAAA,EAAA,MAAA,EKNlB,cLMkB,CAAA,EKLzB,OLKyB,CKLjB,cLKiB,CKLF,MLKE,EKLM,OLKN,CAAA,CAAA;EAAjB;;;EAKU,QAAA,CAAA,SAAA,OAAA,EAAA,UAAA,OAAA,CAAA,CAAA,GAAA,EAAA,MAAA,EAAA,MAAA,CAAA,EKDV,cLCU,CKDK,OLCL,CAAA,CAAA,EKAlB,QLAkB,CKAT,MLAS,EKAD,OLAC,CAAA;EAWX;;;EAGR,EAAA,CAAA,MAAA,EAAA,gBAAA,MAAA,CAAA,CAAA,MAAA,EKNQ,QLMR,CKNiB,MLMjB,EKNyB,OLMzB,CAAA,CAAA,EKLC,QLKD,CKLU,MLKV,EKLkB,OLKlB,CAAA;CAAU"}
|
package/dist/index.d.mts
CHANGED
|
@@ -1,10 +1,70 @@
|
|
|
1
|
+
//#region src/optimizer/types.d.ts
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Inline optimization config for didactic.eval().
|
|
5
|
+
*/
|
|
6
|
+
type OptimizeConfig = {
|
|
7
|
+
systemPrompt: string;
|
|
8
|
+
patchSystemPrompt?: string;
|
|
9
|
+
mergeSystemPrompt?: string;
|
|
10
|
+
targetSuccessRate: number;
|
|
11
|
+
maxIterations?: number;
|
|
12
|
+
maxCost?: number;
|
|
13
|
+
apiKey: string;
|
|
14
|
+
storeLogs?: boolean | string;
|
|
15
|
+
provider: LLMProviders;
|
|
16
|
+
thinking?: boolean;
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* Result for a single optimization iteration.
|
|
20
|
+
*/
|
|
21
|
+
interface IterationResult<TInput = unknown, TOutput = unknown> {
|
|
22
|
+
iteration: number;
|
|
23
|
+
systemPrompt: string;
|
|
24
|
+
passed: number;
|
|
25
|
+
total: number;
|
|
26
|
+
testCases: TestCaseResult<TInput, TOutput>[];
|
|
27
|
+
cost: number;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Final result from optimization.
|
|
31
|
+
*/
|
|
32
|
+
interface OptimizeResult<TInput = unknown, TOutput = unknown> {
|
|
33
|
+
success: boolean;
|
|
34
|
+
finalPrompt: string;
|
|
35
|
+
iterations: IterationResult<TInput, TOutput>[];
|
|
36
|
+
totalCost: number;
|
|
37
|
+
logFolder?: string;
|
|
38
|
+
}
|
|
39
|
+
//#endregion
|
|
1
40
|
//#region src/types.d.ts
|
|
41
|
+
/**
|
|
42
|
+
* Supported LLM providers.
|
|
43
|
+
* Used by both optimizer and LLM-based comparators.
|
|
44
|
+
*/
|
|
45
|
+
declare enum LLMProviders {
|
|
46
|
+
anthropic_claude_opus = "anthropic_claude_opus",
|
|
47
|
+
anthropic_claude_sonnet = "anthropic_claude_sonnet",
|
|
48
|
+
anthropic_claude_haiku = "anthropic_claude_haiku",
|
|
49
|
+
openai_gpt5 = "openai_gpt5",
|
|
50
|
+
openai_gpt5_mini = "openai_gpt5_mini",
|
|
51
|
+
}
|
|
2
52
|
/**
|
|
3
53
|
* Result returned by a comparator function.
|
|
4
54
|
*/
|
|
5
55
|
interface ComparatorResult {
|
|
6
56
|
passed: boolean;
|
|
7
57
|
similarity?: number;
|
|
58
|
+
rationale?: string;
|
|
59
|
+
cost?: number;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* LLM configuration for use by LLM-based comparators.
|
|
63
|
+
* Can be specified at the top level of eval config to avoid repeating apiKey.
|
|
64
|
+
*/
|
|
65
|
+
interface LLMConfig {
|
|
66
|
+
apiKey: string;
|
|
67
|
+
provider?: LLMProviders;
|
|
8
68
|
}
|
|
9
69
|
/**
|
|
10
70
|
* Context passed to comparators for cross-field access.
|
|
@@ -12,15 +72,29 @@ interface ComparatorResult {
|
|
|
12
72
|
interface ComparatorContext {
|
|
13
73
|
expectedParent: unknown;
|
|
14
74
|
actualParent: unknown;
|
|
75
|
+
llmConfig?: LLMConfig;
|
|
15
76
|
}
|
|
16
77
|
/**
|
|
17
78
|
* A comparator function that compares expected vs actual.
|
|
79
|
+
* Can be synchronous or asynchronous (for LLM-based comparators).
|
|
18
80
|
*/
|
|
19
|
-
type Comparator<T = unknown> = (expected: T, actual: T, context?: ComparatorContext) => ComparatorResult
|
|
81
|
+
type Comparator<T = unknown> = (expected: T, actual: T, context?: ComparatorContext) => ComparatorResult | Promise<ComparatorResult>;
|
|
20
82
|
/**
|
|
21
|
-
*
|
|
83
|
+
* Marker interface for comparators with ordering metadata.
|
|
84
|
+
* Created by the unordered() wrapper function.
|
|
22
85
|
*/
|
|
23
|
-
|
|
86
|
+
interface ComparatorWithOrdering<T = unknown> {
|
|
87
|
+
(expected: T, actual: T, context?: ComparatorContext): ComparatorResult | Promise<ComparatorResult>;
|
|
88
|
+
_unordered: true;
|
|
89
|
+
_nestedComparators?: NestedComparatorConfig;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Recursive comparator configuration that matches data shape.
|
|
93
|
+
* Can be a comparator function, a comparator with ordering, or a nested object.
|
|
94
|
+
*/
|
|
95
|
+
type NestedComparatorConfig = {
|
|
96
|
+
[key: string]: Comparator<any> | ComparatorWithOrdering<any> | NestedComparatorConfig;
|
|
97
|
+
};
|
|
24
98
|
/**
|
|
25
99
|
* The result returned by an executor.
|
|
26
100
|
*/
|
|
@@ -40,19 +114,6 @@ interface TestCase<TInput = unknown, TOutput = unknown> {
|
|
|
40
114
|
input: TInput;
|
|
41
115
|
expected: TOutput;
|
|
42
116
|
}
|
|
43
|
-
/**
|
|
44
|
-
* Inline optimization config for didactic.eval().
|
|
45
|
-
*/
|
|
46
|
-
type OptimizeConfig = {
|
|
47
|
-
systemPrompt: string;
|
|
48
|
-
targetSuccessRate: number;
|
|
49
|
-
maxIterations?: number;
|
|
50
|
-
maxCost?: number;
|
|
51
|
-
apiKey: string;
|
|
52
|
-
storeLogs?: boolean | string;
|
|
53
|
-
provider: LLMProviders;
|
|
54
|
-
thinking?: boolean;
|
|
55
|
-
};
|
|
56
117
|
/**
|
|
57
118
|
* Base eval configuration shared by both modes.
|
|
58
119
|
*/
|
|
@@ -61,23 +122,27 @@ interface BaseEvalConfig<TInput = unknown, TOutput = unknown> {
|
|
|
61
122
|
executor: Executor<TInput, TOutput>;
|
|
62
123
|
testCases: TestCase<TInput, TOutput>[];
|
|
63
124
|
perTestThreshold?: number;
|
|
64
|
-
unorderedList?: boolean;
|
|
65
125
|
optimize?: OptimizeConfig;
|
|
66
126
|
rateLimitBatch?: number;
|
|
67
127
|
rateLimitPause?: number;
|
|
128
|
+
llmConfig?: LLMConfig;
|
|
129
|
+
storeLogs?: boolean | string;
|
|
68
130
|
}
|
|
69
131
|
/**
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
*
|
|
132
|
+
* Top-level comparators configuration.
|
|
133
|
+
* Can be:
|
|
134
|
+
* - A single comparator function for root-level primitives/arrays (e.g., `exact`, `numeric`)
|
|
135
|
+
* - A ComparatorWithOrdering for unordered root-level arrays (e.g., `unordered(exact)`)
|
|
136
|
+
* - A nested object structure matching your data shape
|
|
73
137
|
*/
|
|
74
|
-
type ComparatorsConfig =
|
|
138
|
+
type ComparatorsConfig = NestedComparatorConfig | ComparatorWithOrdering<unknown> | Comparator<unknown>;
|
|
75
139
|
/**
|
|
76
140
|
* Main eval configuration.
|
|
77
|
-
*
|
|
141
|
+
* - `comparators` (optional): field mapping or single comparator. Defaults to `exact` for entire shape.
|
|
142
|
+
* - `comparatorOverride`: whole-object comparison (bypasses field-level comparison).
|
|
78
143
|
*/
|
|
79
144
|
type EvalConfig<TInput = unknown, TOutput = unknown> = (BaseEvalConfig<TInput, TOutput> & {
|
|
80
|
-
comparators
|
|
145
|
+
comparators?: ComparatorsConfig;
|
|
81
146
|
comparatorOverride?: undefined;
|
|
82
147
|
}) | (BaseEvalConfig<TInput, TOutput> & {
|
|
83
148
|
comparatorOverride: Comparator<TOutput>;
|
|
@@ -90,6 +155,8 @@ interface FieldResult {
|
|
|
90
155
|
passed: boolean;
|
|
91
156
|
expected: unknown;
|
|
92
157
|
actual: unknown;
|
|
158
|
+
rationale?: string;
|
|
159
|
+
cost?: number;
|
|
93
160
|
}
|
|
94
161
|
/**
|
|
95
162
|
* Result for a single test case.
|
|
@@ -100,6 +167,7 @@ interface TestCaseResult<TInput = unknown, TOutput = unknown> {
|
|
|
100
167
|
actual?: TOutput;
|
|
101
168
|
additionalContext?: unknown;
|
|
102
169
|
cost?: number;
|
|
170
|
+
comparatorCost?: number;
|
|
103
171
|
passed: boolean;
|
|
104
172
|
fields: Record<string, FieldResult>;
|
|
105
173
|
error?: string;
|
|
@@ -120,47 +188,11 @@ interface EvalResult<TInput = unknown, TOutput = unknown> {
|
|
|
120
188
|
totalFields: number;
|
|
121
189
|
accuracy: number;
|
|
122
190
|
cost: number;
|
|
123
|
-
|
|
124
|
-
/**
|
|
125
|
-
* Chat message for LLM calls.
|
|
126
|
-
*/
|
|
127
|
-
interface Message {
|
|
128
|
-
role: 'system' | 'user' | 'assistant';
|
|
129
|
-
content: string;
|
|
130
|
-
}
|
|
131
|
-
/**
|
|
132
|
-
* Supported LLM providers.
|
|
133
|
-
*/
|
|
134
|
-
declare enum LLMProviders {
|
|
135
|
-
anthropic_claude_opus = "anthropic_claude_opus",
|
|
136
|
-
anthropic_claude_sonnet = "anthropic_claude_sonnet",
|
|
137
|
-
anthropic_claude_haiku = "anthropic_claude_haiku",
|
|
138
|
-
openai_gpt5 = "openai_gpt5",
|
|
139
|
-
openai_gpt5_mini = "openai_gpt5_mini",
|
|
140
|
-
}
|
|
141
|
-
/**
|
|
142
|
-
* Result for a single optimization iteration.
|
|
143
|
-
*/
|
|
144
|
-
interface IterationResult<TInput = unknown, TOutput = unknown> {
|
|
145
|
-
iteration: number;
|
|
146
|
-
systemPrompt: string;
|
|
147
|
-
passed: number;
|
|
148
|
-
total: number;
|
|
149
|
-
testCases: TestCaseResult<TInput, TOutput>[];
|
|
150
|
-
cost: number;
|
|
151
|
-
}
|
|
152
|
-
/**
|
|
153
|
-
* Final result from optimization.
|
|
154
|
-
*/
|
|
155
|
-
interface OptimizeResult<TInput = unknown, TOutput = unknown> {
|
|
156
|
-
success: boolean;
|
|
157
|
-
finalPrompt: string;
|
|
158
|
-
iterations: IterationResult<TInput, TOutput>[];
|
|
159
|
-
totalCost: number;
|
|
191
|
+
comparatorCost: number;
|
|
160
192
|
logFolder?: string;
|
|
161
193
|
}
|
|
162
194
|
//#endregion
|
|
163
|
-
//#region src/executors.d.ts
|
|
195
|
+
//#region src/eval/executors.d.ts
|
|
164
196
|
/**
|
|
165
197
|
* Configuration for endpoint executor.
|
|
166
198
|
*/
|
|
@@ -243,7 +275,7 @@ declare function fn<TInput, TOutput extends object, TRaw = TOutput>(config: FnCo
|
|
|
243
275
|
*/
|
|
244
276
|
declare function mock<TInput, TOutput extends object>(outputsOrFn: TOutput[] | ((input: TInput, systemPrompt?: string) => TOutput)): Executor<TInput, TOutput>;
|
|
245
277
|
//#endregion
|
|
246
|
-
//#region src/comparators.d.ts
|
|
278
|
+
//#region src/eval/comparators/comparators.d.ts
|
|
247
279
|
/** Checks if actual string contains a substring. */
|
|
248
280
|
declare function contains(substring: string): Comparator<string>;
|
|
249
281
|
/** Creates a comparator with custom logic. */
|
|
@@ -269,14 +301,51 @@ declare function within(config: {
|
|
|
269
301
|
tolerance: number;
|
|
270
302
|
mode?: 'percentage' | 'absolute';
|
|
271
303
|
}): Comparator<number>;
|
|
304
|
+
/** Configuration for LLM-based comparison. */
|
|
305
|
+
interface LLMCompareConfig {
|
|
306
|
+
provider?: LLMProviders;
|
|
307
|
+
/** API key for LLM provider. If not provided, uses llmConfig.apiKey from eval config. */
|
|
308
|
+
apiKey?: string;
|
|
309
|
+
systemPrompt?: string;
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Uses an LLM to compare expected vs actual values.
|
|
313
|
+
* Returns a comparison result with rationale and cost tracking.
|
|
314
|
+
* Default provider: anthropic_claude_haiku (fastest, cheapest).
|
|
315
|
+
*/
|
|
316
|
+
declare function llmCompare(config: LLMCompareConfig): Comparator;
|
|
317
|
+
/**
|
|
318
|
+
* Marks a comparator or comparator config as unordered.
|
|
319
|
+
* When applied to an array field, items will be matched by similarity
|
|
320
|
+
* rather than index position (using Hungarian algorithm).
|
|
321
|
+
*
|
|
322
|
+
* @example
|
|
323
|
+
* // Unordered array of objects
|
|
324
|
+
* lineItems: unordered({
|
|
325
|
+
* description: name,
|
|
326
|
+
* price: within({ tolerance: 5 })
|
|
327
|
+
* })
|
|
328
|
+
*
|
|
329
|
+
* @example
|
|
330
|
+
* // Unordered array of primitives
|
|
331
|
+
* tags: unordered(exact)
|
|
332
|
+
*
|
|
333
|
+
* @example
|
|
334
|
+
* // When entire output is an array
|
|
335
|
+
* comparators: unordered({
|
|
336
|
+
* carrier: exact,
|
|
337
|
+
* premium: within({ tolerance: 0.05 })
|
|
338
|
+
* })
|
|
339
|
+
*/
|
|
340
|
+
declare function unordered<T>(comparator: Comparator<T> | NestedComparatorConfig): ComparatorWithOrdering<T>;
|
|
272
341
|
//#endregion
|
|
273
|
-
//#region src/eval.d.ts
|
|
342
|
+
//#region src/eval/eval.d.ts
|
|
274
343
|
/**
|
|
275
344
|
* Run all test cases and return results.
|
|
276
345
|
*/
|
|
277
346
|
declare function evaluate<TInput, TOutput>(config: EvalConfig<TInput, TOutput>): Promise<EvalResult<TInput, TOutput>>;
|
|
278
347
|
//#endregion
|
|
279
|
-
//#region src/optimizer.d.ts
|
|
348
|
+
//#region src/optimizer/optimizer.d.ts
|
|
280
349
|
declare function optimize<TInput, TOutput>(evalConfig: EvalConfig<TInput, TOutput>, config: OptimizeConfig): Promise<OptimizeResult<TInput, TOutput>>;
|
|
281
350
|
//#endregion
|
|
282
351
|
//#region src/index.d.ts
|
|
@@ -335,5 +404,5 @@ declare const didactic: {
|
|
|
335
404
|
fn<TInput, TOutput extends object>(config: FnConfig<TInput, TOutput>): Executor<TInput, TOutput>;
|
|
336
405
|
};
|
|
337
406
|
//#endregion
|
|
338
|
-
export { type Comparator, type ComparatorContext, type
|
|
407
|
+
export { type Comparator, type ComparatorContext, type ComparatorResult, type EndpointConfig, type EvalConfig, type EvalResult, type Executor, type ExecutorResult, type FnConfig, type LLMCompareConfig, type LLMConfig, LLMProviders, type OptimizeConfig, type OptimizeResult, type TestCase, type TestCaseResult, contains, custom, date, didactic as default, didactic as didact, didactic, endpoint, evaluate, exact, fn, llmCompare, mock, name, numeric, oneOf, optimize, presence, unordered, within };
|
|
339
408
|
//# sourceMappingURL=index.d.mts.map
|