orchestrated 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +243 -0
- package/index.js +113508 -303
- package/index.js.map +990 -12
- package/package.json +4 -35
- package/chunk-1fxw3qys.js +0 -131
- package/chunk-1fxw3qys.js.map +0 -10
- package/chunk-41g79vhc.js +0 -49
- package/chunk-41g79vhc.js.map +0 -10
- package/chunk-5va59f7m.js +0 -22
- package/chunk-5va59f7m.js.map +0 -9
- package/chunk-83qdt756.js +0 -424
- package/chunk-83qdt756.js.map +0 -15
package/index.d.ts
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Orchestrated - LLM Evaluation Framework
|
|
3
|
+
*
|
|
4
|
+
* A comprehensive evaluation framework for LLM applications with batch processing,
|
|
5
|
+
* data sources, and multi-backend export capabilities.
|
|
6
|
+
*
|
|
7
|
+
* @packageDocumentation
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { Score, Scorer } from "autoevals";
|
|
11
|
+
|
|
12
|
+
// Core Evaluation API
|
|
13
|
+
export declare function Eval(
|
|
14
|
+
name: string,
|
|
15
|
+
config: EvalConfig,
|
|
16
|
+
options?: EvalOptions
|
|
17
|
+
): Promise<EvalSummary>;
|
|
18
|
+
|
|
19
|
+
export interface EvalConfig {
|
|
20
|
+
data: EvalData;
|
|
21
|
+
task?: TaskFunction;
|
|
22
|
+
scores: (string | Scorer)[];
|
|
23
|
+
ctx?: any;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export type EvalData =
|
|
27
|
+
| Array<Record<string, any>>
|
|
28
|
+
| DataSourceDefinition
|
|
29
|
+
| (() => Promise<Array<Record<string, any>>>);
|
|
30
|
+
|
|
31
|
+
export interface DataSourceDefinition {
|
|
32
|
+
type: string;
|
|
33
|
+
config?: any;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface EvalOptions {
|
|
37
|
+
reporters?: Reporter[];
|
|
38
|
+
exporters?: any[];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface EvalResult {
|
|
42
|
+
input: any;
|
|
43
|
+
output?: any;
|
|
44
|
+
expected?: any;
|
|
45
|
+
scores: Record<string, Score>;
|
|
46
|
+
error?: Error;
|
|
47
|
+
tags?: string[];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface EvalSummary {
|
|
51
|
+
name: string;
|
|
52
|
+
results: EvalResult[];
|
|
53
|
+
summary: {
|
|
54
|
+
total: number;
|
|
55
|
+
passed: number;
|
|
56
|
+
failed: number;
|
|
57
|
+
scores: Record<string, ScoreSummary>;
|
|
58
|
+
};
|
|
59
|
+
hasPendingBatch?: boolean;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface ScoreSummary {
|
|
63
|
+
name: string;
|
|
64
|
+
mean: number;
|
|
65
|
+
median: number;
|
|
66
|
+
min: number;
|
|
67
|
+
max: number;
|
|
68
|
+
p10: number;
|
|
69
|
+
p25: number;
|
|
70
|
+
p75: number;
|
|
71
|
+
p90: number;
|
|
72
|
+
stddev: number;
|
|
73
|
+
count: number;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export type TaskFunction = (
|
|
77
|
+
input: any,
|
|
78
|
+
ctx?: any
|
|
79
|
+
) => Promise<any> | any;
|
|
80
|
+
|
|
81
|
+
// Evaluation Registry & Management
|
|
82
|
+
export declare function registerEvaluation(promise: Promise<any>): void;
|
|
83
|
+
export declare function clearEvaluations(): void;
|
|
84
|
+
export declare function getRunningEvaluationCount(): number;
|
|
85
|
+
export declare function waitForEvaluations(): Promise<void>;
|
|
86
|
+
|
|
87
|
+
// Data Sources
|
|
88
|
+
export declare function interactions(
|
|
89
|
+
options?: InteractionsDatasetOptions
|
|
90
|
+
): DataSourceDefinition;
|
|
91
|
+
|
|
92
|
+
export interface InteractionsDatasetOptions {
|
|
93
|
+
tenantId?: string;
|
|
94
|
+
serviceName?: string;
|
|
95
|
+
environment?: string;
|
|
96
|
+
month?: string;
|
|
97
|
+
startDate?: string;
|
|
98
|
+
endDate?: string;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Project API (Scorers)
|
|
102
|
+
export declare const projects: {
|
|
103
|
+
create(options?: ProjectOptions): Project;
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
export interface ProjectOptions {
|
|
107
|
+
tenantId?: string;
|
|
108
|
+
serviceName?: string;
|
|
109
|
+
environment?: string;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export declare class Project {
|
|
113
|
+
scorers: ScorerRegistry;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export declare class ScorerRegistry {
|
|
117
|
+
create(config: ScorerConfig): Scorer;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export interface BaseScorerConfig {
|
|
121
|
+
name: string;
|
|
122
|
+
slug: string;
|
|
123
|
+
description: string;
|
|
124
|
+
parameters: any; // Zod schema
|
|
125
|
+
metadata?: Record<string, any>;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
export interface TypedScorerConfig extends BaseScorerConfig {
|
|
129
|
+
promptTemplate: string;
|
|
130
|
+
choiceScores: Record<string, number>;
|
|
131
|
+
model?: string;
|
|
132
|
+
useCoT?: boolean;
|
|
133
|
+
temperature?: number;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export interface CustomScorerConfig extends BaseScorerConfig {
|
|
137
|
+
handler: (args: any) => Promise<Score>;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export type ScorerConfig = TypedScorerConfig | CustomScorerConfig;
|
|
141
|
+
|
|
142
|
+
// State Management
|
|
143
|
+
export declare function initState(
|
|
144
|
+
partial?: PartialEvalState,
|
|
145
|
+
skipAuth?: boolean
|
|
146
|
+
): Promise<void>;
|
|
147
|
+
|
|
148
|
+
export declare function getState(): Readonly<EvalState>;
|
|
149
|
+
export declare function resetState(): void;
|
|
150
|
+
export declare function isStateInitialized(): boolean;
|
|
151
|
+
|
|
152
|
+
export interface EvalState {
|
|
153
|
+
apiUrl: string;
|
|
154
|
+
tenantId: string;
|
|
155
|
+
serviceName: string;
|
|
156
|
+
loggedInUser: string | null;
|
|
157
|
+
accessToken: string | null;
|
|
158
|
+
apiKey: string | null;
|
|
159
|
+
environment: string;
|
|
160
|
+
appUrl: string;
|
|
161
|
+
appClientId: string;
|
|
162
|
+
otelEndpoint: string | null;
|
|
163
|
+
sendNoLogs: boolean;
|
|
164
|
+
lazyLoad: boolean;
|
|
165
|
+
awsAccessKeyId: string | null;
|
|
166
|
+
awsSecretAccessKey: string | null;
|
|
167
|
+
awsSessionToken: string | null;
|
|
168
|
+
disableBundleCache: boolean;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
export type PartialEvalState = Partial<EvalState>;
|
|
172
|
+
|
|
173
|
+
// Reporters
|
|
174
|
+
export declare const legacyReporter: Reporter;
|
|
175
|
+
|
|
176
|
+
export interface Reporter {
|
|
177
|
+
onStart?: (ctx: any) => void | Promise<void>;
|
|
178
|
+
onResult?: (ctx: any, result: EvalResult) => void | Promise<void>;
|
|
179
|
+
onComplete?: (ctx: any, summary: EvalSummary) => void | Promise<void>;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Utilities
|
|
183
|
+
export declare const colors: {
|
|
184
|
+
gray: (text: string) => string;
|
|
185
|
+
green: (text: string) => string;
|
|
186
|
+
red: (text: string) => string;
|
|
187
|
+
yellow: (text: string) => string;
|
|
188
|
+
blue: (text: string) => string;
|
|
189
|
+
magenta: (text: string) => string;
|
|
190
|
+
cyan: (text: string) => string;
|
|
191
|
+
white: (text: string) => string;
|
|
192
|
+
bold: (text: string) => string;
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
export declare const iso: {
|
|
196
|
+
now: () => string;
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
// Tracing
|
|
200
|
+
export declare function traced<T extends (...args: any[]) => any>(
|
|
201
|
+
fn: T,
|
|
202
|
+
options?: { name?: string }
|
|
203
|
+
): T;
|
|
204
|
+
|
|
205
|
+
// Serialization (for advanced users)
|
|
206
|
+
export interface SerializableScorerDefinition {
|
|
207
|
+
type: "prompt";
|
|
208
|
+
name: string;
|
|
209
|
+
slug?: string;
|
|
210
|
+
description?: string;
|
|
211
|
+
schema: any;
|
|
212
|
+
promptTemplate: string;
|
|
213
|
+
choiceScores: Record<string, number>;
|
|
214
|
+
model?: string;
|
|
215
|
+
useCoT?: boolean;
|
|
216
|
+
temperature?: number;
|
|
217
|
+
metadata?: Record<string, any>;
|
|
218
|
+
fingerprint?: string;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
export interface SerializableCustomScorer {
|
|
222
|
+
type: "custom_scorer";
|
|
223
|
+
name: string;
|
|
224
|
+
slug: string;
|
|
225
|
+
description: string;
|
|
226
|
+
schema: any;
|
|
227
|
+
handler: any;
|
|
228
|
+
metadata?: Record<string, any>;
|
|
229
|
+
fingerprint?: string;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
export interface SerializableEvaluation {
|
|
233
|
+
slug: string;
|
|
234
|
+
name: string;
|
|
235
|
+
data: any;
|
|
236
|
+
task?: any;
|
|
237
|
+
scorers: any[];
|
|
238
|
+
fingerprint?: string;
|
|
239
|
+
options?: any;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Re-export common types from autoevals for convenience
|
|
243
|
+
export type { Score, Scorer } from "autoevals";
|