orchestrated 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts ADDED
@@ -0,0 +1,243 @@
1
+ /**
2
+ * Orchestrated - LLM Evaluation Framework
3
+ *
4
+ * A comprehensive evaluation framework for LLM applications with batch processing,
5
+ * data sources, and multi-backend export capabilities.
6
+ *
7
+ * @packageDocumentation
8
+ */
9
+
10
+ import type { Score, Scorer } from "autoevals";
11
+
12
+ // Core Evaluation API
13
+ export declare function Eval(
14
+ name: string,
15
+ config: EvalConfig,
16
+ options?: EvalOptions
17
+ ): Promise<EvalSummary>;
18
+
19
+ export interface EvalConfig {
20
+ data: EvalData;
21
+ task?: TaskFunction;
22
+ scores: (string | Scorer)[];
23
+ ctx?: any;
24
+ }
25
+
26
+ export type EvalData =
27
+ | Array<Record<string, any>>
28
+ | DataSourceDefinition
29
+ | (() => Promise<Array<Record<string, any>>>);
30
+
31
+ export interface DataSourceDefinition {
32
+ type: string;
33
+ config?: any;
34
+ }
35
+
36
+ export interface EvalOptions {
37
+ reporters?: Reporter[];
38
+ exporters?: any[];
39
+ }
40
+
41
+ export interface EvalResult {
42
+ input: any;
43
+ output?: any;
44
+ expected?: any;
45
+ scores: Record<string, Score>;
46
+ error?: Error;
47
+ tags?: string[];
48
+ }
49
+
50
+ export interface EvalSummary {
51
+ name: string;
52
+ results: EvalResult[];
53
+ summary: {
54
+ total: number;
55
+ passed: number;
56
+ failed: number;
57
+ scores: Record<string, ScoreSummary>;
58
+ };
59
+ hasPendingBatch?: boolean;
60
+ }
61
+
62
+ export interface ScoreSummary {
63
+ name: string;
64
+ mean: number;
65
+ median: number;
66
+ min: number;
67
+ max: number;
68
+ p10: number;
69
+ p25: number;
70
+ p75: number;
71
+ p90: number;
72
+ stddev: number;
73
+ count: number;
74
+ }
75
+
76
+ export type TaskFunction = (
77
+ input: any,
78
+ ctx?: any
79
+ ) => Promise<any> | any;
80
+
81
+ // Evaluation Registry & Management
82
+ export declare function registerEvaluation(promise: Promise<any>): void;
83
+ export declare function clearEvaluations(): void;
84
+ export declare function getRunningEvaluationCount(): number;
85
+ export declare function waitForEvaluations(): Promise<void>;
86
+
87
+ // Data Sources
88
+ export declare function interactions(
89
+ options?: InteractionsDatasetOptions
90
+ ): DataSourceDefinition;
91
+
92
+ export interface InteractionsDatasetOptions {
93
+ tenantId?: string;
94
+ serviceName?: string;
95
+ environment?: string;
96
+ month?: string;
97
+ startDate?: string;
98
+ endDate?: string;
99
+ }
100
+
101
+ // Project API (Scorers)
102
+ export declare const projects: {
103
+ create(options?: ProjectOptions): Project;
104
+ };
105
+
106
+ export interface ProjectOptions {
107
+ tenantId?: string;
108
+ serviceName?: string;
109
+ environment?: string;
110
+ }
111
+
112
+ export declare class Project {
113
+ scorers: ScorerRegistry;
114
+ }
115
+
116
+ export declare class ScorerRegistry {
117
+ create(config: ScorerConfig): Scorer;
118
+ }
119
+
120
+ export interface BaseScorerConfig {
121
+ name: string;
122
+ slug: string;
123
+ description: string;
124
+ parameters: any; // Zod schema
125
+ metadata?: Record<string, any>;
126
+ }
127
+
128
+ export interface TypedScorerConfig extends BaseScorerConfig {
129
+ promptTemplate: string;
130
+ choiceScores: Record<string, number>;
131
+ model?: string;
132
+ useCoT?: boolean;
133
+ temperature?: number;
134
+ }
135
+
136
+ export interface CustomScorerConfig extends BaseScorerConfig {
137
+ handler: (args: any) => Promise<Score>;
138
+ }
139
+
140
+ export type ScorerConfig = TypedScorerConfig | CustomScorerConfig;
141
+
142
+ // State Management
143
+ export declare function initState(
144
+ partial?: PartialEvalState,
145
+ skipAuth?: boolean
146
+ ): Promise<void>;
147
+
148
+ export declare function getState(): Readonly<EvalState>;
149
+ export declare function resetState(): void;
150
+ export declare function isStateInitialized(): boolean;
151
+
152
+ export interface EvalState {
153
+ apiUrl: string;
154
+ tenantId: string;
155
+ serviceName: string;
156
+ loggedInUser: string | null;
157
+ accessToken: string | null;
158
+ apiKey: string | null;
159
+ environment: string;
160
+ appUrl: string;
161
+ appClientId: string;
162
+ otelEndpoint: string | null;
163
+ sendNoLogs: boolean;
164
+ lazyLoad: boolean;
165
+ awsAccessKeyId: string | null;
166
+ awsSecretAccessKey: string | null;
167
+ awsSessionToken: string | null;
168
+ disableBundleCache: boolean;
169
+ }
170
+
171
+ export type PartialEvalState = Partial<EvalState>;
172
+
173
+ // Reporters
174
+ export declare const legacyReporter: Reporter;
175
+
176
+ export interface Reporter {
177
+ onStart?: (ctx: any) => void | Promise<void>;
178
+ onResult?: (ctx: any, result: EvalResult) => void | Promise<void>;
179
+ onComplete?: (ctx: any, summary: EvalSummary) => void | Promise<void>;
180
+ }
181
+
182
+ // Utilities
183
+ export declare const colors: {
184
+ gray: (text: string) => string;
185
+ green: (text: string) => string;
186
+ red: (text: string) => string;
187
+ yellow: (text: string) => string;
188
+ blue: (text: string) => string;
189
+ magenta: (text: string) => string;
190
+ cyan: (text: string) => string;
191
+ white: (text: string) => string;
192
+ bold: (text: string) => string;
193
+ };
194
+
195
+ export declare const iso: {
196
+ now: () => string;
197
+ };
198
+
199
+ // Tracing
200
+ export declare function traced<T extends (...args: any[]) => any>(
201
+ fn: T,
202
+ options?: { name?: string }
203
+ ): T;
204
+
205
+ // Serialization (for advanced users)
206
+ export interface SerializableScorerDefinition {
207
+ type: "prompt";
208
+ name: string;
209
+ slug?: string;
210
+ description?: string;
211
+ schema: any;
212
+ promptTemplate: string;
213
+ choiceScores: Record<string, number>;
214
+ model?: string;
215
+ useCoT?: boolean;
216
+ temperature?: number;
217
+ metadata?: Record<string, any>;
218
+ fingerprint?: string;
219
+ }
220
+
221
+ export interface SerializableCustomScorer {
222
+ type: "custom_scorer";
223
+ name: string;
224
+ slug: string;
225
+ description: string;
226
+ schema: any;
227
+ handler: any;
228
+ metadata?: Record<string, any>;
229
+ fingerprint?: string;
230
+ }
231
+
232
+ export interface SerializableEvaluation {
233
+ slug: string;
234
+ name: string;
235
+ data: any;
236
+ task?: any;
237
+ scorers: any[];
238
+ fingerprint?: string;
239
+ options?: any;
240
+ }
241
+
242
+ // Re-export common types from autoevals for convenience
243
+ export type { Score, Scorer } from "autoevals";