@runtypelabs/sdk 1.7.1 → 1.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +7026 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +5147 -0
- package/dist/index.d.ts +5146 -27
- package/dist/index.js +6955 -77
- package/dist/index.js.map +1 -1
- package/package.json +14 -7
- package/dist/batch-builder.d.ts +0 -106
- package/dist/batch-builder.d.ts.map +0 -1
- package/dist/batch-builder.js +0 -124
- package/dist/batch-builder.js.map +0 -1
- package/dist/batches-namespace.d.ts +0 -132
- package/dist/batches-namespace.d.ts.map +0 -1
- package/dist/batches-namespace.js +0 -128
- package/dist/batches-namespace.js.map +0 -1
- package/dist/case-types.d.ts +0 -42
- package/dist/case-types.d.ts.map +0 -1
- package/dist/case-types.js +0 -16
- package/dist/case-types.js.map +0 -1
- package/dist/client-token-types.d.ts +0 -143
- package/dist/client-token-types.d.ts.map +0 -1
- package/dist/client-token-types.js +0 -11
- package/dist/client-token-types.js.map +0 -1
- package/dist/client.d.ts +0 -131
- package/dist/client.d.ts.map +0 -1
- package/dist/client.js +0 -501
- package/dist/client.js.map +0 -1
- package/dist/endpoints.d.ts +0 -1248
- package/dist/endpoints.d.ts.map +0 -1
- package/dist/endpoints.js +0 -1649
- package/dist/endpoints.js.map +0 -1
- package/dist/error-handling-types.d.ts +0 -71
- package/dist/error-handling-types.d.ts.map +0 -1
- package/dist/error-handling-types.js +0 -12
- package/dist/error-handling-types.js.map +0 -1
- package/dist/eval-builder.d.ts +0 -216
- package/dist/eval-builder.d.ts.map +0 -1
- package/dist/eval-builder.js +0 -225
- package/dist/eval-builder.js.map +0 -1
- package/dist/evals-namespace.d.ts +0 -205
- package/dist/evals-namespace.d.ts.map +0 -1
- package/dist/evals-namespace.js +0 -208
- package/dist/evals-namespace.js.map +0 -1
- package/dist/flow-builder.d.ts +0 -717
- package/dist/flow-builder.d.ts.map +0 -1
- package/dist/flow-builder.js +0 -592
- package/dist/flow-builder.js.map +0 -1
- package/dist/flow-result.d.ts +0 -117
- package/dist/flow-result.d.ts.map +0 -1
- package/dist/flow-result.js +0 -175
- package/dist/flow-result.js.map +0 -1
- package/dist/flows-namespace.d.ts +0 -442
- package/dist/flows-namespace.d.ts.map +0 -1
- package/dist/flows-namespace.js +0 -686
- package/dist/flows-namespace.js.map +0 -1
- package/dist/generated-tool-gate.d.ts +0 -75
- package/dist/generated-tool-gate.d.ts.map +0 -1
- package/dist/generated-tool-gate.js +0 -314
- package/dist/generated-tool-gate.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/prompts-namespace.d.ts +0 -237
- package/dist/prompts-namespace.d.ts.map +0 -1
- package/dist/prompts-namespace.js +0 -222
- package/dist/prompts-namespace.js.map +0 -1
- package/dist/runtype.d.ts +0 -232
- package/dist/runtype.d.ts.map +0 -1
- package/dist/runtype.js +0 -367
- package/dist/runtype.js.map +0 -1
- package/dist/stream-utils.d.ts +0 -58
- package/dist/stream-utils.d.ts.map +0 -1
- package/dist/stream-utils.js +0 -373
- package/dist/stream-utils.js.map +0 -1
- package/dist/transform.d.ts +0 -30
- package/dist/transform.d.ts.map +0 -1
- package/dist/transform.js +0 -196
- package/dist/transform.js.map +0 -1
- package/dist/types.d.ts +0 -717
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -7
- package/dist/types.js.map +0 -1
|
@@ -1,205 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* EvalsNamespace - Static namespace for evaluation operations
|
|
3
|
-
*
|
|
4
|
-
* Provides methods for running evaluations and comparing model performance.
|
|
5
|
-
* Evals can be streamed for real-time results or submitted as batch jobs.
|
|
6
|
-
*/
|
|
7
|
-
import type { RuntypeClient } from './runtype';
|
|
8
|
-
import { FlowResult } from './flow-result';
|
|
9
|
-
import type { StreamCallbacks } from './flow-builder';
|
|
10
|
-
export interface ModelOverride {
|
|
11
|
-
/** Name of the step to override */
|
|
12
|
-
stepName: string;
|
|
13
|
-
/** Model to use for this step */
|
|
14
|
-
model: string;
|
|
15
|
-
/** Optional temperature override */
|
|
16
|
-
temperature?: number;
|
|
17
|
-
/** Optional max tokens override */
|
|
18
|
-
maxTokens?: number;
|
|
19
|
-
}
|
|
20
|
-
export interface EvalRunConfig {
|
|
21
|
-
/** Flow ID to evaluate */
|
|
22
|
-
flowId?: string;
|
|
23
|
-
/** Virtual flow definition (alternative to flowId) */
|
|
24
|
-
flow?: {
|
|
25
|
-
name: string;
|
|
26
|
-
steps: any[];
|
|
27
|
-
};
|
|
28
|
-
/** Record type to evaluate against */
|
|
29
|
-
recordType?: string;
|
|
30
|
-
/** Inline records for ad-hoc evaluation */
|
|
31
|
-
records?: Array<{
|
|
32
|
-
name: string;
|
|
33
|
-
type: string;
|
|
34
|
-
metadata: Record<string, any>;
|
|
35
|
-
}>;
|
|
36
|
-
/** Model overrides for single-model evaluation */
|
|
37
|
-
models?: ModelOverride[];
|
|
38
|
-
/** Multiple model configurations for comparison */
|
|
39
|
-
compareModels?: ModelOverride[][];
|
|
40
|
-
/** Store evaluation results */
|
|
41
|
-
storeResults?: boolean;
|
|
42
|
-
/** Run evaluations in parallel */
|
|
43
|
-
parallel?: boolean;
|
|
44
|
-
/** Maximum concurrent evaluations */
|
|
45
|
-
concurrency?: number;
|
|
46
|
-
/** Continue on individual record failures */
|
|
47
|
-
continueOnError?: boolean;
|
|
48
|
-
/** Optional filter for records */
|
|
49
|
-
filter?: Record<string, any>;
|
|
50
|
-
/** Optional limit on number of records */
|
|
51
|
-
limit?: number;
|
|
52
|
-
}
|
|
53
|
-
export interface EvalStatus {
|
|
54
|
-
evalId: string;
|
|
55
|
-
status: 'queued' | 'running' | 'completed' | 'failed';
|
|
56
|
-
totalRecords: number;
|
|
57
|
-
completedRecords: number;
|
|
58
|
-
failedRecords: number;
|
|
59
|
-
results?: Array<{
|
|
60
|
-
modelConfig: ModelOverride[];
|
|
61
|
-
records: Array<{
|
|
62
|
-
recordId: string;
|
|
63
|
-
status: 'success' | 'error';
|
|
64
|
-
result?: any;
|
|
65
|
-
error?: string;
|
|
66
|
-
executionTime: number;
|
|
67
|
-
}>;
|
|
68
|
-
}>;
|
|
69
|
-
}
|
|
70
|
-
export interface EvalListParams {
|
|
71
|
-
/** Filter by status */
|
|
72
|
-
status?: 'queued' | 'running' | 'completed' | 'failed';
|
|
73
|
-
/** Filter by flow ID */
|
|
74
|
-
flowId?: string;
|
|
75
|
-
/** Pagination limit */
|
|
76
|
-
limit?: number;
|
|
77
|
-
/** Pagination offset */
|
|
78
|
-
offset?: number;
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* EvalRunner - Builder returned by Runtype.evals.run()
|
|
82
|
-
*
|
|
83
|
-
* Provides terminal methods for executing evaluations:
|
|
84
|
-
* - .stream() - Execute and stream results in real-time
|
|
85
|
-
* - .submit() - Submit as a batch job for async processing
|
|
86
|
-
*/
|
|
87
|
-
export declare class EvalRunner {
|
|
88
|
-
private getClient;
|
|
89
|
-
private config;
|
|
90
|
-
constructor(getClient: () => RuntypeClient, config: EvalRunConfig);
|
|
91
|
-
/**
|
|
92
|
-
* Execute the evaluation with streaming results
|
|
93
|
-
*
|
|
94
|
-
* Streams evaluation results as they complete.
|
|
95
|
-
* Good for real-time feedback during development/testing.
|
|
96
|
-
*
|
|
97
|
-
* @example
|
|
98
|
-
* ```typescript
|
|
99
|
-
* const result = await Runtype.evals.run({
|
|
100
|
-
* flowId: 'flow_123',
|
|
101
|
-
* recordType: 'test_data',
|
|
102
|
-
* models: [{ stepName: 'Analyze', model: 'gpt-4o' }]
|
|
103
|
-
* }).stream()
|
|
104
|
-
*
|
|
105
|
-
* // Process with callbacks
|
|
106
|
-
* await result.stream({
|
|
107
|
-
* onStepComplete: (result, event) => {
|
|
108
|
-
* console.log('Step completed:', event.name, result)
|
|
109
|
-
* },
|
|
110
|
-
* })
|
|
111
|
-
* ```
|
|
112
|
-
*/
|
|
113
|
-
stream(callbacks?: StreamCallbacks): Promise<FlowResult>;
|
|
114
|
-
/**
|
|
115
|
-
* Submit the evaluation as a batch job
|
|
116
|
-
*
|
|
117
|
-
* Queues the evaluation to run asynchronously.
|
|
118
|
-
* Good for large evaluations or scheduled runs.
|
|
119
|
-
*
|
|
120
|
-
* @example
|
|
121
|
-
* ```typescript
|
|
122
|
-
* const eval = await Runtype.evals.run({
|
|
123
|
-
* flowId: 'flow_123',
|
|
124
|
-
* recordType: 'test_data',
|
|
125
|
-
* compareModels: [
|
|
126
|
-
* [{ stepName: 'Analyze', model: 'gpt-4o' }],
|
|
127
|
-
* [{ stepName: 'Analyze', model: 'claude-3-opus' }],
|
|
128
|
-
* ]
|
|
129
|
-
* }).submit()
|
|
130
|
-
*
|
|
131
|
-
* console.log('Eval queued:', eval.evalId)
|
|
132
|
-
*
|
|
133
|
-
* // Check status later
|
|
134
|
-
* const status = await Runtype.evals.get(eval.evalId)
|
|
135
|
-
* ```
|
|
136
|
-
*/
|
|
137
|
-
submit(): Promise<EvalStatus>;
|
|
138
|
-
/**
|
|
139
|
-
* Build the evaluation payload
|
|
140
|
-
*/
|
|
141
|
-
private buildPayload;
|
|
142
|
-
}
|
|
143
|
-
export declare class EvalsNamespace {
|
|
144
|
-
private getClient;
|
|
145
|
-
constructor(getClient: () => RuntypeClient);
|
|
146
|
-
/**
|
|
147
|
-
* Run an evaluation
|
|
148
|
-
*
|
|
149
|
-
* Returns an EvalRunner with terminal methods:
|
|
150
|
-
* - .stream() - Execute and stream results
|
|
151
|
-
* - .submit() - Submit as batch job
|
|
152
|
-
*
|
|
153
|
-
* @example
|
|
154
|
-
* ```typescript
|
|
155
|
-
* // Single model evaluation with streaming
|
|
156
|
-
* const result = await Runtype.evals.run({
|
|
157
|
-
* flowId: 'flow_123',
|
|
158
|
-
* recordType: 'test_data',
|
|
159
|
-
* models: [{ stepName: 'Analyze', model: 'gpt-4o' }]
|
|
160
|
-
* }).stream()
|
|
161
|
-
*
|
|
162
|
-
* // Multi-model comparison as batch
|
|
163
|
-
* const eval = await Runtype.evals.run({
|
|
164
|
-
* flowId: 'flow_123',
|
|
165
|
-
* recordType: 'test_data',
|
|
166
|
-
* compareModels: [
|
|
167
|
-
* [{ stepName: 'Analyze', model: 'gpt-4o' }],
|
|
168
|
-
* [{ stepName: 'Analyze', model: 'claude-3-opus' }],
|
|
169
|
-
* [{ stepName: 'Analyze', model: 'gemini-pro' }],
|
|
170
|
-
* ]
|
|
171
|
-
* }).submit()
|
|
172
|
-
* ```
|
|
173
|
-
*/
|
|
174
|
-
run(config: EvalRunConfig): EvalRunner;
|
|
175
|
-
/**
|
|
176
|
-
* Get evaluation status by ID
|
|
177
|
-
*
|
|
178
|
-
* @example
|
|
179
|
-
* ```typescript
|
|
180
|
-
* const status = await Runtype.evals.get('eval_123')
|
|
181
|
-
* console.log(status.status, status.completedRecords, '/', status.totalRecords)
|
|
182
|
-
* ```
|
|
183
|
-
*/
|
|
184
|
-
get(evalId: string): Promise<EvalStatus>;
|
|
185
|
-
/**
|
|
186
|
-
* List evaluations
|
|
187
|
-
*
|
|
188
|
-
* @example
|
|
189
|
-
* ```typescript
|
|
190
|
-
* // List all evals
|
|
191
|
-
* const evals = await Runtype.evals.list()
|
|
192
|
-
*
|
|
193
|
-
* // Filter by status
|
|
194
|
-
* const running = await Runtype.evals.list({ status: 'running' })
|
|
195
|
-
*
|
|
196
|
-
* // Filter by flow
|
|
197
|
-
* const flowEvals = await Runtype.evals.list({ flowId: 'flow_123' })
|
|
198
|
-
* ```
|
|
199
|
-
*/
|
|
200
|
-
list(params?: EvalListParams): Promise<{
|
|
201
|
-
data: EvalStatus[];
|
|
202
|
-
total: number;
|
|
203
|
-
}>;
|
|
204
|
-
}
|
|
205
|
-
//# sourceMappingURL=evals-namespace.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"evals-namespace.d.ts","sourceRoot":"","sources":["../src/evals-namespace.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,WAAW,CAAA;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAA;AAC1C,OAAO,KAAK,EAAE,eAAe,EAAe,MAAM,gBAAgB,CAAA;AAMlE,MAAM,WAAW,aAAa;IAC5B,mCAAmC;IACnC,QAAQ,EAAE,MAAM,CAAA;IAChB,iCAAiC;IACjC,KAAK,EAAE,MAAM,CAAA;IACb,oCAAoC;IACpC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,mCAAmC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAA;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,0BAA0B;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,sDAAsD;IACtD,IAAI,CAAC,EAAE;QACL,IAAI,EAAE,MAAM,CAAA;QACZ,KAAK,EAAE,GAAG,EAAE,CAAA;KACb,CAAA;IACD,sCAAsC;IACtC,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,2CAA2C;IAC3C,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAA;QACZ,IAAI,EAAE,MAAM,CAAA;QACZ,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;KAC9B,CAAC,CAAA;IACF,kDAAkD;IAClD,MAAM,CAAC,EAAE,aAAa,EAAE,CAAA;IACxB,mDAAmD;IACnD,aAAa,CAAC,EAAE,aAAa,EAAE,EAAE,CAAA;IACjC,+BAA+B;IAC/B,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB,kCAAkC;IAClC,QAAQ,CAAC,EAAE,OAAO,CAAA;IAClB,qCAAqC;IACrC,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,6CAA6C;IAC7C,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB,kCAAkC;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;IAC5B,0CAA0C;IAC1C,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,CAAA;IACrD,YAAY,EAAE,MAAM,CAAA;IACpB,gBAAgB,EAAE,MAAM,CAAA;IACxB,aAAa,EAAE,MAAM,CAAA;IACrB,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,WAAW,EAAE,aAAa,EAAE,CAAA;QAC5B,OAAO,EAAE,KAAK,CAAC;YACb,QAAQ,EAAE,MAAM,CAAA;YAChB,MAAM,EAAE,SAAS,GAAG,OAAO,CAAA;YAC3B,MAAM,CAAC,EAAE,GAAG,CAAA;YACZ,KAAK,CAAC,EAAE,MAAM,CAAA;YACd,aAAa,EAAE,MAAM,CAAA;SACtB,CAAC,CAAA;KACH,CAAC,CAAA;CACH;AAED,MAAM,WAAW,cAAc;IAC7B,uBAAuB;IACvB,MAAM,CAAC,EAAE,QAAQ,GAAG,SAAS,GAAG,WAAW,GAAG,QAAQ,CAAA;IACtD,wBAAwB;IACxB,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,uBAAuB;IACvB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,wBAAwB;IACxB,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB;AAMD;;;;;;GAMG;AACH,qBAAa,UAAU;IAEnB,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,MAAM;gBADN,SAAS,EAAE,MAAM,aAAa,EAC9B,MAAM,EAAE,aAAa;IAG/B;;;;;;;;;;;;;;;;;;;;;OAqBG;IACG,MAAM,CAAC,SAAS,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,UAAU,CAAC;IAoB9D;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACG,MAAM,IAAI,OAAO,CAAC,UAAU,CAAC;IASnC;;OAEG;IACH,OAAO,CAAC,YAAY;CA0CrB;AAMD,qBAAa,cAAc;IACb,OAAO,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM,aAAa;IAElD;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2BG;IACH,GAAG,CAAC,MAAM,EAAE,aAAa,GAAG,UAAU;IAItC;;;;;;;;OAQG;IACG,GAAG,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC;IAK9C;;;;;;;;;;;;;;OAcG;IACG,IAAI,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC;QAAE,IAAI,EAAE,UAAU,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;CAIpF"}
|
package/dist/evals-namespace.js
DELETED
|
@@ -1,208 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
/**
|
|
3
|
-
* EvalsNamespace - Static namespace for evaluation operations
|
|
4
|
-
*
|
|
5
|
-
* Provides methods for running evaluations and comparing model performance.
|
|
6
|
-
* Evals can be streamed for real-time results or submitted as batch jobs.
|
|
7
|
-
*/
|
|
8
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
-
exports.EvalsNamespace = exports.EvalRunner = void 0;
|
|
10
|
-
const flow_result_1 = require("./flow-result");
|
|
11
|
-
// ============================================================================
|
|
12
|
-
// EvalRunner
|
|
13
|
-
// ============================================================================
|
|
14
|
-
/**
|
|
15
|
-
* EvalRunner - Builder returned by Runtype.evals.run()
|
|
16
|
-
*
|
|
17
|
-
* Provides terminal methods for executing evaluations:
|
|
18
|
-
* - .stream() - Execute and stream results in real-time
|
|
19
|
-
* - .submit() - Submit as a batch job for async processing
|
|
20
|
-
*/
|
|
21
|
-
class EvalRunner {
|
|
22
|
-
constructor(getClient, config) {
|
|
23
|
-
this.getClient = getClient;
|
|
24
|
-
this.config = config;
|
|
25
|
-
}
|
|
26
|
-
/**
|
|
27
|
-
* Execute the evaluation with streaming results
|
|
28
|
-
*
|
|
29
|
-
* Streams evaluation results as they complete.
|
|
30
|
-
* Good for real-time feedback during development/testing.
|
|
31
|
-
*
|
|
32
|
-
* @example
|
|
33
|
-
* ```typescript
|
|
34
|
-
* const result = await Runtype.evals.run({
|
|
35
|
-
* flowId: 'flow_123',
|
|
36
|
-
* recordType: 'test_data',
|
|
37
|
-
* models: [{ stepName: 'Analyze', model: 'gpt-4o' }]
|
|
38
|
-
* }).stream()
|
|
39
|
-
*
|
|
40
|
-
* // Process with callbacks
|
|
41
|
-
* await result.stream({
|
|
42
|
-
* onStepComplete: (result, event) => {
|
|
43
|
-
* console.log('Step completed:', event.name, result)
|
|
44
|
-
* },
|
|
45
|
-
* })
|
|
46
|
-
* ```
|
|
47
|
-
*/
|
|
48
|
-
async stream(callbacks) {
|
|
49
|
-
const client = this.getClient();
|
|
50
|
-
const payload = this.buildPayload();
|
|
51
|
-
payload.stream = true;
|
|
52
|
-
const response = await client.requestStream('/eval/stream', {
|
|
53
|
-
method: 'POST',
|
|
54
|
-
body: JSON.stringify(payload),
|
|
55
|
-
});
|
|
56
|
-
const result = new flow_result_1.FlowResult(response);
|
|
57
|
-
if (callbacks) {
|
|
58
|
-
await result.stream(callbacks);
|
|
59
|
-
}
|
|
60
|
-
return result;
|
|
61
|
-
}
|
|
62
|
-
/**
|
|
63
|
-
* Submit the evaluation as a batch job
|
|
64
|
-
*
|
|
65
|
-
* Queues the evaluation to run asynchronously.
|
|
66
|
-
* Good for large evaluations or scheduled runs.
|
|
67
|
-
*
|
|
68
|
-
* @example
|
|
69
|
-
* ```typescript
|
|
70
|
-
* const eval = await Runtype.evals.run({
|
|
71
|
-
* flowId: 'flow_123',
|
|
72
|
-
* recordType: 'test_data',
|
|
73
|
-
* compareModels: [
|
|
74
|
-
* [{ stepName: 'Analyze', model: 'gpt-4o' }],
|
|
75
|
-
* [{ stepName: 'Analyze', model: 'claude-3-opus' }],
|
|
76
|
-
* ]
|
|
77
|
-
* }).submit()
|
|
78
|
-
*
|
|
79
|
-
* console.log('Eval queued:', eval.evalId)
|
|
80
|
-
*
|
|
81
|
-
* // Check status later
|
|
82
|
-
* const status = await Runtype.evals.get(eval.evalId)
|
|
83
|
-
* ```
|
|
84
|
-
*/
|
|
85
|
-
async submit() {
|
|
86
|
-
const client = this.getClient();
|
|
87
|
-
const payload = this.buildPayload();
|
|
88
|
-
payload.async = true;
|
|
89
|
-
return client.post('/evals', payload);
|
|
90
|
-
}
|
|
91
|
-
/**
|
|
92
|
-
* Build the evaluation payload
|
|
93
|
-
*/
|
|
94
|
-
buildPayload() {
|
|
95
|
-
const payload = {};
|
|
96
|
-
if (this.config.flowId) {
|
|
97
|
-
payload.flowId = this.config.flowId;
|
|
98
|
-
}
|
|
99
|
-
else if (this.config.flow) {
|
|
100
|
-
payload.flow = this.config.flow;
|
|
101
|
-
}
|
|
102
|
-
if (this.config.recordType) {
|
|
103
|
-
payload.recordType = this.config.recordType;
|
|
104
|
-
}
|
|
105
|
-
else if (this.config.records) {
|
|
106
|
-
payload.records = this.config.records;
|
|
107
|
-
}
|
|
108
|
-
if (this.config.models) {
|
|
109
|
-
payload.modelOverrides = this.config.models;
|
|
110
|
-
}
|
|
111
|
-
else if (this.config.compareModels) {
|
|
112
|
-
payload.modelConfigs = this.config.compareModels;
|
|
113
|
-
}
|
|
114
|
-
const options = {};
|
|
115
|
-
if (this.config.storeResults !== undefined)
|
|
116
|
-
options.storeResults = this.config.storeResults;
|
|
117
|
-
if (this.config.parallel !== undefined)
|
|
118
|
-
options.parallel = this.config.parallel;
|
|
119
|
-
if (this.config.concurrency !== undefined)
|
|
120
|
-
options.concurrency = this.config.concurrency;
|
|
121
|
-
if (this.config.continueOnError !== undefined)
|
|
122
|
-
options.continueOnError = this.config.continueOnError;
|
|
123
|
-
if (Object.keys(options).length > 0) {
|
|
124
|
-
payload.options = options;
|
|
125
|
-
}
|
|
126
|
-
if (this.config.filter) {
|
|
127
|
-
payload.filter = this.config.filter;
|
|
128
|
-
}
|
|
129
|
-
if (this.config.limit !== undefined) {
|
|
130
|
-
payload.limit = this.config.limit;
|
|
131
|
-
}
|
|
132
|
-
return payload;
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
exports.EvalRunner = EvalRunner;
|
|
136
|
-
// ============================================================================
|
|
137
|
-
// EvalsNamespace
|
|
138
|
-
// ============================================================================
|
|
139
|
-
class EvalsNamespace {
|
|
140
|
-
constructor(getClient) {
|
|
141
|
-
this.getClient = getClient;
|
|
142
|
-
}
|
|
143
|
-
/**
|
|
144
|
-
* Run an evaluation
|
|
145
|
-
*
|
|
146
|
-
* Returns an EvalRunner with terminal methods:
|
|
147
|
-
* - .stream() - Execute and stream results
|
|
148
|
-
* - .submit() - Submit as batch job
|
|
149
|
-
*
|
|
150
|
-
* @example
|
|
151
|
-
* ```typescript
|
|
152
|
-
* // Single model evaluation with streaming
|
|
153
|
-
* const result = await Runtype.evals.run({
|
|
154
|
-
* flowId: 'flow_123',
|
|
155
|
-
* recordType: 'test_data',
|
|
156
|
-
* models: [{ stepName: 'Analyze', model: 'gpt-4o' }]
|
|
157
|
-
* }).stream()
|
|
158
|
-
*
|
|
159
|
-
* // Multi-model comparison as batch
|
|
160
|
-
* const eval = await Runtype.evals.run({
|
|
161
|
-
* flowId: 'flow_123',
|
|
162
|
-
* recordType: 'test_data',
|
|
163
|
-
* compareModels: [
|
|
164
|
-
* [{ stepName: 'Analyze', model: 'gpt-4o' }],
|
|
165
|
-
* [{ stepName: 'Analyze', model: 'claude-3-opus' }],
|
|
166
|
-
* [{ stepName: 'Analyze', model: 'gemini-pro' }],
|
|
167
|
-
* ]
|
|
168
|
-
* }).submit()
|
|
169
|
-
* ```
|
|
170
|
-
*/
|
|
171
|
-
run(config) {
|
|
172
|
-
return new EvalRunner(this.getClient, config);
|
|
173
|
-
}
|
|
174
|
-
/**
|
|
175
|
-
* Get evaluation status by ID
|
|
176
|
-
*
|
|
177
|
-
* @example
|
|
178
|
-
* ```typescript
|
|
179
|
-
* const status = await Runtype.evals.get('eval_123')
|
|
180
|
-
* console.log(status.status, status.completedRecords, '/', status.totalRecords)
|
|
181
|
-
* ```
|
|
182
|
-
*/
|
|
183
|
-
async get(evalId) {
|
|
184
|
-
const client = this.getClient();
|
|
185
|
-
return client.get(`/evals/${evalId}`);
|
|
186
|
-
}
|
|
187
|
-
/**
|
|
188
|
-
* List evaluations
|
|
189
|
-
*
|
|
190
|
-
* @example
|
|
191
|
-
* ```typescript
|
|
192
|
-
* // List all evals
|
|
193
|
-
* const evals = await Runtype.evals.list()
|
|
194
|
-
*
|
|
195
|
-
* // Filter by status
|
|
196
|
-
* const running = await Runtype.evals.list({ status: 'running' })
|
|
197
|
-
*
|
|
198
|
-
* // Filter by flow
|
|
199
|
-
* const flowEvals = await Runtype.evals.list({ flowId: 'flow_123' })
|
|
200
|
-
* ```
|
|
201
|
-
*/
|
|
202
|
-
async list(params) {
|
|
203
|
-
const client = this.getClient();
|
|
204
|
-
return client.get('/evals', params);
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
exports.EvalsNamespace = EvalsNamespace;
|
|
208
|
-
//# sourceMappingURL=evals-namespace.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"evals-namespace.js","sourceRoot":"","sources":["../src/evals-namespace.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;AAGH,+CAA0C;AAiF1C,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;GAMG;AACH,MAAa,UAAU;IACrB,YACU,SAA8B,EAC9B,MAAqB;QADrB,cAAS,GAAT,SAAS,CAAqB;QAC9B,WAAM,GAAN,MAAM,CAAe;IAC5B,CAAC;IAEJ;;;;;;;;;;;;;;;;;;;;;OAqBG;IACH,KAAK,CAAC,MAAM,CAAC,SAA2B;QACtC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAA;QAE/B,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,EAAE,CAAA;QACnC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAA;QAErB,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,cAAc,EAAE;YAC1D,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;SAC9B,CAAC,CAAA;QAEF,MAAM,MAAM,GAAG,IAAI,wBAAU,CAAC,QAAQ,CAAC,CAAA;QAEvC,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;QAChC,CAAC;QAED,OAAO,MAAM,CAAA;IACf,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,KAAK,CAAC,MAAM;QACV,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAA;QAE/B,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,EAAE,CAAA;QACnC,OAAO,CAAC,KAAK,GAAG,IAAI,CAAA;QAEpB,OAAO,MAAM,CAAC,IAAI,CAAa,QAAQ,EAAE,OAAO,CAAC,CAAA;IACnD,CAAC;IAED;;OAEG;IACK,YAAY;QAClB,MAAM,OAAO,GAAQ,EAAE,CAAA;QAEvB,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACvB,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAA;QACrC,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;YAC5B,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAA;QACjC,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;YAC3B,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAA;QAC7C,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YAC/B,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAA;QACvC,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACvB,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAA;QAC7C,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;YACrC,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAAA;QAClD,CAAC;QAED,MAAM,OAAO,GAAQ,EAAE,CAAA;QACvB,IAAI,IAAI,CAAC,MAAM,CAAC,YAAY,KAAK,SAAS;YAAE,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAA;QAC3F,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,SAAS;YAAE,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAA;QAC/E,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,KAAK,SAAS;YAAE,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,CAAA;QACxF,IAAI,IAAI,CAAC,MAAM,CAAC,eAAe,KAAK,SAAS;YAC3C,OAAO,CAAC,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAA;QAEvD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpC,OAAO,CAAC,OAAO,GAAG,OAAO,CAAA;QAC3B,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACvB,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAA;QACrC,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YACpC,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAA;QACnC,CAAC;QAED,OAAO,OAAO,CAAA;IAChB,CAAC;CACF;AA7HD,gCA6HC;AAED,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E,MAAa,cAAc;IACzB,YAAoB,SAA8B;QAA9B,cAAS,GAAT,SAAS,CAAqB;IAAG,CAAC;IAEtD;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2BG;IACH,GAAG,CAAC,MAAqB;QACvB,OAAO,IAAI,UAAU,CAAC,IAAI,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;IAC/C,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,GAAG,CAAC,MAAc;QACtB,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAA;QAC/B,OAAO,MAAM,CAAC,GAAG,CAAa,UAAU,MAAM,EAAE,CAAC,CAAA;IACnD,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,IAAI,CAAC,MAAuB;QAChC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAA;QAC/B,OAAO,MAAM,CAAC,GAAG,CAAwC,QAAQ,EAAE,MAAM,CAAC,CAAA;IAC5E,CAAC;CACF;AApED,wCAoEC"}
|