llama-stack-client 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/resources/agents/agents.d.ts +83 -2
- package/resources/agents/agents.d.ts.map +1 -1
- package/resources/agents/agents.js +6 -0
- package/resources/agents/agents.js.map +1 -1
- package/resources/agents/agents.mjs +6 -0
- package/resources/agents/agents.mjs.map +1 -1
- package/resources/agents/session.d.ts +15 -0
- package/resources/agents/session.d.ts.map +1 -1
- package/resources/agents/session.js +6 -0
- package/resources/agents/session.js.map +1 -1
- package/resources/agents/session.mjs +6 -0
- package/resources/agents/session.mjs.map +1 -1
- package/resources/agents/steps.d.ts +6 -0
- package/resources/agents/steps.d.ts.map +1 -1
- package/resources/agents/steps.js +3 -0
- package/resources/agents/steps.js.map +1 -1
- package/resources/agents/steps.mjs +3 -0
- package/resources/agents/steps.mjs.map +1 -1
- package/resources/agents/turn.d.ts +61 -7
- package/resources/agents/turn.d.ts.map +1 -1
- package/resources/agents/turn.js +3 -0
- package/resources/agents/turn.js.map +1 -1
- package/resources/agents/turn.mjs +3 -0
- package/resources/agents/turn.mjs.map +1 -1
- package/resources/datasetio.d.ts +27 -0
- package/resources/datasetio.d.ts.map +1 -1
- package/resources/datasetio.js +3 -0
- package/resources/datasetio.js.map +1 -1
- package/resources/datasetio.mjs +3 -0
- package/resources/datasetio.mjs.map +1 -1
- package/resources/eval/eval.d.ts +82 -5
- package/resources/eval/eval.d.ts.map +1 -1
- package/resources/eval/eval.js +12 -0
- package/resources/eval/eval.js.map +1 -1
- package/resources/eval/eval.mjs +12 -0
- package/resources/eval/eval.mjs.map +1 -1
- package/resources/eval/jobs.d.ts +9 -0
- package/resources/eval/jobs.d.ts.map +1 -1
- package/resources/eval/jobs.js +9 -0
- package/resources/eval/jobs.js.map +1 -1
- package/resources/eval/jobs.mjs +9 -0
- package/resources/eval/jobs.mjs.map +1 -1
- package/resources/scoring.d.ts +15 -0
- package/resources/scoring.d.ts.map +1 -1
- package/resources/scoring.js +3 -0
- package/resources/scoring.js.map +1 -1
- package/resources/scoring.mjs +3 -0
- package/resources/scoring.mjs.map +1 -1
- package/resources/shared.d.ts +9 -0
- package/resources/shared.d.ts.map +1 -1
- package/src/resources/agents/agents.ts +83 -2
- package/src/resources/agents/session.ts +15 -0
- package/src/resources/agents/steps.ts +6 -0
- package/src/resources/agents/turn.ts +61 -8
- package/src/resources/datasetio.ts +27 -0
- package/src/resources/eval/eval.ts +84 -7
- package/src/resources/eval/jobs.ts +9 -0
- package/src/resources/scoring.ts +15 -0
- package/src/resources/shared.ts +9 -0
- package/src/version.ts +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
|
@@ -33,10 +33,16 @@ export class Agents extends APIResource {
|
|
|
33
33
|
steps: StepsAPI.Steps = new StepsAPI.Steps(this._client);
|
|
34
34
|
turn: TurnAPI.TurnResource = new TurnAPI.TurnResource(this._client);
|
|
35
35
|
|
|
36
|
+
/**
|
|
37
|
+
* Create an agent with the given configuration.
|
|
38
|
+
*/
|
|
36
39
|
create(body: AgentCreateParams, options?: Core.RequestOptions): Core.APIPromise<AgentCreateResponse> {
|
|
37
40
|
return this._client.post('/v1/agents', { body, ...options });
|
|
38
41
|
}
|
|
39
42
|
|
|
43
|
+
/**
|
|
44
|
+
* Delete an agent by its ID.
|
|
45
|
+
*/
|
|
40
46
|
delete(agentId: string, options?: Core.RequestOptions): Core.APIPromise<void> {
|
|
41
47
|
return this._client.delete(`/v1/agents/${agentId}`, {
|
|
42
48
|
...options,
|
|
@@ -45,69 +51,141 @@ export class Agents extends APIResource {
|
|
|
45
51
|
}
|
|
46
52
|
}
|
|
47
53
|
|
|
54
|
+
/**
|
|
55
|
+
* An inference step in an agent turn.
|
|
56
|
+
*/
|
|
48
57
|
export interface InferenceStep {
|
|
49
58
|
/**
|
|
50
|
-
*
|
|
59
|
+
* The response from the LLM.
|
|
51
60
|
*/
|
|
52
61
|
model_response: Shared.CompletionMessage;
|
|
53
62
|
|
|
63
|
+
/**
|
|
64
|
+
* The ID of the step.
|
|
65
|
+
*/
|
|
54
66
|
step_id: string;
|
|
55
67
|
|
|
56
68
|
step_type: 'inference';
|
|
57
69
|
|
|
70
|
+
/**
|
|
71
|
+
* The ID of the turn.
|
|
72
|
+
*/
|
|
58
73
|
turn_id: string;
|
|
59
74
|
|
|
75
|
+
/**
|
|
76
|
+
* The time the step completed.
|
|
77
|
+
*/
|
|
60
78
|
completed_at?: string;
|
|
61
79
|
|
|
80
|
+
/**
|
|
81
|
+
* The time the step started.
|
|
82
|
+
*/
|
|
62
83
|
started_at?: string;
|
|
63
84
|
}
|
|
64
85
|
|
|
86
|
+
/**
|
|
87
|
+
* A memory retrieval step in an agent turn.
|
|
88
|
+
*/
|
|
65
89
|
export interface MemoryRetrievalStep {
|
|
66
90
|
/**
|
|
67
|
-
*
|
|
91
|
+
* The context retrieved from the vector databases.
|
|
68
92
|
*/
|
|
69
93
|
inserted_context: Shared.InterleavedContent;
|
|
70
94
|
|
|
95
|
+
/**
|
|
96
|
+
* The ID of the step.
|
|
97
|
+
*/
|
|
71
98
|
step_id: string;
|
|
72
99
|
|
|
73
100
|
step_type: 'memory_retrieval';
|
|
74
101
|
|
|
102
|
+
/**
|
|
103
|
+
* The ID of the turn.
|
|
104
|
+
*/
|
|
75
105
|
turn_id: string;
|
|
76
106
|
|
|
107
|
+
/**
|
|
108
|
+
* The IDs of the vector databases to retrieve context from.
|
|
109
|
+
*/
|
|
77
110
|
vector_db_ids: string;
|
|
78
111
|
|
|
112
|
+
/**
|
|
113
|
+
* The time the step completed.
|
|
114
|
+
*/
|
|
79
115
|
completed_at?: string;
|
|
80
116
|
|
|
117
|
+
/**
|
|
118
|
+
* The time the step started.
|
|
119
|
+
*/
|
|
81
120
|
started_at?: string;
|
|
82
121
|
}
|
|
83
122
|
|
|
123
|
+
/**
|
|
124
|
+
* A shield call step in an agent turn.
|
|
125
|
+
*/
|
|
84
126
|
export interface ShieldCallStep {
|
|
127
|
+
/**
|
|
128
|
+
* The ID of the step.
|
|
129
|
+
*/
|
|
85
130
|
step_id: string;
|
|
86
131
|
|
|
87
132
|
step_type: 'shield_call';
|
|
88
133
|
|
|
134
|
+
/**
|
|
135
|
+
* The ID of the turn.
|
|
136
|
+
*/
|
|
89
137
|
turn_id: string;
|
|
90
138
|
|
|
139
|
+
/**
|
|
140
|
+
* The time the step completed.
|
|
141
|
+
*/
|
|
91
142
|
completed_at?: string;
|
|
92
143
|
|
|
144
|
+
/**
|
|
145
|
+
* The time the step started.
|
|
146
|
+
*/
|
|
93
147
|
started_at?: string;
|
|
94
148
|
|
|
149
|
+
/**
|
|
150
|
+
* The violation from the shield call.
|
|
151
|
+
*/
|
|
95
152
|
violation?: Shared.SafetyViolation;
|
|
96
153
|
}
|
|
97
154
|
|
|
155
|
+
/**
|
|
156
|
+
* A tool execution step in an agent turn.
|
|
157
|
+
*/
|
|
98
158
|
export interface ToolExecutionStep {
|
|
159
|
+
/**
|
|
160
|
+
* The ID of the step.
|
|
161
|
+
*/
|
|
99
162
|
step_id: string;
|
|
100
163
|
|
|
101
164
|
step_type: 'tool_execution';
|
|
102
165
|
|
|
166
|
+
/**
|
|
167
|
+
* The tool calls to execute.
|
|
168
|
+
*/
|
|
103
169
|
tool_calls: Array<Shared.ToolCall>;
|
|
104
170
|
|
|
171
|
+
/**
|
|
172
|
+
* The tool responses from the tool calls.
|
|
173
|
+
*/
|
|
105
174
|
tool_responses: Array<ToolResponse>;
|
|
106
175
|
|
|
176
|
+
/**
|
|
177
|
+
* The ID of the turn.
|
|
178
|
+
*/
|
|
107
179
|
turn_id: string;
|
|
108
180
|
|
|
181
|
+
/**
|
|
182
|
+
* The time the step completed.
|
|
183
|
+
*/
|
|
109
184
|
completed_at?: string;
|
|
110
185
|
|
|
186
|
+
/**
|
|
187
|
+
* The time the step started.
|
|
188
|
+
*/
|
|
111
189
|
started_at?: string;
|
|
112
190
|
}
|
|
113
191
|
|
|
@@ -129,6 +207,9 @@ export interface AgentCreateResponse {
|
|
|
129
207
|
}
|
|
130
208
|
|
|
131
209
|
export interface AgentCreateParams {
|
|
210
|
+
/**
|
|
211
|
+
* The configuration for the agent.
|
|
212
|
+
*/
|
|
132
213
|
agent_config: Shared.AgentConfig;
|
|
133
214
|
}
|
|
134
215
|
|
|
@@ -6,6 +6,9 @@ import * as Core from '../../core';
|
|
|
6
6
|
import * as TurnAPI from './turn';
|
|
7
7
|
|
|
8
8
|
export class SessionResource extends APIResource {
|
|
9
|
+
/**
|
|
10
|
+
* Create a new session for an agent.
|
|
11
|
+
*/
|
|
9
12
|
create(
|
|
10
13
|
agentId: string,
|
|
11
14
|
body: SessionCreateParams,
|
|
@@ -14,6 +17,9 @@ export class SessionResource extends APIResource {
|
|
|
14
17
|
return this._client.post(`/v1/agents/${agentId}/session`, { body, ...options });
|
|
15
18
|
}
|
|
16
19
|
|
|
20
|
+
/**
|
|
21
|
+
* Retrieve an agent session by its ID.
|
|
22
|
+
*/
|
|
17
23
|
retrieve(
|
|
18
24
|
agentId: string,
|
|
19
25
|
sessionId: string,
|
|
@@ -33,6 +39,9 @@ export class SessionResource extends APIResource {
|
|
|
33
39
|
return this._client.get(`/v1/agents/${agentId}/session/${sessionId}`, { query, ...options });
|
|
34
40
|
}
|
|
35
41
|
|
|
42
|
+
/**
|
|
43
|
+
* Delete an agent session by its ID.
|
|
44
|
+
*/
|
|
36
45
|
delete(agentId: string, sessionId: string, options?: Core.RequestOptions): Core.APIPromise<void> {
|
|
37
46
|
return this._client.delete(`/v1/agents/${agentId}/session/${sessionId}`, {
|
|
38
47
|
...options,
|
|
@@ -59,10 +68,16 @@ export interface SessionCreateResponse {
|
|
|
59
68
|
}
|
|
60
69
|
|
|
61
70
|
export interface SessionCreateParams {
|
|
71
|
+
/**
|
|
72
|
+
* The name of the session to create.
|
|
73
|
+
*/
|
|
62
74
|
session_name: string;
|
|
63
75
|
}
|
|
64
76
|
|
|
65
77
|
export interface SessionRetrieveParams {
|
|
78
|
+
/**
|
|
79
|
+
* (Optional) List of turn IDs to filter the session by.
|
|
80
|
+
*/
|
|
66
81
|
turn_ids?: Array<string>;
|
|
67
82
|
}
|
|
68
83
|
|
|
@@ -5,6 +5,9 @@ import * as Core from '../../core';
|
|
|
5
5
|
import * as AgentsAPI from './agents';
|
|
6
6
|
|
|
7
7
|
export class Steps extends APIResource {
|
|
8
|
+
/**
|
|
9
|
+
* Retrieve an agent step by its ID.
|
|
10
|
+
*/
|
|
8
11
|
retrieve(
|
|
9
12
|
agentId: string,
|
|
10
13
|
sessionId: string,
|
|
@@ -20,6 +23,9 @@ export class Steps extends APIResource {
|
|
|
20
23
|
}
|
|
21
24
|
|
|
22
25
|
export interface StepRetrieveResponse {
|
|
26
|
+
/**
|
|
27
|
+
* An inference step in an agent turn.
|
|
28
|
+
*/
|
|
23
29
|
step:
|
|
24
30
|
| AgentsAPI.InferenceStep
|
|
25
31
|
| AgentsAPI.ToolExecutionStep
|
|
@@ -9,6 +9,9 @@ import * as AgentsAPI from './agents';
|
|
|
9
9
|
import { Stream } from '../../streaming';
|
|
10
10
|
|
|
11
11
|
export class TurnResource extends APIResource {
|
|
12
|
+
/**
|
|
13
|
+
* Create a new turn for an agent.
|
|
14
|
+
*/
|
|
12
15
|
create(
|
|
13
16
|
agentId: string,
|
|
14
17
|
sessionId: string,
|
|
@@ -40,6 +43,9 @@ export class TurnResource extends APIResource {
|
|
|
40
43
|
}) as APIPromise<Turn> | APIPromise<Stream<AgentTurnResponseStreamChunk>>;
|
|
41
44
|
}
|
|
42
45
|
|
|
46
|
+
/**
|
|
47
|
+
* Retrieve an agent turn by its ID.
|
|
48
|
+
*/
|
|
43
49
|
retrieve(
|
|
44
50
|
agentId: string,
|
|
45
51
|
sessionId: string,
|
|
@@ -128,9 +134,12 @@ export interface Turn {
|
|
|
128
134
|
}
|
|
129
135
|
|
|
130
136
|
export namespace Turn {
|
|
137
|
+
/**
|
|
138
|
+
* An attachment to an agent turn.
|
|
139
|
+
*/
|
|
131
140
|
export interface OutputAttachment {
|
|
132
141
|
/**
|
|
133
|
-
*
|
|
142
|
+
* The content of the attachment.
|
|
134
143
|
*/
|
|
135
144
|
content:
|
|
136
145
|
| string
|
|
@@ -139,6 +148,9 @@ export namespace Turn {
|
|
|
139
148
|
| Array<Shared.InterleavedContentItem>
|
|
140
149
|
| OutputAttachment.URL;
|
|
141
150
|
|
|
151
|
+
/**
|
|
152
|
+
* The MIME type of the attachment.
|
|
153
|
+
*/
|
|
142
154
|
mime_type: string;
|
|
143
155
|
}
|
|
144
156
|
|
|
@@ -225,6 +237,9 @@ export namespace TurnResponseEventPayload {
|
|
|
225
237
|
|
|
226
238
|
step_id: string;
|
|
227
239
|
|
|
240
|
+
/**
|
|
241
|
+
* Type of the step in an agent turn.
|
|
242
|
+
*/
|
|
228
243
|
step_type: 'inference' | 'tool_execution' | 'shield_call' | 'memory_retrieval';
|
|
229
244
|
|
|
230
245
|
metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
@@ -237,12 +252,18 @@ export namespace TurnResponseEventPayload {
|
|
|
237
252
|
|
|
238
253
|
step_id: string;
|
|
239
254
|
|
|
255
|
+
/**
|
|
256
|
+
* Type of the step in an agent turn.
|
|
257
|
+
*/
|
|
240
258
|
step_type: 'inference' | 'tool_execution' | 'shield_call' | 'memory_retrieval';
|
|
241
259
|
}
|
|
242
260
|
|
|
243
261
|
export interface AgentTurnResponseStepCompletePayload {
|
|
244
262
|
event_type: 'step_complete';
|
|
245
263
|
|
|
264
|
+
/**
|
|
265
|
+
* An inference step in an agent turn.
|
|
266
|
+
*/
|
|
246
267
|
step_details:
|
|
247
268
|
| AgentsAPI.InferenceStep
|
|
248
269
|
| AgentsAPI.ToolExecutionStep
|
|
@@ -251,6 +272,9 @@ export namespace TurnResponseEventPayload {
|
|
|
251
272
|
|
|
252
273
|
step_id: string;
|
|
253
274
|
|
|
275
|
+
/**
|
|
276
|
+
* Type of the step in an agent turn.
|
|
277
|
+
*/
|
|
254
278
|
step_type: 'inference' | 'tool_execution' | 'shield_call' | 'memory_retrieval';
|
|
255
279
|
}
|
|
256
280
|
|
|
@@ -282,26 +306,42 @@ export namespace TurnResponseEventPayload {
|
|
|
282
306
|
export type TurnCreateParams = TurnCreateParamsNonStreaming | TurnCreateParamsStreaming;
|
|
283
307
|
|
|
284
308
|
export interface TurnCreateParamsBase {
|
|
309
|
+
/**
|
|
310
|
+
* List of messages to start the turn with.
|
|
311
|
+
*/
|
|
285
312
|
messages: Array<Shared.UserMessage | Shared.ToolResponseMessage>;
|
|
286
313
|
|
|
287
|
-
|
|
288
|
-
|
|
314
|
+
/**
|
|
315
|
+
* (Optional) List of documents to create the turn with.
|
|
316
|
+
*/
|
|
289
317
|
documents?: Array<TurnCreateParams.Document>;
|
|
290
318
|
|
|
319
|
+
/**
|
|
320
|
+
* (Optional) If True, generate an SSE event stream of the response. Defaults to
|
|
321
|
+
* False.
|
|
322
|
+
*/
|
|
291
323
|
stream?: boolean;
|
|
292
324
|
|
|
293
325
|
/**
|
|
294
|
-
*
|
|
326
|
+
* (Optional) The tool configuration to create the turn with, will be used to
|
|
327
|
+
* override the agent's tool_config.
|
|
295
328
|
*/
|
|
296
329
|
tool_config?: TurnCreateParams.ToolConfig;
|
|
297
330
|
|
|
331
|
+
/**
|
|
332
|
+
* (Optional) List of toolgroups to create the turn with, will be used in addition
|
|
333
|
+
* to the agent's config toolgroups for the request.
|
|
334
|
+
*/
|
|
298
335
|
toolgroups?: Array<string | TurnCreateParams.AgentToolGroupWithArgs>;
|
|
299
336
|
}
|
|
300
337
|
|
|
301
338
|
export namespace TurnCreateParams {
|
|
339
|
+
/**
|
|
340
|
+
* A document to be used by an agent.
|
|
341
|
+
*/
|
|
302
342
|
export interface Document {
|
|
303
343
|
/**
|
|
304
|
-
*
|
|
344
|
+
* The content of the document.
|
|
305
345
|
*/
|
|
306
346
|
content:
|
|
307
347
|
| string
|
|
@@ -310,6 +350,9 @@ export namespace TurnCreateParams {
|
|
|
310
350
|
| Array<Shared.InterleavedContentItem>
|
|
311
351
|
| Document.URL;
|
|
312
352
|
|
|
353
|
+
/**
|
|
354
|
+
* The MIME type of the document.
|
|
355
|
+
*/
|
|
313
356
|
mime_type: string;
|
|
314
357
|
}
|
|
315
358
|
|
|
@@ -378,7 +421,8 @@ export namespace TurnCreateParams {
|
|
|
378
421
|
}
|
|
379
422
|
|
|
380
423
|
/**
|
|
381
|
-
*
|
|
424
|
+
* (Optional) The tool configuration to create the turn with, will be used to
|
|
425
|
+
* override the agent's tool_config.
|
|
382
426
|
*/
|
|
383
427
|
export interface ToolConfig {
|
|
384
428
|
/**
|
|
@@ -419,10 +463,18 @@ export namespace TurnCreateParams {
|
|
|
419
463
|
}
|
|
420
464
|
|
|
421
465
|
export interface TurnCreateParamsNonStreaming extends TurnCreateParamsBase {
|
|
466
|
+
/**
|
|
467
|
+
* (Optional) If True, generate an SSE event stream of the response. Defaults to
|
|
468
|
+
* False.
|
|
469
|
+
*/
|
|
422
470
|
stream?: false;
|
|
423
471
|
}
|
|
424
472
|
|
|
425
473
|
export interface TurnCreateParamsStreaming extends TurnCreateParamsBase {
|
|
474
|
+
/**
|
|
475
|
+
* (Optional) If True, generate an SSE event stream of the response. Defaults to
|
|
476
|
+
* False.
|
|
477
|
+
*/
|
|
426
478
|
stream: true;
|
|
427
479
|
}
|
|
428
480
|
|
|
@@ -430,9 +482,10 @@ export type TurnResumeParams = TurnResumeParamsNonStreaming | TurnResumeParamsSt
|
|
|
430
482
|
|
|
431
483
|
export interface TurnResumeParamsBase {
|
|
432
484
|
/**
|
|
433
|
-
* The tool call responses to resume the turn with.
|
|
485
|
+
* The tool call responses to resume the turn with. NOTE: ToolResponseMessage will
|
|
486
|
+
* be deprecated. Use ToolResponse.
|
|
434
487
|
*/
|
|
435
|
-
tool_responses: Array<Shared.ToolResponseMessage>;
|
|
488
|
+
tool_responses: Array<AgentsAPI.ToolResponse> | Array<Shared.ToolResponseMessage>;
|
|
436
489
|
|
|
437
490
|
/**
|
|
438
491
|
* Whether to stream the response.
|
|
@@ -12,6 +12,9 @@ export class Datasetio extends APIResource {
|
|
|
12
12
|
});
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
/**
|
|
16
|
+
* Get a paginated list of rows from a dataset.
|
|
17
|
+
*/
|
|
15
18
|
getRowsPaginated(
|
|
16
19
|
query: DatasetioGetRowsPaginatedParams,
|
|
17
20
|
options?: Core.RequestOptions,
|
|
@@ -20,11 +23,23 @@ export class Datasetio extends APIResource {
|
|
|
20
23
|
}
|
|
21
24
|
}
|
|
22
25
|
|
|
26
|
+
/**
|
|
27
|
+
* A paginated list of rows from a dataset.
|
|
28
|
+
*/
|
|
23
29
|
export interface PaginatedRowsResult {
|
|
30
|
+
/**
|
|
31
|
+
* The rows in the current page.
|
|
32
|
+
*/
|
|
24
33
|
rows: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
|
|
25
34
|
|
|
35
|
+
/**
|
|
36
|
+
* The total number of rows in the dataset.
|
|
37
|
+
*/
|
|
26
38
|
total_count: number;
|
|
27
39
|
|
|
40
|
+
/**
|
|
41
|
+
* The token to get the next page of rows.
|
|
42
|
+
*/
|
|
28
43
|
next_page_token?: string;
|
|
29
44
|
}
|
|
30
45
|
|
|
@@ -35,12 +50,24 @@ export interface DatasetioAppendRowsParams {
|
|
|
35
50
|
}
|
|
36
51
|
|
|
37
52
|
export interface DatasetioGetRowsPaginatedParams {
|
|
53
|
+
/**
|
|
54
|
+
* The ID of the dataset to get the rows from.
|
|
55
|
+
*/
|
|
38
56
|
dataset_id: string;
|
|
39
57
|
|
|
58
|
+
/**
|
|
59
|
+
* The number of rows to get per page.
|
|
60
|
+
*/
|
|
40
61
|
rows_in_page: number;
|
|
41
62
|
|
|
63
|
+
/**
|
|
64
|
+
* (Optional) A condition to filter the rows by.
|
|
65
|
+
*/
|
|
42
66
|
filter_condition?: string;
|
|
43
67
|
|
|
68
|
+
/**
|
|
69
|
+
* The token to get the next page of rows.
|
|
70
|
+
*/
|
|
44
71
|
page_token?: string;
|
|
45
72
|
}
|
|
46
73
|
|
|
@@ -10,6 +10,9 @@ import { JobStatusResponse, Jobs } from './jobs';
|
|
|
10
10
|
export class Eval extends APIResource {
|
|
11
11
|
jobs: JobsAPI.Jobs = new JobsAPI.Jobs(this._client);
|
|
12
12
|
|
|
13
|
+
/**
|
|
14
|
+
* Evaluate a list of rows on a benchmark.
|
|
15
|
+
*/
|
|
13
16
|
evaluateRows(
|
|
14
17
|
benchmarkId: string,
|
|
15
18
|
body: EvalEvaluateRowsParams,
|
|
@@ -18,6 +21,9 @@ export class Eval extends APIResource {
|
|
|
18
21
|
return this._client.post(`/v1/eval/benchmarks/${benchmarkId}/evaluations`, { body, ...options });
|
|
19
22
|
}
|
|
20
23
|
|
|
24
|
+
/**
|
|
25
|
+
* Evaluate a list of rows on a benchmark.
|
|
26
|
+
*/
|
|
21
27
|
evaluateRowsAlpha(
|
|
22
28
|
benchmarkId: string,
|
|
23
29
|
body: EvalEvaluateRowsAlphaParams,
|
|
@@ -26,10 +32,16 @@ export class Eval extends APIResource {
|
|
|
26
32
|
return this._client.post(`/v1/eval/benchmarks/${benchmarkId}/evaluations`, { body, ...options });
|
|
27
33
|
}
|
|
28
34
|
|
|
35
|
+
/**
|
|
36
|
+
* Run an evaluation on a benchmark.
|
|
37
|
+
*/
|
|
29
38
|
runEval(benchmarkId: string, body: EvalRunEvalParams, options?: Core.RequestOptions): Core.APIPromise<Job> {
|
|
30
39
|
return this._client.post(`/v1/eval/benchmarks/${benchmarkId}/jobs`, { body, ...options });
|
|
31
40
|
}
|
|
32
41
|
|
|
42
|
+
/**
|
|
43
|
+
* Run an evaluation on a benchmark.
|
|
44
|
+
*/
|
|
33
45
|
runEvalAlpha(
|
|
34
46
|
benchmarkId: string,
|
|
35
47
|
body: EvalRunEvalAlphaParams,
|
|
@@ -39,40 +51,81 @@ export class Eval extends APIResource {
|
|
|
39
51
|
}
|
|
40
52
|
}
|
|
41
53
|
|
|
54
|
+
/**
|
|
55
|
+
* A benchmark configuration for evaluation.
|
|
56
|
+
*/
|
|
42
57
|
export interface BenchmarkConfig {
|
|
58
|
+
/**
|
|
59
|
+
* The candidate to evaluate.
|
|
60
|
+
*/
|
|
43
61
|
eval_candidate: EvalCandidate;
|
|
44
62
|
|
|
63
|
+
/**
|
|
64
|
+
* Map between scoring function id and parameters for each scoring function you
|
|
65
|
+
* want to run
|
|
66
|
+
*/
|
|
45
67
|
scoring_params: Record<string, ScoringFunctionsAPI.ScoringFnParams>;
|
|
46
68
|
|
|
69
|
+
/**
|
|
70
|
+
* (Optional) The number of examples to evaluate. If not provided, all examples in
|
|
71
|
+
* the dataset will be evaluated
|
|
72
|
+
*/
|
|
47
73
|
num_examples?: number;
|
|
48
74
|
}
|
|
49
75
|
|
|
76
|
+
/**
|
|
77
|
+
* A model candidate for evaluation.
|
|
78
|
+
*/
|
|
50
79
|
export type EvalCandidate = EvalCandidate.ModelCandidate | EvalCandidate.AgentCandidate;
|
|
51
80
|
|
|
52
81
|
export namespace EvalCandidate {
|
|
82
|
+
/**
|
|
83
|
+
* A model candidate for evaluation.
|
|
84
|
+
*/
|
|
53
85
|
export interface ModelCandidate {
|
|
86
|
+
/**
|
|
87
|
+
* The model ID to evaluate.
|
|
88
|
+
*/
|
|
54
89
|
model: string;
|
|
55
90
|
|
|
91
|
+
/**
|
|
92
|
+
* The sampling parameters for the model.
|
|
93
|
+
*/
|
|
56
94
|
sampling_params: Shared.SamplingParams;
|
|
57
95
|
|
|
58
96
|
type: 'model';
|
|
59
97
|
|
|
60
98
|
/**
|
|
61
|
-
*
|
|
99
|
+
* (Optional) The system message providing instructions or context to the model.
|
|
62
100
|
*/
|
|
63
101
|
system_message?: Shared.SystemMessage;
|
|
64
102
|
}
|
|
65
103
|
|
|
104
|
+
/**
|
|
105
|
+
* An agent candidate for evaluation.
|
|
106
|
+
*/
|
|
66
107
|
export interface AgentCandidate {
|
|
108
|
+
/**
|
|
109
|
+
* The configuration for the agent candidate.
|
|
110
|
+
*/
|
|
67
111
|
config: Shared.AgentConfig;
|
|
68
112
|
|
|
69
113
|
type: 'agent';
|
|
70
114
|
}
|
|
71
115
|
}
|
|
72
116
|
|
|
117
|
+
/**
|
|
118
|
+
* The response from an evaluation.
|
|
119
|
+
*/
|
|
73
120
|
export interface EvaluateResponse {
|
|
121
|
+
/**
|
|
122
|
+
* The generations from the evaluation.
|
|
123
|
+
*/
|
|
74
124
|
generations: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
|
|
75
125
|
|
|
126
|
+
/**
|
|
127
|
+
* The scores from the evaluation.
|
|
128
|
+
*/
|
|
76
129
|
scores: Record<string, Shared.ScoringResult>;
|
|
77
130
|
}
|
|
78
131
|
|
|
@@ -81,27 +134,51 @@ export interface Job {
|
|
|
81
134
|
}
|
|
82
135
|
|
|
83
136
|
export interface EvalEvaluateRowsParams {
|
|
137
|
+
/**
|
|
138
|
+
* The configuration for the benchmark.
|
|
139
|
+
*/
|
|
140
|
+
benchmark_config: BenchmarkConfig;
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* The rows to evaluate.
|
|
144
|
+
*/
|
|
84
145
|
input_rows: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
|
|
85
146
|
|
|
147
|
+
/**
|
|
148
|
+
* The scoring functions to use for the evaluation.
|
|
149
|
+
*/
|
|
86
150
|
scoring_functions: Array<string>;
|
|
87
|
-
|
|
88
|
-
task_config: BenchmarkConfig;
|
|
89
151
|
}
|
|
90
152
|
|
|
91
153
|
export interface EvalEvaluateRowsAlphaParams {
|
|
154
|
+
/**
|
|
155
|
+
* The configuration for the benchmark.
|
|
156
|
+
*/
|
|
157
|
+
benchmark_config: BenchmarkConfig;
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* The rows to evaluate.
|
|
161
|
+
*/
|
|
92
162
|
input_rows: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
|
|
93
163
|
|
|
164
|
+
/**
|
|
165
|
+
* The scoring functions to use for the evaluation.
|
|
166
|
+
*/
|
|
94
167
|
scoring_functions: Array<string>;
|
|
95
|
-
|
|
96
|
-
task_config: BenchmarkConfig;
|
|
97
168
|
}
|
|
98
169
|
|
|
99
170
|
export interface EvalRunEvalParams {
|
|
100
|
-
|
|
171
|
+
/**
|
|
172
|
+
* The configuration for the benchmark.
|
|
173
|
+
*/
|
|
174
|
+
benchmark_config: BenchmarkConfig;
|
|
101
175
|
}
|
|
102
176
|
|
|
103
177
|
export interface EvalRunEvalAlphaParams {
|
|
104
|
-
|
|
178
|
+
/**
|
|
179
|
+
* The configuration for the benchmark.
|
|
180
|
+
*/
|
|
181
|
+
benchmark_config: BenchmarkConfig;
|
|
105
182
|
}
|
|
106
183
|
|
|
107
184
|
Eval.Jobs = Jobs;
|
|
@@ -5,6 +5,9 @@ import * as Core from '../../core';
|
|
|
5
5
|
import * as EvalAPI from './eval';
|
|
6
6
|
|
|
7
7
|
export class Jobs extends APIResource {
|
|
8
|
+
/**
|
|
9
|
+
* Get the result of a job.
|
|
10
|
+
*/
|
|
8
11
|
retrieve(
|
|
9
12
|
benchmarkId: string,
|
|
10
13
|
jobId: string,
|
|
@@ -13,6 +16,9 @@ export class Jobs extends APIResource {
|
|
|
13
16
|
return this._client.get(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}/result`, options);
|
|
14
17
|
}
|
|
15
18
|
|
|
19
|
+
/**
|
|
20
|
+
* Cancel a job.
|
|
21
|
+
*/
|
|
16
22
|
cancel(benchmarkId: string, jobId: string, options?: Core.RequestOptions): Core.APIPromise<void> {
|
|
17
23
|
return this._client.delete(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}`, {
|
|
18
24
|
...options,
|
|
@@ -20,6 +26,9 @@ export class Jobs extends APIResource {
|
|
|
20
26
|
});
|
|
21
27
|
}
|
|
22
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Get the status of a job.
|
|
31
|
+
*/
|
|
23
32
|
status(
|
|
24
33
|
benchmarkId: string,
|
|
25
34
|
jobId: string,
|