llama-stack-client 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.mts +8 -5
- package/index.d.ts +8 -5
- package/index.d.ts.map +1 -1
- package/index.js +3 -0
- package/index.js.map +1 -1
- package/index.mjs +4 -1
- package/index.mjs.map +1 -1
- package/internal/decoders/line.d.ts +2 -2
- package/internal/decoders/line.d.ts.map +1 -1
- package/internal/decoders/line.js +69 -34
- package/internal/decoders/line.js.map +1 -1
- package/internal/decoders/line.mjs +69 -34
- package/internal/decoders/line.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/agents/turn.d.ts +61 -5
- package/resources/agents/turn.d.ts.map +1 -1
- package/resources/benchmarks.d.ts +32 -0
- package/resources/benchmarks.d.ts.map +1 -0
- package/resources/benchmarks.js +22 -0
- package/resources/benchmarks.js.map +1 -0
- package/resources/benchmarks.mjs +18 -0
- package/resources/benchmarks.mjs.map +1 -0
- package/resources/datasets.d.ts +18 -3
- package/resources/datasets.d.ts.map +1 -1
- package/resources/eval/eval.d.ts +19 -18
- package/resources/eval/eval.d.ts.map +1 -1
- package/resources/eval/eval.js +6 -0
- package/resources/eval/eval.js.map +1 -1
- package/resources/eval/eval.mjs +6 -0
- package/resources/eval/eval.mjs.map +1 -1
- package/resources/eval/index.d.ts +1 -1
- package/resources/eval/index.d.ts.map +1 -1
- package/resources/eval/index.js.map +1 -1
- package/resources/eval/index.mjs.map +1 -1
- package/resources/eval/jobs.d.ts +3 -3
- package/resources/eval/jobs.d.ts.map +1 -1
- package/resources/eval/jobs.js +6 -6
- package/resources/eval/jobs.js.map +1 -1
- package/resources/eval/jobs.mjs +6 -6
- package/resources/eval/jobs.mjs.map +1 -1
- package/resources/eval-tasks.d.ts +6 -17
- package/resources/eval-tasks.d.ts.map +1 -1
- package/resources/eval-tasks.js.map +1 -1
- package/resources/eval-tasks.mjs.map +1 -1
- package/resources/index.d.ts +3 -2
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +3 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -1
- package/resources/index.mjs.map +1 -1
- package/resources/inference.d.ts +11 -0
- package/resources/inference.d.ts.map +1 -1
- package/resources/shared.d.ts +84 -11
- package/resources/shared.d.ts.map +1 -1
- package/resources/telemetry.d.ts +3 -3
- package/resources/telemetry.d.ts.map +1 -1
- package/resources/tool-runtime/tool-runtime.d.ts +6 -1
- package/resources/tool-runtime/tool-runtime.d.ts.map +1 -1
- package/resources/tool-runtime/tool-runtime.js.map +1 -1
- package/resources/tool-runtime/tool-runtime.mjs.map +1 -1
- package/resources/toolgroups.d.ts +12 -3
- package/resources/toolgroups.d.ts.map +1 -1
- package/resources/toolgroups.js.map +1 -1
- package/resources/toolgroups.mjs.map +1 -1
- package/src/index.ts +25 -12
- package/src/internal/decoders/line.ts +69 -38
- package/src/resources/agents/turn.ts +70 -6
- package/src/resources/benchmarks.ts +69 -0
- package/src/resources/datasets.ts +21 -3
- package/src/resources/eval/eval.ts +43 -26
- package/src/resources/eval/index.ts +3 -1
- package/src/resources/eval/jobs.ts +6 -6
- package/src/resources/eval-tasks.ts +11 -27
- package/src/resources/index.ts +11 -8
- package/src/resources/inference.ts +20 -0
- package/src/resources/shared.ts +104 -14
- package/src/resources/telemetry.ts +3 -3
- package/src/resources/tool-runtime/tool-runtime.ts +7 -1
- package/src/resources/toolgroups.ts +14 -3
- package/src/streaming.ts +5 -1
- package/src/version.ts +1 -1
- package/streaming.d.ts +3 -1
- package/streaming.d.ts.map +1 -1
- package/streaming.js +4 -1
- package/streaming.js.map +1 -1
- package/streaming.mjs +4 -1
- package/streaming.mjs.map +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
|
@@ -13,52 +13,58 @@ export class LineDecoder {
|
|
|
13
13
|
static NEWLINE_CHARS = new Set(['\n', '\r']);
|
|
14
14
|
static NEWLINE_REGEXP = /\r\n|[\n\r]/g;
|
|
15
15
|
|
|
16
|
-
buffer:
|
|
17
|
-
|
|
16
|
+
buffer: Uint8Array;
|
|
17
|
+
#carriageReturnIndex: number | null;
|
|
18
18
|
textDecoder: any; // TextDecoder found in browsers; not typed to avoid pulling in either "dom" or "node" types.
|
|
19
19
|
|
|
20
20
|
constructor() {
|
|
21
|
-
this.buffer =
|
|
22
|
-
this
|
|
21
|
+
this.buffer = new Uint8Array();
|
|
22
|
+
this.#carriageReturnIndex = null;
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
decode(chunk: Bytes): string[] {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
if (this.trailingCR) {
|
|
29
|
-
text = '\r' + text;
|
|
30
|
-
this.trailingCR = false;
|
|
31
|
-
}
|
|
32
|
-
if (text.endsWith('\r')) {
|
|
33
|
-
this.trailingCR = true;
|
|
34
|
-
text = text.slice(0, -1);
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
if (!text) {
|
|
26
|
+
if (chunk == null) {
|
|
38
27
|
return [];
|
|
39
28
|
}
|
|
40
29
|
|
|
41
|
-
const
|
|
42
|
-
|
|
30
|
+
const binaryChunk =
|
|
31
|
+
chunk instanceof ArrayBuffer ? new Uint8Array(chunk)
|
|
32
|
+
: typeof chunk === 'string' ? new TextEncoder().encode(chunk)
|
|
33
|
+
: chunk;
|
|
34
|
+
|
|
35
|
+
let newData = new Uint8Array(this.buffer.length + binaryChunk.length);
|
|
36
|
+
newData.set(this.buffer);
|
|
37
|
+
newData.set(binaryChunk, this.buffer.length);
|
|
38
|
+
this.buffer = newData;
|
|
39
|
+
|
|
40
|
+
const lines: string[] = [];
|
|
41
|
+
let patternIndex;
|
|
42
|
+
while ((patternIndex = findNewlineIndex(this.buffer, this.#carriageReturnIndex)) != null) {
|
|
43
|
+
if (patternIndex.carriage && this.#carriageReturnIndex == null) {
|
|
44
|
+
// skip until we either get a corresponding `\n`, a new `\r` or nothing
|
|
45
|
+
this.#carriageReturnIndex = patternIndex.index;
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
43
48
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
+
// we got double \r or \rtext\n
|
|
50
|
+
if (
|
|
51
|
+
this.#carriageReturnIndex != null &&
|
|
52
|
+
(patternIndex.index !== this.#carriageReturnIndex + 1 || patternIndex.carriage)
|
|
53
|
+
) {
|
|
54
|
+
lines.push(this.decodeText(this.buffer.slice(0, this.#carriageReturnIndex - 1)));
|
|
55
|
+
this.buffer = this.buffer.slice(this.#carriageReturnIndex);
|
|
56
|
+
this.#carriageReturnIndex = null;
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
49
59
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
return [];
|
|
53
|
-
}
|
|
60
|
+
const endIndex =
|
|
61
|
+
this.#carriageReturnIndex !== null ? patternIndex.preceding - 1 : patternIndex.preceding;
|
|
54
62
|
|
|
55
|
-
|
|
56
|
-
lines
|
|
57
|
-
this.buffer = [];
|
|
58
|
-
}
|
|
63
|
+
const line = this.decodeText(this.buffer.slice(0, endIndex));
|
|
64
|
+
lines.push(line);
|
|
59
65
|
|
|
60
|
-
|
|
61
|
-
this
|
|
66
|
+
this.buffer = this.buffer.slice(patternIndex.index);
|
|
67
|
+
this.#carriageReturnIndex = null;
|
|
62
68
|
}
|
|
63
69
|
|
|
64
70
|
return lines;
|
|
@@ -102,13 +108,38 @@ export class LineDecoder {
|
|
|
102
108
|
}
|
|
103
109
|
|
|
104
110
|
flush(): string[] {
|
|
105
|
-
if (!this.buffer.length
|
|
111
|
+
if (!this.buffer.length) {
|
|
106
112
|
return [];
|
|
107
113
|
}
|
|
114
|
+
return this.decode('\n');
|
|
115
|
+
}
|
|
116
|
+
}
|
|
108
117
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
118
|
+
/**
|
|
119
|
+
* This function searches the buffer for the end patterns, (\r or \n)
|
|
120
|
+
* and returns an object with the index preceding the matched newline and the
|
|
121
|
+
* index after the newline char. `null` is returned if no new line is found.
|
|
122
|
+
*
|
|
123
|
+
* ```ts
|
|
124
|
+
* findNewLineIndex('abc\ndef') -> { preceding: 2, index: 3 }
|
|
125
|
+
* ```
|
|
126
|
+
*/
|
|
127
|
+
function findNewlineIndex(
|
|
128
|
+
buffer: Uint8Array,
|
|
129
|
+
startIndex: number | null,
|
|
130
|
+
): { preceding: number; index: number; carriage: boolean } | null {
|
|
131
|
+
const newline = 0x0a; // \n
|
|
132
|
+
const carriage = 0x0d; // \r
|
|
133
|
+
|
|
134
|
+
for (let i = startIndex ?? 0; i < buffer.length; i++) {
|
|
135
|
+
if (buffer[i] === newline) {
|
|
136
|
+
return { preceding: i, index: i + 1, carriage: false };
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (buffer[i] === carriage) {
|
|
140
|
+
return { preceding: i, index: i + 1, carriage: true };
|
|
141
|
+
}
|
|
113
142
|
}
|
|
143
|
+
|
|
144
|
+
return null;
|
|
114
145
|
}
|
|
@@ -63,8 +63,6 @@ export interface AgentTurnResponseStreamChunk {
|
|
|
63
63
|
export interface Turn {
|
|
64
64
|
input_messages: Array<Shared.UserMessage | Shared.ToolResponseMessage>;
|
|
65
65
|
|
|
66
|
-
output_attachments: Array<Turn.OutputAttachment>;
|
|
67
|
-
|
|
68
66
|
/**
|
|
69
67
|
* A message containing the model's (assistant) response in a chat conversation.
|
|
70
68
|
*/
|
|
@@ -84,6 +82,8 @@ export interface Turn {
|
|
|
84
82
|
turn_id: string;
|
|
85
83
|
|
|
86
84
|
completed_at?: string;
|
|
85
|
+
|
|
86
|
+
output_attachments?: Array<Turn.OutputAttachment>;
|
|
87
87
|
}
|
|
88
88
|
|
|
89
89
|
export namespace Turn {
|
|
@@ -96,7 +96,7 @@ export namespace Turn {
|
|
|
96
96
|
| OutputAttachment.ImageContentItem
|
|
97
97
|
| OutputAttachment.TextContentItem
|
|
98
98
|
| Array<Shared.InterleavedContentItem>
|
|
99
|
-
|
|
|
99
|
+
| OutputAttachment.URL;
|
|
100
100
|
|
|
101
101
|
mime_type: string;
|
|
102
102
|
}
|
|
@@ -131,7 +131,17 @@ export namespace Turn {
|
|
|
131
131
|
* A URL of the image or data URL in the format of data:image/{type};base64,{data}.
|
|
132
132
|
* Note that URL could have length limits.
|
|
133
133
|
*/
|
|
134
|
-
url?:
|
|
134
|
+
url?: Image.URL;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export namespace Image {
|
|
138
|
+
/**
|
|
139
|
+
* A URL of the image or data URL in the format of data:image/{type};base64,{data}.
|
|
140
|
+
* Note that URL could have length limits.
|
|
141
|
+
*/
|
|
142
|
+
export interface URL {
|
|
143
|
+
uri: string;
|
|
144
|
+
}
|
|
135
145
|
}
|
|
136
146
|
}
|
|
137
147
|
|
|
@@ -149,6 +159,10 @@ export namespace Turn {
|
|
|
149
159
|
*/
|
|
150
160
|
type: 'text';
|
|
151
161
|
}
|
|
162
|
+
|
|
163
|
+
export interface URL {
|
|
164
|
+
uri: string;
|
|
165
|
+
}
|
|
152
166
|
}
|
|
153
167
|
}
|
|
154
168
|
|
|
@@ -223,6 +237,11 @@ export interface TurnCreateParamsBase {
|
|
|
223
237
|
|
|
224
238
|
stream?: boolean;
|
|
225
239
|
|
|
240
|
+
/**
|
|
241
|
+
* Configuration for tool use.
|
|
242
|
+
*/
|
|
243
|
+
tool_config?: TurnCreateParams.ToolConfig;
|
|
244
|
+
|
|
226
245
|
toolgroups?: Array<string | TurnCreateParams.UnionMember1>;
|
|
227
246
|
}
|
|
228
247
|
|
|
@@ -236,7 +255,7 @@ export namespace TurnCreateParams {
|
|
|
236
255
|
| Document.ImageContentItem
|
|
237
256
|
| Document.TextContentItem
|
|
238
257
|
| Array<Shared.InterleavedContentItem>
|
|
239
|
-
|
|
|
258
|
+
| Document.URL;
|
|
240
259
|
|
|
241
260
|
mime_type: string;
|
|
242
261
|
}
|
|
@@ -271,7 +290,17 @@ export namespace TurnCreateParams {
|
|
|
271
290
|
* A URL of the image or data URL in the format of data:image/{type};base64,{data}.
|
|
272
291
|
* Note that URL could have length limits.
|
|
273
292
|
*/
|
|
274
|
-
url?:
|
|
293
|
+
url?: Image.URL;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
export namespace Image {
|
|
297
|
+
/**
|
|
298
|
+
* A URL of the image or data URL in the format of data:image/{type};base64,{data}.
|
|
299
|
+
* Note that URL could have length limits.
|
|
300
|
+
*/
|
|
301
|
+
export interface URL {
|
|
302
|
+
uri: string;
|
|
303
|
+
}
|
|
275
304
|
}
|
|
276
305
|
}
|
|
277
306
|
|
|
@@ -289,6 +318,41 @@ export namespace TurnCreateParams {
|
|
|
289
318
|
*/
|
|
290
319
|
type: 'text';
|
|
291
320
|
}
|
|
321
|
+
|
|
322
|
+
export interface URL {
|
|
323
|
+
uri: string;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Configuration for tool use.
|
|
329
|
+
*/
|
|
330
|
+
export interface ToolConfig {
|
|
331
|
+
/**
|
|
332
|
+
* (Optional) Config for how to override the default system prompt. -
|
|
333
|
+
* `SystemMessageBehavior.append`: Appends the provided system message to the
|
|
334
|
+
* default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
|
|
335
|
+
* system prompt with the provided system message. The system message can include
|
|
336
|
+
* the string '{{function_definitions}}' to indicate where the function definitions
|
|
337
|
+
* should be inserted.
|
|
338
|
+
*/
|
|
339
|
+
system_message_behavior: 'append' | 'replace';
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* (Optional) Whether tool use is required or automatic. Defaults to
|
|
343
|
+
* ToolChoice.auto.
|
|
344
|
+
*/
|
|
345
|
+
tool_choice?: 'auto' | 'required';
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* (Optional) Instructs the model how to format tool calls. By default, Llama Stack
|
|
349
|
+
* will attempt to use a format that is best adapted to the model. -
|
|
350
|
+
* `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
|
|
351
|
+
* `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
|
|
352
|
+
* <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
|
|
353
|
+
* are output as Python syntax -- a list of function calls.
|
|
354
|
+
*/
|
|
355
|
+
tool_prompt_format?: 'json' | 'function_tag' | 'python_list';
|
|
292
356
|
}
|
|
293
357
|
|
|
294
358
|
export interface UnionMember1 {
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
import { APIResource } from '../resource';
|
|
4
|
+
import * as Core from '../core';
|
|
5
|
+
|
|
6
|
+
export class Benchmarks extends APIResource {
|
|
7
|
+
retrieve(benchmarkId: string, options?: Core.RequestOptions): Core.APIPromise<Benchmark | null> {
|
|
8
|
+
return this._client.get(`/v1/eval/benchmarks/${benchmarkId}`, options);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
list(options?: Core.RequestOptions): Core.APIPromise<BenchmarkListResponse> {
|
|
12
|
+
return (
|
|
13
|
+
this._client.get('/v1/eval/benchmarks', options) as Core.APIPromise<{ data: BenchmarkListResponse }>
|
|
14
|
+
)._thenUnwrap((obj) => obj.data);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
register(body: BenchmarkRegisterParams, options?: Core.RequestOptions): Core.APIPromise<void> {
|
|
18
|
+
return this._client.post('/v1/eval/benchmarks', {
|
|
19
|
+
body,
|
|
20
|
+
...options,
|
|
21
|
+
headers: { Accept: '*/*', ...options?.headers },
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface Benchmark {
|
|
27
|
+
dataset_id: string;
|
|
28
|
+
|
|
29
|
+
identifier: string;
|
|
30
|
+
|
|
31
|
+
metadata: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
32
|
+
|
|
33
|
+
provider_id: string;
|
|
34
|
+
|
|
35
|
+
provider_resource_id: string;
|
|
36
|
+
|
|
37
|
+
scoring_functions: Array<string>;
|
|
38
|
+
|
|
39
|
+
type: 'benchmark';
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface ListBenchmarksResponse {
|
|
43
|
+
data: BenchmarkListResponse;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export type BenchmarkListResponse = Array<Benchmark>;
|
|
47
|
+
|
|
48
|
+
export interface BenchmarkRegisterParams {
|
|
49
|
+
benchmark_id: string;
|
|
50
|
+
|
|
51
|
+
dataset_id: string;
|
|
52
|
+
|
|
53
|
+
scoring_functions: Array<string>;
|
|
54
|
+
|
|
55
|
+
metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
56
|
+
|
|
57
|
+
provider_benchmark_id?: string;
|
|
58
|
+
|
|
59
|
+
provider_id?: string;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export declare namespace Benchmarks {
|
|
63
|
+
export {
|
|
64
|
+
type Benchmark as Benchmark,
|
|
65
|
+
type ListBenchmarksResponse as ListBenchmarksResponse,
|
|
66
|
+
type BenchmarkListResponse as BenchmarkListResponse,
|
|
67
|
+
type BenchmarkRegisterParams as BenchmarkRegisterParams,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
@@ -51,7 +51,13 @@ export interface DatasetRetrieveResponse {
|
|
|
51
51
|
|
|
52
52
|
type: 'dataset';
|
|
53
53
|
|
|
54
|
-
url:
|
|
54
|
+
url: DatasetRetrieveResponse.URL;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export namespace DatasetRetrieveResponse {
|
|
58
|
+
export interface URL {
|
|
59
|
+
uri: string;
|
|
60
|
+
}
|
|
55
61
|
}
|
|
56
62
|
|
|
57
63
|
export type DatasetListResponse = Array<DatasetListResponse.DatasetListResponseItem>;
|
|
@@ -70,7 +76,13 @@ export namespace DatasetListResponse {
|
|
|
70
76
|
|
|
71
77
|
type: 'dataset';
|
|
72
78
|
|
|
73
|
-
url:
|
|
79
|
+
url: DatasetListResponseItem.URL;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export namespace DatasetListResponseItem {
|
|
83
|
+
export interface URL {
|
|
84
|
+
uri: string;
|
|
85
|
+
}
|
|
74
86
|
}
|
|
75
87
|
}
|
|
76
88
|
|
|
@@ -79,7 +91,7 @@ export interface DatasetRegisterParams {
|
|
|
79
91
|
|
|
80
92
|
dataset_schema: Record<string, Shared.ParamType>;
|
|
81
93
|
|
|
82
|
-
url:
|
|
94
|
+
url: DatasetRegisterParams.URL;
|
|
83
95
|
|
|
84
96
|
metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
85
97
|
|
|
@@ -88,6 +100,12 @@ export interface DatasetRegisterParams {
|
|
|
88
100
|
provider_id?: string;
|
|
89
101
|
}
|
|
90
102
|
|
|
103
|
+
export namespace DatasetRegisterParams {
|
|
104
|
+
export interface URL {
|
|
105
|
+
uri: string;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
91
109
|
export declare namespace Datasets {
|
|
92
110
|
export {
|
|
93
111
|
type ListDatasetsResponse as ListDatasetsResponse,
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import { APIResource } from '../../resource';
|
|
4
4
|
import * as Core from '../../core';
|
|
5
|
-
import * as EvalAPI from './eval';
|
|
6
5
|
import * as ScoringFunctionsAPI from '../scoring-functions';
|
|
7
6
|
import * as Shared from '../shared';
|
|
8
7
|
import * as JobsAPI from './jobs';
|
|
@@ -19,9 +18,35 @@ export class Eval extends APIResource {
|
|
|
19
18
|
return this._client.post(`/v1/eval/tasks/${taskId}/evaluations`, { body, ...options });
|
|
20
19
|
}
|
|
21
20
|
|
|
21
|
+
evaluateRowsAlpha(
|
|
22
|
+
benchmarkId: string,
|
|
23
|
+
body: EvalEvaluateRowsAlphaParams,
|
|
24
|
+
options?: Core.RequestOptions,
|
|
25
|
+
): Core.APIPromise<EvaluateResponse> {
|
|
26
|
+
return this._client.post(`/v1/eval/benchmarks/${benchmarkId}/evaluations`, { body, ...options });
|
|
27
|
+
}
|
|
28
|
+
|
|
22
29
|
runEval(taskId: string, body: EvalRunEvalParams, options?: Core.RequestOptions): Core.APIPromise<Job> {
|
|
23
30
|
return this._client.post(`/v1/eval/tasks/${taskId}/jobs`, { body, ...options });
|
|
24
31
|
}
|
|
32
|
+
|
|
33
|
+
runEvalAlpha(
|
|
34
|
+
benchmarkId: string,
|
|
35
|
+
body: EvalRunEvalAlphaParams,
|
|
36
|
+
options?: Core.RequestOptions,
|
|
37
|
+
): Core.APIPromise<Job> {
|
|
38
|
+
return this._client.post(`/v1/eval/benchmarks/${benchmarkId}/jobs`, { body, ...options });
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface BenchmarkConfig {
|
|
43
|
+
eval_candidate: EvalCandidate;
|
|
44
|
+
|
|
45
|
+
scoring_params: Record<string, ScoringFunctionsAPI.ScoringFnParams>;
|
|
46
|
+
|
|
47
|
+
type: 'benchmark';
|
|
48
|
+
|
|
49
|
+
num_examples?: number;
|
|
25
50
|
}
|
|
26
51
|
|
|
27
52
|
export type EvalCandidate = EvalCandidate.ModelCandidate | EvalCandidate.AgentCandidate;
|
|
@@ -47,28 +72,6 @@ export namespace EvalCandidate {
|
|
|
47
72
|
}
|
|
48
73
|
}
|
|
49
74
|
|
|
50
|
-
export type EvalTaskConfig = EvalTaskConfig.BenchmarkEvalTaskConfig | EvalTaskConfig.AppEvalTaskConfig;
|
|
51
|
-
|
|
52
|
-
export namespace EvalTaskConfig {
|
|
53
|
-
export interface BenchmarkEvalTaskConfig {
|
|
54
|
-
eval_candidate: EvalAPI.EvalCandidate;
|
|
55
|
-
|
|
56
|
-
type: 'benchmark';
|
|
57
|
-
|
|
58
|
-
num_examples?: number;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
export interface AppEvalTaskConfig {
|
|
62
|
-
eval_candidate: EvalAPI.EvalCandidate;
|
|
63
|
-
|
|
64
|
-
scoring_params: Record<string, ScoringFunctionsAPI.ScoringFnParams>;
|
|
65
|
-
|
|
66
|
-
type: 'app';
|
|
67
|
-
|
|
68
|
-
num_examples?: number;
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
|
|
72
75
|
export interface EvaluateResponse {
|
|
73
76
|
generations: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
|
|
74
77
|
|
|
@@ -84,23 +87,37 @@ export interface EvalEvaluateRowsParams {
|
|
|
84
87
|
|
|
85
88
|
scoring_functions: Array<string>;
|
|
86
89
|
|
|
87
|
-
task_config:
|
|
90
|
+
task_config: BenchmarkConfig;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export interface EvalEvaluateRowsAlphaParams {
|
|
94
|
+
input_rows: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
|
|
95
|
+
|
|
96
|
+
scoring_functions: Array<string>;
|
|
97
|
+
|
|
98
|
+
task_config: BenchmarkConfig;
|
|
88
99
|
}
|
|
89
100
|
|
|
90
101
|
export interface EvalRunEvalParams {
|
|
91
|
-
task_config:
|
|
102
|
+
task_config: BenchmarkConfig;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export interface EvalRunEvalAlphaParams {
|
|
106
|
+
task_config: BenchmarkConfig;
|
|
92
107
|
}
|
|
93
108
|
|
|
94
109
|
Eval.Jobs = Jobs;
|
|
95
110
|
|
|
96
111
|
export declare namespace Eval {
|
|
97
112
|
export {
|
|
113
|
+
type BenchmarkConfig as BenchmarkConfig,
|
|
98
114
|
type EvalCandidate as EvalCandidate,
|
|
99
|
-
type EvalTaskConfig as EvalTaskConfig,
|
|
100
115
|
type EvaluateResponse as EvaluateResponse,
|
|
101
116
|
type Job as Job,
|
|
102
117
|
type EvalEvaluateRowsParams as EvalEvaluateRowsParams,
|
|
118
|
+
type EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams,
|
|
103
119
|
type EvalRunEvalParams as EvalRunEvalParams,
|
|
120
|
+
type EvalRunEvalAlphaParams as EvalRunEvalAlphaParams,
|
|
104
121
|
};
|
|
105
122
|
|
|
106
123
|
export { Jobs as Jobs, type JobStatusResponse as JobStatusResponse };
|
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
export {
|
|
4
4
|
Eval,
|
|
5
|
+
type BenchmarkConfig,
|
|
5
6
|
type EvalCandidate,
|
|
6
|
-
type EvalTaskConfig,
|
|
7
7
|
type EvaluateResponse,
|
|
8
8
|
type Job,
|
|
9
9
|
type EvalEvaluateRowsParams,
|
|
10
|
+
type EvalEvaluateRowsAlphaParams,
|
|
10
11
|
type EvalRunEvalParams,
|
|
12
|
+
type EvalRunEvalAlphaParams,
|
|
11
13
|
} from './eval';
|
|
12
14
|
export { Jobs, type JobStatusResponse } from './jobs';
|
|
@@ -6,26 +6,26 @@ import * as EvalAPI from './eval';
|
|
|
6
6
|
|
|
7
7
|
export class Jobs extends APIResource {
|
|
8
8
|
retrieve(
|
|
9
|
-
|
|
9
|
+
benchmarkId: string,
|
|
10
10
|
jobId: string,
|
|
11
11
|
options?: Core.RequestOptions,
|
|
12
12
|
): Core.APIPromise<EvalAPI.EvaluateResponse> {
|
|
13
|
-
return this._client.get(`/v1/eval/
|
|
13
|
+
return this._client.get(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}/result`, options);
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
-
cancel(
|
|
17
|
-
return this._client.delete(`/v1/eval/
|
|
16
|
+
cancel(benchmarkId: string, jobId: string, options?: Core.RequestOptions): Core.APIPromise<void> {
|
|
17
|
+
return this._client.delete(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}`, {
|
|
18
18
|
...options,
|
|
19
19
|
headers: { Accept: '*/*', ...options?.headers },
|
|
20
20
|
});
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
status(
|
|
24
|
-
|
|
24
|
+
benchmarkId: string,
|
|
25
25
|
jobId: string,
|
|
26
26
|
options?: Core.RequestOptions,
|
|
27
27
|
): Core.APIPromise<JobStatusResponse | null> {
|
|
28
|
-
return this._client.get(`/v1/eval/
|
|
28
|
+
return this._client.get(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}`, options);
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
31
|
|
|
@@ -2,15 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
import { APIResource } from '../resource';
|
|
4
4
|
import * as Core from '../core';
|
|
5
|
+
import * as BenchmarksAPI from './benchmarks';
|
|
5
6
|
|
|
6
7
|
export class EvalTasks extends APIResource {
|
|
7
|
-
retrieve(
|
|
8
|
+
retrieve(
|
|
9
|
+
evalTaskId: string,
|
|
10
|
+
options?: Core.RequestOptions,
|
|
11
|
+
): Core.APIPromise<BenchmarksAPI.Benchmark | null> {
|
|
8
12
|
return this._client.get(`/v1/eval-tasks/${evalTaskId}`, options);
|
|
9
13
|
}
|
|
10
14
|
|
|
11
|
-
list(options?: Core.RequestOptions): Core.APIPromise<
|
|
15
|
+
list(options?: Core.RequestOptions): Core.APIPromise<BenchmarksAPI.BenchmarkListResponse> {
|
|
12
16
|
return (
|
|
13
|
-
this._client.get('/v1/eval-tasks', options) as Core.APIPromise<{
|
|
17
|
+
this._client.get('/v1/eval-tasks', options) as Core.APIPromise<{
|
|
18
|
+
data: BenchmarksAPI.BenchmarkListResponse;
|
|
19
|
+
}>
|
|
14
20
|
)._thenUnwrap((obj) => obj.data);
|
|
15
21
|
}
|
|
16
22
|
|
|
@@ -23,27 +29,7 @@ export class EvalTasks extends APIResource {
|
|
|
23
29
|
}
|
|
24
30
|
}
|
|
25
31
|
|
|
26
|
-
export
|
|
27
|
-
dataset_id: string;
|
|
28
|
-
|
|
29
|
-
identifier: string;
|
|
30
|
-
|
|
31
|
-
metadata: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
32
|
-
|
|
33
|
-
provider_id: string;
|
|
34
|
-
|
|
35
|
-
provider_resource_id: string;
|
|
36
|
-
|
|
37
|
-
scoring_functions: Array<string>;
|
|
38
|
-
|
|
39
|
-
type: 'eval_task';
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
export interface ListEvalTasksResponse {
|
|
43
|
-
data: EvalTaskListResponse;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
export type EvalTaskListResponse = Array<EvalTask>;
|
|
32
|
+
export type EvalTaskListResponse = Array<BenchmarksAPI.Benchmark>;
|
|
47
33
|
|
|
48
34
|
export interface EvalTaskRegisterParams {
|
|
49
35
|
dataset_id: string;
|
|
@@ -54,15 +40,13 @@ export interface EvalTaskRegisterParams {
|
|
|
54
40
|
|
|
55
41
|
metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
56
42
|
|
|
57
|
-
|
|
43
|
+
provider_benchmark_id?: string;
|
|
58
44
|
|
|
59
45
|
provider_id?: string;
|
|
60
46
|
}
|
|
61
47
|
|
|
62
48
|
export declare namespace EvalTasks {
|
|
63
49
|
export {
|
|
64
|
-
type EvalTask as EvalTask,
|
|
65
|
-
type ListEvalTasksResponse as ListEvalTasksResponse,
|
|
66
50
|
type EvalTaskListResponse as EvalTaskListResponse,
|
|
67
51
|
type EvalTaskRegisterParams as EvalTaskRegisterParams,
|
|
68
52
|
};
|
package/src/resources/index.ts
CHANGED
|
@@ -17,6 +17,13 @@ export {
|
|
|
17
17
|
type BatchInferenceChatCompletionParams,
|
|
18
18
|
type BatchInferenceCompletionParams,
|
|
19
19
|
} from './batch-inference';
|
|
20
|
+
export {
|
|
21
|
+
Benchmarks,
|
|
22
|
+
type Benchmark,
|
|
23
|
+
type ListBenchmarksResponse,
|
|
24
|
+
type BenchmarkListResponse,
|
|
25
|
+
type BenchmarkRegisterParams,
|
|
26
|
+
} from './benchmarks';
|
|
20
27
|
export {
|
|
21
28
|
Datasetio,
|
|
22
29
|
type PaginatedRowsResult,
|
|
@@ -32,20 +39,16 @@ export {
|
|
|
32
39
|
} from './datasets';
|
|
33
40
|
export {
|
|
34
41
|
Eval,
|
|
42
|
+
type BenchmarkConfig,
|
|
35
43
|
type EvalCandidate,
|
|
36
|
-
type EvalTaskConfig,
|
|
37
44
|
type EvaluateResponse,
|
|
38
45
|
type Job,
|
|
39
46
|
type EvalEvaluateRowsParams,
|
|
47
|
+
type EvalEvaluateRowsAlphaParams,
|
|
40
48
|
type EvalRunEvalParams,
|
|
49
|
+
type EvalRunEvalAlphaParams,
|
|
41
50
|
} from './eval/eval';
|
|
42
|
-
export {
|
|
43
|
-
EvalTasks,
|
|
44
|
-
type EvalTask,
|
|
45
|
-
type ListEvalTasksResponse,
|
|
46
|
-
type EvalTaskListResponse,
|
|
47
|
-
type EvalTaskRegisterParams,
|
|
48
|
-
} from './eval-tasks';
|
|
51
|
+
export { EvalTasks, type EvalTaskListResponse, type EvalTaskRegisterParams } from './eval-tasks';
|
|
49
52
|
export {
|
|
50
53
|
Inference,
|
|
51
54
|
type ChatCompletionResponseStreamChunk,
|
|
@@ -79,6 +79,8 @@ export interface ChatCompletionResponseStreamChunk {
|
|
|
79
79
|
* The event containing the new content
|
|
80
80
|
*/
|
|
81
81
|
event: ChatCompletionResponseStreamChunk.Event;
|
|
82
|
+
|
|
83
|
+
metrics?: Array<ChatCompletionResponseStreamChunk.Metric>;
|
|
82
84
|
}
|
|
83
85
|
|
|
84
86
|
export namespace ChatCompletionResponseStreamChunk {
|
|
@@ -107,6 +109,24 @@ export namespace ChatCompletionResponseStreamChunk {
|
|
|
107
109
|
*/
|
|
108
110
|
stop_reason?: 'end_of_turn' | 'end_of_message' | 'out_of_tokens';
|
|
109
111
|
}
|
|
112
|
+
|
|
113
|
+
export interface Metric {
|
|
114
|
+
metric: string;
|
|
115
|
+
|
|
116
|
+
span_id: string;
|
|
117
|
+
|
|
118
|
+
timestamp: string;
|
|
119
|
+
|
|
120
|
+
trace_id: string;
|
|
121
|
+
|
|
122
|
+
type: 'metric';
|
|
123
|
+
|
|
124
|
+
unit: string;
|
|
125
|
+
|
|
126
|
+
value: number;
|
|
127
|
+
|
|
128
|
+
attributes?: Record<string, string | number | boolean | null>;
|
|
129
|
+
}
|
|
110
130
|
}
|
|
111
131
|
|
|
112
132
|
/**
|