llama-stack-client 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/index.d.mts +8 -5
  2. package/index.d.ts +8 -5
  3. package/index.d.ts.map +1 -1
  4. package/index.js +3 -0
  5. package/index.js.map +1 -1
  6. package/index.mjs +4 -1
  7. package/index.mjs.map +1 -1
  8. package/internal/decoders/line.d.ts +2 -2
  9. package/internal/decoders/line.d.ts.map +1 -1
  10. package/internal/decoders/line.js +69 -34
  11. package/internal/decoders/line.js.map +1 -1
  12. package/internal/decoders/line.mjs +69 -34
  13. package/internal/decoders/line.mjs.map +1 -1
  14. package/package.json +1 -1
  15. package/resources/agents/turn.d.ts +61 -5
  16. package/resources/agents/turn.d.ts.map +1 -1
  17. package/resources/benchmarks.d.ts +32 -0
  18. package/resources/benchmarks.d.ts.map +1 -0
  19. package/resources/benchmarks.js +22 -0
  20. package/resources/benchmarks.js.map +1 -0
  21. package/resources/benchmarks.mjs +18 -0
  22. package/resources/benchmarks.mjs.map +1 -0
  23. package/resources/datasets.d.ts +18 -3
  24. package/resources/datasets.d.ts.map +1 -1
  25. package/resources/eval/eval.d.ts +19 -18
  26. package/resources/eval/eval.d.ts.map +1 -1
  27. package/resources/eval/eval.js +6 -0
  28. package/resources/eval/eval.js.map +1 -1
  29. package/resources/eval/eval.mjs +6 -0
  30. package/resources/eval/eval.mjs.map +1 -1
  31. package/resources/eval/index.d.ts +1 -1
  32. package/resources/eval/index.d.ts.map +1 -1
  33. package/resources/eval/index.js.map +1 -1
  34. package/resources/eval/index.mjs.map +1 -1
  35. package/resources/eval/jobs.d.ts +3 -3
  36. package/resources/eval/jobs.d.ts.map +1 -1
  37. package/resources/eval/jobs.js +6 -6
  38. package/resources/eval/jobs.js.map +1 -1
  39. package/resources/eval/jobs.mjs +6 -6
  40. package/resources/eval/jobs.mjs.map +1 -1
  41. package/resources/eval-tasks.d.ts +6 -17
  42. package/resources/eval-tasks.d.ts.map +1 -1
  43. package/resources/eval-tasks.js.map +1 -1
  44. package/resources/eval-tasks.mjs.map +1 -1
  45. package/resources/index.d.ts +3 -2
  46. package/resources/index.d.ts.map +1 -1
  47. package/resources/index.js +3 -1
  48. package/resources/index.js.map +1 -1
  49. package/resources/index.mjs +2 -1
  50. package/resources/index.mjs.map +1 -1
  51. package/resources/inference.d.ts +11 -0
  52. package/resources/inference.d.ts.map +1 -1
  53. package/resources/shared.d.ts +84 -11
  54. package/resources/shared.d.ts.map +1 -1
  55. package/resources/telemetry.d.ts +3 -3
  56. package/resources/telemetry.d.ts.map +1 -1
  57. package/resources/tool-runtime/tool-runtime.d.ts +6 -1
  58. package/resources/tool-runtime/tool-runtime.d.ts.map +1 -1
  59. package/resources/tool-runtime/tool-runtime.js.map +1 -1
  60. package/resources/tool-runtime/tool-runtime.mjs.map +1 -1
  61. package/resources/toolgroups.d.ts +12 -3
  62. package/resources/toolgroups.d.ts.map +1 -1
  63. package/resources/toolgroups.js.map +1 -1
  64. package/resources/toolgroups.mjs.map +1 -1
  65. package/src/index.ts +25 -12
  66. package/src/internal/decoders/line.ts +69 -38
  67. package/src/resources/agents/turn.ts +70 -6
  68. package/src/resources/benchmarks.ts +69 -0
  69. package/src/resources/datasets.ts +21 -3
  70. package/src/resources/eval/eval.ts +43 -26
  71. package/src/resources/eval/index.ts +3 -1
  72. package/src/resources/eval/jobs.ts +6 -6
  73. package/src/resources/eval-tasks.ts +11 -27
  74. package/src/resources/index.ts +11 -8
  75. package/src/resources/inference.ts +20 -0
  76. package/src/resources/shared.ts +104 -14
  77. package/src/resources/telemetry.ts +3 -3
  78. package/src/resources/tool-runtime/tool-runtime.ts +7 -1
  79. package/src/resources/toolgroups.ts +14 -3
  80. package/src/streaming.ts +5 -1
  81. package/src/version.ts +1 -1
  82. package/streaming.d.ts +3 -1
  83. package/streaming.d.ts.map +1 -1
  84. package/streaming.js +4 -1
  85. package/streaming.js.map +1 -1
  86. package/streaming.mjs +4 -1
  87. package/streaming.mjs.map +1 -1
  88. package/version.d.ts +1 -1
  89. package/version.js +1 -1
  90. package/version.mjs +1 -1
@@ -13,52 +13,58 @@ export class LineDecoder {
13
13
  static NEWLINE_CHARS = new Set(['\n', '\r']);
14
14
  static NEWLINE_REGEXP = /\r\n|[\n\r]/g;
15
15
 
16
- buffer: string[];
17
- trailingCR: boolean;
16
+ buffer: Uint8Array;
17
+ #carriageReturnIndex: number | null;
18
18
  textDecoder: any; // TextDecoder found in browsers; not typed to avoid pulling in either "dom" or "node" types.
19
19
 
20
20
  constructor() {
21
- this.buffer = [];
22
- this.trailingCR = false;
21
+ this.buffer = new Uint8Array();
22
+ this.#carriageReturnIndex = null;
23
23
  }
24
24
 
25
25
  decode(chunk: Bytes): string[] {
26
- let text = this.decodeText(chunk);
27
-
28
- if (this.trailingCR) {
29
- text = '\r' + text;
30
- this.trailingCR = false;
31
- }
32
- if (text.endsWith('\r')) {
33
- this.trailingCR = true;
34
- text = text.slice(0, -1);
35
- }
36
-
37
- if (!text) {
26
+ if (chunk == null) {
38
27
  return [];
39
28
  }
40
29
 
41
- const trailingNewline = LineDecoder.NEWLINE_CHARS.has(text[text.length - 1] || '');
42
- let lines = text.split(LineDecoder.NEWLINE_REGEXP);
30
+ const binaryChunk =
31
+ chunk instanceof ArrayBuffer ? new Uint8Array(chunk)
32
+ : typeof chunk === 'string' ? new TextEncoder().encode(chunk)
33
+ : chunk;
34
+
35
+ let newData = new Uint8Array(this.buffer.length + binaryChunk.length);
36
+ newData.set(this.buffer);
37
+ newData.set(binaryChunk, this.buffer.length);
38
+ this.buffer = newData;
39
+
40
+ const lines: string[] = [];
41
+ let patternIndex;
42
+ while ((patternIndex = findNewlineIndex(this.buffer, this.#carriageReturnIndex)) != null) {
43
+ if (patternIndex.carriage && this.#carriageReturnIndex == null) {
44
+ // skip until we either get a corresponding `\n`, a new `\r` or nothing
45
+ this.#carriageReturnIndex = patternIndex.index;
46
+ continue;
47
+ }
43
48
 
44
- // if there is a trailing new line then the last entry will be an empty
45
- // string which we don't care about
46
- if (trailingNewline) {
47
- lines.pop();
48
- }
49
+ // we got double \r or \rtext\n
50
+ if (
51
+ this.#carriageReturnIndex != null &&
52
+ (patternIndex.index !== this.#carriageReturnIndex + 1 || patternIndex.carriage)
53
+ ) {
54
+ lines.push(this.decodeText(this.buffer.slice(0, this.#carriageReturnIndex - 1)));
55
+ this.buffer = this.buffer.slice(this.#carriageReturnIndex);
56
+ this.#carriageReturnIndex = null;
57
+ continue;
58
+ }
49
59
 
50
- if (lines.length === 1 && !trailingNewline) {
51
- this.buffer.push(lines[0]!);
52
- return [];
53
- }
60
+ const endIndex =
61
+ this.#carriageReturnIndex !== null ? patternIndex.preceding - 1 : patternIndex.preceding;
54
62
 
55
- if (this.buffer.length > 0) {
56
- lines = [this.buffer.join('') + lines[0], ...lines.slice(1)];
57
- this.buffer = [];
58
- }
63
+ const line = this.decodeText(this.buffer.slice(0, endIndex));
64
+ lines.push(line);
59
65
 
60
- if (!trailingNewline) {
61
- this.buffer = [lines.pop() || ''];
66
+ this.buffer = this.buffer.slice(patternIndex.index);
67
+ this.#carriageReturnIndex = null;
62
68
  }
63
69
 
64
70
  return lines;
@@ -102,13 +108,38 @@ export class LineDecoder {
102
108
  }
103
109
 
104
110
  flush(): string[] {
105
- if (!this.buffer.length && !this.trailingCR) {
111
+ if (!this.buffer.length) {
106
112
  return [];
107
113
  }
114
+ return this.decode('\n');
115
+ }
116
+ }
108
117
 
109
- const lines = [this.buffer.join('')];
110
- this.buffer = [];
111
- this.trailingCR = false;
112
- return lines;
118
+ /**
119
+ * This function searches the buffer for the end patterns, (\r or \n)
120
+ * and returns an object with the index preceding the matched newline and the
121
+ * index after the newline char. `null` is returned if no new line is found.
122
+ *
123
+ * ```ts
124
+ * findNewLineIndex('abc\ndef') -> { preceding: 2, index: 3 }
125
+ * ```
126
+ */
127
+ function findNewlineIndex(
128
+ buffer: Uint8Array,
129
+ startIndex: number | null,
130
+ ): { preceding: number; index: number; carriage: boolean } | null {
131
+ const newline = 0x0a; // \n
132
+ const carriage = 0x0d; // \r
133
+
134
+ for (let i = startIndex ?? 0; i < buffer.length; i++) {
135
+ if (buffer[i] === newline) {
136
+ return { preceding: i, index: i + 1, carriage: false };
137
+ }
138
+
139
+ if (buffer[i] === carriage) {
140
+ return { preceding: i, index: i + 1, carriage: true };
141
+ }
113
142
  }
143
+
144
+ return null;
114
145
  }
@@ -63,8 +63,6 @@ export interface AgentTurnResponseStreamChunk {
63
63
  export interface Turn {
64
64
  input_messages: Array<Shared.UserMessage | Shared.ToolResponseMessage>;
65
65
 
66
- output_attachments: Array<Turn.OutputAttachment>;
67
-
68
66
  /**
69
67
  * A message containing the model's (assistant) response in a chat conversation.
70
68
  */
@@ -84,6 +82,8 @@ export interface Turn {
84
82
  turn_id: string;
85
83
 
86
84
  completed_at?: string;
85
+
86
+ output_attachments?: Array<Turn.OutputAttachment>;
87
87
  }
88
88
 
89
89
  export namespace Turn {
@@ -96,7 +96,7 @@ export namespace Turn {
96
96
  | OutputAttachment.ImageContentItem
97
97
  | OutputAttachment.TextContentItem
98
98
  | Array<Shared.InterleavedContentItem>
99
- | Shared.URL;
99
+ | OutputAttachment.URL;
100
100
 
101
101
  mime_type: string;
102
102
  }
@@ -131,7 +131,17 @@ export namespace Turn {
131
131
  * A URL of the image or data URL in the format of data:image/{type};base64,{data}.
132
132
  * Note that URL could have length limits.
133
133
  */
134
- url?: Shared.URL;
134
+ url?: Image.URL;
135
+ }
136
+
137
+ export namespace Image {
138
+ /**
139
+ * A URL of the image or data URL in the format of data:image/{type};base64,{data}.
140
+ * Note that URL could have length limits.
141
+ */
142
+ export interface URL {
143
+ uri: string;
144
+ }
135
145
  }
136
146
  }
137
147
 
@@ -149,6 +159,10 @@ export namespace Turn {
149
159
  */
150
160
  type: 'text';
151
161
  }
162
+
163
+ export interface URL {
164
+ uri: string;
165
+ }
152
166
  }
153
167
  }
154
168
 
@@ -223,6 +237,11 @@ export interface TurnCreateParamsBase {
223
237
 
224
238
  stream?: boolean;
225
239
 
240
+ /**
241
+ * Configuration for tool use.
242
+ */
243
+ tool_config?: TurnCreateParams.ToolConfig;
244
+
226
245
  toolgroups?: Array<string | TurnCreateParams.UnionMember1>;
227
246
  }
228
247
 
@@ -236,7 +255,7 @@ export namespace TurnCreateParams {
236
255
  | Document.ImageContentItem
237
256
  | Document.TextContentItem
238
257
  | Array<Shared.InterleavedContentItem>
239
- | Shared.URL;
258
+ | Document.URL;
240
259
 
241
260
  mime_type: string;
242
261
  }
@@ -271,7 +290,17 @@ export namespace TurnCreateParams {
271
290
  * A URL of the image or data URL in the format of data:image/{type};base64,{data}.
272
291
  * Note that URL could have length limits.
273
292
  */
274
- url?: Shared.URL;
293
+ url?: Image.URL;
294
+ }
295
+
296
+ export namespace Image {
297
+ /**
298
+ * A URL of the image or data URL in the format of data:image/{type};base64,{data}.
299
+ * Note that URL could have length limits.
300
+ */
301
+ export interface URL {
302
+ uri: string;
303
+ }
275
304
  }
276
305
  }
277
306
 
@@ -289,6 +318,41 @@ export namespace TurnCreateParams {
289
318
  */
290
319
  type: 'text';
291
320
  }
321
+
322
+ export interface URL {
323
+ uri: string;
324
+ }
325
+ }
326
+
327
+ /**
328
+ * Configuration for tool use.
329
+ */
330
+ export interface ToolConfig {
331
+ /**
332
+ * (Optional) Config for how to override the default system prompt. -
333
+ * `SystemMessageBehavior.append`: Appends the provided system message to the
334
+ * default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
335
+ * system prompt with the provided system message. The system message can include
336
+ * the string '{{function_definitions}}' to indicate where the function definitions
337
+ * should be inserted.
338
+ */
339
+ system_message_behavior: 'append' | 'replace';
340
+
341
+ /**
342
+ * (Optional) Whether tool use is required or automatic. Defaults to
343
+ * ToolChoice.auto.
344
+ */
345
+ tool_choice?: 'auto' | 'required';
346
+
347
+ /**
348
+ * (Optional) Instructs the model how to format tool calls. By default, Llama Stack
349
+ * will attempt to use a format that is best adapted to the model. -
350
+ * `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
351
+ * `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
352
+ * <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
353
+ * are output as Python syntax -- a list of function calls.
354
+ */
355
+ tool_prompt_format?: 'json' | 'function_tag' | 'python_list';
292
356
  }
293
357
 
294
358
  export interface UnionMember1 {
@@ -0,0 +1,69 @@
1
+ // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ import { APIResource } from '../resource';
4
+ import * as Core from '../core';
5
+
6
+ export class Benchmarks extends APIResource {
7
+ retrieve(benchmarkId: string, options?: Core.RequestOptions): Core.APIPromise<Benchmark | null> {
8
+ return this._client.get(`/v1/eval/benchmarks/${benchmarkId}`, options);
9
+ }
10
+
11
+ list(options?: Core.RequestOptions): Core.APIPromise<BenchmarkListResponse> {
12
+ return (
13
+ this._client.get('/v1/eval/benchmarks', options) as Core.APIPromise<{ data: BenchmarkListResponse }>
14
+ )._thenUnwrap((obj) => obj.data);
15
+ }
16
+
17
+ register(body: BenchmarkRegisterParams, options?: Core.RequestOptions): Core.APIPromise<void> {
18
+ return this._client.post('/v1/eval/benchmarks', {
19
+ body,
20
+ ...options,
21
+ headers: { Accept: '*/*', ...options?.headers },
22
+ });
23
+ }
24
+ }
25
+
26
+ export interface Benchmark {
27
+ dataset_id: string;
28
+
29
+ identifier: string;
30
+
31
+ metadata: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
32
+
33
+ provider_id: string;
34
+
35
+ provider_resource_id: string;
36
+
37
+ scoring_functions: Array<string>;
38
+
39
+ type: 'benchmark';
40
+ }
41
+
42
+ export interface ListBenchmarksResponse {
43
+ data: BenchmarkListResponse;
44
+ }
45
+
46
+ export type BenchmarkListResponse = Array<Benchmark>;
47
+
48
+ export interface BenchmarkRegisterParams {
49
+ benchmark_id: string;
50
+
51
+ dataset_id: string;
52
+
53
+ scoring_functions: Array<string>;
54
+
55
+ metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
56
+
57
+ provider_benchmark_id?: string;
58
+
59
+ provider_id?: string;
60
+ }
61
+
62
+ export declare namespace Benchmarks {
63
+ export {
64
+ type Benchmark as Benchmark,
65
+ type ListBenchmarksResponse as ListBenchmarksResponse,
66
+ type BenchmarkListResponse as BenchmarkListResponse,
67
+ type BenchmarkRegisterParams as BenchmarkRegisterParams,
68
+ };
69
+ }
@@ -51,7 +51,13 @@ export interface DatasetRetrieveResponse {
51
51
 
52
52
  type: 'dataset';
53
53
 
54
- url: Shared.URL;
54
+ url: DatasetRetrieveResponse.URL;
55
+ }
56
+
57
+ export namespace DatasetRetrieveResponse {
58
+ export interface URL {
59
+ uri: string;
60
+ }
55
61
  }
56
62
 
57
63
  export type DatasetListResponse = Array<DatasetListResponse.DatasetListResponseItem>;
@@ -70,7 +76,13 @@ export namespace DatasetListResponse {
70
76
 
71
77
  type: 'dataset';
72
78
 
73
- url: Shared.URL;
79
+ url: DatasetListResponseItem.URL;
80
+ }
81
+
82
+ export namespace DatasetListResponseItem {
83
+ export interface URL {
84
+ uri: string;
85
+ }
74
86
  }
75
87
  }
76
88
 
@@ -79,7 +91,7 @@ export interface DatasetRegisterParams {
79
91
 
80
92
  dataset_schema: Record<string, Shared.ParamType>;
81
93
 
82
- url: Shared.URL;
94
+ url: DatasetRegisterParams.URL;
83
95
 
84
96
  metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
85
97
 
@@ -88,6 +100,12 @@ export interface DatasetRegisterParams {
88
100
  provider_id?: string;
89
101
  }
90
102
 
103
+ export namespace DatasetRegisterParams {
104
+ export interface URL {
105
+ uri: string;
106
+ }
107
+ }
108
+
91
109
  export declare namespace Datasets {
92
110
  export {
93
111
  type ListDatasetsResponse as ListDatasetsResponse,
@@ -2,7 +2,6 @@
2
2
 
3
3
  import { APIResource } from '../../resource';
4
4
  import * as Core from '../../core';
5
- import * as EvalAPI from './eval';
6
5
  import * as ScoringFunctionsAPI from '../scoring-functions';
7
6
  import * as Shared from '../shared';
8
7
  import * as JobsAPI from './jobs';
@@ -19,9 +18,35 @@ export class Eval extends APIResource {
19
18
  return this._client.post(`/v1/eval/tasks/${taskId}/evaluations`, { body, ...options });
20
19
  }
21
20
 
21
+ evaluateRowsAlpha(
22
+ benchmarkId: string,
23
+ body: EvalEvaluateRowsAlphaParams,
24
+ options?: Core.RequestOptions,
25
+ ): Core.APIPromise<EvaluateResponse> {
26
+ return this._client.post(`/v1/eval/benchmarks/${benchmarkId}/evaluations`, { body, ...options });
27
+ }
28
+
22
29
  runEval(taskId: string, body: EvalRunEvalParams, options?: Core.RequestOptions): Core.APIPromise<Job> {
23
30
  return this._client.post(`/v1/eval/tasks/${taskId}/jobs`, { body, ...options });
24
31
  }
32
+
33
+ runEvalAlpha(
34
+ benchmarkId: string,
35
+ body: EvalRunEvalAlphaParams,
36
+ options?: Core.RequestOptions,
37
+ ): Core.APIPromise<Job> {
38
+ return this._client.post(`/v1/eval/benchmarks/${benchmarkId}/jobs`, { body, ...options });
39
+ }
40
+ }
41
+
42
+ export interface BenchmarkConfig {
43
+ eval_candidate: EvalCandidate;
44
+
45
+ scoring_params: Record<string, ScoringFunctionsAPI.ScoringFnParams>;
46
+
47
+ type: 'benchmark';
48
+
49
+ num_examples?: number;
25
50
  }
26
51
 
27
52
  export type EvalCandidate = EvalCandidate.ModelCandidate | EvalCandidate.AgentCandidate;
@@ -47,28 +72,6 @@ export namespace EvalCandidate {
47
72
  }
48
73
  }
49
74
 
50
- export type EvalTaskConfig = EvalTaskConfig.BenchmarkEvalTaskConfig | EvalTaskConfig.AppEvalTaskConfig;
51
-
52
- export namespace EvalTaskConfig {
53
- export interface BenchmarkEvalTaskConfig {
54
- eval_candidate: EvalAPI.EvalCandidate;
55
-
56
- type: 'benchmark';
57
-
58
- num_examples?: number;
59
- }
60
-
61
- export interface AppEvalTaskConfig {
62
- eval_candidate: EvalAPI.EvalCandidate;
63
-
64
- scoring_params: Record<string, ScoringFunctionsAPI.ScoringFnParams>;
65
-
66
- type: 'app';
67
-
68
- num_examples?: number;
69
- }
70
- }
71
-
72
75
  export interface EvaluateResponse {
73
76
  generations: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
74
77
 
@@ -84,23 +87,37 @@ export interface EvalEvaluateRowsParams {
84
87
 
85
88
  scoring_functions: Array<string>;
86
89
 
87
- task_config: EvalTaskConfig;
90
+ task_config: BenchmarkConfig;
91
+ }
92
+
93
+ export interface EvalEvaluateRowsAlphaParams {
94
+ input_rows: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
95
+
96
+ scoring_functions: Array<string>;
97
+
98
+ task_config: BenchmarkConfig;
88
99
  }
89
100
 
90
101
  export interface EvalRunEvalParams {
91
- task_config: EvalTaskConfig;
102
+ task_config: BenchmarkConfig;
103
+ }
104
+
105
+ export interface EvalRunEvalAlphaParams {
106
+ task_config: BenchmarkConfig;
92
107
  }
93
108
 
94
109
  Eval.Jobs = Jobs;
95
110
 
96
111
  export declare namespace Eval {
97
112
  export {
113
+ type BenchmarkConfig as BenchmarkConfig,
98
114
  type EvalCandidate as EvalCandidate,
99
- type EvalTaskConfig as EvalTaskConfig,
100
115
  type EvaluateResponse as EvaluateResponse,
101
116
  type Job as Job,
102
117
  type EvalEvaluateRowsParams as EvalEvaluateRowsParams,
118
+ type EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams,
103
119
  type EvalRunEvalParams as EvalRunEvalParams,
120
+ type EvalRunEvalAlphaParams as EvalRunEvalAlphaParams,
104
121
  };
105
122
 
106
123
  export { Jobs as Jobs, type JobStatusResponse as JobStatusResponse };
@@ -2,11 +2,13 @@
2
2
 
3
3
  export {
4
4
  Eval,
5
+ type BenchmarkConfig,
5
6
  type EvalCandidate,
6
- type EvalTaskConfig,
7
7
  type EvaluateResponse,
8
8
  type Job,
9
9
  type EvalEvaluateRowsParams,
10
+ type EvalEvaluateRowsAlphaParams,
10
11
  type EvalRunEvalParams,
12
+ type EvalRunEvalAlphaParams,
11
13
  } from './eval';
12
14
  export { Jobs, type JobStatusResponse } from './jobs';
@@ -6,26 +6,26 @@ import * as EvalAPI from './eval';
6
6
 
7
7
  export class Jobs extends APIResource {
8
8
  retrieve(
9
- taskId: string,
9
+ benchmarkId: string,
10
10
  jobId: string,
11
11
  options?: Core.RequestOptions,
12
12
  ): Core.APIPromise<EvalAPI.EvaluateResponse> {
13
- return this._client.get(`/v1/eval/tasks/${taskId}/jobs/${jobId}/result`, options);
13
+ return this._client.get(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}/result`, options);
14
14
  }
15
15
 
16
- cancel(taskId: string, jobId: string, options?: Core.RequestOptions): Core.APIPromise<void> {
17
- return this._client.delete(`/v1/eval/tasks/${taskId}/jobs/${jobId}`, {
16
+ cancel(benchmarkId: string, jobId: string, options?: Core.RequestOptions): Core.APIPromise<void> {
17
+ return this._client.delete(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}`, {
18
18
  ...options,
19
19
  headers: { Accept: '*/*', ...options?.headers },
20
20
  });
21
21
  }
22
22
 
23
23
  status(
24
- taskId: string,
24
+ benchmarkId: string,
25
25
  jobId: string,
26
26
  options?: Core.RequestOptions,
27
27
  ): Core.APIPromise<JobStatusResponse | null> {
28
- return this._client.get(`/v1/eval/tasks/${taskId}/jobs/${jobId}`, options);
28
+ return this._client.get(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}`, options);
29
29
  }
30
30
  }
31
31
 
@@ -2,15 +2,21 @@
2
2
 
3
3
  import { APIResource } from '../resource';
4
4
  import * as Core from '../core';
5
+ import * as BenchmarksAPI from './benchmarks';
5
6
 
6
7
  export class EvalTasks extends APIResource {
7
- retrieve(evalTaskId: string, options?: Core.RequestOptions): Core.APIPromise<EvalTask | null> {
8
+ retrieve(
9
+ evalTaskId: string,
10
+ options?: Core.RequestOptions,
11
+ ): Core.APIPromise<BenchmarksAPI.Benchmark | null> {
8
12
  return this._client.get(`/v1/eval-tasks/${evalTaskId}`, options);
9
13
  }
10
14
 
11
- list(options?: Core.RequestOptions): Core.APIPromise<EvalTaskListResponse> {
15
+ list(options?: Core.RequestOptions): Core.APIPromise<BenchmarksAPI.BenchmarkListResponse> {
12
16
  return (
13
- this._client.get('/v1/eval-tasks', options) as Core.APIPromise<{ data: EvalTaskListResponse }>
17
+ this._client.get('/v1/eval-tasks', options) as Core.APIPromise<{
18
+ data: BenchmarksAPI.BenchmarkListResponse;
19
+ }>
14
20
  )._thenUnwrap((obj) => obj.data);
15
21
  }
16
22
 
@@ -23,27 +29,7 @@ export class EvalTasks extends APIResource {
23
29
  }
24
30
  }
25
31
 
26
- export interface EvalTask {
27
- dataset_id: string;
28
-
29
- identifier: string;
30
-
31
- metadata: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
32
-
33
- provider_id: string;
34
-
35
- provider_resource_id: string;
36
-
37
- scoring_functions: Array<string>;
38
-
39
- type: 'eval_task';
40
- }
41
-
42
- export interface ListEvalTasksResponse {
43
- data: EvalTaskListResponse;
44
- }
45
-
46
- export type EvalTaskListResponse = Array<EvalTask>;
32
+ export type EvalTaskListResponse = Array<BenchmarksAPI.Benchmark>;
47
33
 
48
34
  export interface EvalTaskRegisterParams {
49
35
  dataset_id: string;
@@ -54,15 +40,13 @@ export interface EvalTaskRegisterParams {
54
40
 
55
41
  metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
56
42
 
57
- provider_eval_task_id?: string;
43
+ provider_benchmark_id?: string;
58
44
 
59
45
  provider_id?: string;
60
46
  }
61
47
 
62
48
  export declare namespace EvalTasks {
63
49
  export {
64
- type EvalTask as EvalTask,
65
- type ListEvalTasksResponse as ListEvalTasksResponse,
66
50
  type EvalTaskListResponse as EvalTaskListResponse,
67
51
  type EvalTaskRegisterParams as EvalTaskRegisterParams,
68
52
  };
@@ -17,6 +17,13 @@ export {
17
17
  type BatchInferenceChatCompletionParams,
18
18
  type BatchInferenceCompletionParams,
19
19
  } from './batch-inference';
20
+ export {
21
+ Benchmarks,
22
+ type Benchmark,
23
+ type ListBenchmarksResponse,
24
+ type BenchmarkListResponse,
25
+ type BenchmarkRegisterParams,
26
+ } from './benchmarks';
20
27
  export {
21
28
  Datasetio,
22
29
  type PaginatedRowsResult,
@@ -32,20 +39,16 @@ export {
32
39
  } from './datasets';
33
40
  export {
34
41
  Eval,
42
+ type BenchmarkConfig,
35
43
  type EvalCandidate,
36
- type EvalTaskConfig,
37
44
  type EvaluateResponse,
38
45
  type Job,
39
46
  type EvalEvaluateRowsParams,
47
+ type EvalEvaluateRowsAlphaParams,
40
48
  type EvalRunEvalParams,
49
+ type EvalRunEvalAlphaParams,
41
50
  } from './eval/eval';
42
- export {
43
- EvalTasks,
44
- type EvalTask,
45
- type ListEvalTasksResponse,
46
- type EvalTaskListResponse,
47
- type EvalTaskRegisterParams,
48
- } from './eval-tasks';
51
+ export { EvalTasks, type EvalTaskListResponse, type EvalTaskRegisterParams } from './eval-tasks';
49
52
  export {
50
53
  Inference,
51
54
  type ChatCompletionResponseStreamChunk,
@@ -79,6 +79,8 @@ export interface ChatCompletionResponseStreamChunk {
79
79
  * The event containing the new content
80
80
  */
81
81
  event: ChatCompletionResponseStreamChunk.Event;
82
+
83
+ metrics?: Array<ChatCompletionResponseStreamChunk.Metric>;
82
84
  }
83
85
 
84
86
  export namespace ChatCompletionResponseStreamChunk {
@@ -107,6 +109,24 @@ export namespace ChatCompletionResponseStreamChunk {
107
109
  */
108
110
  stop_reason?: 'end_of_turn' | 'end_of_message' | 'out_of_tokens';
109
111
  }
112
+
113
+ export interface Metric {
114
+ metric: string;
115
+
116
+ span_id: string;
117
+
118
+ timestamp: string;
119
+
120
+ trace_id: string;
121
+
122
+ type: 'metric';
123
+
124
+ unit: string;
125
+
126
+ value: number;
127
+
128
+ attributes?: Record<string, string | number | boolean | null>;
129
+ }
110
130
  }
111
131
 
112
132
  /**