llama-stack-client 0.2.0 → 0.2.3-rc5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_shims/index.d.ts +2 -0
- package/_shims/index.js +5 -1
- package/_shims/index.mjs +5 -1
- package/core.d.ts +12 -1
- package/core.d.ts.map +1 -1
- package/core.js +8 -6
- package/core.js.map +1 -1
- package/core.mjs +9 -7
- package/core.mjs.map +1 -1
- package/index.d.mts +10 -7
- package/index.d.ts +10 -7
- package/index.d.ts.map +1 -1
- package/index.js +6 -3
- package/index.js.map +1 -1
- package/index.mjs +6 -3
- package/index.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/chat/chat.d.ts +153 -0
- package/resources/chat/chat.d.ts.map +1 -0
- package/resources/chat/chat.js +39 -0
- package/resources/chat/chat.js.map +1 -0
- package/resources/chat/chat.mjs +12 -0
- package/resources/chat/chat.mjs.map +1 -0
- package/resources/chat/completions.d.ts +632 -0
- package/resources/chat/completions.d.ts.map +1 -0
- package/resources/chat/completions.js +16 -0
- package/resources/chat/completions.js.map +1 -0
- package/resources/chat/completions.mjs +12 -0
- package/resources/chat/completions.mjs.map +1 -0
- package/resources/chat/index.d.ts +3 -0
- package/resources/chat/index.d.ts.map +1 -0
- package/resources/chat/index.js +9 -0
- package/resources/chat/index.js.map +1 -0
- package/resources/chat/index.mjs +4 -0
- package/resources/chat/index.mjs.map +1 -0
- package/resources/chat.d.ts +2 -0
- package/resources/chat.d.ts.map +1 -0
- package/resources/chat.js +19 -0
- package/resources/chat.js.map +1 -0
- package/resources/chat.mjs +3 -0
- package/resources/chat.mjs.map +1 -0
- package/resources/completions.d.ts +193 -0
- package/resources/completions.d.ts.map +1 -0
- package/resources/completions.js +16 -0
- package/resources/completions.js.map +1 -0
- package/resources/completions.mjs +12 -0
- package/resources/completions.mjs.map +1 -0
- package/resources/datasets.d.ts +13 -6
- package/resources/datasets.d.ts.map +1 -1
- package/resources/datasets.js.map +1 -1
- package/resources/datasets.mjs.map +1 -1
- package/resources/eval/eval.d.ts +1 -1
- package/resources/eval/eval.d.ts.map +1 -1
- package/resources/index.d.ts +4 -3
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +5 -3
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -1
- package/resources/index.mjs.map +1 -1
- package/resources/inference.d.ts +86 -1
- package/resources/inference.d.ts.map +1 -1
- package/resources/inference.js +6 -0
- package/resources/inference.js.map +1 -1
- package/resources/inference.mjs +6 -0
- package/resources/inference.mjs.map +1 -1
- package/resources/inspect.d.ts +2 -1
- package/resources/inspect.d.ts.map +1 -1
- package/resources/post-training/job.d.ts +1 -1
- package/resources/post-training/job.d.ts.map +1 -1
- package/resources/post-training/post-training.d.ts +19 -19
- package/resources/post-training/post-training.d.ts.map +1 -1
- package/resources/shared.d.ts +40 -1
- package/resources/shared.d.ts.map +1 -1
- package/resources/tool-runtime/index.d.ts +1 -1
- package/resources/tool-runtime/index.d.ts.map +1 -1
- package/resources/tool-runtime/index.js.map +1 -1
- package/resources/tool-runtime/index.mjs.map +1 -1
- package/resources/tool-runtime/tool-runtime.d.ts +4 -4
- package/resources/tool-runtime/tool-runtime.d.ts.map +1 -1
- package/resources/tool-runtime/tool-runtime.js +1 -9
- package/resources/tool-runtime/tool-runtime.js.map +1 -1
- package/resources/tool-runtime/tool-runtime.mjs +1 -9
- package/resources/tool-runtime/tool-runtime.mjs.map +1 -1
- package/src/_shims/index.d.ts +2 -0
- package/src/_shims/index.js +5 -1
- package/src/_shims/index.mjs +5 -1
- package/src/core.ts +24 -5
- package/src/index.ts +30 -15
- package/src/resources/chat/chat.ts +206 -0
- package/src/resources/chat/completions.ts +890 -0
- package/src/resources/chat/index.ts +10 -0
- package/src/resources/chat.ts +3 -0
- package/src/resources/completions.ts +268 -0
- package/src/resources/datasets.ts +13 -6
- package/src/resources/eval/eval.ts +1 -1
- package/src/resources/index.ts +12 -6
- package/src/resources/inference.ts +121 -0
- package/src/resources/inspect.ts +3 -1
- package/src/resources/post-training/job.ts +1 -1
- package/src/resources/post-training/post-training.ts +32 -32
- package/src/resources/shared.ts +42 -1
- package/src/resources/tool-runtime/index.ts +1 -0
- package/src/resources/tool-runtime/tool-runtime.ts +11 -12
- package/src/version.ts +1 -1
- package/version.d.ts +1 -1
- package/version.d.ts.map +1 -1
- package/version.js +1 -1
- package/version.js.map +1 -1
- package/version.mjs +1 -1
- package/version.mjs.map +1 -1
- package/internal/decoders/jsonl.d.ts +0 -12
- package/internal/decoders/jsonl.d.ts.map +0 -1
- package/internal/decoders/jsonl.js +0 -35
- package/internal/decoders/jsonl.js.map +0 -1
- package/internal/decoders/jsonl.mjs +0 -31
- package/internal/decoders/jsonl.mjs.map +0 -1
- package/resources/batch-inference.d.ts +0 -66
- package/resources/batch-inference.d.ts.map +0 -1
- package/resources/batch-inference.js +0 -15
- package/resources/batch-inference.js.map +0 -1
- package/resources/batch-inference.mjs +0 -11
- package/resources/batch-inference.mjs.map +0 -1
- package/src/internal/decoders/jsonl.ts +0 -41
- package/src/resources/batch-inference.ts +0 -103
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
export { Chat, type ChatCompletionChunk } from './chat';
|
|
4
|
+
export {
|
|
5
|
+
Completions,
|
|
6
|
+
type CompletionCreateResponse,
|
|
7
|
+
type CompletionCreateParams,
|
|
8
|
+
type CompletionCreateParamsNonStreaming,
|
|
9
|
+
type CompletionCreateParamsStreaming,
|
|
10
|
+
} from './completions';
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
import { APIResource } from '../resource';
|
|
4
|
+
import { APIPromise } from '../core';
|
|
5
|
+
import * as Core from '../core';
|
|
6
|
+
import * as CompletionsAPI from './completions';
|
|
7
|
+
import { Stream } from '../streaming';
|
|
8
|
+
|
|
9
|
+
export class Completions extends APIResource {
|
|
10
|
+
/**
|
|
11
|
+
* Generate an OpenAI-compatible completion for the given prompt using the
|
|
12
|
+
* specified model.
|
|
13
|
+
*/
|
|
14
|
+
create(
|
|
15
|
+
body: CompletionCreateParamsNonStreaming,
|
|
16
|
+
options?: Core.RequestOptions,
|
|
17
|
+
): APIPromise<CompletionCreateResponse>;
|
|
18
|
+
create(
|
|
19
|
+
body: CompletionCreateParamsStreaming,
|
|
20
|
+
options?: Core.RequestOptions,
|
|
21
|
+
): APIPromise<Stream<CompletionCreateResponse>>;
|
|
22
|
+
create(
|
|
23
|
+
body: CompletionCreateParamsBase,
|
|
24
|
+
options?: Core.RequestOptions,
|
|
25
|
+
): APIPromise<Stream<CompletionCreateResponse> | CompletionCreateResponse>;
|
|
26
|
+
create(
|
|
27
|
+
body: CompletionCreateParams,
|
|
28
|
+
options?: Core.RequestOptions,
|
|
29
|
+
): APIPromise<CompletionCreateResponse> | APIPromise<Stream<CompletionCreateResponse>> {
|
|
30
|
+
return this._client.post('/v1/openai/v1/completions', {
|
|
31
|
+
body,
|
|
32
|
+
...options,
|
|
33
|
+
stream: body.stream ?? false,
|
|
34
|
+
}) as APIPromise<CompletionCreateResponse> | APIPromise<Stream<CompletionCreateResponse>>;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Response from an OpenAI-compatible completion request.
|
|
40
|
+
*/
|
|
41
|
+
export interface CompletionCreateResponse {
|
|
42
|
+
id: string;
|
|
43
|
+
|
|
44
|
+
choices: Array<CompletionCreateResponse.Choice>;
|
|
45
|
+
|
|
46
|
+
created: number;
|
|
47
|
+
|
|
48
|
+
model: string;
|
|
49
|
+
|
|
50
|
+
object: 'text_completion';
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export namespace CompletionCreateResponse {
|
|
54
|
+
/**
|
|
55
|
+
* A choice from an OpenAI-compatible completion response.
|
|
56
|
+
*/
|
|
57
|
+
export interface Choice {
|
|
58
|
+
finish_reason: string;
|
|
59
|
+
|
|
60
|
+
index: number;
|
|
61
|
+
|
|
62
|
+
text: string;
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* The log probabilities for the tokens in the message from an OpenAI-compatible
|
|
66
|
+
* chat completion response.
|
|
67
|
+
*/
|
|
68
|
+
logprobs?: Choice.Logprobs;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export namespace Choice {
|
|
72
|
+
/**
|
|
73
|
+
* The log probabilities for the tokens in the message from an OpenAI-compatible
|
|
74
|
+
* chat completion response.
|
|
75
|
+
*/
|
|
76
|
+
export interface Logprobs {
|
|
77
|
+
/**
|
|
78
|
+
* (Optional) The log probabilities for the tokens in the message
|
|
79
|
+
*/
|
|
80
|
+
content?: Array<Logprobs.Content>;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* (Optional) The log probabilities for the tokens in the message
|
|
84
|
+
*/
|
|
85
|
+
refusal?: Array<Logprobs.Refusal>;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export namespace Logprobs {
|
|
89
|
+
/**
|
|
90
|
+
* The log probability for a token from an OpenAI-compatible chat completion
|
|
91
|
+
* response.
|
|
92
|
+
*/
|
|
93
|
+
export interface Content {
|
|
94
|
+
token: string;
|
|
95
|
+
|
|
96
|
+
logprob: number;
|
|
97
|
+
|
|
98
|
+
top_logprobs: Array<Content.TopLogprob>;
|
|
99
|
+
|
|
100
|
+
bytes?: Array<number>;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export namespace Content {
|
|
104
|
+
/**
|
|
105
|
+
* The top log probability for a token from an OpenAI-compatible chat completion
|
|
106
|
+
* response.
|
|
107
|
+
*/
|
|
108
|
+
export interface TopLogprob {
|
|
109
|
+
token: string;
|
|
110
|
+
|
|
111
|
+
logprob: number;
|
|
112
|
+
|
|
113
|
+
bytes?: Array<number>;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* The log probability for a token from an OpenAI-compatible chat completion
|
|
119
|
+
* response.
|
|
120
|
+
*/
|
|
121
|
+
export interface Refusal {
|
|
122
|
+
token: string;
|
|
123
|
+
|
|
124
|
+
logprob: number;
|
|
125
|
+
|
|
126
|
+
top_logprobs: Array<Refusal.TopLogprob>;
|
|
127
|
+
|
|
128
|
+
bytes?: Array<number>;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export namespace Refusal {
|
|
132
|
+
/**
|
|
133
|
+
* The top log probability for a token from an OpenAI-compatible chat completion
|
|
134
|
+
* response.
|
|
135
|
+
*/
|
|
136
|
+
export interface TopLogprob {
|
|
137
|
+
token: string;
|
|
138
|
+
|
|
139
|
+
logprob: number;
|
|
140
|
+
|
|
141
|
+
bytes?: Array<number>;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export type CompletionCreateParams = CompletionCreateParamsNonStreaming | CompletionCreateParamsStreaming;
|
|
149
|
+
|
|
150
|
+
export interface CompletionCreateParamsBase {
|
|
151
|
+
/**
|
|
152
|
+
* The identifier of the model to use. The model must be registered with Llama
|
|
153
|
+
* Stack and available via the /models endpoint.
|
|
154
|
+
*/
|
|
155
|
+
model: string;
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* The prompt to generate a completion for
|
|
159
|
+
*/
|
|
160
|
+
prompt: string | Array<string> | Array<number> | Array<Array<number>>;
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* (Optional) The number of completions to generate
|
|
164
|
+
*/
|
|
165
|
+
best_of?: number;
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* (Optional) Whether to echo the prompt
|
|
169
|
+
*/
|
|
170
|
+
echo?: boolean;
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* (Optional) The penalty for repeated tokens
|
|
174
|
+
*/
|
|
175
|
+
frequency_penalty?: number;
|
|
176
|
+
|
|
177
|
+
guided_choice?: Array<string>;
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* (Optional) The logit bias to use
|
|
181
|
+
*/
|
|
182
|
+
logit_bias?: Record<string, number>;
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* (Optional) The log probabilities to use
|
|
186
|
+
*/
|
|
187
|
+
logprobs?: boolean;
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* (Optional) The maximum number of tokens to generate
|
|
191
|
+
*/
|
|
192
|
+
max_tokens?: number;
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* (Optional) The number of completions to generate
|
|
196
|
+
*/
|
|
197
|
+
n?: number;
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* (Optional) The penalty for repeated tokens
|
|
201
|
+
*/
|
|
202
|
+
presence_penalty?: number;
|
|
203
|
+
|
|
204
|
+
prompt_logprobs?: number;
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* (Optional) The seed to use
|
|
208
|
+
*/
|
|
209
|
+
seed?: number;
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* (Optional) The stop tokens to use
|
|
213
|
+
*/
|
|
214
|
+
stop?: string | Array<string>;
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* (Optional) Whether to stream the response
|
|
218
|
+
*/
|
|
219
|
+
stream?: boolean;
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* (Optional) The stream options to use
|
|
223
|
+
*/
|
|
224
|
+
stream_options?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* (Optional) The temperature to use
|
|
228
|
+
*/
|
|
229
|
+
temperature?: number;
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* (Optional) The top p to use
|
|
233
|
+
*/
|
|
234
|
+
top_p?: number;
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* (Optional) The user to use
|
|
238
|
+
*/
|
|
239
|
+
user?: string;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
export namespace CompletionCreateParams {
|
|
243
|
+
export type CompletionCreateParamsNonStreaming = CompletionsAPI.CompletionCreateParamsNonStreaming;
|
|
244
|
+
export type CompletionCreateParamsStreaming = CompletionsAPI.CompletionCreateParamsStreaming;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
export interface CompletionCreateParamsNonStreaming extends CompletionCreateParamsBase {
|
|
248
|
+
/**
|
|
249
|
+
* (Optional) Whether to stream the response
|
|
250
|
+
*/
|
|
251
|
+
stream?: false;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
export interface CompletionCreateParamsStreaming extends CompletionCreateParamsBase {
|
|
255
|
+
/**
|
|
256
|
+
* (Optional) Whether to stream the response
|
|
257
|
+
*/
|
|
258
|
+
stream: true;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
export declare namespace Completions {
|
|
262
|
+
export {
|
|
263
|
+
type CompletionCreateResponse as CompletionCreateResponse,
|
|
264
|
+
type CompletionCreateParams as CompletionCreateParams,
|
|
265
|
+
type CompletionCreateParamsNonStreaming as CompletionCreateParamsNonStreaming,
|
|
266
|
+
type CompletionCreateParamsStreaming as CompletionCreateParamsStreaming,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
@@ -16,7 +16,15 @@ export class Datasets extends APIResource {
|
|
|
16
16
|
}
|
|
17
17
|
|
|
18
18
|
/**
|
|
19
|
-
* Get a paginated list of rows from a dataset. Uses
|
|
19
|
+
* Get a paginated list of rows from a dataset. Uses offset-based pagination where:
|
|
20
|
+
*
|
|
21
|
+
* - start_index: The starting index (0-based). If None, starts from beginning.
|
|
22
|
+
* - limit: Number of items to return. If None or -1, returns all items.
|
|
23
|
+
*
|
|
24
|
+
* The response includes:
|
|
25
|
+
*
|
|
26
|
+
* - data: List of items for the current page
|
|
27
|
+
* - has_more: Whether there are more items available after this set
|
|
20
28
|
*/
|
|
21
29
|
iterrows(
|
|
22
30
|
datasetId: string,
|
|
@@ -166,19 +174,18 @@ export namespace DatasetListResponse {
|
|
|
166
174
|
}
|
|
167
175
|
|
|
168
176
|
/**
|
|
169
|
-
* A paginated
|
|
177
|
+
* A generic paginated response that follows a simple format.
|
|
170
178
|
*/
|
|
171
179
|
export interface DatasetIterrowsResponse {
|
|
172
180
|
/**
|
|
173
|
-
* The
|
|
181
|
+
* The list of items for the current page
|
|
174
182
|
*/
|
|
175
183
|
data: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
|
|
176
184
|
|
|
177
185
|
/**
|
|
178
|
-
*
|
|
179
|
-
* rows.
|
|
186
|
+
* Whether there are more items available after this set
|
|
180
187
|
*/
|
|
181
|
-
|
|
188
|
+
has_more: boolean;
|
|
182
189
|
}
|
|
183
190
|
|
|
184
191
|
export interface DatasetRegisterResponse {
|
|
@@ -132,7 +132,7 @@ export interface EvaluateResponse {
|
|
|
132
132
|
export interface Job {
|
|
133
133
|
job_id: string;
|
|
134
134
|
|
|
135
|
-
status: 'completed' | 'in_progress' | 'failed' | 'scheduled';
|
|
135
|
+
status: 'completed' | 'in_progress' | 'failed' | 'scheduled' | 'cancelled';
|
|
136
136
|
}
|
|
137
137
|
|
|
138
138
|
export interface EvalEvaluateRowsParams {
|
package/src/resources/index.ts
CHANGED
|
@@ -11,12 +11,6 @@ export {
|
|
|
11
11
|
type AgentCreateResponse,
|
|
12
12
|
type AgentCreateParams,
|
|
13
13
|
} from './agents/agents';
|
|
14
|
-
export {
|
|
15
|
-
BatchInference,
|
|
16
|
-
type BatchInferenceChatCompletionResponse,
|
|
17
|
-
type BatchInferenceChatCompletionParams,
|
|
18
|
-
type BatchInferenceCompletionParams,
|
|
19
|
-
} from './batch-inference';
|
|
20
14
|
export {
|
|
21
15
|
Benchmarks,
|
|
22
16
|
type Benchmark,
|
|
@@ -24,6 +18,14 @@ export {
|
|
|
24
18
|
type BenchmarkListResponse,
|
|
25
19
|
type BenchmarkRegisterParams,
|
|
26
20
|
} from './benchmarks';
|
|
21
|
+
export { Chat, type ChatCompletionChunk } from './chat/chat';
|
|
22
|
+
export {
|
|
23
|
+
Completions,
|
|
24
|
+
type CompletionCreateResponse,
|
|
25
|
+
type CompletionCreateParams,
|
|
26
|
+
type CompletionCreateParamsNonStreaming,
|
|
27
|
+
type CompletionCreateParamsStreaming,
|
|
28
|
+
} from './completions';
|
|
27
29
|
export {
|
|
28
30
|
Datasets,
|
|
29
31
|
type ListDatasetsResponse,
|
|
@@ -51,6 +53,9 @@ export {
|
|
|
51
53
|
type CompletionResponse,
|
|
52
54
|
type EmbeddingsResponse,
|
|
53
55
|
type TokenLogProbs,
|
|
56
|
+
type InferenceBatchChatCompletionResponse,
|
|
57
|
+
type InferenceBatchChatCompletionParams,
|
|
58
|
+
type InferenceBatchCompletionParams,
|
|
54
59
|
type InferenceChatCompletionParams,
|
|
55
60
|
type InferenceChatCompletionParamsNonStreaming,
|
|
56
61
|
type InferenceChatCompletionParamsStreaming,
|
|
@@ -126,6 +131,7 @@ export {
|
|
|
126
131
|
ToolRuntime,
|
|
127
132
|
type ToolDef,
|
|
128
133
|
type ToolInvocationResult,
|
|
134
|
+
type ToolRuntimeListToolsResponse,
|
|
129
135
|
type ToolRuntimeInvokeToolParams,
|
|
130
136
|
type ToolRuntimeListToolsParams,
|
|
131
137
|
} from './tool-runtime/tool-runtime';
|
|
@@ -8,6 +8,20 @@ import * as Shared from './shared';
|
|
|
8
8
|
import { Stream } from '../streaming';
|
|
9
9
|
|
|
10
10
|
export class Inference extends APIResource {
|
|
11
|
+
batchChatCompletion(
|
|
12
|
+
body: InferenceBatchChatCompletionParams,
|
|
13
|
+
options?: Core.RequestOptions,
|
|
14
|
+
): Core.APIPromise<InferenceBatchChatCompletionResponse> {
|
|
15
|
+
return this._client.post('/v1/inference/batch-chat-completion', { body, ...options });
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
batchCompletion(
|
|
19
|
+
body: InferenceBatchCompletionParams,
|
|
20
|
+
options?: Core.RequestOptions,
|
|
21
|
+
): Core.APIPromise<Shared.BatchCompletion> {
|
|
22
|
+
return this._client.post('/v1/inference/batch-completion', { body, ...options });
|
|
23
|
+
}
|
|
24
|
+
|
|
11
25
|
/**
|
|
12
26
|
* Generate a chat completion for the given messages using the specified model.
|
|
13
27
|
*/
|
|
@@ -173,6 +187,110 @@ export interface TokenLogProbs {
|
|
|
173
187
|
logprobs_by_token: Record<string, number>;
|
|
174
188
|
}
|
|
175
189
|
|
|
190
|
+
export interface InferenceBatchChatCompletionResponse {
|
|
191
|
+
batch: Array<Shared.ChatCompletionResponse>;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export interface InferenceBatchChatCompletionParams {
|
|
195
|
+
messages_batch: Array<Array<Shared.Message>>;
|
|
196
|
+
|
|
197
|
+
model_id: string;
|
|
198
|
+
|
|
199
|
+
logprobs?: InferenceBatchChatCompletionParams.Logprobs;
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Configuration for JSON schema-guided response generation.
|
|
203
|
+
*/
|
|
204
|
+
response_format?: Shared.ResponseFormat;
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Sampling parameters.
|
|
208
|
+
*/
|
|
209
|
+
sampling_params?: Shared.SamplingParams;
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Configuration for tool use.
|
|
213
|
+
*/
|
|
214
|
+
tool_config?: InferenceBatchChatCompletionParams.ToolConfig;
|
|
215
|
+
|
|
216
|
+
tools?: Array<InferenceBatchChatCompletionParams.Tool>;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
export namespace InferenceBatchChatCompletionParams {
|
|
220
|
+
export interface Logprobs {
|
|
221
|
+
/**
|
|
222
|
+
* How many tokens (for each position) to return log probabilities for.
|
|
223
|
+
*/
|
|
224
|
+
top_k?: number;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Configuration for tool use.
|
|
229
|
+
*/
|
|
230
|
+
export interface ToolConfig {
|
|
231
|
+
/**
|
|
232
|
+
* (Optional) Config for how to override the default system prompt. -
|
|
233
|
+
* `SystemMessageBehavior.append`: Appends the provided system message to the
|
|
234
|
+
* default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
|
|
235
|
+
* system prompt with the provided system message. The system message can include
|
|
236
|
+
* the string '{{function_definitions}}' to indicate where the function definitions
|
|
237
|
+
* should be inserted.
|
|
238
|
+
*/
|
|
239
|
+
system_message_behavior?: 'append' | 'replace';
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* (Optional) Whether tool use is automatic, required, or none. Can also specify a
|
|
243
|
+
* tool name to use a specific tool. Defaults to ToolChoice.auto.
|
|
244
|
+
*/
|
|
245
|
+
tool_choice?: 'auto' | 'required' | 'none' | (string & {});
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* (Optional) Instructs the model how to format tool calls. By default, Llama Stack
|
|
249
|
+
* will attempt to use a format that is best adapted to the model. -
|
|
250
|
+
* `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
|
|
251
|
+
* `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
|
|
252
|
+
* <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
|
|
253
|
+
* are output as Python syntax -- a list of function calls.
|
|
254
|
+
*/
|
|
255
|
+
tool_prompt_format?: 'json' | 'function_tag' | 'python_list';
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
export interface Tool {
|
|
259
|
+
tool_name: 'brave_search' | 'wolfram_alpha' | 'photogen' | 'code_interpreter' | (string & {});
|
|
260
|
+
|
|
261
|
+
description?: string;
|
|
262
|
+
|
|
263
|
+
parameters?: Record<string, Shared.ToolParamDefinition>;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
export interface InferenceBatchCompletionParams {
|
|
268
|
+
content_batch: Array<Shared.InterleavedContent>;
|
|
269
|
+
|
|
270
|
+
model_id: string;
|
|
271
|
+
|
|
272
|
+
logprobs?: InferenceBatchCompletionParams.Logprobs;
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Configuration for JSON schema-guided response generation.
|
|
276
|
+
*/
|
|
277
|
+
response_format?: Shared.ResponseFormat;
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Sampling parameters.
|
|
281
|
+
*/
|
|
282
|
+
sampling_params?: Shared.SamplingParams;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
export namespace InferenceBatchCompletionParams {
|
|
286
|
+
export interface Logprobs {
|
|
287
|
+
/**
|
|
288
|
+
* How many tokens (for each position) to return log probabilities for.
|
|
289
|
+
*/
|
|
290
|
+
top_k?: number;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
176
294
|
export type InferenceChatCompletionParams =
|
|
177
295
|
| InferenceChatCompletionParamsNonStreaming
|
|
178
296
|
| InferenceChatCompletionParamsStreaming;
|
|
@@ -424,6 +542,9 @@ export declare namespace Inference {
|
|
|
424
542
|
type CompletionResponse as CompletionResponse,
|
|
425
543
|
type EmbeddingsResponse as EmbeddingsResponse,
|
|
426
544
|
type TokenLogProbs as TokenLogProbs,
|
|
545
|
+
type InferenceBatchChatCompletionResponse as InferenceBatchChatCompletionResponse,
|
|
546
|
+
type InferenceBatchChatCompletionParams as InferenceBatchChatCompletionParams,
|
|
547
|
+
type InferenceBatchCompletionParams as InferenceBatchCompletionParams,
|
|
427
548
|
type InferenceChatCompletionParams as InferenceChatCompletionParams,
|
|
428
549
|
type InferenceChatCompletionParamsNonStreaming as InferenceChatCompletionParamsNonStreaming,
|
|
429
550
|
type InferenceChatCompletionParamsStreaming as InferenceChatCompletionParamsStreaming,
|
package/src/resources/inspect.ts
CHANGED
|
@@ -14,7 +14,7 @@ export class Inspect extends APIResource {
|
|
|
14
14
|
}
|
|
15
15
|
|
|
16
16
|
export interface HealthInfo {
|
|
17
|
-
status:
|
|
17
|
+
status: 'OK' | 'Error' | 'Not Implemented';
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
export interface ProviderInfo {
|
|
@@ -22,6 +22,8 @@ export interface ProviderInfo {
|
|
|
22
22
|
|
|
23
23
|
config: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
24
24
|
|
|
25
|
+
health: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
26
|
+
|
|
25
27
|
provider_id: string;
|
|
26
28
|
|
|
27
29
|
provider_type: string;
|
|
@@ -101,21 +101,21 @@ export namespace PostTrainingPreferenceOptimizeParams {
|
|
|
101
101
|
}
|
|
102
102
|
|
|
103
103
|
export interface TrainingConfig {
|
|
104
|
-
data_config: TrainingConfig.DataConfig;
|
|
105
|
-
|
|
106
104
|
gradient_accumulation_steps: number;
|
|
107
105
|
|
|
108
106
|
max_steps_per_epoch: number;
|
|
109
107
|
|
|
110
|
-
max_validation_steps: number;
|
|
111
|
-
|
|
112
108
|
n_epochs: number;
|
|
113
109
|
|
|
114
|
-
|
|
110
|
+
data_config?: TrainingConfig.DataConfig;
|
|
115
111
|
|
|
116
112
|
dtype?: string;
|
|
117
113
|
|
|
118
114
|
efficiency_config?: TrainingConfig.EfficiencyConfig;
|
|
115
|
+
|
|
116
|
+
max_validation_steps?: number;
|
|
117
|
+
|
|
118
|
+
optimizer_config?: TrainingConfig.OptimizerConfig;
|
|
119
119
|
}
|
|
120
120
|
|
|
121
121
|
export namespace TrainingConfig {
|
|
@@ -135,16 +135,6 @@ export namespace PostTrainingPreferenceOptimizeParams {
|
|
|
135
135
|
validation_dataset_id?: string;
|
|
136
136
|
}
|
|
137
137
|
|
|
138
|
-
export interface OptimizerConfig {
|
|
139
|
-
lr: number;
|
|
140
|
-
|
|
141
|
-
num_warmup_steps: number;
|
|
142
|
-
|
|
143
|
-
optimizer_type: 'adam' | 'adamw' | 'sgd';
|
|
144
|
-
|
|
145
|
-
weight_decay: number;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
138
|
export interface EfficiencyConfig {
|
|
149
139
|
enable_activation_checkpointing?: boolean;
|
|
150
140
|
|
|
@@ -154,6 +144,16 @@ export namespace PostTrainingPreferenceOptimizeParams {
|
|
|
154
144
|
|
|
155
145
|
memory_efficient_fsdp_wrap?: boolean;
|
|
156
146
|
}
|
|
147
|
+
|
|
148
|
+
export interface OptimizerConfig {
|
|
149
|
+
lr: number;
|
|
150
|
+
|
|
151
|
+
num_warmup_steps: number;
|
|
152
|
+
|
|
153
|
+
optimizer_type: 'adam' | 'adamw' | 'sgd';
|
|
154
|
+
|
|
155
|
+
weight_decay: number;
|
|
156
|
+
}
|
|
157
157
|
}
|
|
158
158
|
}
|
|
159
159
|
|
|
@@ -164,32 +164,32 @@ export interface PostTrainingSupervisedFineTuneParams {
|
|
|
164
164
|
|
|
165
165
|
logger_config: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
166
166
|
|
|
167
|
-
model: string;
|
|
168
|
-
|
|
169
167
|
training_config: PostTrainingSupervisedFineTuneParams.TrainingConfig;
|
|
170
168
|
|
|
171
169
|
algorithm_config?: AlgorithmConfig;
|
|
172
170
|
|
|
173
171
|
checkpoint_dir?: string;
|
|
172
|
+
|
|
173
|
+
model?: string;
|
|
174
174
|
}
|
|
175
175
|
|
|
176
176
|
export namespace PostTrainingSupervisedFineTuneParams {
|
|
177
177
|
export interface TrainingConfig {
|
|
178
|
-
data_config: TrainingConfig.DataConfig;
|
|
179
|
-
|
|
180
178
|
gradient_accumulation_steps: number;
|
|
181
179
|
|
|
182
180
|
max_steps_per_epoch: number;
|
|
183
181
|
|
|
184
|
-
max_validation_steps: number;
|
|
185
|
-
|
|
186
182
|
n_epochs: number;
|
|
187
183
|
|
|
188
|
-
|
|
184
|
+
data_config?: TrainingConfig.DataConfig;
|
|
189
185
|
|
|
190
186
|
dtype?: string;
|
|
191
187
|
|
|
192
188
|
efficiency_config?: TrainingConfig.EfficiencyConfig;
|
|
189
|
+
|
|
190
|
+
max_validation_steps?: number;
|
|
191
|
+
|
|
192
|
+
optimizer_config?: TrainingConfig.OptimizerConfig;
|
|
193
193
|
}
|
|
194
194
|
|
|
195
195
|
export namespace TrainingConfig {
|
|
@@ -209,16 +209,6 @@ export namespace PostTrainingSupervisedFineTuneParams {
|
|
|
209
209
|
validation_dataset_id?: string;
|
|
210
210
|
}
|
|
211
211
|
|
|
212
|
-
export interface OptimizerConfig {
|
|
213
|
-
lr: number;
|
|
214
|
-
|
|
215
|
-
num_warmup_steps: number;
|
|
216
|
-
|
|
217
|
-
optimizer_type: 'adam' | 'adamw' | 'sgd';
|
|
218
|
-
|
|
219
|
-
weight_decay: number;
|
|
220
|
-
}
|
|
221
|
-
|
|
222
212
|
export interface EfficiencyConfig {
|
|
223
213
|
enable_activation_checkpointing?: boolean;
|
|
224
214
|
|
|
@@ -228,6 +218,16 @@ export namespace PostTrainingSupervisedFineTuneParams {
|
|
|
228
218
|
|
|
229
219
|
memory_efficient_fsdp_wrap?: boolean;
|
|
230
220
|
}
|
|
221
|
+
|
|
222
|
+
export interface OptimizerConfig {
|
|
223
|
+
lr: number;
|
|
224
|
+
|
|
225
|
+
num_warmup_steps: number;
|
|
226
|
+
|
|
227
|
+
optimizer_type: 'adam' | 'adamw' | 'sgd';
|
|
228
|
+
|
|
229
|
+
weight_decay: number;
|
|
230
|
+
}
|
|
231
231
|
}
|
|
232
232
|
}
|
|
233
233
|
|