@huggingface/tasks 0.13.16 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commonjs/model-data.d.ts +3 -0
- package/dist/commonjs/model-data.d.ts.map +1 -1
- package/dist/commonjs/model-libraries-snippets.d.ts +3 -1
- package/dist/commonjs/model-libraries-snippets.d.ts.map +1 -1
- package/dist/commonjs/model-libraries-snippets.js +134 -22
- package/dist/commonjs/model-libraries.d.ts +12 -8
- package/dist/commonjs/model-libraries.d.ts.map +1 -1
- package/dist/commonjs/model-libraries.js +15 -8
- package/dist/commonjs/tasks/audio-classification/inference.d.ts +1 -1
- package/dist/commonjs/tasks/audio-classification/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/automatic-speech-recognition/inference.d.ts +1 -1
- package/dist/commonjs/tasks/automatic-speech-recognition/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/automatic-speech-recognition/inference.js +0 -5
- package/dist/commonjs/tasks/chat-completion/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/chat-completion/inference.js +0 -5
- package/dist/commonjs/tasks/depth-estimation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/depth-estimation/inference.js +0 -5
- package/dist/commonjs/tasks/feature-extraction/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/feature-extraction/inference.js +0 -5
- package/dist/commonjs/tasks/image-classification/inference.d.ts +1 -1
- package/dist/commonjs/tasks/image-classification/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/image-segmentation/inference.d.ts +1 -1
- package/dist/commonjs/tasks/image-segmentation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/image-to-image/inference.d.ts +3 -3
- package/dist/commonjs/tasks/image-to-image/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/image-to-image/inference.js +0 -5
- package/dist/commonjs/tasks/image-to-text/inference.d.ts +1 -1
- package/dist/commonjs/tasks/image-to-text/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/image-to-text/inference.js +0 -5
- package/dist/commonjs/tasks/index.d.ts +1 -0
- package/dist/commonjs/tasks/index.d.ts.map +1 -1
- package/dist/commonjs/tasks/object-detection/inference.d.ts +1 -1
- package/dist/commonjs/tasks/object-detection/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/sentence-similarity/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/sentence-similarity/inference.js +0 -5
- package/dist/commonjs/tasks/summarization/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/summarization/inference.js +0 -5
- package/dist/commonjs/tasks/text-generation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text-generation/inference.js +0 -5
- package/dist/commonjs/tasks/text-to-audio/inference.d.ts +14 -15
- package/dist/commonjs/tasks/text-to-audio/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text-to-audio/inference.js +0 -5
- package/dist/commonjs/tasks/text-to-image/inference.d.ts +2 -2
- package/dist/commonjs/tasks/text-to-image/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text-to-image/inference.js +0 -5
- package/dist/commonjs/tasks/text-to-speech/inference.d.ts +14 -17
- package/dist/commonjs/tasks/text-to-speech/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text-to-speech/inference.js +0 -5
- package/dist/commonjs/tasks/text-to-video/inference.d.ts +58 -0
- package/dist/commonjs/tasks/text-to-video/inference.d.ts.map +1 -0
- package/dist/commonjs/tasks/text-to-video/inference.js +2 -0
- package/dist/commonjs/tasks/text2text-generation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text2text-generation/inference.js +0 -5
- package/dist/commonjs/tasks/translation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/translation/inference.js +0 -5
- package/dist/commonjs/tasks/visual-question-answering/inference.d.ts +1 -1
- package/dist/commonjs/tasks/visual-question-answering/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/zero-shot-image-classification/inference.d.ts +1 -1
- package/dist/commonjs/tasks/zero-shot-image-classification/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/zero-shot-object-detection/inference.d.ts +1 -1
- package/dist/commonjs/tasks/zero-shot-object-detection/inference.d.ts.map +1 -1
- package/dist/esm/model-data.d.ts +3 -0
- package/dist/esm/model-data.d.ts.map +1 -1
- package/dist/esm/model-libraries-snippets.d.ts +3 -1
- package/dist/esm/model-libraries-snippets.d.ts.map +1 -1
- package/dist/esm/model-libraries-snippets.js +129 -19
- package/dist/esm/model-libraries.d.ts +12 -8
- package/dist/esm/model-libraries.d.ts.map +1 -1
- package/dist/esm/model-libraries.js +15 -8
- package/dist/esm/tasks/audio-classification/inference.d.ts +1 -1
- package/dist/esm/tasks/audio-classification/inference.d.ts.map +1 -1
- package/dist/esm/tasks/automatic-speech-recognition/inference.d.ts +1 -1
- package/dist/esm/tasks/automatic-speech-recognition/inference.d.ts.map +1 -1
- package/dist/esm/tasks/automatic-speech-recognition/inference.js +0 -5
- package/dist/esm/tasks/chat-completion/inference.d.ts.map +1 -1
- package/dist/esm/tasks/chat-completion/inference.js +0 -5
- package/dist/esm/tasks/depth-estimation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/depth-estimation/inference.js +0 -5
- package/dist/esm/tasks/feature-extraction/inference.d.ts.map +1 -1
- package/dist/esm/tasks/feature-extraction/inference.js +0 -5
- package/dist/esm/tasks/image-classification/inference.d.ts +1 -1
- package/dist/esm/tasks/image-classification/inference.d.ts.map +1 -1
- package/dist/esm/tasks/image-segmentation/inference.d.ts +1 -1
- package/dist/esm/tasks/image-segmentation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/image-to-image/inference.d.ts +3 -3
- package/dist/esm/tasks/image-to-image/inference.d.ts.map +1 -1
- package/dist/esm/tasks/image-to-image/inference.js +0 -5
- package/dist/esm/tasks/image-to-text/inference.d.ts +1 -1
- package/dist/esm/tasks/image-to-text/inference.d.ts.map +1 -1
- package/dist/esm/tasks/image-to-text/inference.js +0 -5
- package/dist/esm/tasks/index.d.ts +1 -0
- package/dist/esm/tasks/index.d.ts.map +1 -1
- package/dist/esm/tasks/object-detection/inference.d.ts +1 -1
- package/dist/esm/tasks/object-detection/inference.d.ts.map +1 -1
- package/dist/esm/tasks/sentence-similarity/inference.d.ts.map +1 -1
- package/dist/esm/tasks/sentence-similarity/inference.js +0 -5
- package/dist/esm/tasks/summarization/inference.d.ts.map +1 -1
- package/dist/esm/tasks/summarization/inference.js +0 -5
- package/dist/esm/tasks/text-generation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text-generation/inference.js +0 -5
- package/dist/esm/tasks/text-to-audio/inference.d.ts +14 -15
- package/dist/esm/tasks/text-to-audio/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text-to-audio/inference.js +0 -5
- package/dist/esm/tasks/text-to-image/inference.d.ts +2 -2
- package/dist/esm/tasks/text-to-image/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text-to-image/inference.js +0 -5
- package/dist/esm/tasks/text-to-speech/inference.d.ts +14 -17
- package/dist/esm/tasks/text-to-speech/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text-to-speech/inference.js +0 -5
- package/dist/esm/tasks/text-to-video/inference.d.ts +58 -0
- package/dist/esm/tasks/text-to-video/inference.d.ts.map +1 -0
- package/dist/esm/tasks/text-to-video/inference.js +1 -0
- package/dist/esm/tasks/text2text-generation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text2text-generation/inference.js +0 -5
- package/dist/esm/tasks/translation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/translation/inference.js +0 -5
- package/dist/esm/tasks/visual-question-answering/inference.d.ts +1 -1
- package/dist/esm/tasks/visual-question-answering/inference.d.ts.map +1 -1
- package/dist/esm/tasks/zero-shot-image-classification/inference.d.ts +1 -1
- package/dist/esm/tasks/zero-shot-image-classification/inference.d.ts.map +1 -1
- package/dist/esm/tasks/zero-shot-object-detection/inference.d.ts +1 -1
- package/dist/esm/tasks/zero-shot-object-detection/inference.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/model-data.ts +3 -0
- package/src/model-libraries-snippets.ts +141 -19
- package/src/model-libraries.ts +15 -8
- package/src/tasks/audio-classification/inference.ts +1 -1
- package/src/tasks/audio-classification/spec/input.json +2 -1
- package/src/tasks/automatic-speech-recognition/inference.ts +1 -7
- package/src/tasks/automatic-speech-recognition/spec/input.json +2 -1
- package/src/tasks/chat-completion/inference.ts +0 -33
- package/src/tasks/depth-estimation/inference.ts +3 -3
- package/src/tasks/document-question-answering/spec/input.json +2 -1
- package/src/tasks/feature-extraction/inference.ts +0 -3
- package/src/tasks/image-classification/inference.ts +1 -1
- package/src/tasks/image-classification/spec/input.json +2 -1
- package/src/tasks/image-segmentation/inference.ts +1 -1
- package/src/tasks/image-segmentation/spec/input.json +2 -1
- package/src/tasks/image-to-image/inference.ts +3 -7
- package/src/tasks/image-to-image/spec/input.json +4 -6
- package/src/tasks/image-to-text/inference.ts +1 -6
- package/src/tasks/image-to-text/spec/input.json +2 -1
- package/src/tasks/index.ts +1 -0
- package/src/tasks/object-detection/inference.ts +1 -1
- package/src/tasks/object-detection/spec/input.json +2 -1
- package/src/tasks/sentence-similarity/inference.ts +3 -4
- package/src/tasks/summarization/inference.ts +3 -5
- package/src/tasks/text-generation/inference.ts +0 -13
- package/src/tasks/text-to-audio/inference.ts +14 -20
- package/src/tasks/text-to-audio/spec/output.json +3 -2
- package/src/tasks/text-to-image/inference.ts +2 -6
- package/src/tasks/text-to-image/spec/input.json +2 -5
- package/src/tasks/text-to-speech/inference.ts +14 -22
- package/src/tasks/text-to-speech/spec/output.json +13 -2
- package/src/tasks/text-to-video/inference.ts +57 -0
- package/src/tasks/text-to-video/spec/input.json +49 -0
- package/src/tasks/text-to-video/spec/output.json +13 -0
- package/src/tasks/text2text-generation/inference.ts +3 -5
- package/src/tasks/translation/inference.ts +3 -5
- package/src/tasks/visual-question-answering/inference.ts +1 -1
- package/src/tasks/visual-question-answering/spec/input.json +4 -2
- package/src/tasks/zero-shot-image-classification/inference.ts +1 -1
- package/src/tasks/zero-shot-image-classification/spec/input.json +2 -1
- package/src/tasks/zero-shot-object-detection/inference.ts +1 -1
- package/src/tasks/zero-shot-object-detection/spec/input.json +2 -1
|
@@ -7,7 +7,8 @@
|
|
|
7
7
|
"properties": {
|
|
8
8
|
"inputs": {
|
|
9
9
|
"description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.",
|
|
10
|
-
"type": "string"
|
|
10
|
+
"type": "string",
|
|
11
|
+
"comment": "type=binary"
|
|
11
12
|
},
|
|
12
13
|
"parameters": {
|
|
13
14
|
"description": "Additional inference parameters for Automatic Speech Recognition",
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
/**
|
|
8
7
|
* Chat Completion Input.
|
|
9
8
|
*
|
|
@@ -105,30 +104,24 @@ export interface ChatCompletionInput {
|
|
|
105
104
|
top_p?: number;
|
|
106
105
|
[property: string]: unknown;
|
|
107
106
|
}
|
|
108
|
-
|
|
109
107
|
export interface ChatCompletionInputMessage {
|
|
110
108
|
content: ChatCompletionInputMessageContent;
|
|
111
109
|
name?: string;
|
|
112
110
|
role: string;
|
|
113
111
|
[property: string]: unknown;
|
|
114
112
|
}
|
|
115
|
-
|
|
116
113
|
export type ChatCompletionInputMessageContent = ChatCompletionInputMessageChunk[] | string;
|
|
117
|
-
|
|
118
114
|
export interface ChatCompletionInputMessageChunk {
|
|
119
115
|
image_url?: ChatCompletionInputURL;
|
|
120
116
|
text?: string;
|
|
121
117
|
type: ChatCompletionInputMessageChunkType;
|
|
122
118
|
[property: string]: unknown;
|
|
123
119
|
}
|
|
124
|
-
|
|
125
120
|
export interface ChatCompletionInputURL {
|
|
126
121
|
url: string;
|
|
127
122
|
[property: string]: unknown;
|
|
128
123
|
}
|
|
129
|
-
|
|
130
124
|
export type ChatCompletionInputMessageChunkType = "text" | "image_url";
|
|
131
|
-
|
|
132
125
|
export interface ChatCompletionInputGrammarType {
|
|
133
126
|
type: ChatCompletionInputGrammarTypeType;
|
|
134
127
|
/**
|
|
@@ -140,9 +133,7 @@ export interface ChatCompletionInputGrammarType {
|
|
|
140
133
|
value: unknown;
|
|
141
134
|
[property: string]: unknown;
|
|
142
135
|
}
|
|
143
|
-
|
|
144
136
|
export type ChatCompletionInputGrammarTypeType = "json" | "regex";
|
|
145
|
-
|
|
146
137
|
export interface ChatCompletionInputStreamOptions {
|
|
147
138
|
/**
|
|
148
139
|
* If set, an additional chunk will be streamed before the data: [DONE] message. The usage
|
|
@@ -153,13 +144,11 @@ export interface ChatCompletionInputStreamOptions {
|
|
|
153
144
|
include_usage: boolean;
|
|
154
145
|
[property: string]: unknown;
|
|
155
146
|
}
|
|
156
|
-
|
|
157
147
|
/**
|
|
158
148
|
*
|
|
159
149
|
* <https://platform.openai.com/docs/guides/function-calling/configuring-function-calling-behavior-using-the-tool_choice-parameter>
|
|
160
150
|
*/
|
|
161
151
|
export type ChatCompletionInputToolChoice = ChatCompletionInputToolChoiceEnum | ChatCompletionInputToolChoiceObject;
|
|
162
|
-
|
|
163
152
|
/**
|
|
164
153
|
* Means the model can pick between generating a message or calling one or more tools.
|
|
165
154
|
*
|
|
@@ -168,30 +157,25 @@ export type ChatCompletionInputToolChoice = ChatCompletionInputToolChoiceEnum |
|
|
|
168
157
|
* Means the model must call one or more tools.
|
|
169
158
|
*/
|
|
170
159
|
export type ChatCompletionInputToolChoiceEnum = "auto" | "none" | "required";
|
|
171
|
-
|
|
172
160
|
export interface ChatCompletionInputToolChoiceObject {
|
|
173
161
|
function: ChatCompletionInputFunctionName;
|
|
174
162
|
[property: string]: unknown;
|
|
175
163
|
}
|
|
176
|
-
|
|
177
164
|
export interface ChatCompletionInputFunctionName {
|
|
178
165
|
name: string;
|
|
179
166
|
[property: string]: unknown;
|
|
180
167
|
}
|
|
181
|
-
|
|
182
168
|
export interface ChatCompletionInputTool {
|
|
183
169
|
function: ChatCompletionInputFunctionDefinition;
|
|
184
170
|
type: string;
|
|
185
171
|
[property: string]: unknown;
|
|
186
172
|
}
|
|
187
|
-
|
|
188
173
|
export interface ChatCompletionInputFunctionDefinition {
|
|
189
174
|
arguments: unknown;
|
|
190
175
|
description?: string;
|
|
191
176
|
name: string;
|
|
192
177
|
[property: string]: unknown;
|
|
193
178
|
}
|
|
194
|
-
|
|
195
179
|
/**
|
|
196
180
|
* Chat Completion Output.
|
|
197
181
|
*
|
|
@@ -208,7 +192,6 @@ export interface ChatCompletionOutput {
|
|
|
208
192
|
usage: ChatCompletionOutputUsage;
|
|
209
193
|
[property: string]: unknown;
|
|
210
194
|
}
|
|
211
|
-
|
|
212
195
|
export interface ChatCompletionOutputComplete {
|
|
213
196
|
finish_reason: string;
|
|
214
197
|
index: number;
|
|
@@ -216,53 +199,45 @@ export interface ChatCompletionOutputComplete {
|
|
|
216
199
|
message: ChatCompletionOutputMessage;
|
|
217
200
|
[property: string]: unknown;
|
|
218
201
|
}
|
|
219
|
-
|
|
220
202
|
export interface ChatCompletionOutputLogprobs {
|
|
221
203
|
content: ChatCompletionOutputLogprob[];
|
|
222
204
|
[property: string]: unknown;
|
|
223
205
|
}
|
|
224
|
-
|
|
225
206
|
export interface ChatCompletionOutputLogprob {
|
|
226
207
|
logprob: number;
|
|
227
208
|
token: string;
|
|
228
209
|
top_logprobs: ChatCompletionOutputTopLogprob[];
|
|
229
210
|
[property: string]: unknown;
|
|
230
211
|
}
|
|
231
|
-
|
|
232
212
|
export interface ChatCompletionOutputTopLogprob {
|
|
233
213
|
logprob: number;
|
|
234
214
|
token: string;
|
|
235
215
|
[property: string]: unknown;
|
|
236
216
|
}
|
|
237
|
-
|
|
238
217
|
export interface ChatCompletionOutputMessage {
|
|
239
218
|
content?: string;
|
|
240
219
|
role: string;
|
|
241
220
|
tool_calls?: ChatCompletionOutputToolCall[];
|
|
242
221
|
[property: string]: unknown;
|
|
243
222
|
}
|
|
244
|
-
|
|
245
223
|
export interface ChatCompletionOutputToolCall {
|
|
246
224
|
function: ChatCompletionOutputFunctionDefinition;
|
|
247
225
|
id: string;
|
|
248
226
|
type: string;
|
|
249
227
|
[property: string]: unknown;
|
|
250
228
|
}
|
|
251
|
-
|
|
252
229
|
export interface ChatCompletionOutputFunctionDefinition {
|
|
253
230
|
arguments: unknown;
|
|
254
231
|
description?: string;
|
|
255
232
|
name: string;
|
|
256
233
|
[property: string]: unknown;
|
|
257
234
|
}
|
|
258
|
-
|
|
259
235
|
export interface ChatCompletionOutputUsage {
|
|
260
236
|
completion_tokens: number;
|
|
261
237
|
prompt_tokens: number;
|
|
262
238
|
total_tokens: number;
|
|
263
239
|
[property: string]: unknown;
|
|
264
240
|
}
|
|
265
|
-
|
|
266
241
|
/**
|
|
267
242
|
* Chat Completion Stream Output.
|
|
268
243
|
*
|
|
@@ -279,7 +254,6 @@ export interface ChatCompletionStreamOutput {
|
|
|
279
254
|
usage?: ChatCompletionStreamOutputUsage;
|
|
280
255
|
[property: string]: unknown;
|
|
281
256
|
}
|
|
282
|
-
|
|
283
257
|
export interface ChatCompletionStreamOutputChoice {
|
|
284
258
|
delta: ChatCompletionStreamOutputDelta;
|
|
285
259
|
finish_reason?: string;
|
|
@@ -287,14 +261,12 @@ export interface ChatCompletionStreamOutputChoice {
|
|
|
287
261
|
logprobs?: ChatCompletionStreamOutputLogprobs;
|
|
288
262
|
[property: string]: unknown;
|
|
289
263
|
}
|
|
290
|
-
|
|
291
264
|
export interface ChatCompletionStreamOutputDelta {
|
|
292
265
|
content?: string;
|
|
293
266
|
role: string;
|
|
294
267
|
tool_calls?: ChatCompletionStreamOutputDeltaToolCall;
|
|
295
268
|
[property: string]: unknown;
|
|
296
269
|
}
|
|
297
|
-
|
|
298
270
|
export interface ChatCompletionStreamOutputDeltaToolCall {
|
|
299
271
|
function: ChatCompletionStreamOutputFunction;
|
|
300
272
|
id: string;
|
|
@@ -302,31 +274,26 @@ export interface ChatCompletionStreamOutputDeltaToolCall {
|
|
|
302
274
|
type: string;
|
|
303
275
|
[property: string]: unknown;
|
|
304
276
|
}
|
|
305
|
-
|
|
306
277
|
export interface ChatCompletionStreamOutputFunction {
|
|
307
278
|
arguments: string;
|
|
308
279
|
name?: string;
|
|
309
280
|
[property: string]: unknown;
|
|
310
281
|
}
|
|
311
|
-
|
|
312
282
|
export interface ChatCompletionStreamOutputLogprobs {
|
|
313
283
|
content: ChatCompletionStreamOutputLogprob[];
|
|
314
284
|
[property: string]: unknown;
|
|
315
285
|
}
|
|
316
|
-
|
|
317
286
|
export interface ChatCompletionStreamOutputLogprob {
|
|
318
287
|
logprob: number;
|
|
319
288
|
token: string;
|
|
320
289
|
top_logprobs: ChatCompletionStreamOutputTopLogprob[];
|
|
321
290
|
[property: string]: unknown;
|
|
322
291
|
}
|
|
323
|
-
|
|
324
292
|
export interface ChatCompletionStreamOutputTopLogprob {
|
|
325
293
|
logprob: number;
|
|
326
294
|
token: string;
|
|
327
295
|
[property: string]: unknown;
|
|
328
296
|
}
|
|
329
|
-
|
|
330
297
|
export interface ChatCompletionStreamOutputUsage {
|
|
331
298
|
completion_tokens: number;
|
|
332
299
|
prompt_tokens: number;
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
/**
|
|
8
7
|
* Inputs for Depth Estimation inference
|
|
9
8
|
*/
|
|
@@ -15,10 +14,11 @@ export interface DepthEstimationInput {
|
|
|
15
14
|
/**
|
|
16
15
|
* Additional inference parameters for Depth Estimation
|
|
17
16
|
*/
|
|
18
|
-
parameters?: {
|
|
17
|
+
parameters?: {
|
|
18
|
+
[key: string]: unknown;
|
|
19
|
+
};
|
|
19
20
|
[property: string]: unknown;
|
|
20
21
|
}
|
|
21
|
-
|
|
22
22
|
/**
|
|
23
23
|
* Outputs of inference for the Depth Estimation task
|
|
24
24
|
*/
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
"title": "DocumentQuestionAnsweringInputData",
|
|
12
12
|
"properties": {
|
|
13
13
|
"image": {
|
|
14
|
-
"description": "The image on which the question is asked"
|
|
14
|
+
"description": "The image on which the question is asked",
|
|
15
|
+
"comment": "type=binary"
|
|
15
16
|
},
|
|
16
17
|
"question": {
|
|
17
18
|
"type": "string",
|
|
@@ -3,9 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
export type FeatureExtractionOutput = Array<number[]>;
|
|
8
|
-
|
|
9
7
|
/**
|
|
10
8
|
* Feature Extraction Input.
|
|
11
9
|
*
|
|
@@ -36,5 +34,4 @@ export interface FeatureExtractionInput {
|
|
|
36
34
|
truncation_direction?: FeatureExtractionInputTruncationDirection;
|
|
37
35
|
[property: string]: unknown;
|
|
38
36
|
}
|
|
39
|
-
|
|
40
37
|
export type FeatureExtractionInputTruncationDirection = "Left" | "Right";
|
|
@@ -11,7 +11,7 @@ export interface ImageClassificationInput {
|
|
|
11
11
|
* The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
12
12
|
* also provide the image data as a raw bytes payload.
|
|
13
13
|
*/
|
|
14
|
-
inputs:
|
|
14
|
+
inputs: Blob;
|
|
15
15
|
/**
|
|
16
16
|
* Additional inference parameters for Image Classification
|
|
17
17
|
*/
|
|
@@ -7,7 +7,8 @@
|
|
|
7
7
|
"properties": {
|
|
8
8
|
"inputs": {
|
|
9
9
|
"type": "string",
|
|
10
|
-
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
|
|
10
|
+
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
|
|
11
|
+
"comment": "type=binary"
|
|
11
12
|
},
|
|
12
13
|
"parameters": {
|
|
13
14
|
"description": "Additional inference parameters for Image Classification",
|
|
@@ -11,7 +11,7 @@ export interface ImageSegmentationInput {
|
|
|
11
11
|
* The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
12
12
|
* also provide the image data as a raw bytes payload.
|
|
13
13
|
*/
|
|
14
|
-
inputs:
|
|
14
|
+
inputs: Blob;
|
|
15
15
|
/**
|
|
16
16
|
* Additional inference parameters for Image Segmentation
|
|
17
17
|
*/
|
|
@@ -7,7 +7,8 @@
|
|
|
7
7
|
"properties": {
|
|
8
8
|
"inputs": {
|
|
9
9
|
"type": "string",
|
|
10
|
-
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
|
|
10
|
+
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
|
|
11
|
+
"comment": "type=binary"
|
|
11
12
|
},
|
|
12
13
|
"parameters": {
|
|
13
14
|
"description": "Additional inference parameters for Image Segmentation",
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
/**
|
|
8
7
|
* Inputs for Image To Image inference
|
|
9
8
|
*/
|
|
@@ -12,14 +11,13 @@ export interface ImageToImageInput {
|
|
|
12
11
|
* The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
13
12
|
* also provide the image data as a raw bytes payload.
|
|
14
13
|
*/
|
|
15
|
-
inputs:
|
|
14
|
+
inputs: Blob;
|
|
16
15
|
/**
|
|
17
16
|
* Additional inference parameters for Image To Image
|
|
18
17
|
*/
|
|
19
18
|
parameters?: ImageToImageParameters;
|
|
20
19
|
[property: string]: unknown;
|
|
21
20
|
}
|
|
22
|
-
|
|
23
21
|
/**
|
|
24
22
|
* Additional inference parameters for Image To Image
|
|
25
23
|
*/
|
|
@@ -30,9 +28,9 @@ export interface ImageToImageParameters {
|
|
|
30
28
|
*/
|
|
31
29
|
guidance_scale?: number;
|
|
32
30
|
/**
|
|
33
|
-
* One
|
|
31
|
+
* One prompt to guide what NOT to include in image generation.
|
|
34
32
|
*/
|
|
35
|
-
negative_prompt?: string
|
|
33
|
+
negative_prompt?: string;
|
|
36
34
|
/**
|
|
37
35
|
* For diffusion models. The number of denoising steps. More denoising steps usually lead to
|
|
38
36
|
* a higher quality image at the expense of slower inference.
|
|
@@ -44,7 +42,6 @@ export interface ImageToImageParameters {
|
|
|
44
42
|
target_size?: TargetSize;
|
|
45
43
|
[property: string]: unknown;
|
|
46
44
|
}
|
|
47
|
-
|
|
48
45
|
/**
|
|
49
46
|
* The size in pixel of the output image.
|
|
50
47
|
*/
|
|
@@ -53,7 +50,6 @@ export interface TargetSize {
|
|
|
53
50
|
width: number;
|
|
54
51
|
[property: string]: unknown;
|
|
55
52
|
}
|
|
56
|
-
|
|
57
53
|
/**
|
|
58
54
|
* Outputs of inference for the Image To Image task
|
|
59
55
|
*/
|
|
@@ -7,7 +7,8 @@
|
|
|
7
7
|
"properties": {
|
|
8
8
|
"inputs": {
|
|
9
9
|
"type": "string",
|
|
10
|
-
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
|
|
10
|
+
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
|
|
11
|
+
"comment": "type=binary"
|
|
11
12
|
},
|
|
12
13
|
"parameters": {
|
|
13
14
|
"description": "Additional inference parameters for Image To Image",
|
|
@@ -24,11 +25,8 @@
|
|
|
24
25
|
"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
|
|
25
26
|
},
|
|
26
27
|
"negative_prompt": {
|
|
27
|
-
"type": "
|
|
28
|
-
"
|
|
29
|
-
"type": "string"
|
|
30
|
-
},
|
|
31
|
-
"description": "One or several prompt to guide what NOT to include in image generation."
|
|
28
|
+
"type": "string",
|
|
29
|
+
"description": "One prompt to guide what NOT to include in image generation."
|
|
32
30
|
},
|
|
33
31
|
"num_inference_steps": {
|
|
34
32
|
"type": "integer",
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
/**
|
|
8
7
|
* Inputs for Image To Text inference
|
|
9
8
|
*/
|
|
@@ -11,14 +10,13 @@ export interface ImageToTextInput {
|
|
|
11
10
|
/**
|
|
12
11
|
* The input image data
|
|
13
12
|
*/
|
|
14
|
-
inputs:
|
|
13
|
+
inputs: Blob;
|
|
15
14
|
/**
|
|
16
15
|
* Additional inference parameters for Image To Text
|
|
17
16
|
*/
|
|
18
17
|
parameters?: ImageToTextParameters;
|
|
19
18
|
[property: string]: unknown;
|
|
20
19
|
}
|
|
21
|
-
|
|
22
20
|
/**
|
|
23
21
|
* Additional inference parameters for Image To Text
|
|
24
22
|
*/
|
|
@@ -33,7 +31,6 @@ export interface ImageToTextParameters {
|
|
|
33
31
|
max_new_tokens?: number;
|
|
34
32
|
[property: string]: unknown;
|
|
35
33
|
}
|
|
36
|
-
|
|
37
34
|
/**
|
|
38
35
|
* Parametrization of the text generation process
|
|
39
36
|
*/
|
|
@@ -120,12 +117,10 @@ export interface GenerationParameters {
|
|
|
120
117
|
use_cache?: boolean;
|
|
121
118
|
[property: string]: unknown;
|
|
122
119
|
}
|
|
123
|
-
|
|
124
120
|
/**
|
|
125
121
|
* Controls the stopping condition for beam-based methods.
|
|
126
122
|
*/
|
|
127
123
|
export type EarlyStoppingUnion = boolean | "never";
|
|
128
|
-
|
|
129
124
|
/**
|
|
130
125
|
* Outputs of inference for the Image To Text task
|
|
131
126
|
*/
|
package/src/tasks/index.ts
CHANGED
|
@@ -73,6 +73,7 @@ export type * from "./sentence-similarity/inference.js";
|
|
|
73
73
|
export type * from "./summarization/inference.js";
|
|
74
74
|
export type * from "./table-question-answering/inference.js";
|
|
75
75
|
export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference.js";
|
|
76
|
+
export type { TextToVideoParameters, TextToVideoOutput, TextToVideoInput } from "./text-to-video/inference.js";
|
|
76
77
|
export type { TextToSpeechParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference.js";
|
|
77
78
|
export type * from "./token-classification/inference.js";
|
|
78
79
|
export type { TranslationInput, TranslationOutput } from "./translation/inference.js";
|
|
@@ -11,7 +11,7 @@ export interface ObjectDetectionInput {
|
|
|
11
11
|
* The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
12
12
|
* also provide the image data as a raw bytes payload.
|
|
13
13
|
*/
|
|
14
|
-
inputs:
|
|
14
|
+
inputs: Blob;
|
|
15
15
|
/**
|
|
16
16
|
* Additional inference parameters for Object Detection
|
|
17
17
|
*/
|
|
@@ -7,7 +7,8 @@
|
|
|
7
7
|
"properties": {
|
|
8
8
|
"inputs": {
|
|
9
9
|
"type": "string",
|
|
10
|
-
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
|
|
10
|
+
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
|
|
11
|
+
"comment": "type=binary"
|
|
11
12
|
},
|
|
12
13
|
"parameters": {
|
|
13
14
|
"description": "Additional inference parameters for Object Detection",
|
|
@@ -3,9 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
export type SentenceSimilarityOutput = number[];
|
|
8
|
-
|
|
9
7
|
/**
|
|
10
8
|
* Inputs for Sentence similarity inference
|
|
11
9
|
*/
|
|
@@ -14,10 +12,11 @@ export interface SentenceSimilarityInput {
|
|
|
14
12
|
/**
|
|
15
13
|
* Additional inference parameters for Sentence Similarity
|
|
16
14
|
*/
|
|
17
|
-
parameters?: {
|
|
15
|
+
parameters?: {
|
|
16
|
+
[key: string]: unknown;
|
|
17
|
+
};
|
|
18
18
|
[property: string]: unknown;
|
|
19
19
|
}
|
|
20
|
-
|
|
21
20
|
export interface SentenceSimilarityInputData {
|
|
22
21
|
/**
|
|
23
22
|
* A list of strings which will be compared against the source_sentence.
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
/**
|
|
8
7
|
* Inputs for Summarization inference
|
|
9
8
|
*/
|
|
@@ -18,7 +17,6 @@ export interface SummarizationInput {
|
|
|
18
17
|
parameters?: SummarizationParameters;
|
|
19
18
|
[property: string]: unknown;
|
|
20
19
|
}
|
|
21
|
-
|
|
22
20
|
/**
|
|
23
21
|
* Additional inference parameters for summarization.
|
|
24
22
|
*/
|
|
@@ -30,19 +28,19 @@ export interface SummarizationParameters {
|
|
|
30
28
|
/**
|
|
31
29
|
* Additional parametrization of the text generation algorithm.
|
|
32
30
|
*/
|
|
33
|
-
generate_parameters?: {
|
|
31
|
+
generate_parameters?: {
|
|
32
|
+
[key: string]: unknown;
|
|
33
|
+
};
|
|
34
34
|
/**
|
|
35
35
|
* The truncation strategy to use.
|
|
36
36
|
*/
|
|
37
37
|
truncation?: SummarizationTruncationStrategy;
|
|
38
38
|
[property: string]: unknown;
|
|
39
39
|
}
|
|
40
|
-
|
|
41
40
|
/**
|
|
42
41
|
* The truncation strategy to use.
|
|
43
42
|
*/
|
|
44
43
|
export type SummarizationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
|
|
45
|
-
|
|
46
44
|
/**
|
|
47
45
|
* Outputs of inference for the Summarization task
|
|
48
46
|
*/
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
/**
|
|
8
7
|
* Text Generation Input.
|
|
9
8
|
*
|
|
@@ -17,7 +16,6 @@ export interface TextGenerationInput {
|
|
|
17
16
|
stream?: boolean;
|
|
18
17
|
[property: string]: unknown;
|
|
19
18
|
}
|
|
20
|
-
|
|
21
19
|
export interface TextGenerationInputGenerateParameters {
|
|
22
20
|
/**
|
|
23
21
|
* Lora adapter id
|
|
@@ -100,7 +98,6 @@ export interface TextGenerationInputGenerateParameters {
|
|
|
100
98
|
watermark?: boolean;
|
|
101
99
|
[property: string]: unknown;
|
|
102
100
|
}
|
|
103
|
-
|
|
104
101
|
export interface TextGenerationInputGrammarType {
|
|
105
102
|
type: Type;
|
|
106
103
|
/**
|
|
@@ -112,9 +109,7 @@ export interface TextGenerationInputGrammarType {
|
|
|
112
109
|
value: unknown;
|
|
113
110
|
[property: string]: unknown;
|
|
114
111
|
}
|
|
115
|
-
|
|
116
112
|
export type Type = "json" | "regex";
|
|
117
|
-
|
|
118
113
|
/**
|
|
119
114
|
* Text Generation Output.
|
|
120
115
|
*
|
|
@@ -127,7 +122,6 @@ export interface TextGenerationOutput {
|
|
|
127
122
|
generated_text: string;
|
|
128
123
|
[property: string]: unknown;
|
|
129
124
|
}
|
|
130
|
-
|
|
131
125
|
export interface TextGenerationOutputDetails {
|
|
132
126
|
best_of_sequences?: TextGenerationOutputBestOfSequence[];
|
|
133
127
|
finish_reason: TextGenerationOutputFinishReason;
|
|
@@ -138,7 +132,6 @@ export interface TextGenerationOutputDetails {
|
|
|
138
132
|
top_tokens?: Array<TextGenerationOutputToken[]>;
|
|
139
133
|
[property: string]: unknown;
|
|
140
134
|
}
|
|
141
|
-
|
|
142
135
|
export interface TextGenerationOutputBestOfSequence {
|
|
143
136
|
finish_reason: TextGenerationOutputFinishReason;
|
|
144
137
|
generated_text: string;
|
|
@@ -149,16 +142,13 @@ export interface TextGenerationOutputBestOfSequence {
|
|
|
149
142
|
top_tokens?: Array<TextGenerationOutputToken[]>;
|
|
150
143
|
[property: string]: unknown;
|
|
151
144
|
}
|
|
152
|
-
|
|
153
145
|
export type TextGenerationOutputFinishReason = "length" | "eos_token" | "stop_sequence";
|
|
154
|
-
|
|
155
146
|
export interface TextGenerationOutputPrefillToken {
|
|
156
147
|
id: number;
|
|
157
148
|
logprob: number;
|
|
158
149
|
text: string;
|
|
159
150
|
[property: string]: unknown;
|
|
160
151
|
}
|
|
161
|
-
|
|
162
152
|
export interface TextGenerationOutputToken {
|
|
163
153
|
id: number;
|
|
164
154
|
logprob: number;
|
|
@@ -166,7 +156,6 @@ export interface TextGenerationOutputToken {
|
|
|
166
156
|
text: string;
|
|
167
157
|
[property: string]: unknown;
|
|
168
158
|
}
|
|
169
|
-
|
|
170
159
|
/**
|
|
171
160
|
* Text Generation Stream Output.
|
|
172
161
|
*
|
|
@@ -182,7 +171,6 @@ export interface TextGenerationStreamOutput {
|
|
|
182
171
|
top_tokens?: TextGenerationStreamOutputToken[];
|
|
183
172
|
[property: string]: unknown;
|
|
184
173
|
}
|
|
185
|
-
|
|
186
174
|
export interface TextGenerationStreamOutputStreamDetails {
|
|
187
175
|
finish_reason: TextGenerationOutputFinishReason;
|
|
188
176
|
generated_tokens: number;
|
|
@@ -190,7 +178,6 @@ export interface TextGenerationStreamOutputStreamDetails {
|
|
|
190
178
|
seed?: number;
|
|
191
179
|
[property: string]: unknown;
|
|
192
180
|
}
|
|
193
|
-
|
|
194
181
|
export interface TextGenerationStreamOutputToken {
|
|
195
182
|
id: number;
|
|
196
183
|
logprob: number;
|
|
@@ -1,9 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outputs of inference for the Text To Audio task
|
|
3
|
+
*/
|
|
4
|
+
export interface TextToAudioOutput {
|
|
5
|
+
/**
|
|
6
|
+
* The generated audio waveform.
|
|
7
|
+
*/
|
|
8
|
+
audio: Blob;
|
|
9
|
+
/**
|
|
10
|
+
* The sampling rate of the generated audio waveform.
|
|
11
|
+
*/
|
|
12
|
+
sampling_rate: number;
|
|
13
|
+
[property: string]: unknown;
|
|
14
|
+
}
|
|
1
15
|
/**
|
|
2
16
|
* Inference code generated from the JSON schema spec in ./spec
|
|
3
17
|
*
|
|
4
18
|
* Using src/scripts/inference-codegen
|
|
5
19
|
*/
|
|
6
|
-
|
|
7
20
|
/**
|
|
8
21
|
* Inputs for Text To Audio inference
|
|
9
22
|
*/
|
|
@@ -18,7 +31,6 @@ export interface TextToAudioInput {
|
|
|
18
31
|
parameters?: TextToAudioParameters;
|
|
19
32
|
[property: string]: unknown;
|
|
20
33
|
}
|
|
21
|
-
|
|
22
34
|
/**
|
|
23
35
|
* Additional inference parameters for Text To Audio
|
|
24
36
|
*/
|
|
@@ -29,7 +41,6 @@ export interface TextToAudioParameters {
|
|
|
29
41
|
generation_parameters?: GenerationParameters;
|
|
30
42
|
[property: string]: unknown;
|
|
31
43
|
}
|
|
32
|
-
|
|
33
44
|
/**
|
|
34
45
|
* Parametrization of the text generation process
|
|
35
46
|
*/
|
|
@@ -116,24 +127,7 @@ export interface GenerationParameters {
|
|
|
116
127
|
use_cache?: boolean;
|
|
117
128
|
[property: string]: unknown;
|
|
118
129
|
}
|
|
119
|
-
|
|
120
130
|
/**
|
|
121
131
|
* Controls the stopping condition for beam-based methods.
|
|
122
132
|
*/
|
|
123
133
|
export type EarlyStoppingUnion = boolean | "never";
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Outputs of inference for the Text To Audio task
|
|
127
|
-
*/
|
|
128
|
-
export interface TextToAudioOutput {
|
|
129
|
-
/**
|
|
130
|
-
* The generated audio waveform.
|
|
131
|
-
*/
|
|
132
|
-
audio: unknown;
|
|
133
|
-
samplingRate: unknown;
|
|
134
|
-
/**
|
|
135
|
-
* The sampling rate of the generated audio waveform.
|
|
136
|
-
*/
|
|
137
|
-
sampling_rate?: number;
|
|
138
|
-
[property: string]: unknown;
|
|
139
|
-
}
|