@huggingface/tasks 0.13.16 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/dist/commonjs/model-data.d.ts +3 -0
  2. package/dist/commonjs/model-data.d.ts.map +1 -1
  3. package/dist/commonjs/model-libraries-snippets.d.ts +3 -1
  4. package/dist/commonjs/model-libraries-snippets.d.ts.map +1 -1
  5. package/dist/commonjs/model-libraries-snippets.js +134 -22
  6. package/dist/commonjs/model-libraries.d.ts +12 -8
  7. package/dist/commonjs/model-libraries.d.ts.map +1 -1
  8. package/dist/commonjs/model-libraries.js +15 -8
  9. package/dist/commonjs/tasks/audio-classification/inference.d.ts +1 -1
  10. package/dist/commonjs/tasks/audio-classification/inference.d.ts.map +1 -1
  11. package/dist/commonjs/tasks/automatic-speech-recognition/inference.d.ts +1 -1
  12. package/dist/commonjs/tasks/automatic-speech-recognition/inference.d.ts.map +1 -1
  13. package/dist/commonjs/tasks/automatic-speech-recognition/inference.js +0 -5
  14. package/dist/commonjs/tasks/chat-completion/inference.d.ts.map +1 -1
  15. package/dist/commonjs/tasks/chat-completion/inference.js +0 -5
  16. package/dist/commonjs/tasks/depth-estimation/inference.d.ts.map +1 -1
  17. package/dist/commonjs/tasks/depth-estimation/inference.js +0 -5
  18. package/dist/commonjs/tasks/feature-extraction/inference.d.ts.map +1 -1
  19. package/dist/commonjs/tasks/feature-extraction/inference.js +0 -5
  20. package/dist/commonjs/tasks/image-classification/inference.d.ts +1 -1
  21. package/dist/commonjs/tasks/image-classification/inference.d.ts.map +1 -1
  22. package/dist/commonjs/tasks/image-segmentation/inference.d.ts +1 -1
  23. package/dist/commonjs/tasks/image-segmentation/inference.d.ts.map +1 -1
  24. package/dist/commonjs/tasks/image-to-image/inference.d.ts +3 -3
  25. package/dist/commonjs/tasks/image-to-image/inference.d.ts.map +1 -1
  26. package/dist/commonjs/tasks/image-to-image/inference.js +0 -5
  27. package/dist/commonjs/tasks/image-to-text/inference.d.ts +1 -1
  28. package/dist/commonjs/tasks/image-to-text/inference.d.ts.map +1 -1
  29. package/dist/commonjs/tasks/image-to-text/inference.js +0 -5
  30. package/dist/commonjs/tasks/index.d.ts +1 -0
  31. package/dist/commonjs/tasks/index.d.ts.map +1 -1
  32. package/dist/commonjs/tasks/object-detection/inference.d.ts +1 -1
  33. package/dist/commonjs/tasks/object-detection/inference.d.ts.map +1 -1
  34. package/dist/commonjs/tasks/sentence-similarity/inference.d.ts.map +1 -1
  35. package/dist/commonjs/tasks/sentence-similarity/inference.js +0 -5
  36. package/dist/commonjs/tasks/summarization/inference.d.ts.map +1 -1
  37. package/dist/commonjs/tasks/summarization/inference.js +0 -5
  38. package/dist/commonjs/tasks/text-generation/inference.d.ts.map +1 -1
  39. package/dist/commonjs/tasks/text-generation/inference.js +0 -5
  40. package/dist/commonjs/tasks/text-to-audio/inference.d.ts +14 -15
  41. package/dist/commonjs/tasks/text-to-audio/inference.d.ts.map +1 -1
  42. package/dist/commonjs/tasks/text-to-audio/inference.js +0 -5
  43. package/dist/commonjs/tasks/text-to-image/inference.d.ts +2 -2
  44. package/dist/commonjs/tasks/text-to-image/inference.d.ts.map +1 -1
  45. package/dist/commonjs/tasks/text-to-image/inference.js +0 -5
  46. package/dist/commonjs/tasks/text-to-speech/inference.d.ts +14 -17
  47. package/dist/commonjs/tasks/text-to-speech/inference.d.ts.map +1 -1
  48. package/dist/commonjs/tasks/text-to-speech/inference.js +0 -5
  49. package/dist/commonjs/tasks/text-to-video/inference.d.ts +58 -0
  50. package/dist/commonjs/tasks/text-to-video/inference.d.ts.map +1 -0
  51. package/dist/commonjs/tasks/text-to-video/inference.js +2 -0
  52. package/dist/commonjs/tasks/text2text-generation/inference.d.ts.map +1 -1
  53. package/dist/commonjs/tasks/text2text-generation/inference.js +0 -5
  54. package/dist/commonjs/tasks/translation/inference.d.ts.map +1 -1
  55. package/dist/commonjs/tasks/translation/inference.js +0 -5
  56. package/dist/commonjs/tasks/visual-question-answering/inference.d.ts +1 -1
  57. package/dist/commonjs/tasks/visual-question-answering/inference.d.ts.map +1 -1
  58. package/dist/commonjs/tasks/zero-shot-image-classification/inference.d.ts +1 -1
  59. package/dist/commonjs/tasks/zero-shot-image-classification/inference.d.ts.map +1 -1
  60. package/dist/commonjs/tasks/zero-shot-object-detection/inference.d.ts +1 -1
  61. package/dist/commonjs/tasks/zero-shot-object-detection/inference.d.ts.map +1 -1
  62. package/dist/esm/model-data.d.ts +3 -0
  63. package/dist/esm/model-data.d.ts.map +1 -1
  64. package/dist/esm/model-libraries-snippets.d.ts +3 -1
  65. package/dist/esm/model-libraries-snippets.d.ts.map +1 -1
  66. package/dist/esm/model-libraries-snippets.js +129 -19
  67. package/dist/esm/model-libraries.d.ts +12 -8
  68. package/dist/esm/model-libraries.d.ts.map +1 -1
  69. package/dist/esm/model-libraries.js +15 -8
  70. package/dist/esm/tasks/audio-classification/inference.d.ts +1 -1
  71. package/dist/esm/tasks/audio-classification/inference.d.ts.map +1 -1
  72. package/dist/esm/tasks/automatic-speech-recognition/inference.d.ts +1 -1
  73. package/dist/esm/tasks/automatic-speech-recognition/inference.d.ts.map +1 -1
  74. package/dist/esm/tasks/automatic-speech-recognition/inference.js +0 -5
  75. package/dist/esm/tasks/chat-completion/inference.d.ts.map +1 -1
  76. package/dist/esm/tasks/chat-completion/inference.js +0 -5
  77. package/dist/esm/tasks/depth-estimation/inference.d.ts.map +1 -1
  78. package/dist/esm/tasks/depth-estimation/inference.js +0 -5
  79. package/dist/esm/tasks/feature-extraction/inference.d.ts.map +1 -1
  80. package/dist/esm/tasks/feature-extraction/inference.js +0 -5
  81. package/dist/esm/tasks/image-classification/inference.d.ts +1 -1
  82. package/dist/esm/tasks/image-classification/inference.d.ts.map +1 -1
  83. package/dist/esm/tasks/image-segmentation/inference.d.ts +1 -1
  84. package/dist/esm/tasks/image-segmentation/inference.d.ts.map +1 -1
  85. package/dist/esm/tasks/image-to-image/inference.d.ts +3 -3
  86. package/dist/esm/tasks/image-to-image/inference.d.ts.map +1 -1
  87. package/dist/esm/tasks/image-to-image/inference.js +0 -5
  88. package/dist/esm/tasks/image-to-text/inference.d.ts +1 -1
  89. package/dist/esm/tasks/image-to-text/inference.d.ts.map +1 -1
  90. package/dist/esm/tasks/image-to-text/inference.js +0 -5
  91. package/dist/esm/tasks/index.d.ts +1 -0
  92. package/dist/esm/tasks/index.d.ts.map +1 -1
  93. package/dist/esm/tasks/object-detection/inference.d.ts +1 -1
  94. package/dist/esm/tasks/object-detection/inference.d.ts.map +1 -1
  95. package/dist/esm/tasks/sentence-similarity/inference.d.ts.map +1 -1
  96. package/dist/esm/tasks/sentence-similarity/inference.js +0 -5
  97. package/dist/esm/tasks/summarization/inference.d.ts.map +1 -1
  98. package/dist/esm/tasks/summarization/inference.js +0 -5
  99. package/dist/esm/tasks/text-generation/inference.d.ts.map +1 -1
  100. package/dist/esm/tasks/text-generation/inference.js +0 -5
  101. package/dist/esm/tasks/text-to-audio/inference.d.ts +14 -15
  102. package/dist/esm/tasks/text-to-audio/inference.d.ts.map +1 -1
  103. package/dist/esm/tasks/text-to-audio/inference.js +0 -5
  104. package/dist/esm/tasks/text-to-image/inference.d.ts +2 -2
  105. package/dist/esm/tasks/text-to-image/inference.d.ts.map +1 -1
  106. package/dist/esm/tasks/text-to-image/inference.js +0 -5
  107. package/dist/esm/tasks/text-to-speech/inference.d.ts +14 -17
  108. package/dist/esm/tasks/text-to-speech/inference.d.ts.map +1 -1
  109. package/dist/esm/tasks/text-to-speech/inference.js +0 -5
  110. package/dist/esm/tasks/text-to-video/inference.d.ts +58 -0
  111. package/dist/esm/tasks/text-to-video/inference.d.ts.map +1 -0
  112. package/dist/esm/tasks/text-to-video/inference.js +1 -0
  113. package/dist/esm/tasks/text2text-generation/inference.d.ts.map +1 -1
  114. package/dist/esm/tasks/text2text-generation/inference.js +0 -5
  115. package/dist/esm/tasks/translation/inference.d.ts.map +1 -1
  116. package/dist/esm/tasks/translation/inference.js +0 -5
  117. package/dist/esm/tasks/visual-question-answering/inference.d.ts +1 -1
  118. package/dist/esm/tasks/visual-question-answering/inference.d.ts.map +1 -1
  119. package/dist/esm/tasks/zero-shot-image-classification/inference.d.ts +1 -1
  120. package/dist/esm/tasks/zero-shot-image-classification/inference.d.ts.map +1 -1
  121. package/dist/esm/tasks/zero-shot-object-detection/inference.d.ts +1 -1
  122. package/dist/esm/tasks/zero-shot-object-detection/inference.d.ts.map +1 -1
  123. package/package.json +1 -1
  124. package/src/model-data.ts +3 -0
  125. package/src/model-libraries-snippets.ts +141 -19
  126. package/src/model-libraries.ts +15 -8
  127. package/src/tasks/audio-classification/inference.ts +1 -1
  128. package/src/tasks/audio-classification/spec/input.json +2 -1
  129. package/src/tasks/automatic-speech-recognition/inference.ts +1 -7
  130. package/src/tasks/automatic-speech-recognition/spec/input.json +2 -1
  131. package/src/tasks/chat-completion/inference.ts +0 -33
  132. package/src/tasks/depth-estimation/inference.ts +3 -3
  133. package/src/tasks/document-question-answering/spec/input.json +2 -1
  134. package/src/tasks/feature-extraction/inference.ts +0 -3
  135. package/src/tasks/image-classification/inference.ts +1 -1
  136. package/src/tasks/image-classification/spec/input.json +2 -1
  137. package/src/tasks/image-segmentation/inference.ts +1 -1
  138. package/src/tasks/image-segmentation/spec/input.json +2 -1
  139. package/src/tasks/image-to-image/inference.ts +3 -7
  140. package/src/tasks/image-to-image/spec/input.json +4 -6
  141. package/src/tasks/image-to-text/inference.ts +1 -6
  142. package/src/tasks/image-to-text/spec/input.json +2 -1
  143. package/src/tasks/index.ts +1 -0
  144. package/src/tasks/object-detection/inference.ts +1 -1
  145. package/src/tasks/object-detection/spec/input.json +2 -1
  146. package/src/tasks/sentence-similarity/inference.ts +3 -4
  147. package/src/tasks/summarization/inference.ts +3 -5
  148. package/src/tasks/text-generation/inference.ts +0 -13
  149. package/src/tasks/text-to-audio/inference.ts +14 -20
  150. package/src/tasks/text-to-audio/spec/output.json +3 -2
  151. package/src/tasks/text-to-image/inference.ts +2 -6
  152. package/src/tasks/text-to-image/spec/input.json +2 -5
  153. package/src/tasks/text-to-speech/inference.ts +14 -22
  154. package/src/tasks/text-to-speech/spec/output.json +13 -2
  155. package/src/tasks/text-to-video/inference.ts +57 -0
  156. package/src/tasks/text-to-video/spec/input.json +49 -0
  157. package/src/tasks/text-to-video/spec/output.json +13 -0
  158. package/src/tasks/text2text-generation/inference.ts +3 -5
  159. package/src/tasks/translation/inference.ts +3 -5
  160. package/src/tasks/visual-question-answering/inference.ts +1 -1
  161. package/src/tasks/visual-question-answering/spec/input.json +4 -2
  162. package/src/tasks/zero-shot-image-classification/inference.ts +1 -1
  163. package/src/tasks/zero-shot-image-classification/spec/input.json +2 -1
  164. package/src/tasks/zero-shot-object-detection/inference.ts +1 -1
  165. package/src/tasks/zero-shot-object-detection/spec/input.json +2 -1
@@ -7,7 +7,8 @@
7
7
  "properties": {
8
8
  "inputs": {
9
9
  "description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.",
10
- "type": "string"
10
+ "type": "string",
11
+ "comment": "type=binary"
11
12
  },
12
13
  "parameters": {
13
14
  "description": "Additional inference parameters for Automatic Speech Recognition",
@@ -3,7 +3,6 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  /**
8
7
  * Chat Completion Input.
9
8
  *
@@ -105,30 +104,24 @@ export interface ChatCompletionInput {
105
104
  top_p?: number;
106
105
  [property: string]: unknown;
107
106
  }
108
-
109
107
  export interface ChatCompletionInputMessage {
110
108
  content: ChatCompletionInputMessageContent;
111
109
  name?: string;
112
110
  role: string;
113
111
  [property: string]: unknown;
114
112
  }
115
-
116
113
  export type ChatCompletionInputMessageContent = ChatCompletionInputMessageChunk[] | string;
117
-
118
114
  export interface ChatCompletionInputMessageChunk {
119
115
  image_url?: ChatCompletionInputURL;
120
116
  text?: string;
121
117
  type: ChatCompletionInputMessageChunkType;
122
118
  [property: string]: unknown;
123
119
  }
124
-
125
120
  export interface ChatCompletionInputURL {
126
121
  url: string;
127
122
  [property: string]: unknown;
128
123
  }
129
-
130
124
  export type ChatCompletionInputMessageChunkType = "text" | "image_url";
131
-
132
125
  export interface ChatCompletionInputGrammarType {
133
126
  type: ChatCompletionInputGrammarTypeType;
134
127
  /**
@@ -140,9 +133,7 @@ export interface ChatCompletionInputGrammarType {
140
133
  value: unknown;
141
134
  [property: string]: unknown;
142
135
  }
143
-
144
136
  export type ChatCompletionInputGrammarTypeType = "json" | "regex";
145
-
146
137
  export interface ChatCompletionInputStreamOptions {
147
138
  /**
148
139
  * If set, an additional chunk will be streamed before the data: [DONE] message. The usage
@@ -153,13 +144,11 @@ export interface ChatCompletionInputStreamOptions {
153
144
  include_usage: boolean;
154
145
  [property: string]: unknown;
155
146
  }
156
-
157
147
  /**
158
148
  *
159
149
  * <https://platform.openai.com/docs/guides/function-calling/configuring-function-calling-behavior-using-the-tool_choice-parameter>
160
150
  */
161
151
  export type ChatCompletionInputToolChoice = ChatCompletionInputToolChoiceEnum | ChatCompletionInputToolChoiceObject;
162
-
163
152
  /**
164
153
  * Means the model can pick between generating a message or calling one or more tools.
165
154
  *
@@ -168,30 +157,25 @@ export type ChatCompletionInputToolChoice = ChatCompletionInputToolChoiceEnum |
168
157
  * Means the model must call one or more tools.
169
158
  */
170
159
  export type ChatCompletionInputToolChoiceEnum = "auto" | "none" | "required";
171
-
172
160
  export interface ChatCompletionInputToolChoiceObject {
173
161
  function: ChatCompletionInputFunctionName;
174
162
  [property: string]: unknown;
175
163
  }
176
-
177
164
  export interface ChatCompletionInputFunctionName {
178
165
  name: string;
179
166
  [property: string]: unknown;
180
167
  }
181
-
182
168
  export interface ChatCompletionInputTool {
183
169
  function: ChatCompletionInputFunctionDefinition;
184
170
  type: string;
185
171
  [property: string]: unknown;
186
172
  }
187
-
188
173
  export interface ChatCompletionInputFunctionDefinition {
189
174
  arguments: unknown;
190
175
  description?: string;
191
176
  name: string;
192
177
  [property: string]: unknown;
193
178
  }
194
-
195
179
  /**
196
180
  * Chat Completion Output.
197
181
  *
@@ -208,7 +192,6 @@ export interface ChatCompletionOutput {
208
192
  usage: ChatCompletionOutputUsage;
209
193
  [property: string]: unknown;
210
194
  }
211
-
212
195
  export interface ChatCompletionOutputComplete {
213
196
  finish_reason: string;
214
197
  index: number;
@@ -216,53 +199,45 @@ export interface ChatCompletionOutputComplete {
216
199
  message: ChatCompletionOutputMessage;
217
200
  [property: string]: unknown;
218
201
  }
219
-
220
202
  export interface ChatCompletionOutputLogprobs {
221
203
  content: ChatCompletionOutputLogprob[];
222
204
  [property: string]: unknown;
223
205
  }
224
-
225
206
  export interface ChatCompletionOutputLogprob {
226
207
  logprob: number;
227
208
  token: string;
228
209
  top_logprobs: ChatCompletionOutputTopLogprob[];
229
210
  [property: string]: unknown;
230
211
  }
231
-
232
212
  export interface ChatCompletionOutputTopLogprob {
233
213
  logprob: number;
234
214
  token: string;
235
215
  [property: string]: unknown;
236
216
  }
237
-
238
217
  export interface ChatCompletionOutputMessage {
239
218
  content?: string;
240
219
  role: string;
241
220
  tool_calls?: ChatCompletionOutputToolCall[];
242
221
  [property: string]: unknown;
243
222
  }
244
-
245
223
  export interface ChatCompletionOutputToolCall {
246
224
  function: ChatCompletionOutputFunctionDefinition;
247
225
  id: string;
248
226
  type: string;
249
227
  [property: string]: unknown;
250
228
  }
251
-
252
229
  export interface ChatCompletionOutputFunctionDefinition {
253
230
  arguments: unknown;
254
231
  description?: string;
255
232
  name: string;
256
233
  [property: string]: unknown;
257
234
  }
258
-
259
235
  export interface ChatCompletionOutputUsage {
260
236
  completion_tokens: number;
261
237
  prompt_tokens: number;
262
238
  total_tokens: number;
263
239
  [property: string]: unknown;
264
240
  }
265
-
266
241
  /**
267
242
  * Chat Completion Stream Output.
268
243
  *
@@ -279,7 +254,6 @@ export interface ChatCompletionStreamOutput {
279
254
  usage?: ChatCompletionStreamOutputUsage;
280
255
  [property: string]: unknown;
281
256
  }
282
-
283
257
  export interface ChatCompletionStreamOutputChoice {
284
258
  delta: ChatCompletionStreamOutputDelta;
285
259
  finish_reason?: string;
@@ -287,14 +261,12 @@ export interface ChatCompletionStreamOutputChoice {
287
261
  logprobs?: ChatCompletionStreamOutputLogprobs;
288
262
  [property: string]: unknown;
289
263
  }
290
-
291
264
  export interface ChatCompletionStreamOutputDelta {
292
265
  content?: string;
293
266
  role: string;
294
267
  tool_calls?: ChatCompletionStreamOutputDeltaToolCall;
295
268
  [property: string]: unknown;
296
269
  }
297
-
298
270
  export interface ChatCompletionStreamOutputDeltaToolCall {
299
271
  function: ChatCompletionStreamOutputFunction;
300
272
  id: string;
@@ -302,31 +274,26 @@ export interface ChatCompletionStreamOutputDeltaToolCall {
302
274
  type: string;
303
275
  [property: string]: unknown;
304
276
  }
305
-
306
277
  export interface ChatCompletionStreamOutputFunction {
307
278
  arguments: string;
308
279
  name?: string;
309
280
  [property: string]: unknown;
310
281
  }
311
-
312
282
  export interface ChatCompletionStreamOutputLogprobs {
313
283
  content: ChatCompletionStreamOutputLogprob[];
314
284
  [property: string]: unknown;
315
285
  }
316
-
317
286
  export interface ChatCompletionStreamOutputLogprob {
318
287
  logprob: number;
319
288
  token: string;
320
289
  top_logprobs: ChatCompletionStreamOutputTopLogprob[];
321
290
  [property: string]: unknown;
322
291
  }
323
-
324
292
  export interface ChatCompletionStreamOutputTopLogprob {
325
293
  logprob: number;
326
294
  token: string;
327
295
  [property: string]: unknown;
328
296
  }
329
-
330
297
  export interface ChatCompletionStreamOutputUsage {
331
298
  completion_tokens: number;
332
299
  prompt_tokens: number;
@@ -3,7 +3,6 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  /**
8
7
  * Inputs for Depth Estimation inference
9
8
  */
@@ -15,10 +14,11 @@ export interface DepthEstimationInput {
15
14
  /**
16
15
  * Additional inference parameters for Depth Estimation
17
16
  */
18
- parameters?: { [key: string]: unknown };
17
+ parameters?: {
18
+ [key: string]: unknown;
19
+ };
19
20
  [property: string]: unknown;
20
21
  }
21
-
22
22
  /**
23
23
  * Outputs of inference for the Depth Estimation task
24
24
  */
@@ -11,7 +11,8 @@
11
11
  "title": "DocumentQuestionAnsweringInputData",
12
12
  "properties": {
13
13
  "image": {
14
- "description": "The image on which the question is asked"
14
+ "description": "The image on which the question is asked",
15
+ "comment": "type=binary"
15
16
  },
16
17
  "question": {
17
18
  "type": "string",
@@ -3,9 +3,7 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  export type FeatureExtractionOutput = Array<number[]>;
8
-
9
7
  /**
10
8
  * Feature Extraction Input.
11
9
  *
@@ -36,5 +34,4 @@ export interface FeatureExtractionInput {
36
34
  truncation_direction?: FeatureExtractionInputTruncationDirection;
37
35
  [property: string]: unknown;
38
36
  }
39
-
40
37
  export type FeatureExtractionInputTruncationDirection = "Left" | "Right";
@@ -11,7 +11,7 @@ export interface ImageClassificationInput {
11
11
  * The input image data as a base64-encoded string. If no `parameters` are provided, you can
12
12
  * also provide the image data as a raw bytes payload.
13
13
  */
14
- inputs: string;
14
+ inputs: Blob;
15
15
  /**
16
16
  * Additional inference parameters for Image Classification
17
17
  */
@@ -7,7 +7,8 @@
7
7
  "properties": {
8
8
  "inputs": {
9
9
  "type": "string",
10
- "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
10
+ "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
11
+ "comment": "type=binary"
11
12
  },
12
13
  "parameters": {
13
14
  "description": "Additional inference parameters for Image Classification",
@@ -11,7 +11,7 @@ export interface ImageSegmentationInput {
11
11
  * The input image data as a base64-encoded string. If no `parameters` are provided, you can
12
12
  * also provide the image data as a raw bytes payload.
13
13
  */
14
- inputs: string;
14
+ inputs: Blob;
15
15
  /**
16
16
  * Additional inference parameters for Image Segmentation
17
17
  */
@@ -7,7 +7,8 @@
7
7
  "properties": {
8
8
  "inputs": {
9
9
  "type": "string",
10
- "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
10
+ "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
11
+ "comment": "type=binary"
11
12
  },
12
13
  "parameters": {
13
14
  "description": "Additional inference parameters for Image Segmentation",
@@ -3,7 +3,6 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  /**
8
7
  * Inputs for Image To Image inference
9
8
  */
@@ -12,14 +11,13 @@ export interface ImageToImageInput {
12
11
  * The input image data as a base64-encoded string. If no `parameters` are provided, you can
13
12
  * also provide the image data as a raw bytes payload.
14
13
  */
15
- inputs: string;
14
+ inputs: Blob;
16
15
  /**
17
16
  * Additional inference parameters for Image To Image
18
17
  */
19
18
  parameters?: ImageToImageParameters;
20
19
  [property: string]: unknown;
21
20
  }
22
-
23
21
  /**
24
22
  * Additional inference parameters for Image To Image
25
23
  */
@@ -30,9 +28,9 @@ export interface ImageToImageParameters {
30
28
  */
31
29
  guidance_scale?: number;
32
30
  /**
33
- * One or several prompt to guide what NOT to include in image generation.
31
+ * One prompt to guide what NOT to include in image generation.
34
32
  */
35
- negative_prompt?: string[];
33
+ negative_prompt?: string;
36
34
  /**
37
35
  * For diffusion models. The number of denoising steps. More denoising steps usually lead to
38
36
  * a higher quality image at the expense of slower inference.
@@ -44,7 +42,6 @@ export interface ImageToImageParameters {
44
42
  target_size?: TargetSize;
45
43
  [property: string]: unknown;
46
44
  }
47
-
48
45
  /**
49
46
  * The size in pixel of the output image.
50
47
  */
@@ -53,7 +50,6 @@ export interface TargetSize {
53
50
  width: number;
54
51
  [property: string]: unknown;
55
52
  }
56
-
57
53
  /**
58
54
  * Outputs of inference for the Image To Image task
59
55
  */
@@ -7,7 +7,8 @@
7
7
  "properties": {
8
8
  "inputs": {
9
9
  "type": "string",
10
- "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
10
+ "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
11
+ "comment": "type=binary"
11
12
  },
12
13
  "parameters": {
13
14
  "description": "Additional inference parameters for Image To Image",
@@ -24,11 +25,8 @@
24
25
  "description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
25
26
  },
26
27
  "negative_prompt": {
27
- "type": "array",
28
- "items": {
29
- "type": "string"
30
- },
31
- "description": "One or several prompt to guide what NOT to include in image generation."
28
+ "type": "string",
29
+ "description": "One prompt to guide what NOT to include in image generation."
32
30
  },
33
31
  "num_inference_steps": {
34
32
  "type": "integer",
@@ -3,7 +3,6 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  /**
8
7
  * Inputs for Image To Text inference
9
8
  */
@@ -11,14 +10,13 @@ export interface ImageToTextInput {
11
10
  /**
12
11
  * The input image data
13
12
  */
14
- inputs: unknown;
13
+ inputs: Blob;
15
14
  /**
16
15
  * Additional inference parameters for Image To Text
17
16
  */
18
17
  parameters?: ImageToTextParameters;
19
18
  [property: string]: unknown;
20
19
  }
21
-
22
20
  /**
23
21
  * Additional inference parameters for Image To Text
24
22
  */
@@ -33,7 +31,6 @@ export interface ImageToTextParameters {
33
31
  max_new_tokens?: number;
34
32
  [property: string]: unknown;
35
33
  }
36
-
37
34
  /**
38
35
  * Parametrization of the text generation process
39
36
  */
@@ -120,12 +117,10 @@ export interface GenerationParameters {
120
117
  use_cache?: boolean;
121
118
  [property: string]: unknown;
122
119
  }
123
-
124
120
  /**
125
121
  * Controls the stopping condition for beam-based methods.
126
122
  */
127
123
  export type EarlyStoppingUnion = boolean | "never";
128
-
129
124
  /**
130
125
  * Outputs of inference for the Image To Text task
131
126
  */
@@ -6,7 +6,8 @@
6
6
  "type": "object",
7
7
  "properties": {
8
8
  "inputs": {
9
- "description": "The input image data"
9
+ "description": "The input image data",
10
+ "comment": "type=binary"
10
11
  },
11
12
  "parameters": {
12
13
  "description": "Additional inference parameters for Image To Text",
@@ -73,6 +73,7 @@ export type * from "./sentence-similarity/inference.js";
73
73
  export type * from "./summarization/inference.js";
74
74
  export type * from "./table-question-answering/inference.js";
75
75
  export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference.js";
76
+ export type { TextToVideoParameters, TextToVideoOutput, TextToVideoInput } from "./text-to-video/inference.js";
76
77
  export type { TextToSpeechParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference.js";
77
78
  export type * from "./token-classification/inference.js";
78
79
  export type { TranslationInput, TranslationOutput } from "./translation/inference.js";
@@ -11,7 +11,7 @@ export interface ObjectDetectionInput {
11
11
  * The input image data as a base64-encoded string. If no `parameters` are provided, you can
12
12
  * also provide the image data as a raw bytes payload.
13
13
  */
14
- inputs: string;
14
+ inputs: Blob;
15
15
  /**
16
16
  * Additional inference parameters for Object Detection
17
17
  */
@@ -7,7 +7,8 @@
7
7
  "properties": {
8
8
  "inputs": {
9
9
  "type": "string",
10
- "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
10
+ "description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
11
+ "comment": "type=binary"
11
12
  },
12
13
  "parameters": {
13
14
  "description": "Additional inference parameters for Object Detection",
@@ -3,9 +3,7 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  export type SentenceSimilarityOutput = number[];
8
-
9
7
  /**
10
8
  * Inputs for Sentence similarity inference
11
9
  */
@@ -14,10 +12,11 @@ export interface SentenceSimilarityInput {
14
12
  /**
15
13
  * Additional inference parameters for Sentence Similarity
16
14
  */
17
- parameters?: { [key: string]: unknown };
15
+ parameters?: {
16
+ [key: string]: unknown;
17
+ };
18
18
  [property: string]: unknown;
19
19
  }
20
-
21
20
  export interface SentenceSimilarityInputData {
22
21
  /**
23
22
  * A list of strings which will be compared against the source_sentence.
@@ -3,7 +3,6 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  /**
8
7
  * Inputs for Summarization inference
9
8
  */
@@ -18,7 +17,6 @@ export interface SummarizationInput {
18
17
  parameters?: SummarizationParameters;
19
18
  [property: string]: unknown;
20
19
  }
21
-
22
20
  /**
23
21
  * Additional inference parameters for summarization.
24
22
  */
@@ -30,19 +28,19 @@ export interface SummarizationParameters {
30
28
  /**
31
29
  * Additional parametrization of the text generation algorithm.
32
30
  */
33
- generate_parameters?: { [key: string]: unknown };
31
+ generate_parameters?: {
32
+ [key: string]: unknown;
33
+ };
34
34
  /**
35
35
  * The truncation strategy to use.
36
36
  */
37
37
  truncation?: SummarizationTruncationStrategy;
38
38
  [property: string]: unknown;
39
39
  }
40
-
41
40
  /**
42
41
  * The truncation strategy to use.
43
42
  */
44
43
  export type SummarizationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
45
-
46
44
  /**
47
45
  * Outputs of inference for the Summarization task
48
46
  */
@@ -3,7 +3,6 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  /**
8
7
  * Text Generation Input.
9
8
  *
@@ -17,7 +16,6 @@ export interface TextGenerationInput {
17
16
  stream?: boolean;
18
17
  [property: string]: unknown;
19
18
  }
20
-
21
19
  export interface TextGenerationInputGenerateParameters {
22
20
  /**
23
21
  * Lora adapter id
@@ -100,7 +98,6 @@ export interface TextGenerationInputGenerateParameters {
100
98
  watermark?: boolean;
101
99
  [property: string]: unknown;
102
100
  }
103
-
104
101
  export interface TextGenerationInputGrammarType {
105
102
  type: Type;
106
103
  /**
@@ -112,9 +109,7 @@ export interface TextGenerationInputGrammarType {
112
109
  value: unknown;
113
110
  [property: string]: unknown;
114
111
  }
115
-
116
112
  export type Type = "json" | "regex";
117
-
118
113
  /**
119
114
  * Text Generation Output.
120
115
  *
@@ -127,7 +122,6 @@ export interface TextGenerationOutput {
127
122
  generated_text: string;
128
123
  [property: string]: unknown;
129
124
  }
130
-
131
125
  export interface TextGenerationOutputDetails {
132
126
  best_of_sequences?: TextGenerationOutputBestOfSequence[];
133
127
  finish_reason: TextGenerationOutputFinishReason;
@@ -138,7 +132,6 @@ export interface TextGenerationOutputDetails {
138
132
  top_tokens?: Array<TextGenerationOutputToken[]>;
139
133
  [property: string]: unknown;
140
134
  }
141
-
142
135
  export interface TextGenerationOutputBestOfSequence {
143
136
  finish_reason: TextGenerationOutputFinishReason;
144
137
  generated_text: string;
@@ -149,16 +142,13 @@ export interface TextGenerationOutputBestOfSequence {
149
142
  top_tokens?: Array<TextGenerationOutputToken[]>;
150
143
  [property: string]: unknown;
151
144
  }
152
-
153
145
  export type TextGenerationOutputFinishReason = "length" | "eos_token" | "stop_sequence";
154
-
155
146
  export interface TextGenerationOutputPrefillToken {
156
147
  id: number;
157
148
  logprob: number;
158
149
  text: string;
159
150
  [property: string]: unknown;
160
151
  }
161
-
162
152
  export interface TextGenerationOutputToken {
163
153
  id: number;
164
154
  logprob: number;
@@ -166,7 +156,6 @@ export interface TextGenerationOutputToken {
166
156
  text: string;
167
157
  [property: string]: unknown;
168
158
  }
169
-
170
159
  /**
171
160
  * Text Generation Stream Output.
172
161
  *
@@ -182,7 +171,6 @@ export interface TextGenerationStreamOutput {
182
171
  top_tokens?: TextGenerationStreamOutputToken[];
183
172
  [property: string]: unknown;
184
173
  }
185
-
186
174
  export interface TextGenerationStreamOutputStreamDetails {
187
175
  finish_reason: TextGenerationOutputFinishReason;
188
176
  generated_tokens: number;
@@ -190,7 +178,6 @@ export interface TextGenerationStreamOutputStreamDetails {
190
178
  seed?: number;
191
179
  [property: string]: unknown;
192
180
  }
193
-
194
181
  export interface TextGenerationStreamOutputToken {
195
182
  id: number;
196
183
  logprob: number;
@@ -1,9 +1,22 @@
1
+ /**
2
+ * Outputs of inference for the Text To Audio task
3
+ */
4
+ export interface TextToAudioOutput {
5
+ /**
6
+ * The generated audio waveform.
7
+ */
8
+ audio: Blob;
9
+ /**
10
+ * The sampling rate of the generated audio waveform.
11
+ */
12
+ sampling_rate: number;
13
+ [property: string]: unknown;
14
+ }
1
15
  /**
2
16
  * Inference code generated from the JSON schema spec in ./spec
3
17
  *
4
18
  * Using src/scripts/inference-codegen
5
19
  */
6
-
7
20
  /**
8
21
  * Inputs for Text To Audio inference
9
22
  */
@@ -18,7 +31,6 @@ export interface TextToAudioInput {
18
31
  parameters?: TextToAudioParameters;
19
32
  [property: string]: unknown;
20
33
  }
21
-
22
34
  /**
23
35
  * Additional inference parameters for Text To Audio
24
36
  */
@@ -29,7 +41,6 @@ export interface TextToAudioParameters {
29
41
  generation_parameters?: GenerationParameters;
30
42
  [property: string]: unknown;
31
43
  }
32
-
33
44
  /**
34
45
  * Parametrization of the text generation process
35
46
  */
@@ -116,24 +127,7 @@ export interface GenerationParameters {
116
127
  use_cache?: boolean;
117
128
  [property: string]: unknown;
118
129
  }
119
-
120
130
  /**
121
131
  * Controls the stopping condition for beam-based methods.
122
132
  */
123
133
  export type EarlyStoppingUnion = boolean | "never";
124
-
125
- /**
126
- * Outputs of inference for the Text To Audio task
127
- */
128
- export interface TextToAudioOutput {
129
- /**
130
- * The generated audio waveform.
131
- */
132
- audio: unknown;
133
- samplingRate: unknown;
134
- /**
135
- * The sampling rate of the generated audio waveform.
136
- */
137
- sampling_rate?: number;
138
- [property: string]: unknown;
139
- }