@huggingface/tasks 0.13.16 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/dist/commonjs/model-data.d.ts +3 -0
  2. package/dist/commonjs/model-data.d.ts.map +1 -1
  3. package/dist/commonjs/model-libraries-snippets.d.ts +3 -1
  4. package/dist/commonjs/model-libraries-snippets.d.ts.map +1 -1
  5. package/dist/commonjs/model-libraries-snippets.js +134 -22
  6. package/dist/commonjs/model-libraries.d.ts +12 -8
  7. package/dist/commonjs/model-libraries.d.ts.map +1 -1
  8. package/dist/commonjs/model-libraries.js +15 -8
  9. package/dist/commonjs/tasks/audio-classification/inference.d.ts +1 -1
  10. package/dist/commonjs/tasks/audio-classification/inference.d.ts.map +1 -1
  11. package/dist/commonjs/tasks/automatic-speech-recognition/inference.d.ts +1 -1
  12. package/dist/commonjs/tasks/automatic-speech-recognition/inference.d.ts.map +1 -1
  13. package/dist/commonjs/tasks/automatic-speech-recognition/inference.js +0 -5
  14. package/dist/commonjs/tasks/chat-completion/inference.d.ts.map +1 -1
  15. package/dist/commonjs/tasks/chat-completion/inference.js +0 -5
  16. package/dist/commonjs/tasks/depth-estimation/inference.d.ts.map +1 -1
  17. package/dist/commonjs/tasks/depth-estimation/inference.js +0 -5
  18. package/dist/commonjs/tasks/feature-extraction/inference.d.ts.map +1 -1
  19. package/dist/commonjs/tasks/feature-extraction/inference.js +0 -5
  20. package/dist/commonjs/tasks/image-classification/inference.d.ts +1 -1
  21. package/dist/commonjs/tasks/image-classification/inference.d.ts.map +1 -1
  22. package/dist/commonjs/tasks/image-segmentation/inference.d.ts +1 -1
  23. package/dist/commonjs/tasks/image-segmentation/inference.d.ts.map +1 -1
  24. package/dist/commonjs/tasks/image-to-image/inference.d.ts +3 -3
  25. package/dist/commonjs/tasks/image-to-image/inference.d.ts.map +1 -1
  26. package/dist/commonjs/tasks/image-to-image/inference.js +0 -5
  27. package/dist/commonjs/tasks/image-to-text/inference.d.ts +1 -1
  28. package/dist/commonjs/tasks/image-to-text/inference.d.ts.map +1 -1
  29. package/dist/commonjs/tasks/image-to-text/inference.js +0 -5
  30. package/dist/commonjs/tasks/index.d.ts +1 -0
  31. package/dist/commonjs/tasks/index.d.ts.map +1 -1
  32. package/dist/commonjs/tasks/object-detection/inference.d.ts +1 -1
  33. package/dist/commonjs/tasks/object-detection/inference.d.ts.map +1 -1
  34. package/dist/commonjs/tasks/sentence-similarity/inference.d.ts.map +1 -1
  35. package/dist/commonjs/tasks/sentence-similarity/inference.js +0 -5
  36. package/dist/commonjs/tasks/summarization/inference.d.ts.map +1 -1
  37. package/dist/commonjs/tasks/summarization/inference.js +0 -5
  38. package/dist/commonjs/tasks/text-generation/inference.d.ts.map +1 -1
  39. package/dist/commonjs/tasks/text-generation/inference.js +0 -5
  40. package/dist/commonjs/tasks/text-to-audio/inference.d.ts +14 -15
  41. package/dist/commonjs/tasks/text-to-audio/inference.d.ts.map +1 -1
  42. package/dist/commonjs/tasks/text-to-audio/inference.js +0 -5
  43. package/dist/commonjs/tasks/text-to-image/inference.d.ts +2 -2
  44. package/dist/commonjs/tasks/text-to-image/inference.d.ts.map +1 -1
  45. package/dist/commonjs/tasks/text-to-image/inference.js +0 -5
  46. package/dist/commonjs/tasks/text-to-speech/inference.d.ts +14 -17
  47. package/dist/commonjs/tasks/text-to-speech/inference.d.ts.map +1 -1
  48. package/dist/commonjs/tasks/text-to-speech/inference.js +0 -5
  49. package/dist/commonjs/tasks/text-to-video/inference.d.ts +58 -0
  50. package/dist/commonjs/tasks/text-to-video/inference.d.ts.map +1 -0
  51. package/dist/commonjs/tasks/text-to-video/inference.js +2 -0
  52. package/dist/commonjs/tasks/text2text-generation/inference.d.ts.map +1 -1
  53. package/dist/commonjs/tasks/text2text-generation/inference.js +0 -5
  54. package/dist/commonjs/tasks/translation/inference.d.ts.map +1 -1
  55. package/dist/commonjs/tasks/translation/inference.js +0 -5
  56. package/dist/commonjs/tasks/visual-question-answering/inference.d.ts +1 -1
  57. package/dist/commonjs/tasks/visual-question-answering/inference.d.ts.map +1 -1
  58. package/dist/commonjs/tasks/zero-shot-image-classification/inference.d.ts +1 -1
  59. package/dist/commonjs/tasks/zero-shot-image-classification/inference.d.ts.map +1 -1
  60. package/dist/commonjs/tasks/zero-shot-object-detection/inference.d.ts +1 -1
  61. package/dist/commonjs/tasks/zero-shot-object-detection/inference.d.ts.map +1 -1
  62. package/dist/esm/model-data.d.ts +3 -0
  63. package/dist/esm/model-data.d.ts.map +1 -1
  64. package/dist/esm/model-libraries-snippets.d.ts +3 -1
  65. package/dist/esm/model-libraries-snippets.d.ts.map +1 -1
  66. package/dist/esm/model-libraries-snippets.js +129 -19
  67. package/dist/esm/model-libraries.d.ts +12 -8
  68. package/dist/esm/model-libraries.d.ts.map +1 -1
  69. package/dist/esm/model-libraries.js +15 -8
  70. package/dist/esm/tasks/audio-classification/inference.d.ts +1 -1
  71. package/dist/esm/tasks/audio-classification/inference.d.ts.map +1 -1
  72. package/dist/esm/tasks/automatic-speech-recognition/inference.d.ts +1 -1
  73. package/dist/esm/tasks/automatic-speech-recognition/inference.d.ts.map +1 -1
  74. package/dist/esm/tasks/automatic-speech-recognition/inference.js +0 -5
  75. package/dist/esm/tasks/chat-completion/inference.d.ts.map +1 -1
  76. package/dist/esm/tasks/chat-completion/inference.js +0 -5
  77. package/dist/esm/tasks/depth-estimation/inference.d.ts.map +1 -1
  78. package/dist/esm/tasks/depth-estimation/inference.js +0 -5
  79. package/dist/esm/tasks/feature-extraction/inference.d.ts.map +1 -1
  80. package/dist/esm/tasks/feature-extraction/inference.js +0 -5
  81. package/dist/esm/tasks/image-classification/inference.d.ts +1 -1
  82. package/dist/esm/tasks/image-classification/inference.d.ts.map +1 -1
  83. package/dist/esm/tasks/image-segmentation/inference.d.ts +1 -1
  84. package/dist/esm/tasks/image-segmentation/inference.d.ts.map +1 -1
  85. package/dist/esm/tasks/image-to-image/inference.d.ts +3 -3
  86. package/dist/esm/tasks/image-to-image/inference.d.ts.map +1 -1
  87. package/dist/esm/tasks/image-to-image/inference.js +0 -5
  88. package/dist/esm/tasks/image-to-text/inference.d.ts +1 -1
  89. package/dist/esm/tasks/image-to-text/inference.d.ts.map +1 -1
  90. package/dist/esm/tasks/image-to-text/inference.js +0 -5
  91. package/dist/esm/tasks/index.d.ts +1 -0
  92. package/dist/esm/tasks/index.d.ts.map +1 -1
  93. package/dist/esm/tasks/object-detection/inference.d.ts +1 -1
  94. package/dist/esm/tasks/object-detection/inference.d.ts.map +1 -1
  95. package/dist/esm/tasks/sentence-similarity/inference.d.ts.map +1 -1
  96. package/dist/esm/tasks/sentence-similarity/inference.js +0 -5
  97. package/dist/esm/tasks/summarization/inference.d.ts.map +1 -1
  98. package/dist/esm/tasks/summarization/inference.js +0 -5
  99. package/dist/esm/tasks/text-generation/inference.d.ts.map +1 -1
  100. package/dist/esm/tasks/text-generation/inference.js +0 -5
  101. package/dist/esm/tasks/text-to-audio/inference.d.ts +14 -15
  102. package/dist/esm/tasks/text-to-audio/inference.d.ts.map +1 -1
  103. package/dist/esm/tasks/text-to-audio/inference.js +0 -5
  104. package/dist/esm/tasks/text-to-image/inference.d.ts +2 -2
  105. package/dist/esm/tasks/text-to-image/inference.d.ts.map +1 -1
  106. package/dist/esm/tasks/text-to-image/inference.js +0 -5
  107. package/dist/esm/tasks/text-to-speech/inference.d.ts +14 -17
  108. package/dist/esm/tasks/text-to-speech/inference.d.ts.map +1 -1
  109. package/dist/esm/tasks/text-to-speech/inference.js +0 -5
  110. package/dist/esm/tasks/text-to-video/inference.d.ts +58 -0
  111. package/dist/esm/tasks/text-to-video/inference.d.ts.map +1 -0
  112. package/dist/esm/tasks/text-to-video/inference.js +1 -0
  113. package/dist/esm/tasks/text2text-generation/inference.d.ts.map +1 -1
  114. package/dist/esm/tasks/text2text-generation/inference.js +0 -5
  115. package/dist/esm/tasks/translation/inference.d.ts.map +1 -1
  116. package/dist/esm/tasks/translation/inference.js +0 -5
  117. package/dist/esm/tasks/visual-question-answering/inference.d.ts +1 -1
  118. package/dist/esm/tasks/visual-question-answering/inference.d.ts.map +1 -1
  119. package/dist/esm/tasks/zero-shot-image-classification/inference.d.ts +1 -1
  120. package/dist/esm/tasks/zero-shot-image-classification/inference.d.ts.map +1 -1
  121. package/dist/esm/tasks/zero-shot-object-detection/inference.d.ts +1 -1
  122. package/dist/esm/tasks/zero-shot-object-detection/inference.d.ts.map +1 -1
  123. package/package.json +1 -1
  124. package/src/model-data.ts +3 -0
  125. package/src/model-libraries-snippets.ts +141 -19
  126. package/src/model-libraries.ts +15 -8
  127. package/src/tasks/audio-classification/inference.ts +1 -1
  128. package/src/tasks/audio-classification/spec/input.json +2 -1
  129. package/src/tasks/automatic-speech-recognition/inference.ts +1 -7
  130. package/src/tasks/automatic-speech-recognition/spec/input.json +2 -1
  131. package/src/tasks/chat-completion/inference.ts +0 -33
  132. package/src/tasks/depth-estimation/inference.ts +3 -3
  133. package/src/tasks/document-question-answering/spec/input.json +2 -1
  134. package/src/tasks/feature-extraction/inference.ts +0 -3
  135. package/src/tasks/image-classification/inference.ts +1 -1
  136. package/src/tasks/image-classification/spec/input.json +2 -1
  137. package/src/tasks/image-segmentation/inference.ts +1 -1
  138. package/src/tasks/image-segmentation/spec/input.json +2 -1
  139. package/src/tasks/image-to-image/inference.ts +3 -7
  140. package/src/tasks/image-to-image/spec/input.json +4 -6
  141. package/src/tasks/image-to-text/inference.ts +1 -6
  142. package/src/tasks/image-to-text/spec/input.json +2 -1
  143. package/src/tasks/index.ts +1 -0
  144. package/src/tasks/object-detection/inference.ts +1 -1
  145. package/src/tasks/object-detection/spec/input.json +2 -1
  146. package/src/tasks/sentence-similarity/inference.ts +3 -4
  147. package/src/tasks/summarization/inference.ts +3 -5
  148. package/src/tasks/text-generation/inference.ts +0 -13
  149. package/src/tasks/text-to-audio/inference.ts +14 -20
  150. package/src/tasks/text-to-audio/spec/output.json +3 -2
  151. package/src/tasks/text-to-image/inference.ts +2 -6
  152. package/src/tasks/text-to-image/spec/input.json +2 -5
  153. package/src/tasks/text-to-speech/inference.ts +14 -22
  154. package/src/tasks/text-to-speech/spec/output.json +13 -2
  155. package/src/tasks/text-to-video/inference.ts +57 -0
  156. package/src/tasks/text-to-video/spec/input.json +49 -0
  157. package/src/tasks/text-to-video/spec/output.json +13 -0
  158. package/src/tasks/text2text-generation/inference.ts +3 -5
  159. package/src/tasks/translation/inference.ts +3 -5
  160. package/src/tasks/visual-question-answering/inference.ts +1 -1
  161. package/src/tasks/visual-question-answering/spec/input.json +4 -2
  162. package/src/tasks/zero-shot-image-classification/inference.ts +1 -1
  163. package/src/tasks/zero-shot-image-classification/spec/input.json +2 -1
  164. package/src/tasks/zero-shot-object-detection/inference.ts +1 -1
  165. package/src/tasks/zero-shot-object-detection/spec/input.json +2 -1
@@ -6,12 +6,13 @@
6
6
  "type": "object",
7
7
  "properties": {
8
8
  "audio": {
9
- "description": "The generated audio waveform."
9
+ "description": "The generated audio waveform.",
10
+ "comment": "type=binary"
10
11
  },
11
12
  "sampling_rate": {
12
13
  "type": "number",
13
14
  "description": "The sampling rate of the generated audio waveform."
14
15
  }
15
16
  },
16
- "required": ["audio", "samplingRate"]
17
+ "required": ["audio", "sampling_rate"]
17
18
  }
@@ -3,7 +3,6 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  /**
8
7
  * Inputs for Text To Image inference
9
8
  */
@@ -18,7 +17,6 @@ export interface TextToImageInput {
18
17
  parameters?: TextToImageParameters;
19
18
  [property: string]: unknown;
20
19
  }
21
-
22
20
  /**
23
21
  * Additional inference parameters for Text To Image
24
22
  */
@@ -29,9 +27,9 @@ export interface TextToImageParameters {
29
27
  */
30
28
  guidance_scale?: number;
31
29
  /**
32
- * One or several prompt to guide what NOT to include in image generation.
30
+ * One prompt to guide what NOT to include in image generation.
33
31
  */
34
- negative_prompt?: string[];
32
+ negative_prompt?: string;
35
33
  /**
36
34
  * The number of denoising steps. More denoising steps usually lead to a higher quality
37
35
  * image at the expense of slower inference.
@@ -51,7 +49,6 @@ export interface TextToImageParameters {
51
49
  target_size?: TargetSize;
52
50
  [property: string]: unknown;
53
51
  }
54
-
55
52
  /**
56
53
  * The size in pixel of the output image
57
54
  */
@@ -60,7 +57,6 @@ export interface TargetSize {
60
57
  width: number;
61
58
  [property: string]: unknown;
62
59
  }
63
-
64
60
  /**
65
61
  * Outputs of inference for the Text To Image task
66
62
  */
@@ -24,11 +24,8 @@
24
24
  "description": "A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts."
25
25
  },
26
26
  "negative_prompt": {
27
- "type": "array",
28
- "items": {
29
- "type": "string"
30
- },
31
- "description": "One or several prompt to guide what NOT to include in image generation."
27
+ "type": "string",
28
+ "description": "One prompt to guide what NOT to include in image generation."
32
29
  },
33
30
  "num_inference_steps": {
34
31
  "type": "integer",
@@ -1,9 +1,22 @@
1
+ /**
2
+ * Outputs of inference for the Text To Speech task
3
+ */
4
+ export interface TextToSpeechOutput {
5
+ /**
6
+ * The generated audio
7
+ */
8
+ audio: Blob;
9
+ /**
10
+ * The sampling rate of the generated audio waveform.
11
+ */
12
+ sampling_rate?: number;
13
+ [property: string]: unknown;
14
+ }
1
15
  /**
2
16
  * Inference code generated from the JSON schema spec in ./spec
3
17
  *
4
18
  * Using src/scripts/inference-codegen
5
19
  */
6
-
7
20
  /**
8
21
  * Inputs for Text To Speech inference
9
22
  */
@@ -18,7 +31,6 @@ export interface TextToSpeechInput {
18
31
  parameters?: TextToSpeechParameters;
19
32
  [property: string]: unknown;
20
33
  }
21
-
22
34
  /**
23
35
  * Additional inference parameters for Text To Speech
24
36
  */
@@ -29,7 +41,6 @@ export interface TextToSpeechParameters {
29
41
  generation_parameters?: GenerationParameters;
30
42
  [property: string]: unknown;
31
43
  }
32
-
33
44
  /**
34
45
  * Parametrization of the text generation process
35
46
  */
@@ -116,26 +127,7 @@ export interface GenerationParameters {
116
127
  use_cache?: boolean;
117
128
  [property: string]: unknown;
118
129
  }
119
-
120
130
  /**
121
131
  * Controls the stopping condition for beam-based methods.
122
132
  */
123
133
  export type EarlyStoppingUnion = boolean | "never";
124
-
125
- /**
126
- * Outputs for Text to Speech inference
127
- *
128
- * Outputs of inference for the Text To Audio task
129
- */
130
- export interface TextToSpeechOutput {
131
- /**
132
- * The generated audio waveform.
133
- */
134
- audio: unknown;
135
- samplingRate: unknown;
136
- /**
137
- * The sampling rate of the generated audio waveform.
138
- */
139
- sampling_rate?: number;
140
- [property: string]: unknown;
141
- }
@@ -1,7 +1,18 @@
1
1
  {
2
- "$ref": "/inference/schemas/text-to-audio/output.json",
3
2
  "$id": "/inference/schemas/text-to-speech/output.json",
4
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Outputs of inference for the Text To Speech task",
5
5
  "title": "TextToSpeechOutput",
6
- "description": "Outputs for Text to Speech inference"
6
+ "type": "object",
7
+ "properties": {
8
+ "audio": {
9
+ "description": "The generated audio",
10
+ "comment": "type=binary"
11
+ },
12
+ "sampling_rate": {
13
+ "type": "number",
14
+ "description": "The sampling rate of the generated audio waveform."
15
+ }
16
+ },
17
+ "required": ["audio"]
7
18
  }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Inference code generated from the JSON schema spec in ./spec
3
+ *
4
+ * Using src/scripts/inference-codegen
5
+ */
6
+ /**
7
+ * Inputs for Text To Video inference
8
+ */
9
+ export interface TextToVideoInput {
10
+ /**
11
+ * The input text data (sometimes called "prompt")
12
+ */
13
+ inputs: string;
14
+ /**
15
+ * Additional inference parameters for Text To Video
16
+ */
17
+ parameters?: TextToVideoParameters;
18
+ [property: string]: unknown;
19
+ }
20
+ /**
21
+ * Additional inference parameters for Text To Video
22
+ */
23
+ export interface TextToVideoParameters {
24
+ /**
25
+ * A higher guidance scale value encourages the model to generate images closely linked to
26
+ * the text prompt, but values too high may cause saturation and other artifacts.
27
+ */
28
+ guidance_scale?: number;
29
+ /**
30
+ * One or several prompt to guide what NOT to include in image generation.
31
+ */
32
+ negative_prompt?: string[];
33
+ /**
34
+ * The num_frames parameter determines how many video frames are generated.
35
+ */
36
+ num_frames?: number;
37
+ /**
38
+ * The number of denoising steps. More denoising steps usually lead to a higher quality
39
+ * image at the expense of slower inference.
40
+ */
41
+ num_inference_steps?: number;
42
+ /**
43
+ * Seed for the random number generator.
44
+ */
45
+ seed?: number;
46
+ [property: string]: unknown;
47
+ }
48
+ /**
49
+ * Outputs of inference for the Text To Video task
50
+ */
51
+ export interface TextToVideoOutput {
52
+ /**
53
+ * The generated video returned as raw bytes in the payload.
54
+ */
55
+ video: unknown;
56
+ [property: string]: unknown;
57
+ }
@@ -0,0 +1,49 @@
1
+ {
2
+ "$id": "/inference/schemas/text-to-video/input.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Inputs for Text To Video inference",
5
+ "title": "TextToVideoInput",
6
+ "type": "object",
7
+ "properties": {
8
+ "inputs": {
9
+ "description": "The input text data (sometimes called \"prompt\")",
10
+ "type": "string"
11
+ },
12
+ "parameters": {
13
+ "description": "Additional inference parameters for Text To Video",
14
+ "$ref": "#/$defs/TextToVideoParameters"
15
+ }
16
+ },
17
+ "$defs": {
18
+ "TextToVideoParameters": {
19
+ "title": "TextToVideoParameters",
20
+ "type": "object",
21
+ "properties": {
22
+ "num_frames": {
23
+ "type": "number",
24
+ "description": "The num_frames parameter determines how many video frames are generated."
25
+ },
26
+ "guidance_scale": {
27
+ "type": "number",
28
+ "description": "A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts."
29
+ },
30
+ "negative_prompt": {
31
+ "type": "array",
32
+ "items": {
33
+ "type": "string"
34
+ },
35
+ "description": "One or several prompt to guide what NOT to include in image generation."
36
+ },
37
+ "num_inference_steps": {
38
+ "type": "integer",
39
+ "description": "The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
40
+ },
41
+ "seed": {
42
+ "type": "integer",
43
+ "description": "Seed for the random number generator."
44
+ }
45
+ }
46
+ }
47
+ },
48
+ "required": ["inputs"]
49
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "$id": "/inference/schemas/text-to-video/output.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Outputs of inference for the Text To Video task",
5
+ "title": "TextToVideoOutput",
6
+ "type": "object",
7
+ "properties": {
8
+ "video": {
9
+ "description": "The generated video returned as raw bytes in the payload."
10
+ }
11
+ },
12
+ "required": ["video"]
13
+ }
@@ -3,7 +3,6 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  /**
8
7
  * Inputs for Text2text Generation inference
9
8
  */
@@ -18,7 +17,6 @@ export interface Text2TextGenerationInput {
18
17
  parameters?: Text2TextGenerationParameters;
19
18
  [property: string]: unknown;
20
19
  }
21
-
22
20
  /**
23
21
  * Additional inference parameters for Text2text Generation
24
22
  */
@@ -30,16 +28,16 @@ export interface Text2TextGenerationParameters {
30
28
  /**
31
29
  * Additional parametrization of the text generation algorithm
32
30
  */
33
- generate_parameters?: { [key: string]: unknown };
31
+ generate_parameters?: {
32
+ [key: string]: unknown;
33
+ };
34
34
  /**
35
35
  * The truncation strategy to use
36
36
  */
37
37
  truncation?: Text2TextGenerationTruncationStrategy;
38
38
  [property: string]: unknown;
39
39
  }
40
-
41
40
  export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
42
-
43
41
  /**
44
42
  * Outputs of inference for the Text2text Generation task
45
43
  */
@@ -3,7 +3,6 @@
3
3
  *
4
4
  * Using src/scripts/inference-codegen
5
5
  */
6
-
7
6
  /**
8
7
  * Inputs for Translation inference
9
8
  */
@@ -18,7 +17,6 @@ export interface TranslationInput {
18
17
  parameters?: TranslationParameters;
19
18
  [property: string]: unknown;
20
19
  }
21
-
22
20
  /**
23
21
  * Additional inference parameters for Translation
24
22
  */
@@ -30,7 +28,9 @@ export interface TranslationParameters {
30
28
  /**
31
29
  * Additional parametrization of the text generation algorithm.
32
30
  */
33
- generate_parameters?: { [key: string]: unknown };
31
+ generate_parameters?: {
32
+ [key: string]: unknown;
33
+ };
34
34
  /**
35
35
  * The source language of the text. Required for models that can translate from multiple
36
36
  * languages.
@@ -47,12 +47,10 @@ export interface TranslationParameters {
47
47
  truncation?: TranslationTruncationStrategy;
48
48
  [property: string]: unknown;
49
49
  }
50
-
51
50
  /**
52
51
  * The truncation strategy to use.
53
52
  */
54
53
  export type TranslationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
55
-
56
54
  /**
57
55
  * Outputs of inference for the Translation task
58
56
  */
@@ -28,7 +28,7 @@ export interface VisualQuestionAnsweringInputData {
28
28
  /**
29
29
  * The question to answer based on the image.
30
30
  */
31
- question: unknown;
31
+ question: string;
32
32
  [property: string]: unknown;
33
33
  }
34
34
  /**
@@ -11,10 +11,12 @@
11
11
  "title": "VisualQuestionAnsweringInputData",
12
12
  "properties": {
13
13
  "image": {
14
- "description": "The image."
14
+ "description": "The image.",
15
+ "comment": "type=binary"
15
16
  },
16
17
  "question": {
17
- "description": "The question to answer based on the image."
18
+ "description": "The question to answer based on the image.",
19
+ "type": "string"
18
20
  }
19
21
  },
20
22
  "required": ["question", "image"]
@@ -10,7 +10,7 @@ export interface ZeroShotImageClassificationInput {
10
10
  /**
11
11
  * The input image data to classify as a base64-encoded string.
12
12
  */
13
- inputs: string;
13
+ inputs: Blob;
14
14
  /**
15
15
  * Additional inference parameters for Zero Shot Image Classification
16
16
  */
@@ -7,7 +7,8 @@
7
7
  "properties": {
8
8
  "inputs": {
9
9
  "type": "string",
10
- "description": "The input image data to classify as a base64-encoded string."
10
+ "description": "The input image data to classify as a base64-encoded string.",
11
+ "comment": "type=binary"
11
12
  },
12
13
  "parameters": {
13
14
  "description": "Additional inference parameters for Zero Shot Image Classification",
@@ -10,7 +10,7 @@ export interface ZeroShotObjectDetectionInput {
10
10
  /**
11
11
  * The input image data as a base64-encoded string.
12
12
  */
13
- inputs: string;
13
+ inputs: Blob;
14
14
  /**
15
15
  * Additional inference parameters for Zero Shot Object Detection
16
16
  */
@@ -7,7 +7,8 @@
7
7
  "properties": {
8
8
  "inputs": {
9
9
  "description": "The input image data as a base64-encoded string.",
10
- "type": "string"
10
+ "type": "string",
11
+ "comment": "type=binary"
11
12
  },
12
13
  "parameters": {
13
14
  "description": "Additional inference parameters for Zero Shot Object Detection",