@huggingface/tasks 0.13.16 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commonjs/model-data.d.ts +3 -0
- package/dist/commonjs/model-data.d.ts.map +1 -1
- package/dist/commonjs/model-libraries-snippets.d.ts +3 -1
- package/dist/commonjs/model-libraries-snippets.d.ts.map +1 -1
- package/dist/commonjs/model-libraries-snippets.js +134 -22
- package/dist/commonjs/model-libraries.d.ts +12 -8
- package/dist/commonjs/model-libraries.d.ts.map +1 -1
- package/dist/commonjs/model-libraries.js +15 -8
- package/dist/commonjs/tasks/audio-classification/inference.d.ts +1 -1
- package/dist/commonjs/tasks/audio-classification/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/automatic-speech-recognition/inference.d.ts +1 -1
- package/dist/commonjs/tasks/automatic-speech-recognition/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/automatic-speech-recognition/inference.js +0 -5
- package/dist/commonjs/tasks/chat-completion/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/chat-completion/inference.js +0 -5
- package/dist/commonjs/tasks/depth-estimation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/depth-estimation/inference.js +0 -5
- package/dist/commonjs/tasks/feature-extraction/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/feature-extraction/inference.js +0 -5
- package/dist/commonjs/tasks/image-classification/inference.d.ts +1 -1
- package/dist/commonjs/tasks/image-classification/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/image-segmentation/inference.d.ts +1 -1
- package/dist/commonjs/tasks/image-segmentation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/image-to-image/inference.d.ts +3 -3
- package/dist/commonjs/tasks/image-to-image/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/image-to-image/inference.js +0 -5
- package/dist/commonjs/tasks/image-to-text/inference.d.ts +1 -1
- package/dist/commonjs/tasks/image-to-text/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/image-to-text/inference.js +0 -5
- package/dist/commonjs/tasks/index.d.ts +1 -0
- package/dist/commonjs/tasks/index.d.ts.map +1 -1
- package/dist/commonjs/tasks/object-detection/inference.d.ts +1 -1
- package/dist/commonjs/tasks/object-detection/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/sentence-similarity/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/sentence-similarity/inference.js +0 -5
- package/dist/commonjs/tasks/summarization/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/summarization/inference.js +0 -5
- package/dist/commonjs/tasks/text-generation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text-generation/inference.js +0 -5
- package/dist/commonjs/tasks/text-to-audio/inference.d.ts +14 -15
- package/dist/commonjs/tasks/text-to-audio/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text-to-audio/inference.js +0 -5
- package/dist/commonjs/tasks/text-to-image/inference.d.ts +2 -2
- package/dist/commonjs/tasks/text-to-image/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text-to-image/inference.js +0 -5
- package/dist/commonjs/tasks/text-to-speech/inference.d.ts +14 -17
- package/dist/commonjs/tasks/text-to-speech/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text-to-speech/inference.js +0 -5
- package/dist/commonjs/tasks/text-to-video/inference.d.ts +58 -0
- package/dist/commonjs/tasks/text-to-video/inference.d.ts.map +1 -0
- package/dist/commonjs/tasks/text-to-video/inference.js +2 -0
- package/dist/commonjs/tasks/text2text-generation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/text2text-generation/inference.js +0 -5
- package/dist/commonjs/tasks/translation/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/translation/inference.js +0 -5
- package/dist/commonjs/tasks/visual-question-answering/inference.d.ts +1 -1
- package/dist/commonjs/tasks/visual-question-answering/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/zero-shot-image-classification/inference.d.ts +1 -1
- package/dist/commonjs/tasks/zero-shot-image-classification/inference.d.ts.map +1 -1
- package/dist/commonjs/tasks/zero-shot-object-detection/inference.d.ts +1 -1
- package/dist/commonjs/tasks/zero-shot-object-detection/inference.d.ts.map +1 -1
- package/dist/esm/model-data.d.ts +3 -0
- package/dist/esm/model-data.d.ts.map +1 -1
- package/dist/esm/model-libraries-snippets.d.ts +3 -1
- package/dist/esm/model-libraries-snippets.d.ts.map +1 -1
- package/dist/esm/model-libraries-snippets.js +129 -19
- package/dist/esm/model-libraries.d.ts +12 -8
- package/dist/esm/model-libraries.d.ts.map +1 -1
- package/dist/esm/model-libraries.js +15 -8
- package/dist/esm/tasks/audio-classification/inference.d.ts +1 -1
- package/dist/esm/tasks/audio-classification/inference.d.ts.map +1 -1
- package/dist/esm/tasks/automatic-speech-recognition/inference.d.ts +1 -1
- package/dist/esm/tasks/automatic-speech-recognition/inference.d.ts.map +1 -1
- package/dist/esm/tasks/automatic-speech-recognition/inference.js +0 -5
- package/dist/esm/tasks/chat-completion/inference.d.ts.map +1 -1
- package/dist/esm/tasks/chat-completion/inference.js +0 -5
- package/dist/esm/tasks/depth-estimation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/depth-estimation/inference.js +0 -5
- package/dist/esm/tasks/feature-extraction/inference.d.ts.map +1 -1
- package/dist/esm/tasks/feature-extraction/inference.js +0 -5
- package/dist/esm/tasks/image-classification/inference.d.ts +1 -1
- package/dist/esm/tasks/image-classification/inference.d.ts.map +1 -1
- package/dist/esm/tasks/image-segmentation/inference.d.ts +1 -1
- package/dist/esm/tasks/image-segmentation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/image-to-image/inference.d.ts +3 -3
- package/dist/esm/tasks/image-to-image/inference.d.ts.map +1 -1
- package/dist/esm/tasks/image-to-image/inference.js +0 -5
- package/dist/esm/tasks/image-to-text/inference.d.ts +1 -1
- package/dist/esm/tasks/image-to-text/inference.d.ts.map +1 -1
- package/dist/esm/tasks/image-to-text/inference.js +0 -5
- package/dist/esm/tasks/index.d.ts +1 -0
- package/dist/esm/tasks/index.d.ts.map +1 -1
- package/dist/esm/tasks/object-detection/inference.d.ts +1 -1
- package/dist/esm/tasks/object-detection/inference.d.ts.map +1 -1
- package/dist/esm/tasks/sentence-similarity/inference.d.ts.map +1 -1
- package/dist/esm/tasks/sentence-similarity/inference.js +0 -5
- package/dist/esm/tasks/summarization/inference.d.ts.map +1 -1
- package/dist/esm/tasks/summarization/inference.js +0 -5
- package/dist/esm/tasks/text-generation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text-generation/inference.js +0 -5
- package/dist/esm/tasks/text-to-audio/inference.d.ts +14 -15
- package/dist/esm/tasks/text-to-audio/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text-to-audio/inference.js +0 -5
- package/dist/esm/tasks/text-to-image/inference.d.ts +2 -2
- package/dist/esm/tasks/text-to-image/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text-to-image/inference.js +0 -5
- package/dist/esm/tasks/text-to-speech/inference.d.ts +14 -17
- package/dist/esm/tasks/text-to-speech/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text-to-speech/inference.js +0 -5
- package/dist/esm/tasks/text-to-video/inference.d.ts +58 -0
- package/dist/esm/tasks/text-to-video/inference.d.ts.map +1 -0
- package/dist/esm/tasks/text-to-video/inference.js +1 -0
- package/dist/esm/tasks/text2text-generation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/text2text-generation/inference.js +0 -5
- package/dist/esm/tasks/translation/inference.d.ts.map +1 -1
- package/dist/esm/tasks/translation/inference.js +0 -5
- package/dist/esm/tasks/visual-question-answering/inference.d.ts +1 -1
- package/dist/esm/tasks/visual-question-answering/inference.d.ts.map +1 -1
- package/dist/esm/tasks/zero-shot-image-classification/inference.d.ts +1 -1
- package/dist/esm/tasks/zero-shot-image-classification/inference.d.ts.map +1 -1
- package/dist/esm/tasks/zero-shot-object-detection/inference.d.ts +1 -1
- package/dist/esm/tasks/zero-shot-object-detection/inference.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/model-data.ts +3 -0
- package/src/model-libraries-snippets.ts +141 -19
- package/src/model-libraries.ts +15 -8
- package/src/tasks/audio-classification/inference.ts +1 -1
- package/src/tasks/audio-classification/spec/input.json +2 -1
- package/src/tasks/automatic-speech-recognition/inference.ts +1 -7
- package/src/tasks/automatic-speech-recognition/spec/input.json +2 -1
- package/src/tasks/chat-completion/inference.ts +0 -33
- package/src/tasks/depth-estimation/inference.ts +3 -3
- package/src/tasks/document-question-answering/spec/input.json +2 -1
- package/src/tasks/feature-extraction/inference.ts +0 -3
- package/src/tasks/image-classification/inference.ts +1 -1
- package/src/tasks/image-classification/spec/input.json +2 -1
- package/src/tasks/image-segmentation/inference.ts +1 -1
- package/src/tasks/image-segmentation/spec/input.json +2 -1
- package/src/tasks/image-to-image/inference.ts +3 -7
- package/src/tasks/image-to-image/spec/input.json +4 -6
- package/src/tasks/image-to-text/inference.ts +1 -6
- package/src/tasks/image-to-text/spec/input.json +2 -1
- package/src/tasks/index.ts +1 -0
- package/src/tasks/object-detection/inference.ts +1 -1
- package/src/tasks/object-detection/spec/input.json +2 -1
- package/src/tasks/sentence-similarity/inference.ts +3 -4
- package/src/tasks/summarization/inference.ts +3 -5
- package/src/tasks/text-generation/inference.ts +0 -13
- package/src/tasks/text-to-audio/inference.ts +14 -20
- package/src/tasks/text-to-audio/spec/output.json +3 -2
- package/src/tasks/text-to-image/inference.ts +2 -6
- package/src/tasks/text-to-image/spec/input.json +2 -5
- package/src/tasks/text-to-speech/inference.ts +14 -22
- package/src/tasks/text-to-speech/spec/output.json +13 -2
- package/src/tasks/text-to-video/inference.ts +57 -0
- package/src/tasks/text-to-video/spec/input.json +49 -0
- package/src/tasks/text-to-video/spec/output.json +13 -0
- package/src/tasks/text2text-generation/inference.ts +3 -5
- package/src/tasks/translation/inference.ts +3 -5
- package/src/tasks/visual-question-answering/inference.ts +1 -1
- package/src/tasks/visual-question-answering/spec/input.json +4 -2
- package/src/tasks/zero-shot-image-classification/inference.ts +1 -1
- package/src/tasks/zero-shot-image-classification/spec/input.json +2 -1
- package/src/tasks/zero-shot-object-detection/inference.ts +1 -1
- package/src/tasks/zero-shot-object-detection/spec/input.json +2 -1
|
@@ -6,12 +6,13 @@
|
|
|
6
6
|
"type": "object",
|
|
7
7
|
"properties": {
|
|
8
8
|
"audio": {
|
|
9
|
-
"description": "The generated audio waveform."
|
|
9
|
+
"description": "The generated audio waveform.",
|
|
10
|
+
"comment": "type=binary"
|
|
10
11
|
},
|
|
11
12
|
"sampling_rate": {
|
|
12
13
|
"type": "number",
|
|
13
14
|
"description": "The sampling rate of the generated audio waveform."
|
|
14
15
|
}
|
|
15
16
|
},
|
|
16
|
-
"required": ["audio", "
|
|
17
|
+
"required": ["audio", "sampling_rate"]
|
|
17
18
|
}
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
/**
|
|
8
7
|
* Inputs for Text To Image inference
|
|
9
8
|
*/
|
|
@@ -18,7 +17,6 @@ export interface TextToImageInput {
|
|
|
18
17
|
parameters?: TextToImageParameters;
|
|
19
18
|
[property: string]: unknown;
|
|
20
19
|
}
|
|
21
|
-
|
|
22
20
|
/**
|
|
23
21
|
* Additional inference parameters for Text To Image
|
|
24
22
|
*/
|
|
@@ -29,9 +27,9 @@ export interface TextToImageParameters {
|
|
|
29
27
|
*/
|
|
30
28
|
guidance_scale?: number;
|
|
31
29
|
/**
|
|
32
|
-
* One
|
|
30
|
+
* One prompt to guide what NOT to include in image generation.
|
|
33
31
|
*/
|
|
34
|
-
negative_prompt?: string
|
|
32
|
+
negative_prompt?: string;
|
|
35
33
|
/**
|
|
36
34
|
* The number of denoising steps. More denoising steps usually lead to a higher quality
|
|
37
35
|
* image at the expense of slower inference.
|
|
@@ -51,7 +49,6 @@ export interface TextToImageParameters {
|
|
|
51
49
|
target_size?: TargetSize;
|
|
52
50
|
[property: string]: unknown;
|
|
53
51
|
}
|
|
54
|
-
|
|
55
52
|
/**
|
|
56
53
|
* The size in pixel of the output image
|
|
57
54
|
*/
|
|
@@ -60,7 +57,6 @@ export interface TargetSize {
|
|
|
60
57
|
width: number;
|
|
61
58
|
[property: string]: unknown;
|
|
62
59
|
}
|
|
63
|
-
|
|
64
60
|
/**
|
|
65
61
|
* Outputs of inference for the Text To Image task
|
|
66
62
|
*/
|
|
@@ -24,11 +24,8 @@
|
|
|
24
24
|
"description": "A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts."
|
|
25
25
|
},
|
|
26
26
|
"negative_prompt": {
|
|
27
|
-
"type": "
|
|
28
|
-
"
|
|
29
|
-
"type": "string"
|
|
30
|
-
},
|
|
31
|
-
"description": "One or several prompt to guide what NOT to include in image generation."
|
|
27
|
+
"type": "string",
|
|
28
|
+
"description": "One prompt to guide what NOT to include in image generation."
|
|
32
29
|
},
|
|
33
30
|
"num_inference_steps": {
|
|
34
31
|
"type": "integer",
|
|
@@ -1,9 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outputs of inference for the Text To Speech task
|
|
3
|
+
*/
|
|
4
|
+
export interface TextToSpeechOutput {
|
|
5
|
+
/**
|
|
6
|
+
* The generated audio
|
|
7
|
+
*/
|
|
8
|
+
audio: Blob;
|
|
9
|
+
/**
|
|
10
|
+
* The sampling rate of the generated audio waveform.
|
|
11
|
+
*/
|
|
12
|
+
sampling_rate?: number;
|
|
13
|
+
[property: string]: unknown;
|
|
14
|
+
}
|
|
1
15
|
/**
|
|
2
16
|
* Inference code generated from the JSON schema spec in ./spec
|
|
3
17
|
*
|
|
4
18
|
* Using src/scripts/inference-codegen
|
|
5
19
|
*/
|
|
6
|
-
|
|
7
20
|
/**
|
|
8
21
|
* Inputs for Text To Speech inference
|
|
9
22
|
*/
|
|
@@ -18,7 +31,6 @@ export interface TextToSpeechInput {
|
|
|
18
31
|
parameters?: TextToSpeechParameters;
|
|
19
32
|
[property: string]: unknown;
|
|
20
33
|
}
|
|
21
|
-
|
|
22
34
|
/**
|
|
23
35
|
* Additional inference parameters for Text To Speech
|
|
24
36
|
*/
|
|
@@ -29,7 +41,6 @@ export interface TextToSpeechParameters {
|
|
|
29
41
|
generation_parameters?: GenerationParameters;
|
|
30
42
|
[property: string]: unknown;
|
|
31
43
|
}
|
|
32
|
-
|
|
33
44
|
/**
|
|
34
45
|
* Parametrization of the text generation process
|
|
35
46
|
*/
|
|
@@ -116,26 +127,7 @@ export interface GenerationParameters {
|
|
|
116
127
|
use_cache?: boolean;
|
|
117
128
|
[property: string]: unknown;
|
|
118
129
|
}
|
|
119
|
-
|
|
120
130
|
/**
|
|
121
131
|
* Controls the stopping condition for beam-based methods.
|
|
122
132
|
*/
|
|
123
133
|
export type EarlyStoppingUnion = boolean | "never";
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Outputs for Text to Speech inference
|
|
127
|
-
*
|
|
128
|
-
* Outputs of inference for the Text To Audio task
|
|
129
|
-
*/
|
|
130
|
-
export interface TextToSpeechOutput {
|
|
131
|
-
/**
|
|
132
|
-
* The generated audio waveform.
|
|
133
|
-
*/
|
|
134
|
-
audio: unknown;
|
|
135
|
-
samplingRate: unknown;
|
|
136
|
-
/**
|
|
137
|
-
* The sampling rate of the generated audio waveform.
|
|
138
|
-
*/
|
|
139
|
-
sampling_rate?: number;
|
|
140
|
-
[property: string]: unknown;
|
|
141
|
-
}
|
|
@@ -1,7 +1,18 @@
|
|
|
1
1
|
{
|
|
2
|
-
"$ref": "/inference/schemas/text-to-audio/output.json",
|
|
3
2
|
"$id": "/inference/schemas/text-to-speech/output.json",
|
|
4
3
|
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Outputs of inference for the Text To Speech task",
|
|
5
5
|
"title": "TextToSpeechOutput",
|
|
6
|
-
"
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"audio": {
|
|
9
|
+
"description": "The generated audio",
|
|
10
|
+
"comment": "type=binary"
|
|
11
|
+
},
|
|
12
|
+
"sampling_rate": {
|
|
13
|
+
"type": "number",
|
|
14
|
+
"description": "The sampling rate of the generated audio waveform."
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"required": ["audio"]
|
|
7
18
|
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference code generated from the JSON schema spec in ./spec
|
|
3
|
+
*
|
|
4
|
+
* Using src/scripts/inference-codegen
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Inputs for Text To Video inference
|
|
8
|
+
*/
|
|
9
|
+
export interface TextToVideoInput {
|
|
10
|
+
/**
|
|
11
|
+
* The input text data (sometimes called "prompt")
|
|
12
|
+
*/
|
|
13
|
+
inputs: string;
|
|
14
|
+
/**
|
|
15
|
+
* Additional inference parameters for Text To Video
|
|
16
|
+
*/
|
|
17
|
+
parameters?: TextToVideoParameters;
|
|
18
|
+
[property: string]: unknown;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Additional inference parameters for Text To Video
|
|
22
|
+
*/
|
|
23
|
+
export interface TextToVideoParameters {
|
|
24
|
+
/**
|
|
25
|
+
* A higher guidance scale value encourages the model to generate images closely linked to
|
|
26
|
+
* the text prompt, but values too high may cause saturation and other artifacts.
|
|
27
|
+
*/
|
|
28
|
+
guidance_scale?: number;
|
|
29
|
+
/**
|
|
30
|
+
* One or several prompt to guide what NOT to include in image generation.
|
|
31
|
+
*/
|
|
32
|
+
negative_prompt?: string[];
|
|
33
|
+
/**
|
|
34
|
+
* The num_frames parameter determines how many video frames are generated.
|
|
35
|
+
*/
|
|
36
|
+
num_frames?: number;
|
|
37
|
+
/**
|
|
38
|
+
* The number of denoising steps. More denoising steps usually lead to a higher quality
|
|
39
|
+
* image at the expense of slower inference.
|
|
40
|
+
*/
|
|
41
|
+
num_inference_steps?: number;
|
|
42
|
+
/**
|
|
43
|
+
* Seed for the random number generator.
|
|
44
|
+
*/
|
|
45
|
+
seed?: number;
|
|
46
|
+
[property: string]: unknown;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Outputs of inference for the Text To Video task
|
|
50
|
+
*/
|
|
51
|
+
export interface TextToVideoOutput {
|
|
52
|
+
/**
|
|
53
|
+
* The generated video returned as raw bytes in the payload.
|
|
54
|
+
*/
|
|
55
|
+
video: unknown;
|
|
56
|
+
[property: string]: unknown;
|
|
57
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/text-to-video/input.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Inputs for Text To Video inference",
|
|
5
|
+
"title": "TextToVideoInput",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"inputs": {
|
|
9
|
+
"description": "The input text data (sometimes called \"prompt\")",
|
|
10
|
+
"type": "string"
|
|
11
|
+
},
|
|
12
|
+
"parameters": {
|
|
13
|
+
"description": "Additional inference parameters for Text To Video",
|
|
14
|
+
"$ref": "#/$defs/TextToVideoParameters"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"$defs": {
|
|
18
|
+
"TextToVideoParameters": {
|
|
19
|
+
"title": "TextToVideoParameters",
|
|
20
|
+
"type": "object",
|
|
21
|
+
"properties": {
|
|
22
|
+
"num_frames": {
|
|
23
|
+
"type": "number",
|
|
24
|
+
"description": "The num_frames parameter determines how many video frames are generated."
|
|
25
|
+
},
|
|
26
|
+
"guidance_scale": {
|
|
27
|
+
"type": "number",
|
|
28
|
+
"description": "A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts."
|
|
29
|
+
},
|
|
30
|
+
"negative_prompt": {
|
|
31
|
+
"type": "array",
|
|
32
|
+
"items": {
|
|
33
|
+
"type": "string"
|
|
34
|
+
},
|
|
35
|
+
"description": "One or several prompt to guide what NOT to include in image generation."
|
|
36
|
+
},
|
|
37
|
+
"num_inference_steps": {
|
|
38
|
+
"type": "integer",
|
|
39
|
+
"description": "The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
|
|
40
|
+
},
|
|
41
|
+
"seed": {
|
|
42
|
+
"type": "integer",
|
|
43
|
+
"description": "Seed for the random number generator."
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
"required": ["inputs"]
|
|
49
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/text-to-video/output.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Outputs of inference for the Text To Video task",
|
|
5
|
+
"title": "TextToVideoOutput",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"video": {
|
|
9
|
+
"description": "The generated video returned as raw bytes in the payload."
|
|
10
|
+
}
|
|
11
|
+
},
|
|
12
|
+
"required": ["video"]
|
|
13
|
+
}
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
/**
|
|
8
7
|
* Inputs for Text2text Generation inference
|
|
9
8
|
*/
|
|
@@ -18,7 +17,6 @@ export interface Text2TextGenerationInput {
|
|
|
18
17
|
parameters?: Text2TextGenerationParameters;
|
|
19
18
|
[property: string]: unknown;
|
|
20
19
|
}
|
|
21
|
-
|
|
22
20
|
/**
|
|
23
21
|
* Additional inference parameters for Text2text Generation
|
|
24
22
|
*/
|
|
@@ -30,16 +28,16 @@ export interface Text2TextGenerationParameters {
|
|
|
30
28
|
/**
|
|
31
29
|
* Additional parametrization of the text generation algorithm
|
|
32
30
|
*/
|
|
33
|
-
generate_parameters?: {
|
|
31
|
+
generate_parameters?: {
|
|
32
|
+
[key: string]: unknown;
|
|
33
|
+
};
|
|
34
34
|
/**
|
|
35
35
|
* The truncation strategy to use
|
|
36
36
|
*/
|
|
37
37
|
truncation?: Text2TextGenerationTruncationStrategy;
|
|
38
38
|
[property: string]: unknown;
|
|
39
39
|
}
|
|
40
|
-
|
|
41
40
|
export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
|
|
42
|
-
|
|
43
41
|
/**
|
|
44
42
|
* Outputs of inference for the Text2text Generation task
|
|
45
43
|
*/
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Using src/scripts/inference-codegen
|
|
5
5
|
*/
|
|
6
|
-
|
|
7
6
|
/**
|
|
8
7
|
* Inputs for Translation inference
|
|
9
8
|
*/
|
|
@@ -18,7 +17,6 @@ export interface TranslationInput {
|
|
|
18
17
|
parameters?: TranslationParameters;
|
|
19
18
|
[property: string]: unknown;
|
|
20
19
|
}
|
|
21
|
-
|
|
22
20
|
/**
|
|
23
21
|
* Additional inference parameters for Translation
|
|
24
22
|
*/
|
|
@@ -30,7 +28,9 @@ export interface TranslationParameters {
|
|
|
30
28
|
/**
|
|
31
29
|
* Additional parametrization of the text generation algorithm.
|
|
32
30
|
*/
|
|
33
|
-
generate_parameters?: {
|
|
31
|
+
generate_parameters?: {
|
|
32
|
+
[key: string]: unknown;
|
|
33
|
+
};
|
|
34
34
|
/**
|
|
35
35
|
* The source language of the text. Required for models that can translate from multiple
|
|
36
36
|
* languages.
|
|
@@ -47,12 +47,10 @@ export interface TranslationParameters {
|
|
|
47
47
|
truncation?: TranslationTruncationStrategy;
|
|
48
48
|
[property: string]: unknown;
|
|
49
49
|
}
|
|
50
|
-
|
|
51
50
|
/**
|
|
52
51
|
* The truncation strategy to use.
|
|
53
52
|
*/
|
|
54
53
|
export type TranslationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
|
|
55
|
-
|
|
56
54
|
/**
|
|
57
55
|
* Outputs of inference for the Translation task
|
|
58
56
|
*/
|
|
@@ -11,10 +11,12 @@
|
|
|
11
11
|
"title": "VisualQuestionAnsweringInputData",
|
|
12
12
|
"properties": {
|
|
13
13
|
"image": {
|
|
14
|
-
"description": "The image."
|
|
14
|
+
"description": "The image.",
|
|
15
|
+
"comment": "type=binary"
|
|
15
16
|
},
|
|
16
17
|
"question": {
|
|
17
|
-
"description": "The question to answer based on the image."
|
|
18
|
+
"description": "The question to answer based on the image.",
|
|
19
|
+
"type": "string"
|
|
18
20
|
}
|
|
19
21
|
},
|
|
20
22
|
"required": ["question", "image"]
|
|
@@ -7,7 +7,8 @@
|
|
|
7
7
|
"properties": {
|
|
8
8
|
"inputs": {
|
|
9
9
|
"type": "string",
|
|
10
|
-
"description": "The input image data to classify as a base64-encoded string."
|
|
10
|
+
"description": "The input image data to classify as a base64-encoded string.",
|
|
11
|
+
"comment": "type=binary"
|
|
11
12
|
},
|
|
12
13
|
"parameters": {
|
|
13
14
|
"description": "Additional inference parameters for Zero Shot Image Classification",
|
|
@@ -7,7 +7,8 @@
|
|
|
7
7
|
"properties": {
|
|
8
8
|
"inputs": {
|
|
9
9
|
"description": "The input image data as a base64-encoded string.",
|
|
10
|
-
"type": "string"
|
|
10
|
+
"type": "string",
|
|
11
|
+
"comment": "type=binary"
|
|
11
12
|
},
|
|
12
13
|
"parameters": {
|
|
13
14
|
"description": "Additional inference parameters for Zero Shot Object Detection",
|