@huggingface/tasks 0.12.7 → 0.12.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +58 -2
- package/dist/index.js +58 -2
- package/dist/src/model-libraries.d.ts +9 -2
- package/dist/src/model-libraries.d.ts.map +1 -1
- package/dist/src/tasks/automatic-speech-recognition/inference.d.ts +2 -2
- package/dist/src/tasks/chat-completion/inference.d.ts +58 -21
- package/dist/src/tasks/chat-completion/inference.d.ts.map +1 -1
- package/dist/src/tasks/image-to-text/inference.d.ts +2 -2
- package/dist/src/tasks/index.d.ts +1 -1
- package/dist/src/tasks/index.d.ts.map +1 -1
- package/dist/src/tasks/text-generation/inference.d.ts +62 -0
- package/dist/src/tasks/text-generation/inference.d.ts.map +1 -1
- package/dist/src/tasks/text-to-audio/inference.d.ts +2 -2
- package/dist/src/tasks/text-to-speech/inference.d.ts +6 -8
- package/dist/src/tasks/text-to-speech/inference.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/model-libraries-snippets.ts +1 -1
- package/src/model-libraries.ts +7 -0
- package/src/tasks/automatic-speech-recognition/inference.ts +2 -2
- package/src/tasks/chat-completion/inference.ts +66 -21
- package/src/tasks/chat-completion/spec/input.json +163 -40
- package/src/tasks/chat-completion/spec/output.json +28 -18
- package/src/tasks/chat-completion/spec/stream_output.json +57 -14
- package/src/tasks/common-definitions.json +2 -2
- package/src/tasks/image-to-text/inference.ts +2 -2
- package/src/tasks/index.ts +3 -2
- package/src/tasks/text-generation/inference.ts +62 -0
- package/src/tasks/text-generation/spec/input.json +24 -0
- package/src/tasks/text-generation/spec/stream_output.json +7 -1
- package/src/tasks/text-to-audio/inference.ts +2 -2
- package/src/tasks/text-to-speech/inference.ts +6 -8
- package/src/tasks/text-to-speech/spec/input.json +26 -2
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"description": "Chat Completion Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
|
5
5
|
"title": "ChatCompletionStreamOutput",
|
|
6
6
|
"type": "object",
|
|
7
|
-
"required": ["id", "
|
|
7
|
+
"required": ["id", "created", "model", "system_fingerprint", "choices"],
|
|
8
8
|
"properties": {
|
|
9
9
|
"choices": {
|
|
10
10
|
"type": "array",
|
|
@@ -25,11 +25,16 @@
|
|
|
25
25
|
"type": "string",
|
|
26
26
|
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
|
27
27
|
},
|
|
28
|
-
"object": {
|
|
29
|
-
"type": "string"
|
|
30
|
-
},
|
|
31
28
|
"system_fingerprint": {
|
|
32
29
|
"type": "string"
|
|
30
|
+
},
|
|
31
|
+
"usage": {
|
|
32
|
+
"allOf": [
|
|
33
|
+
{
|
|
34
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputUsage"
|
|
35
|
+
}
|
|
36
|
+
],
|
|
37
|
+
"nullable": true
|
|
33
38
|
}
|
|
34
39
|
},
|
|
35
40
|
"$defs": {
|
|
@@ -61,28 +66,44 @@
|
|
|
61
66
|
"title": "ChatCompletionStreamOutputChoice"
|
|
62
67
|
},
|
|
63
68
|
"ChatCompletionStreamOutputDelta": {
|
|
69
|
+
"oneOf": [
|
|
70
|
+
{
|
|
71
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputTextMessage"
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputToolCallDelta"
|
|
75
|
+
}
|
|
76
|
+
],
|
|
77
|
+
"title": "ChatCompletionStreamOutputDelta"
|
|
78
|
+
},
|
|
79
|
+
"ChatCompletionStreamOutputTextMessage": {
|
|
64
80
|
"type": "object",
|
|
65
|
-
"required": ["role"],
|
|
81
|
+
"required": ["role", "content"],
|
|
66
82
|
"properties": {
|
|
67
83
|
"content": {
|
|
68
84
|
"type": "string",
|
|
69
|
-
"example": "
|
|
70
|
-
"nullable": true
|
|
85
|
+
"example": "My name is David and I"
|
|
71
86
|
},
|
|
72
87
|
"role": {
|
|
73
88
|
"type": "string",
|
|
74
89
|
"example": "user"
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
"title": "ChatCompletionStreamOutputTextMessage"
|
|
93
|
+
},
|
|
94
|
+
"ChatCompletionStreamOutputToolCallDelta": {
|
|
95
|
+
"type": "object",
|
|
96
|
+
"required": ["role", "tool_calls"],
|
|
97
|
+
"properties": {
|
|
98
|
+
"role": {
|
|
99
|
+
"type": "string",
|
|
100
|
+
"example": "assistant"
|
|
75
101
|
},
|
|
76
102
|
"tool_calls": {
|
|
77
|
-
"
|
|
78
|
-
{
|
|
79
|
-
"$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
|
|
80
|
-
}
|
|
81
|
-
],
|
|
82
|
-
"nullable": true
|
|
103
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
|
|
83
104
|
}
|
|
84
105
|
},
|
|
85
|
-
"title": "
|
|
106
|
+
"title": "ChatCompletionStreamOutputToolCallDelta"
|
|
86
107
|
},
|
|
87
108
|
"ChatCompletionStreamOutputDeltaToolCall": {
|
|
88
109
|
"type": "object",
|
|
@@ -165,6 +186,28 @@
|
|
|
165
186
|
}
|
|
166
187
|
},
|
|
167
188
|
"title": "ChatCompletionStreamOutputTopLogprob"
|
|
189
|
+
},
|
|
190
|
+
"ChatCompletionStreamOutputUsage": {
|
|
191
|
+
"type": "object",
|
|
192
|
+
"required": ["prompt_tokens", "completion_tokens", "total_tokens"],
|
|
193
|
+
"properties": {
|
|
194
|
+
"completion_tokens": {
|
|
195
|
+
"type": "integer",
|
|
196
|
+
"format": "int32",
|
|
197
|
+
"minimum": 0
|
|
198
|
+
},
|
|
199
|
+
"prompt_tokens": {
|
|
200
|
+
"type": "integer",
|
|
201
|
+
"format": "int32",
|
|
202
|
+
"minimum": 0
|
|
203
|
+
},
|
|
204
|
+
"total_tokens": {
|
|
205
|
+
"type": "integer",
|
|
206
|
+
"format": "int32",
|
|
207
|
+
"minimum": 0
|
|
208
|
+
}
|
|
209
|
+
},
|
|
210
|
+
"title": "ChatCompletionStreamOutputUsage"
|
|
168
211
|
}
|
|
169
212
|
}
|
|
170
213
|
}
|
|
@@ -59,7 +59,7 @@
|
|
|
59
59
|
},
|
|
60
60
|
"max_new_tokens": {
|
|
61
61
|
"type": "integer",
|
|
62
|
-
"description": "The maximum number of tokens to generate. Takes precedence over
|
|
62
|
+
"description": "The maximum number of tokens to generate. Takes precedence over max_length."
|
|
63
63
|
},
|
|
64
64
|
"min_length": {
|
|
65
65
|
"type": "integer",
|
|
@@ -67,7 +67,7 @@
|
|
|
67
67
|
},
|
|
68
68
|
"min_new_tokens": {
|
|
69
69
|
"type": "integer",
|
|
70
|
-
"description": "The minimum number of tokens to generate. Takes precedence over
|
|
70
|
+
"description": "The minimum number of tokens to generate. Takes precedence over min_length."
|
|
71
71
|
},
|
|
72
72
|
"do_sample": {
|
|
73
73
|
"type": "boolean",
|
|
@@ -72,7 +72,7 @@ export interface GenerationParameters {
|
|
|
72
72
|
*/
|
|
73
73
|
max_length?: number;
|
|
74
74
|
/**
|
|
75
|
-
* The maximum number of tokens to generate. Takes precedence over
|
|
75
|
+
* The maximum number of tokens to generate. Takes precedence over max_length.
|
|
76
76
|
*/
|
|
77
77
|
max_new_tokens?: number;
|
|
78
78
|
/**
|
|
@@ -80,7 +80,7 @@ export interface GenerationParameters {
|
|
|
80
80
|
*/
|
|
81
81
|
min_length?: number;
|
|
82
82
|
/**
|
|
83
|
-
* The minimum number of tokens to generate. Takes precedence over
|
|
83
|
+
* The minimum number of tokens to generate. Takes precedence over min_length.
|
|
84
84
|
*/
|
|
85
85
|
min_new_tokens?: number;
|
|
86
86
|
/**
|
package/src/tasks/index.ts
CHANGED
|
@@ -39,6 +39,7 @@ import zeroShotImageClassification from "./zero-shot-image-classification/data";
|
|
|
39
39
|
import zeroShotObjectDetection from "./zero-shot-object-detection/data";
|
|
40
40
|
import imageTo3D from "./image-to-3d/data";
|
|
41
41
|
import textTo3D from "./text-to-3d/data";
|
|
42
|
+
import keypointDetection from "./keypoint-detection/data";
|
|
42
43
|
|
|
43
44
|
export type * from "./audio-classification/inference";
|
|
44
45
|
export type * from "./automatic-speech-recognition/inference";
|
|
@@ -71,7 +72,7 @@ export type * from "./sentence-similarity/inference";
|
|
|
71
72
|
export type * from "./summarization/inference";
|
|
72
73
|
export type * from "./table-question-answering/inference";
|
|
73
74
|
export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference";
|
|
74
|
-
export type {
|
|
75
|
+
export type { TextToSpeechParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference";
|
|
75
76
|
export type * from "./token-classification/inference";
|
|
76
77
|
export type { TranslationInput, TranslationOutput } from "./translation/inference";
|
|
77
78
|
export type {
|
|
@@ -208,7 +209,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
|
208
209
|
"image-text-to-text": getData("image-text-to-text", imageTextToText),
|
|
209
210
|
"image-to-text": getData("image-to-text", imageToText),
|
|
210
211
|
"image-to-video": undefined,
|
|
211
|
-
"keypoint-detection": getData("keypoint-detection",
|
|
212
|
+
"keypoint-detection": getData("keypoint-detection", keypointDetection),
|
|
212
213
|
"mask-generation": getData("mask-generation", maskGeneration),
|
|
213
214
|
"multiple-choice": undefined,
|
|
214
215
|
"object-detection": getData("object-detection", objectDetection),
|
|
@@ -19,23 +19,84 @@ export interface TextGenerationInput {
|
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
export interface TextGenerationInputGenerateParameters {
|
|
22
|
+
/**
|
|
23
|
+
* Lora adapter id
|
|
24
|
+
*/
|
|
25
|
+
adapter_id?: string;
|
|
26
|
+
/**
|
|
27
|
+
* Generate best_of sequences and return the one if the highest token logprobs.
|
|
28
|
+
*/
|
|
22
29
|
best_of?: number;
|
|
30
|
+
/**
|
|
31
|
+
* Whether to return decoder input token logprobs and ids.
|
|
32
|
+
*/
|
|
23
33
|
decoder_input_details?: boolean;
|
|
34
|
+
/**
|
|
35
|
+
* Whether to return generation details.
|
|
36
|
+
*/
|
|
24
37
|
details?: boolean;
|
|
38
|
+
/**
|
|
39
|
+
* Activate logits sampling.
|
|
40
|
+
*/
|
|
25
41
|
do_sample?: boolean;
|
|
42
|
+
/**
|
|
43
|
+
* The parameter for frequency penalty. 1.0 means no penalty
|
|
44
|
+
* Penalize new tokens based on their existing frequency in the text so far,
|
|
45
|
+
* decreasing the model's likelihood to repeat the same line verbatim.
|
|
46
|
+
*/
|
|
26
47
|
frequency_penalty?: number;
|
|
27
48
|
grammar?: TextGenerationInputGrammarType;
|
|
49
|
+
/**
|
|
50
|
+
* Maximum number of tokens to generate.
|
|
51
|
+
*/
|
|
28
52
|
max_new_tokens?: number;
|
|
53
|
+
/**
|
|
54
|
+
* The parameter for repetition penalty. 1.0 means no penalty.
|
|
55
|
+
* See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
|
|
56
|
+
*/
|
|
29
57
|
repetition_penalty?: number;
|
|
58
|
+
/**
|
|
59
|
+
* Whether to prepend the prompt to the generated text
|
|
60
|
+
*/
|
|
30
61
|
return_full_text?: boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Random sampling seed.
|
|
64
|
+
*/
|
|
31
65
|
seed?: number;
|
|
66
|
+
/**
|
|
67
|
+
* Stop generating tokens if a member of `stop` is generated.
|
|
68
|
+
*/
|
|
32
69
|
stop?: string[];
|
|
70
|
+
/**
|
|
71
|
+
* The value used to module the logits distribution.
|
|
72
|
+
*/
|
|
33
73
|
temperature?: number;
|
|
74
|
+
/**
|
|
75
|
+
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
|
76
|
+
*/
|
|
34
77
|
top_k?: number;
|
|
78
|
+
/**
|
|
79
|
+
* The number of highest probability vocabulary tokens to keep for top-n-filtering.
|
|
80
|
+
*/
|
|
35
81
|
top_n_tokens?: number;
|
|
82
|
+
/**
|
|
83
|
+
* Top-p value for nucleus sampling.
|
|
84
|
+
*/
|
|
36
85
|
top_p?: number;
|
|
86
|
+
/**
|
|
87
|
+
* Truncate inputs tokens to the given size.
|
|
88
|
+
*/
|
|
37
89
|
truncate?: number;
|
|
90
|
+
/**
|
|
91
|
+
* Typical Decoding mass
|
|
92
|
+
* See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666)
|
|
93
|
+
* for more information.
|
|
94
|
+
*/
|
|
38
95
|
typical_p?: number;
|
|
96
|
+
/**
|
|
97
|
+
* Watermarking with [A Watermark for Large Language
|
|
98
|
+
* Models](https://arxiv.org/abs/2301.10226).
|
|
99
|
+
*/
|
|
39
100
|
watermark?: boolean;
|
|
40
101
|
[property: string]: unknown;
|
|
41
102
|
}
|
|
@@ -125,6 +186,7 @@ export interface TextGenerationStreamOutput {
|
|
|
125
186
|
export interface TextGenerationStreamOutputStreamDetails {
|
|
126
187
|
finish_reason: TextGenerationOutputFinishReason;
|
|
127
188
|
generated_tokens: number;
|
|
189
|
+
input_length: number;
|
|
128
190
|
seed?: number;
|
|
129
191
|
[property: string]: unknown;
|
|
130
192
|
}
|
|
@@ -22,8 +22,16 @@
|
|
|
22
22
|
"TextGenerationInputGenerateParameters": {
|
|
23
23
|
"type": "object",
|
|
24
24
|
"properties": {
|
|
25
|
+
"adapter_id": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"description": "Lora adapter id",
|
|
28
|
+
"default": "null",
|
|
29
|
+
"example": "null",
|
|
30
|
+
"nullable": true
|
|
31
|
+
},
|
|
25
32
|
"best_of": {
|
|
26
33
|
"type": "integer",
|
|
34
|
+
"description": "Generate best_of sequences and return the one if the highest token logprobs.",
|
|
27
35
|
"default": "null",
|
|
28
36
|
"example": 1,
|
|
29
37
|
"nullable": true,
|
|
@@ -32,20 +40,24 @@
|
|
|
32
40
|
},
|
|
33
41
|
"decoder_input_details": {
|
|
34
42
|
"type": "boolean",
|
|
43
|
+
"description": "Whether to return decoder input token logprobs and ids.",
|
|
35
44
|
"default": "false"
|
|
36
45
|
},
|
|
37
46
|
"details": {
|
|
38
47
|
"type": "boolean",
|
|
48
|
+
"description": "Whether to return generation details.",
|
|
39
49
|
"default": "true"
|
|
40
50
|
},
|
|
41
51
|
"do_sample": {
|
|
42
52
|
"type": "boolean",
|
|
53
|
+
"description": "Activate logits sampling.",
|
|
43
54
|
"default": "false",
|
|
44
55
|
"example": true
|
|
45
56
|
},
|
|
46
57
|
"frequency_penalty": {
|
|
47
58
|
"type": "number",
|
|
48
59
|
"format": "float",
|
|
60
|
+
"description": "The parameter for frequency penalty. 1.0 means no penalty\nPenalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
|
|
49
61
|
"default": "null",
|
|
50
62
|
"example": 0.1,
|
|
51
63
|
"nullable": true,
|
|
@@ -63,6 +75,7 @@
|
|
|
63
75
|
"max_new_tokens": {
|
|
64
76
|
"type": "integer",
|
|
65
77
|
"format": "int32",
|
|
78
|
+
"description": "Maximum number of tokens to generate.",
|
|
66
79
|
"default": "100",
|
|
67
80
|
"example": "20",
|
|
68
81
|
"nullable": true,
|
|
@@ -71,6 +84,7 @@
|
|
|
71
84
|
"repetition_penalty": {
|
|
72
85
|
"type": "number",
|
|
73
86
|
"format": "float",
|
|
87
|
+
"description": "The parameter for repetition penalty. 1.0 means no penalty.\nSee [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.",
|
|
74
88
|
"default": "null",
|
|
75
89
|
"example": 1.03,
|
|
76
90
|
"nullable": true,
|
|
@@ -78,6 +92,7 @@
|
|
|
78
92
|
},
|
|
79
93
|
"return_full_text": {
|
|
80
94
|
"type": "boolean",
|
|
95
|
+
"description": "Whether to prepend the prompt to the generated text",
|
|
81
96
|
"default": "null",
|
|
82
97
|
"example": false,
|
|
83
98
|
"nullable": true
|
|
@@ -85,6 +100,7 @@
|
|
|
85
100
|
"seed": {
|
|
86
101
|
"type": "integer",
|
|
87
102
|
"format": "int64",
|
|
103
|
+
"description": "Random sampling seed.",
|
|
88
104
|
"default": "null",
|
|
89
105
|
"example": "null",
|
|
90
106
|
"nullable": true,
|
|
@@ -96,12 +112,14 @@
|
|
|
96
112
|
"items": {
|
|
97
113
|
"type": "string"
|
|
98
114
|
},
|
|
115
|
+
"description": "Stop generating tokens if a member of `stop` is generated.",
|
|
99
116
|
"example": ["photographer"],
|
|
100
117
|
"maxItems": 4
|
|
101
118
|
},
|
|
102
119
|
"temperature": {
|
|
103
120
|
"type": "number",
|
|
104
121
|
"format": "float",
|
|
122
|
+
"description": "The value used to module the logits distribution.",
|
|
105
123
|
"default": "null",
|
|
106
124
|
"example": 0.5,
|
|
107
125
|
"nullable": true,
|
|
@@ -110,6 +128,7 @@
|
|
|
110
128
|
"top_k": {
|
|
111
129
|
"type": "integer",
|
|
112
130
|
"format": "int32",
|
|
131
|
+
"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering.",
|
|
113
132
|
"default": "null",
|
|
114
133
|
"example": 10,
|
|
115
134
|
"nullable": true,
|
|
@@ -118,6 +137,7 @@
|
|
|
118
137
|
"top_n_tokens": {
|
|
119
138
|
"type": "integer",
|
|
120
139
|
"format": "int32",
|
|
140
|
+
"description": "The number of highest probability vocabulary tokens to keep for top-n-filtering.",
|
|
121
141
|
"default": "null",
|
|
122
142
|
"example": 5,
|
|
123
143
|
"nullable": true,
|
|
@@ -127,6 +147,7 @@
|
|
|
127
147
|
"top_p": {
|
|
128
148
|
"type": "number",
|
|
129
149
|
"format": "float",
|
|
150
|
+
"description": "Top-p value for nucleus sampling.",
|
|
130
151
|
"default": "null",
|
|
131
152
|
"example": 0.95,
|
|
132
153
|
"nullable": true,
|
|
@@ -135,6 +156,7 @@
|
|
|
135
156
|
},
|
|
136
157
|
"truncate": {
|
|
137
158
|
"type": "integer",
|
|
159
|
+
"description": "Truncate inputs tokens to the given size.",
|
|
138
160
|
"default": "null",
|
|
139
161
|
"example": "null",
|
|
140
162
|
"nullable": true,
|
|
@@ -143,6 +165,7 @@
|
|
|
143
165
|
"typical_p": {
|
|
144
166
|
"type": "number",
|
|
145
167
|
"format": "float",
|
|
168
|
+
"description": "Typical Decoding mass\nSee [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.",
|
|
146
169
|
"default": "null",
|
|
147
170
|
"example": 0.95,
|
|
148
171
|
"nullable": true,
|
|
@@ -151,6 +174,7 @@
|
|
|
151
174
|
},
|
|
152
175
|
"watermark": {
|
|
153
176
|
"type": "boolean",
|
|
177
|
+
"description": "Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226).",
|
|
154
178
|
"default": "false",
|
|
155
179
|
"example": true
|
|
156
180
|
}
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"$defs": {
|
|
40
40
|
"TextGenerationStreamOutputStreamDetails": {
|
|
41
41
|
"type": "object",
|
|
42
|
-
"required": ["finish_reason", "generated_tokens"],
|
|
42
|
+
"required": ["finish_reason", "generated_tokens", "input_length"],
|
|
43
43
|
"properties": {
|
|
44
44
|
"finish_reason": {
|
|
45
45
|
"$ref": "#/$defs/TextGenerationStreamOutputFinishReason"
|
|
@@ -50,6 +50,12 @@
|
|
|
50
50
|
"example": 1,
|
|
51
51
|
"minimum": 0
|
|
52
52
|
},
|
|
53
|
+
"input_length": {
|
|
54
|
+
"type": "integer",
|
|
55
|
+
"format": "int32",
|
|
56
|
+
"example": 1,
|
|
57
|
+
"minimum": 0
|
|
58
|
+
},
|
|
53
59
|
"seed": {
|
|
54
60
|
"type": "integer",
|
|
55
61
|
"format": "int64",
|
|
@@ -68,7 +68,7 @@ export interface GenerationParameters {
|
|
|
68
68
|
*/
|
|
69
69
|
max_length?: number;
|
|
70
70
|
/**
|
|
71
|
-
* The maximum number of tokens to generate. Takes precedence over
|
|
71
|
+
* The maximum number of tokens to generate. Takes precedence over max_length.
|
|
72
72
|
*/
|
|
73
73
|
max_new_tokens?: number;
|
|
74
74
|
/**
|
|
@@ -76,7 +76,7 @@ export interface GenerationParameters {
|
|
|
76
76
|
*/
|
|
77
77
|
min_length?: number;
|
|
78
78
|
/**
|
|
79
|
-
* The minimum number of tokens to generate. Takes precedence over
|
|
79
|
+
* The minimum number of tokens to generate. Takes precedence over min_length.
|
|
80
80
|
*/
|
|
81
81
|
min_new_tokens?: number;
|
|
82
82
|
/**
|
|
@@ -5,9 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
|
-
* Inputs for Text
|
|
9
|
-
*
|
|
10
|
-
* Inputs for Text To Audio inference
|
|
8
|
+
* Inputs for Text To Speech inference
|
|
11
9
|
*/
|
|
12
10
|
export interface TextToSpeechInput {
|
|
13
11
|
/**
|
|
@@ -17,16 +15,16 @@ export interface TextToSpeechInput {
|
|
|
17
15
|
/**
|
|
18
16
|
* Additional inference parameters
|
|
19
17
|
*/
|
|
20
|
-
parameters?:
|
|
18
|
+
parameters?: TextToSpeechParameters;
|
|
21
19
|
[property: string]: unknown;
|
|
22
20
|
}
|
|
23
21
|
|
|
24
22
|
/**
|
|
25
23
|
* Additional inference parameters
|
|
26
24
|
*
|
|
27
|
-
* Additional inference parameters for Text To
|
|
25
|
+
* Additional inference parameters for Text To Speech
|
|
28
26
|
*/
|
|
29
|
-
export interface
|
|
27
|
+
export interface TextToSpeechParameters {
|
|
30
28
|
/**
|
|
31
29
|
* Parametrization of the text generation process
|
|
32
30
|
*/
|
|
@@ -70,7 +68,7 @@ export interface GenerationParameters {
|
|
|
70
68
|
*/
|
|
71
69
|
max_length?: number;
|
|
72
70
|
/**
|
|
73
|
-
* The maximum number of tokens to generate. Takes precedence over
|
|
71
|
+
* The maximum number of tokens to generate. Takes precedence over max_length.
|
|
74
72
|
*/
|
|
75
73
|
max_new_tokens?: number;
|
|
76
74
|
/**
|
|
@@ -78,7 +76,7 @@ export interface GenerationParameters {
|
|
|
78
76
|
*/
|
|
79
77
|
min_length?: number;
|
|
80
78
|
/**
|
|
81
|
-
* The minimum number of tokens to generate. Takes precedence over
|
|
79
|
+
* The minimum number of tokens to generate. Takes precedence over min_length.
|
|
82
80
|
*/
|
|
83
81
|
min_new_tokens?: number;
|
|
84
82
|
/**
|
|
@@ -1,7 +1,31 @@
|
|
|
1
1
|
{
|
|
2
|
-
"$ref": "/inference/schemas/text-to-audio/input.json",
|
|
3
2
|
"$id": "/inference/schemas/text-to-speech/input.json",
|
|
4
3
|
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Inputs for Text To Speech inference",
|
|
5
5
|
"title": "TextToSpeechInput",
|
|
6
|
-
"
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"inputs": {
|
|
9
|
+
"description": "The input text data",
|
|
10
|
+
"type": "string"
|
|
11
|
+
},
|
|
12
|
+
"parameters": {
|
|
13
|
+
"description": "Additional inference parameters",
|
|
14
|
+
"$ref": "#/$defs/TextToSpeechParameters"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"$defs": {
|
|
18
|
+
"TextToSpeechParameters": {
|
|
19
|
+
"title": "TextToSpeechParameters",
|
|
20
|
+
"description": "Additional inference parameters for Text To Speech",
|
|
21
|
+
"type": "object",
|
|
22
|
+
"properties": {
|
|
23
|
+
"generate": {
|
|
24
|
+
"description": "Parametrization of the text generation process",
|
|
25
|
+
"$ref": "/inference/schemas/common-definitions.json#/definitions/GenerationParameters"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
"required": ["inputs"]
|
|
7
31
|
}
|