@huggingface/tasks 0.12.7 → 0.12.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/index.cjs +58 -2
  2. package/dist/index.js +58 -2
  3. package/dist/src/model-libraries.d.ts +9 -2
  4. package/dist/src/model-libraries.d.ts.map +1 -1
  5. package/dist/src/tasks/automatic-speech-recognition/inference.d.ts +2 -2
  6. package/dist/src/tasks/chat-completion/inference.d.ts +58 -21
  7. package/dist/src/tasks/chat-completion/inference.d.ts.map +1 -1
  8. package/dist/src/tasks/image-to-text/inference.d.ts +2 -2
  9. package/dist/src/tasks/index.d.ts +1 -1
  10. package/dist/src/tasks/index.d.ts.map +1 -1
  11. package/dist/src/tasks/text-generation/inference.d.ts +62 -0
  12. package/dist/src/tasks/text-generation/inference.d.ts.map +1 -1
  13. package/dist/src/tasks/text-to-audio/inference.d.ts +2 -2
  14. package/dist/src/tasks/text-to-speech/inference.d.ts +6 -8
  15. package/dist/src/tasks/text-to-speech/inference.d.ts.map +1 -1
  16. package/package.json +1 -1
  17. package/src/model-libraries-snippets.ts +1 -1
  18. package/src/model-libraries.ts +7 -0
  19. package/src/tasks/automatic-speech-recognition/inference.ts +2 -2
  20. package/src/tasks/chat-completion/inference.ts +66 -21
  21. package/src/tasks/chat-completion/spec/input.json +163 -40
  22. package/src/tasks/chat-completion/spec/output.json +28 -18
  23. package/src/tasks/chat-completion/spec/stream_output.json +57 -14
  24. package/src/tasks/common-definitions.json +2 -2
  25. package/src/tasks/image-to-text/inference.ts +2 -2
  26. package/src/tasks/index.ts +3 -2
  27. package/src/tasks/text-generation/inference.ts +62 -0
  28. package/src/tasks/text-generation/spec/input.json +24 -0
  29. package/src/tasks/text-generation/spec/stream_output.json +7 -1
  30. package/src/tasks/text-to-audio/inference.ts +2 -2
  31. package/src/tasks/text-to-speech/inference.ts +6 -8
  32. package/src/tasks/text-to-speech/spec/input.json +26 -2
@@ -4,7 +4,7 @@
4
4
  "description": "Chat Completion Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
5
5
  "title": "ChatCompletionStreamOutput",
6
6
  "type": "object",
7
- "required": ["id", "object", "created", "model", "system_fingerprint", "choices"],
7
+ "required": ["id", "created", "model", "system_fingerprint", "choices"],
8
8
  "properties": {
9
9
  "choices": {
10
10
  "type": "array",
@@ -25,11 +25,16 @@
25
25
  "type": "string",
26
26
  "example": "mistralai/Mistral-7B-Instruct-v0.2"
27
27
  },
28
- "object": {
29
- "type": "string"
30
- },
31
28
  "system_fingerprint": {
32
29
  "type": "string"
30
+ },
31
+ "usage": {
32
+ "allOf": [
33
+ {
34
+ "$ref": "#/$defs/ChatCompletionStreamOutputUsage"
35
+ }
36
+ ],
37
+ "nullable": true
33
38
  }
34
39
  },
35
40
  "$defs": {
@@ -61,28 +66,44 @@
61
66
  "title": "ChatCompletionStreamOutputChoice"
62
67
  },
63
68
  "ChatCompletionStreamOutputDelta": {
69
+ "oneOf": [
70
+ {
71
+ "$ref": "#/$defs/ChatCompletionStreamOutputTextMessage"
72
+ },
73
+ {
74
+ "$ref": "#/$defs/ChatCompletionStreamOutputToolCallDelta"
75
+ }
76
+ ],
77
+ "title": "ChatCompletionStreamOutputDelta"
78
+ },
79
+ "ChatCompletionStreamOutputTextMessage": {
64
80
  "type": "object",
65
- "required": ["role"],
81
+ "required": ["role", "content"],
66
82
  "properties": {
67
83
  "content": {
68
84
  "type": "string",
69
- "example": "What is Deep Learning?",
70
- "nullable": true
85
+ "example": "My name is David and I"
71
86
  },
72
87
  "role": {
73
88
  "type": "string",
74
89
  "example": "user"
90
+ }
91
+ },
92
+ "title": "ChatCompletionStreamOutputTextMessage"
93
+ },
94
+ "ChatCompletionStreamOutputToolCallDelta": {
95
+ "type": "object",
96
+ "required": ["role", "tool_calls"],
97
+ "properties": {
98
+ "role": {
99
+ "type": "string",
100
+ "example": "assistant"
75
101
  },
76
102
  "tool_calls": {
77
- "allOf": [
78
- {
79
- "$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
80
- }
81
- ],
82
- "nullable": true
103
+ "$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
83
104
  }
84
105
  },
85
- "title": "ChatCompletionStreamOutputDelta"
106
+ "title": "ChatCompletionStreamOutputToolCallDelta"
86
107
  },
87
108
  "ChatCompletionStreamOutputDeltaToolCall": {
88
109
  "type": "object",
@@ -165,6 +186,28 @@
165
186
  }
166
187
  },
167
188
  "title": "ChatCompletionStreamOutputTopLogprob"
189
+ },
190
+ "ChatCompletionStreamOutputUsage": {
191
+ "type": "object",
192
+ "required": ["prompt_tokens", "completion_tokens", "total_tokens"],
193
+ "properties": {
194
+ "completion_tokens": {
195
+ "type": "integer",
196
+ "format": "int32",
197
+ "minimum": 0
198
+ },
199
+ "prompt_tokens": {
200
+ "type": "integer",
201
+ "format": "int32",
202
+ "minimum": 0
203
+ },
204
+ "total_tokens": {
205
+ "type": "integer",
206
+ "format": "int32",
207
+ "minimum": 0
208
+ }
209
+ },
210
+ "title": "ChatCompletionStreamOutputUsage"
168
211
  }
169
212
  }
170
213
  }
@@ -59,7 +59,7 @@
59
59
  },
60
60
  "max_new_tokens": {
61
61
  "type": "integer",
62
- "description": "The maximum number of tokens to generate. Takes precedence over maxLength."
62
+ "description": "The maximum number of tokens to generate. Takes precedence over max_length."
63
63
  },
64
64
  "min_length": {
65
65
  "type": "integer",
@@ -67,7 +67,7 @@
67
67
  },
68
68
  "min_new_tokens": {
69
69
  "type": "integer",
70
- "description": "The minimum number of tokens to generate. Takes precedence over maxLength."
70
+ "description": "The minimum number of tokens to generate. Takes precedence over min_length."
71
71
  },
72
72
  "do_sample": {
73
73
  "type": "boolean",
@@ -72,7 +72,7 @@ export interface GenerationParameters {
72
72
  */
73
73
  max_length?: number;
74
74
  /**
75
- * The maximum number of tokens to generate. Takes precedence over maxLength.
75
+ * The maximum number of tokens to generate. Takes precedence over max_length.
76
76
  */
77
77
  max_new_tokens?: number;
78
78
  /**
@@ -80,7 +80,7 @@ export interface GenerationParameters {
80
80
  */
81
81
  min_length?: number;
82
82
  /**
83
- * The minimum number of tokens to generate. Takes precedence over maxLength.
83
+ * The minimum number of tokens to generate. Takes precedence over min_length.
84
84
  */
85
85
  min_new_tokens?: number;
86
86
  /**
@@ -39,6 +39,7 @@ import zeroShotImageClassification from "./zero-shot-image-classification/data";
39
39
  import zeroShotObjectDetection from "./zero-shot-object-detection/data";
40
40
  import imageTo3D from "./image-to-3d/data";
41
41
  import textTo3D from "./text-to-3d/data";
42
+ import keypointDetection from "./keypoint-detection/data";
42
43
 
43
44
  export type * from "./audio-classification/inference";
44
45
  export type * from "./automatic-speech-recognition/inference";
@@ -71,7 +72,7 @@ export type * from "./sentence-similarity/inference";
71
72
  export type * from "./summarization/inference";
72
73
  export type * from "./table-question-answering/inference";
73
74
  export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference";
74
- export type { TextToAudioParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference";
75
+ export type { TextToSpeechParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference";
75
76
  export type * from "./token-classification/inference";
76
77
  export type { TranslationInput, TranslationOutput } from "./translation/inference";
77
78
  export type {
@@ -208,7 +209,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
208
209
  "image-text-to-text": getData("image-text-to-text", imageTextToText),
209
210
  "image-to-text": getData("image-to-text", imageToText),
210
211
  "image-to-video": undefined,
211
- "keypoint-detection": getData("keypoint-detection", placeholder),
212
+ "keypoint-detection": getData("keypoint-detection", keypointDetection),
212
213
  "mask-generation": getData("mask-generation", maskGeneration),
213
214
  "multiple-choice": undefined,
214
215
  "object-detection": getData("object-detection", objectDetection),
@@ -19,23 +19,84 @@ export interface TextGenerationInput {
19
19
  }
20
20
 
21
21
  export interface TextGenerationInputGenerateParameters {
22
+ /**
23
+ * Lora adapter id
24
+ */
25
+ adapter_id?: string;
26
+ /**
27
+ * Generate best_of sequences and return the one if the highest token logprobs.
28
+ */
22
29
  best_of?: number;
30
+ /**
31
+ * Whether to return decoder input token logprobs and ids.
32
+ */
23
33
  decoder_input_details?: boolean;
34
+ /**
35
+ * Whether to return generation details.
36
+ */
24
37
  details?: boolean;
38
+ /**
39
+ * Activate logits sampling.
40
+ */
25
41
  do_sample?: boolean;
42
+ /**
43
+ * The parameter for frequency penalty. 1.0 means no penalty
44
+ * Penalize new tokens based on their existing frequency in the text so far,
45
+ * decreasing the model's likelihood to repeat the same line verbatim.
46
+ */
26
47
  frequency_penalty?: number;
27
48
  grammar?: TextGenerationInputGrammarType;
49
+ /**
50
+ * Maximum number of tokens to generate.
51
+ */
28
52
  max_new_tokens?: number;
53
+ /**
54
+ * The parameter for repetition penalty. 1.0 means no penalty.
55
+ * See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
56
+ */
29
57
  repetition_penalty?: number;
58
+ /**
59
+ * Whether to prepend the prompt to the generated text
60
+ */
30
61
  return_full_text?: boolean;
62
+ /**
63
+ * Random sampling seed.
64
+ */
31
65
  seed?: number;
66
+ /**
67
+ * Stop generating tokens if a member of `stop` is generated.
68
+ */
32
69
  stop?: string[];
70
+ /**
71
+ * The value used to module the logits distribution.
72
+ */
33
73
  temperature?: number;
74
+ /**
75
+ * The number of highest probability vocabulary tokens to keep for top-k-filtering.
76
+ */
34
77
  top_k?: number;
78
+ /**
79
+ * The number of highest probability vocabulary tokens to keep for top-n-filtering.
80
+ */
35
81
  top_n_tokens?: number;
82
+ /**
83
+ * Top-p value for nucleus sampling.
84
+ */
36
85
  top_p?: number;
86
+ /**
87
+ * Truncate inputs tokens to the given size.
88
+ */
37
89
  truncate?: number;
90
+ /**
91
+ * Typical Decoding mass
92
+ * See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666)
93
+ * for more information.
94
+ */
38
95
  typical_p?: number;
96
+ /**
97
+ * Watermarking with [A Watermark for Large Language
98
+ * Models](https://arxiv.org/abs/2301.10226).
99
+ */
39
100
  watermark?: boolean;
40
101
  [property: string]: unknown;
41
102
  }
@@ -125,6 +186,7 @@ export interface TextGenerationStreamOutput {
125
186
  export interface TextGenerationStreamOutputStreamDetails {
126
187
  finish_reason: TextGenerationOutputFinishReason;
127
188
  generated_tokens: number;
189
+ input_length: number;
128
190
  seed?: number;
129
191
  [property: string]: unknown;
130
192
  }
@@ -22,8 +22,16 @@
22
22
  "TextGenerationInputGenerateParameters": {
23
23
  "type": "object",
24
24
  "properties": {
25
+ "adapter_id": {
26
+ "type": "string",
27
+ "description": "Lora adapter id",
28
+ "default": "null",
29
+ "example": "null",
30
+ "nullable": true
31
+ },
25
32
  "best_of": {
26
33
  "type": "integer",
34
+ "description": "Generate best_of sequences and return the one if the highest token logprobs.",
27
35
  "default": "null",
28
36
  "example": 1,
29
37
  "nullable": true,
@@ -32,20 +40,24 @@
32
40
  },
33
41
  "decoder_input_details": {
34
42
  "type": "boolean",
43
+ "description": "Whether to return decoder input token logprobs and ids.",
35
44
  "default": "false"
36
45
  },
37
46
  "details": {
38
47
  "type": "boolean",
48
+ "description": "Whether to return generation details.",
39
49
  "default": "true"
40
50
  },
41
51
  "do_sample": {
42
52
  "type": "boolean",
53
+ "description": "Activate logits sampling.",
43
54
  "default": "false",
44
55
  "example": true
45
56
  },
46
57
  "frequency_penalty": {
47
58
  "type": "number",
48
59
  "format": "float",
60
+ "description": "The parameter for frequency penalty. 1.0 means no penalty\nPenalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
49
61
  "default": "null",
50
62
  "example": 0.1,
51
63
  "nullable": true,
@@ -63,6 +75,7 @@
63
75
  "max_new_tokens": {
64
76
  "type": "integer",
65
77
  "format": "int32",
78
+ "description": "Maximum number of tokens to generate.",
66
79
  "default": "100",
67
80
  "example": "20",
68
81
  "nullable": true,
@@ -71,6 +84,7 @@
71
84
  "repetition_penalty": {
72
85
  "type": "number",
73
86
  "format": "float",
87
+ "description": "The parameter for repetition penalty. 1.0 means no penalty.\nSee [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.",
74
88
  "default": "null",
75
89
  "example": 1.03,
76
90
  "nullable": true,
@@ -78,6 +92,7 @@
78
92
  },
79
93
  "return_full_text": {
80
94
  "type": "boolean",
95
+ "description": "Whether to prepend the prompt to the generated text",
81
96
  "default": "null",
82
97
  "example": false,
83
98
  "nullable": true
@@ -85,6 +100,7 @@
85
100
  "seed": {
86
101
  "type": "integer",
87
102
  "format": "int64",
103
+ "description": "Random sampling seed.",
88
104
  "default": "null",
89
105
  "example": "null",
90
106
  "nullable": true,
@@ -96,12 +112,14 @@
96
112
  "items": {
97
113
  "type": "string"
98
114
  },
115
+ "description": "Stop generating tokens if a member of `stop` is generated.",
99
116
  "example": ["photographer"],
100
117
  "maxItems": 4
101
118
  },
102
119
  "temperature": {
103
120
  "type": "number",
104
121
  "format": "float",
122
+ "description": "The value used to module the logits distribution.",
105
123
  "default": "null",
106
124
  "example": 0.5,
107
125
  "nullable": true,
@@ -110,6 +128,7 @@
110
128
  "top_k": {
111
129
  "type": "integer",
112
130
  "format": "int32",
131
+ "description": "The number of highest probability vocabulary tokens to keep for top-k-filtering.",
113
132
  "default": "null",
114
133
  "example": 10,
115
134
  "nullable": true,
@@ -118,6 +137,7 @@
118
137
  "top_n_tokens": {
119
138
  "type": "integer",
120
139
  "format": "int32",
140
+ "description": "The number of highest probability vocabulary tokens to keep for top-n-filtering.",
121
141
  "default": "null",
122
142
  "example": 5,
123
143
  "nullable": true,
@@ -127,6 +147,7 @@
127
147
  "top_p": {
128
148
  "type": "number",
129
149
  "format": "float",
150
+ "description": "Top-p value for nucleus sampling.",
130
151
  "default": "null",
131
152
  "example": 0.95,
132
153
  "nullable": true,
@@ -135,6 +156,7 @@
135
156
  },
136
157
  "truncate": {
137
158
  "type": "integer",
159
+ "description": "Truncate inputs tokens to the given size.",
138
160
  "default": "null",
139
161
  "example": "null",
140
162
  "nullable": true,
@@ -143,6 +165,7 @@
143
165
  "typical_p": {
144
166
  "type": "number",
145
167
  "format": "float",
168
+ "description": "Typical Decoding mass\nSee [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.",
146
169
  "default": "null",
147
170
  "example": 0.95,
148
171
  "nullable": true,
@@ -151,6 +174,7 @@
151
174
  },
152
175
  "watermark": {
153
176
  "type": "boolean",
177
+ "description": "Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226).",
154
178
  "default": "false",
155
179
  "example": true
156
180
  }
@@ -39,7 +39,7 @@
39
39
  "$defs": {
40
40
  "TextGenerationStreamOutputStreamDetails": {
41
41
  "type": "object",
42
- "required": ["finish_reason", "generated_tokens"],
42
+ "required": ["finish_reason", "generated_tokens", "input_length"],
43
43
  "properties": {
44
44
  "finish_reason": {
45
45
  "$ref": "#/$defs/TextGenerationStreamOutputFinishReason"
@@ -50,6 +50,12 @@
50
50
  "example": 1,
51
51
  "minimum": 0
52
52
  },
53
+ "input_length": {
54
+ "type": "integer",
55
+ "format": "int32",
56
+ "example": 1,
57
+ "minimum": 0
58
+ },
53
59
  "seed": {
54
60
  "type": "integer",
55
61
  "format": "int64",
@@ -68,7 +68,7 @@ export interface GenerationParameters {
68
68
  */
69
69
  max_length?: number;
70
70
  /**
71
- * The maximum number of tokens to generate. Takes precedence over maxLength.
71
+ * The maximum number of tokens to generate. Takes precedence over max_length.
72
72
  */
73
73
  max_new_tokens?: number;
74
74
  /**
@@ -76,7 +76,7 @@ export interface GenerationParameters {
76
76
  */
77
77
  min_length?: number;
78
78
  /**
79
- * The minimum number of tokens to generate. Takes precedence over maxLength.
79
+ * The minimum number of tokens to generate. Takes precedence over min_length.
80
80
  */
81
81
  min_new_tokens?: number;
82
82
  /**
@@ -5,9 +5,7 @@
5
5
  */
6
6
 
7
7
  /**
8
- * Inputs for Text to Speech inference
9
- *
10
- * Inputs for Text To Audio inference
8
+ * Inputs for Text To Speech inference
11
9
  */
12
10
  export interface TextToSpeechInput {
13
11
  /**
@@ -17,16 +15,16 @@ export interface TextToSpeechInput {
17
15
  /**
18
16
  * Additional inference parameters
19
17
  */
20
- parameters?: TextToAudioParameters;
18
+ parameters?: TextToSpeechParameters;
21
19
  [property: string]: unknown;
22
20
  }
23
21
 
24
22
  /**
25
23
  * Additional inference parameters
26
24
  *
27
- * Additional inference parameters for Text To Audio
25
+ * Additional inference parameters for Text To Speech
28
26
  */
29
- export interface TextToAudioParameters {
27
+ export interface TextToSpeechParameters {
30
28
  /**
31
29
  * Parametrization of the text generation process
32
30
  */
@@ -70,7 +68,7 @@ export interface GenerationParameters {
70
68
  */
71
69
  max_length?: number;
72
70
  /**
73
- * The maximum number of tokens to generate. Takes precedence over maxLength.
71
+ * The maximum number of tokens to generate. Takes precedence over max_length.
74
72
  */
75
73
  max_new_tokens?: number;
76
74
  /**
@@ -78,7 +76,7 @@ export interface GenerationParameters {
78
76
  */
79
77
  min_length?: number;
80
78
  /**
81
- * The minimum number of tokens to generate. Takes precedence over maxLength.
79
+ * The minimum number of tokens to generate. Takes precedence over min_length.
82
80
  */
83
81
  min_new_tokens?: number;
84
82
  /**
@@ -1,7 +1,31 @@
1
1
  {
2
- "$ref": "/inference/schemas/text-to-audio/input.json",
3
2
  "$id": "/inference/schemas/text-to-speech/input.json",
4
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Inputs for Text To Speech inference",
5
5
  "title": "TextToSpeechInput",
6
- "description": "Inputs for Text to Speech inference"
6
+ "type": "object",
7
+ "properties": {
8
+ "inputs": {
9
+ "description": "The input text data",
10
+ "type": "string"
11
+ },
12
+ "parameters": {
13
+ "description": "Additional inference parameters",
14
+ "$ref": "#/$defs/TextToSpeechParameters"
15
+ }
16
+ },
17
+ "$defs": {
18
+ "TextToSpeechParameters": {
19
+ "title": "TextToSpeechParameters",
20
+ "description": "Additional inference parameters for Text To Speech",
21
+ "type": "object",
22
+ "properties": {
23
+ "generate": {
24
+ "description": "Parametrization of the text generation process",
25
+ "$ref": "/inference/schemas/common-definitions.json#/definitions/GenerationParameters"
26
+ }
27
+ }
28
+ }
29
+ },
30
+ "required": ["inputs"]
7
31
  }