@huggingface/tasks 0.5.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Inference code generated from the JSON schema spec in ./spec
3
+ *
4
+ * Using src/scripts/inference-codegen
5
+ */
6
+
7
+ /**
8
+ * Inputs for ChatCompletion inference
9
+ */
10
+ export interface ChatCompletionInput {
11
+ /**
12
+ * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
13
+ * frequency in the text so far, decreasing the model's likelihood to repeat the same line
14
+ * verbatim.
15
+ */
16
+ frequency_penalty?: number;
17
+ /**
18
+ * The maximum number of tokens that can be generated in the chat completion.
19
+ */
20
+ max_tokens?: number;
21
+ messages: ChatCompletionInputMessage[];
22
+ /**
23
+ * The random sampling seed.
24
+ */
25
+ seed?: number;
26
+ /**
27
+ * Stop generating tokens if a stop token is generated.
28
+ */
29
+ stop?: ChatCompletionInputStopReason;
30
+ /**
31
+ * If set, partial message deltas will be sent.
32
+ */
33
+ stream?: boolean;
34
+ /**
35
+ * The value used to modulate the logits distribution.
36
+ */
37
+ temperature?: number;
38
+ /**
39
+ * If set to < 1, only the smallest set of most probable tokens with probabilities that add
40
+ * up to `top_p` or higher are kept for generation.
41
+ */
42
+ top_p?: number;
43
+ [property: string]: unknown;
44
+ }
45
+
46
+ export interface ChatCompletionInputMessage {
47
+ /**
48
+ * The content of the message.
49
+ */
50
+ content: string;
51
+ role: ChatCompletionMessageRole;
52
+ [property: string]: unknown;
53
+ }
54
+
55
+ /**
56
+ * The role of the message author.
57
+ */
58
+ export type ChatCompletionMessageRole = "assistant" | "system" | "user";
59
+
60
+ /**
61
+ * Stop generating tokens if a stop token is generated.
62
+ */
63
+ export type ChatCompletionInputStopReason = string[] | string;
64
+
65
+ /**
66
+ * Outputs for Chat Completion inference
67
+ */
68
+ export interface ChatCompletionOutput {
69
+ /**
70
+ * A list of chat completion choices.
71
+ */
72
+ choices: ChatCompletionOutputChoice[];
73
+ /**
74
+ * The Unix timestamp (in seconds) of when the chat completion was created.
75
+ */
76
+ created: number;
77
+ [property: string]: unknown;
78
+ }
79
+
80
+ export interface ChatCompletionOutputChoice {
81
+ /**
82
+ * The reason why the generation was stopped.
83
+ */
84
+ finish_reason: ChatCompletionFinishReason;
85
+ /**
86
+ * The index of the choice in the list of choices.
87
+ */
88
+ index: number;
89
+ message: ChatCompletionOutputChoiceMessage;
90
+ [property: string]: unknown;
91
+ }
92
+
93
+ /**
94
+ * The reason why the generation was stopped.
95
+ *
96
+ * The generated sequence reached the maximum allowed length
97
+ *
98
+ * The model generated an end-of-sentence (EOS) token
99
+ *
100
+ * One of the sequence in stop_sequences was generated
101
+ */
102
+ export type ChatCompletionFinishReason = "length" | "eos_token" | "stop_sequence";
103
+
104
+ export interface ChatCompletionOutputChoiceMessage {
105
+ /**
106
+ * The content of the chat completion message.
107
+ */
108
+ content: string;
109
+ role: ChatCompletionMessageRole;
110
+ [property: string]: unknown;
111
+ }
112
+
113
+ /**
114
+ * Chat Completion Stream Output
115
+ */
116
+ export interface ChatCompletionStreamOutput {
117
+ /**
118
+ * A list of chat completion choices.
119
+ */
120
+ choices: ChatCompletionStreamOutputChoice[];
121
+ /**
122
+ * The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
123
+ * the same timestamp.
124
+ */
125
+ created: number;
126
+ [property: string]: unknown;
127
+ }
128
+
129
+ export interface ChatCompletionStreamOutputChoice {
130
+ /**
131
+ * A chat completion delta generated by streamed model responses.
132
+ */
133
+ delta: ChatCompletionStreamOutputDelta;
134
+ /**
135
+ * The reason why the generation was stopped.
136
+ */
137
+ finish_reason?: ChatCompletionFinishReason;
138
+ /**
139
+ * The index of the choice in the list of choices.
140
+ */
141
+ index: number;
142
+ [property: string]: unknown;
143
+ }
144
+
145
+ /**
146
+ * A chat completion delta generated by streamed model responses.
147
+ */
148
+ export interface ChatCompletionStreamOutputDelta {
149
+ /**
150
+ * The contents of the chunk message.
151
+ */
152
+ content?: string;
153
+ /**
154
+ * The role of the author of this message.
155
+ */
156
+ role?: string;
157
+ [property: string]: unknown;
158
+ }
@@ -0,0 +1,63 @@
1
+ {
2
+ "title": "ChatCompletionInput",
3
+ "$id": "/inference/schemas/chat-completion/input.json",
4
+ "$schema": "http://json-schema.org/draft-06/schema#",
5
+ "description": "Inputs for ChatCompletion inference",
6
+ "type": "object",
7
+ "properties": {
8
+ "messages": {
9
+ "type": "array",
10
+ "title": "ChatCompletionInputMessage",
11
+ "items": {
12
+ "type": "object",
13
+ "properties": {
14
+ "role": {
15
+ "$ref": "#/definitions/Role"
16
+ },
17
+ "content": {
18
+ "type": "string",
19
+ "description": "The content of the message."
20
+ }
21
+ },
22
+ "required": ["role", "content"]
23
+ }
24
+ },
25
+ "frequency_penalty": {
26
+ "type": "number",
27
+ "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
28
+ },
29
+ "max_tokens": {
30
+ "type": "integer",
31
+ "description": "The maximum number of tokens that can be generated in the chat completion."
32
+ },
33
+ "seed": {
34
+ "type": "integer",
35
+ "description": "The random sampling seed."
36
+ },
37
+ "stop": {
38
+ "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }],
39
+ "title": "ChatCompletionInputStopReason",
40
+ "description": "Stop generating tokens if a stop token is generated."
41
+ },
42
+ "stream": {
43
+ "type": "boolean",
44
+ "description": "If set, partial message deltas will be sent."
45
+ },
46
+ "temperature": {
47
+ "type": "number",
48
+ "description": "The value used to modulate the logits distribution."
49
+ },
50
+ "top_p": {
51
+ "type": "number",
52
+ "description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
53
+ }
54
+ },
55
+ "required": ["messages"],
56
+ "definitions": {
57
+ "Role": {
58
+ "oneOf": [{ "const": "assistant" }, { "const": "system" }, { "const": "user" }],
59
+ "title": "ChatCompletionMessageRole",
60
+ "description": "The role of the message author."
61
+ }
62
+ }
63
+ }
@@ -0,0 +1,58 @@
1
+ {
2
+ "$id": "/inference/schemas/chat-completion/output.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Outputs for Chat Completion inference",
5
+ "title": "ChatCompletionOutput",
6
+ "type": "object",
7
+ "properties": {
8
+ "choices": {
9
+ "type": "array",
10
+ "description": "A list of chat completion choices.",
11
+ "title": "ChatCompletionOutputChoice",
12
+ "items": {
13
+ "type": "object",
14
+ "properties": {
15
+ "finish_reason": {
16
+ "$ref": "#/definitions/FinishReason",
17
+ "description": "The reason why the generation was stopped."
18
+ },
19
+ "index": {
20
+ "type": "integer",
21
+ "description": "The index of the choice in the list of choices."
22
+ },
23
+ "message": {
24
+ "type": "object",
25
+ "properties": {
26
+ "role": {
27
+ "$ref": "/inference/schemas/chat-completion/input.json#/definitions/Role"
28
+ },
29
+ "content": {
30
+ "type": "string",
31
+ "description": "The content of the chat completion message."
32
+ }
33
+ },
34
+ "title": "ChatCompletionOutputChoiceMessage",
35
+ "required": ["content", "role"]
36
+ }
37
+ },
38
+ "required": ["finish_reason", "index", "message"]
39
+ }
40
+ },
41
+ "created": {
42
+ "type": "integer",
43
+ "description": "The Unix timestamp (in seconds) of when the chat completion was created."
44
+ }
45
+ },
46
+ "required": ["choices", "created"],
47
+ "definitions": {
48
+ "FinishReason": {
49
+ "type": "string",
50
+ "title": "ChatCompletionFinishReason",
51
+ "oneOf": [
52
+ { "const": "length", "description": "The generated sequence reached the maximum allowed length" },
53
+ { "const": "eos_token", "description": "The model generated an end-of-sentence (EOS) token" },
54
+ { "const": "stop_sequence", "description": "One of the sequence in stop_sequences was generated" }
55
+ ]
56
+ }
57
+ }
58
+ }
@@ -0,0 +1,48 @@
1
+ {
2
+ "$id": "/inference/schemas/chat-completion/output_stream.json",
3
+ "$schema": "http://json-schema.org/draft-06/schema#",
4
+ "description": "Chat Completion Stream Output",
5
+ "title": "ChatCompletionStreamOutput",
6
+ "type": "object",
7
+ "properties": {
8
+ "choices": {
9
+ "type": "array",
10
+ "title": "ChatCompletionStreamOutputChoice",
11
+ "description": "A list of chat completion choices.",
12
+ "items": {
13
+ "type": "object",
14
+ "properties": {
15
+ "delta": {
16
+ "type": "object",
17
+ "title": "ChatCompletionStreamOutputDelta",
18
+ "description": "A chat completion delta generated by streamed model responses.",
19
+ "properties": {
20
+ "content": {
21
+ "type": "string",
22
+ "description": "The contents of the chunk message."
23
+ },
24
+ "role": {
25
+ "type": "string",
26
+ "description": "The role of the author of this message."
27
+ }
28
+ }
29
+ },
30
+ "finish_reason": {
31
+ "$ref": "/inference/schemas/chat-completion/output.json#/definitions/FinishReason",
32
+ "description": "The reason why the generation was stopped."
33
+ },
34
+ "index": {
35
+ "type": "integer",
36
+ "description": "The index of the choice in the list of choices."
37
+ }
38
+ },
39
+ "required": ["delta", "index"]
40
+ }
41
+ },
42
+ "created": {
43
+ "type": "integer",
44
+ "description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp."
45
+ }
46
+ },
47
+ "required": ["choices", "created"]
48
+ }
@@ -38,6 +38,17 @@ import zeroShotObjectDetection from "./zero-shot-object-detection/data";
38
38
 
39
39
  export type * from "./audio-classification/inference";
40
40
  export type * from "./automatic-speech-recognition/inference";
41
+ export type {
42
+ ChatCompletionInput,
43
+ ChatCompletionInputMessage,
44
+ ChatCompletionOutput,
45
+ ChatCompletionOutputChoice,
46
+ ChatCompletionFinishReason,
47
+ ChatCompletionOutputChoiceMessage,
48
+ ChatCompletionStreamOutput,
49
+ ChatCompletionStreamOutputChoice,
50
+ ChatCompletionStreamOutputDelta,
51
+ } from "./chat-completion/inference";
41
52
  export type * from "./document-question-answering/inference";
42
53
  export type * from "./feature-extraction/inference";
43
54
  export type * from "./fill-mask/inference";
@@ -73,14 +84,14 @@ export type {
73
84
  TextClassificationParameters,
74
85
  } from "./text-classification/inference";
75
86
  export type {
76
- FinishReason,
77
- PrefillToken,
87
+ TextGenerationFinishReason,
88
+ TextGenerationPrefillToken,
78
89
  TextGenerationInput,
79
90
  TextGenerationOutput,
80
91
  TextGenerationOutputDetails,
81
92
  TextGenerationParameters,
82
- TextGenerationSequenceDetails,
83
- Token,
93
+ TextGenerationOutputSequenceDetails,
94
+ TextGenerationOutputToken,
84
95
  } from "./text-generation/inference";
85
96
  export type * from "./video-classification/inference";
86
97
  export type * from "./visual-question-answering/inference";
@@ -16,6 +16,10 @@ export interface TextGenerationInput {
16
16
  * Additional inference parameters
17
17
  */
18
18
  parameters?: TextGenerationParameters;
19
+ /**
20
+ * Whether to stream output tokens
21
+ */
22
+ stream?: boolean;
19
23
  [property: string]: unknown;
20
24
  }
21
25
 
@@ -114,16 +118,16 @@ export interface TextGenerationOutputDetails {
114
118
  /**
115
119
  * Details about additional sequences when best_of is provided
116
120
  */
117
- best_of_sequences?: TextGenerationSequenceDetails[];
121
+ best_of_sequences?: TextGenerationOutputSequenceDetails[];
118
122
  /**
119
123
  * The reason why the generation was stopped.
120
124
  */
121
- finish_reason: FinishReason;
125
+ finish_reason: TextGenerationFinishReason;
122
126
  /**
123
127
  * The number of generated tokens
124
128
  */
125
129
  generated_tokens: number;
126
- prefill: PrefillToken[];
130
+ prefill: TextGenerationPrefillToken[];
127
131
  /**
128
132
  * The random seed used for generation
129
133
  */
@@ -131,24 +135,25 @@ export interface TextGenerationOutputDetails {
131
135
  /**
132
136
  * The generated tokens and associated details
133
137
  */
134
- tokens: Token[];
138
+ tokens: TextGenerationOutputToken[];
139
+ /**
140
+ * Most likely tokens
141
+ */
142
+ top_tokens?: Array<TextGenerationOutputToken[]>;
135
143
  [property: string]: unknown;
136
144
  }
137
145
 
138
- export interface TextGenerationSequenceDetails {
139
- /**
140
- * The reason why the generation was stopped.
141
- */
142
- finish_reason: FinishReason;
146
+ export interface TextGenerationOutputSequenceDetails {
147
+ finish_reason: TextGenerationFinishReason;
143
148
  /**
144
149
  * The generated text
145
150
  */
146
- generated_text: number;
151
+ generated_text: string;
147
152
  /**
148
153
  * The number of generated tokens
149
154
  */
150
155
  generated_tokens: number;
151
- prefill: PrefillToken[];
156
+ prefill: TextGenerationPrefillToken[];
152
157
  /**
153
158
  * The random seed used for generation
154
159
  */
@@ -156,20 +161,26 @@ export interface TextGenerationSequenceDetails {
156
161
  /**
157
162
  * The generated tokens and associated details
158
163
  */
159
- tokens: Token[];
164
+ tokens: TextGenerationOutputToken[];
165
+ /**
166
+ * Most likely tokens
167
+ */
168
+ top_tokens?: Array<TextGenerationOutputToken[]>;
160
169
  [property: string]: unknown;
161
170
  }
162
171
 
163
172
  /**
164
- * The generated sequence reached the maximum allowed length
173
+ * The reason why the generation was stopped.
174
+ *
175
+ * length: The generated sequence reached the maximum allowed length
165
176
  *
166
- * The model generated an end-of-sentence (EOS) token
177
+ * eos_token: The model generated an end-of-sentence (EOS) token
167
178
  *
168
- * One of the sequence in stop_sequences was generated
179
+ * stop_sequence: One of the sequence in stop_sequences was generated
169
180
  */
170
- export type FinishReason = "length" | "eos_token" | "stop_sequence";
181
+ export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
171
182
 
172
- export interface PrefillToken {
183
+ export interface TextGenerationPrefillToken {
173
184
  id: number;
174
185
  logprob: number;
175
186
  /**
@@ -179,9 +190,12 @@ export interface PrefillToken {
179
190
  [property: string]: unknown;
180
191
  }
181
192
 
182
- export interface Token {
193
+ /**
194
+ * Generated token.
195
+ */
196
+ export interface TextGenerationOutputToken {
183
197
  id: number;
184
- logprob: number;
198
+ logprob?: number;
185
199
  /**
186
200
  * Whether or not that token is a special one
187
201
  */
@@ -192,3 +206,45 @@ export interface Token {
192
206
  text: string;
193
207
  [property: string]: unknown;
194
208
  }
209
+
210
+ /**
211
+ * Text Generation Stream Output
212
+ */
213
+ export interface TextGenerationStreamOutput {
214
+ /**
215
+ * Generation details. Only available when the generation is finished.
216
+ */
217
+ details?: TextGenerationStreamDetails;
218
+ /**
219
+ * The complete generated text. Only available when the generation is finished.
220
+ */
221
+ generated_text?: string;
222
+ /**
223
+ * The token index within the stream. Optional to support older clients that omit it.
224
+ */
225
+ index?: number;
226
+ /**
227
+ * Generated token.
228
+ */
229
+ token: TextGenerationOutputToken;
230
+ [property: string]: unknown;
231
+ }
232
+
233
+ /**
234
+ * Generation details. Only available when the generation is finished.
235
+ */
236
+ export interface TextGenerationStreamDetails {
237
+ /**
238
+ * The reason why the generation was stopped.
239
+ */
240
+ finish_reason: TextGenerationFinishReason;
241
+ /**
242
+ * The number of generated tokens
243
+ */
244
+ generated_tokens: number;
245
+ /**
246
+ * The random seed used for generation
247
+ */
248
+ seed: number;
249
+ [property: string]: unknown;
250
+ }
@@ -12,6 +12,10 @@
12
12
  "parameters": {
13
13
  "description": "Additional inference parameters",
14
14
  "$ref": "#/$defs/TextGenerationParameters"
15
+ },
16
+ "stream": {
17
+ "description": "Whether to stream output tokens",
18
+ "type": "boolean"
15
19
  }
16
20
  },
17
21
  "$defs": {