huggingface-hub 0.22.1__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (45) hide show
  1. huggingface_hub/__init__.py +51 -19
  2. huggingface_hub/_commit_api.py +10 -9
  3. huggingface_hub/_commit_scheduler.py +2 -2
  4. huggingface_hub/_inference_endpoints.py +10 -17
  5. huggingface_hub/_local_folder.py +229 -0
  6. huggingface_hub/_login.py +4 -3
  7. huggingface_hub/_multi_commits.py +1 -1
  8. huggingface_hub/_snapshot_download.py +16 -38
  9. huggingface_hub/_tensorboard_logger.py +16 -6
  10. huggingface_hub/_webhooks_payload.py +22 -1
  11. huggingface_hub/_webhooks_server.py +24 -20
  12. huggingface_hub/commands/download.py +11 -34
  13. huggingface_hub/commands/huggingface_cli.py +2 -0
  14. huggingface_hub/commands/tag.py +159 -0
  15. huggingface_hub/constants.py +3 -5
  16. huggingface_hub/errors.py +58 -0
  17. huggingface_hub/file_download.py +545 -376
  18. huggingface_hub/hf_api.py +758 -629
  19. huggingface_hub/hf_file_system.py +14 -5
  20. huggingface_hub/hub_mixin.py +127 -43
  21. huggingface_hub/inference/_client.py +402 -183
  22. huggingface_hub/inference/_common.py +19 -29
  23. huggingface_hub/inference/_generated/_async_client.py +402 -184
  24. huggingface_hub/inference/_generated/types/__init__.py +23 -6
  25. huggingface_hub/inference/_generated/types/chat_completion.py +197 -43
  26. huggingface_hub/inference/_generated/types/text_generation.py +57 -79
  27. huggingface_hub/inference/_templating.py +2 -4
  28. huggingface_hub/keras_mixin.py +0 -3
  29. huggingface_hub/lfs.py +16 -4
  30. huggingface_hub/repository.py +1 -0
  31. huggingface_hub/utils/__init__.py +19 -6
  32. huggingface_hub/utils/_fixes.py +1 -0
  33. huggingface_hub/utils/_headers.py +2 -4
  34. huggingface_hub/utils/_http.py +16 -5
  35. huggingface_hub/utils/_paths.py +13 -1
  36. huggingface_hub/utils/_runtime.py +10 -0
  37. huggingface_hub/utils/_safetensors.py +0 -13
  38. huggingface_hub/utils/_validators.py +2 -7
  39. huggingface_hub/utils/tqdm.py +124 -46
  40. {huggingface_hub-0.22.1.dist-info → huggingface_hub-0.23.0.dist-info}/METADATA +5 -1
  41. {huggingface_hub-0.22.1.dist-info → huggingface_hub-0.23.0.dist-info}/RECORD +45 -43
  42. {huggingface_hub-0.22.1.dist-info → huggingface_hub-0.23.0.dist-info}/LICENSE +0 -0
  43. {huggingface_hub-0.22.1.dist-info → huggingface_hub-0.23.0.dist-info}/WHEEL +0 -0
  44. {huggingface_hub-0.22.1.dist-info → huggingface_hub-0.23.0.dist-info}/entry_points.txt +0 -0
  45. {huggingface_hub-0.22.1.dist-info → huggingface_hub-0.23.0.dist-info}/top_level.txt +0 -0
@@ -19,13 +19,28 @@ from .automatic_speech_recognition import (
19
19
  from .base import BaseInferenceType
20
20
  from .chat_completion import (
21
21
  ChatCompletionInput,
22
+ ChatCompletionInputFunctionDefinition,
22
23
  ChatCompletionInputMessage,
24
+ ChatCompletionInputTool,
25
+ ChatCompletionInputToolCall,
26
+ ChatCompletionInputToolTypeClass,
23
27
  ChatCompletionOutput,
24
- ChatCompletionOutputChoice,
25
- ChatCompletionOutputChoiceMessage,
28
+ ChatCompletionOutputComplete,
29
+ ChatCompletionOutputFunctionDefinition,
30
+ ChatCompletionOutputLogprob,
31
+ ChatCompletionOutputLogprobs,
32
+ ChatCompletionOutputMessage,
33
+ ChatCompletionOutputToolCall,
34
+ ChatCompletionOutputTopLogprob,
35
+ ChatCompletionOutputUsage,
26
36
  ChatCompletionStreamOutput,
27
37
  ChatCompletionStreamOutputChoice,
28
38
  ChatCompletionStreamOutputDelta,
39
+ ChatCompletionStreamOutputDeltaToolCall,
40
+ ChatCompletionStreamOutputFunction,
41
+ ChatCompletionStreamOutputLogprob,
42
+ ChatCompletionStreamOutputLogprobs,
43
+ ChatCompletionStreamOutputTopLogprob,
29
44
  )
30
45
  from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
31
46
  from .document_question_answering import (
@@ -67,14 +82,16 @@ from .text2text_generation import Text2TextGenerationInput, Text2TextGenerationO
67
82
  from .text_classification import TextClassificationInput, TextClassificationOutputElement, TextClassificationParameters
68
83
  from .text_generation import (
69
84
  TextGenerationInput,
85
+ TextGenerationInputGenerateParameters,
86
+ TextGenerationInputGrammarType,
70
87
  TextGenerationOutput,
88
+ TextGenerationOutputBestOfSequence,
71
89
  TextGenerationOutputDetails,
72
- TextGenerationOutputSequenceDetails,
90
+ TextGenerationOutputPrefillToken,
73
91
  TextGenerationOutputToken,
74
- TextGenerationParameters,
75
- TextGenerationPrefillToken,
76
- TextGenerationStreamDetails,
77
92
  TextGenerationStreamOutput,
93
+ TextGenerationStreamOutputStreamDetails,
94
+ TextGenerationStreamOutputToken,
78
95
  )
79
96
  from .text_to_audio import TextToAudioGenerationParameters, TextToAudioInput, TextToAudioOutput, TextToAudioParameters
80
97
  from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters, TextToImageTargetSize
@@ -4,103 +4,257 @@
4
4
  # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
5
  # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
6
  from dataclasses import dataclass
7
- from typing import List, Literal, Optional, Union
7
+ from typing import Any, List, Literal, Optional, Union
8
8
 
9
9
  from .base import BaseInferenceType
10
10
 
11
11
 
12
- ChatCompletionMessageRole = Literal["assistant", "system", "user"]
12
+ @dataclass
13
+ class ChatCompletionInputFunctionDefinition(BaseInferenceType):
14
+ arguments: Any
15
+ name: str
16
+ description: Optional[str] = None
17
+
18
+
19
+ @dataclass
20
+ class ChatCompletionInputToolCall(BaseInferenceType):
21
+ function: ChatCompletionInputFunctionDefinition
22
+ id: int
23
+ type: str
13
24
 
14
25
 
15
26
  @dataclass
16
27
  class ChatCompletionInputMessage(BaseInferenceType):
17
- content: str
18
- """The content of the message."""
19
- role: "ChatCompletionMessageRole"
28
+ role: str
29
+ content: Optional[str] = None
30
+ name: Optional[str] = None
31
+ tool_calls: Optional[List[ChatCompletionInputToolCall]] = None
32
+
33
+
34
+ @dataclass
35
+ class ChatCompletionInputToolTypeClass(BaseInferenceType):
36
+ function_name: str
37
+
38
+
39
+ ChatCompletionInputToolTypeEnum = Literal["OneOf"]
40
+
41
+
42
+ @dataclass
43
+ class ChatCompletionInputTool(BaseInferenceType):
44
+ function: ChatCompletionInputFunctionDefinition
45
+ type: str
20
46
 
21
47
 
22
48
  @dataclass
23
49
  class ChatCompletionInput(BaseInferenceType):
24
- """Inputs for ChatCompletion inference"""
50
+ """Chat Completion Input.
51
+ Auto-generated from TGI specs.
52
+ For more details, check out
53
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
54
+ """
25
55
 
26
56
  messages: List[ChatCompletionInputMessage]
57
+ """A list of messages comprising the conversation so far."""
58
+ model: str
59
+ """[UNUSED] ID of the model to use. See the model endpoint compatibility table for details
60
+ on which models work with the Chat API.
61
+ """
27
62
  frequency_penalty: Optional[float] = None
28
63
  """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
29
- frequency in the text so far, decreasing the model's likelihood to repeat the same line
30
- verbatim.
64
+ frequency in the text so far,
65
+ decreasing the model's likelihood to repeat the same line verbatim.
66
+ """
67
+ logit_bias: Optional[List[float]] = None
68
+ """UNUSED
69
+ Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON
70
+ object that maps tokens
71
+ (specified by their token ID in the tokenizer) to an associated bias value from -100 to
72
+ 100. Mathematically,
73
+ the bias is added to the logits generated by the model prior to sampling. The exact
74
+ effect will vary per model,
75
+ but values between -1 and 1 should decrease or increase likelihood of selection; values
76
+ like -100 or 100 should
77
+ result in a ban or exclusive selection of the relevant token.
78
+ """
79
+ logprobs: Optional[bool] = None
80
+ """Whether to return log probabilities of the output tokens or not. If true, returns the log
81
+ probabilities of each
82
+ output token returned in the content of message.
31
83
  """
32
84
  max_tokens: Optional[int] = None
33
85
  """The maximum number of tokens that can be generated in the chat completion."""
86
+ n: Optional[int] = None
87
+ """UNUSED
88
+ How many chat completion choices to generate for each input message. Note that you will
89
+ be charged based on the
90
+ number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
91
+ """
92
+ presence_penalty: Optional[float] = None
93
+ """Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
94
+ appear in the text so far,
95
+ increasing the model's likelihood to talk about new topics
96
+ """
34
97
  seed: Optional[int] = None
35
- """The random sampling seed."""
36
- stop: Optional[Union[List[str], str]] = None
37
- """Stop generating tokens if a stop token is generated."""
98
+ stop: Optional[List[str]] = None
99
+ """Up to 4 sequences where the API will stop generating further tokens."""
38
100
  stream: Optional[bool] = None
39
- """If set, partial message deltas will be sent."""
40
101
  temperature: Optional[float] = None
41
- """The value used to modulate the logits distribution."""
102
+ """What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
103
+ output more random, while
104
+ lower values like 0.2 will make it more focused and deterministic.
105
+ We generally recommend altering this or `top_p` but not both.
106
+ """
107
+ tool_choice: Optional[Union[ChatCompletionInputToolTypeClass, "ChatCompletionInputToolTypeEnum"]] = None
108
+ tool_prompt: Optional[str] = None
109
+ """A prompt to be appended before the tools"""
110
+ tools: Optional[List[ChatCompletionInputTool]] = None
111
+ """A list of tools the model may call. Currently, only functions are supported as a tool.
112
+ Use this to provide a list of
113
+ functions the model may generate JSON inputs for.
114
+ """
115
+ top_logprobs: Optional[int] = None
116
+ """An integer between 0 and 5 specifying the number of most likely tokens to return at each
117
+ token position, each with
118
+ an associated log probability. logprobs must be set to true if this parameter is used.
119
+ """
42
120
  top_p: Optional[float] = None
43
- """If set to < 1, only the smallest set of most probable tokens with probabilities that add
44
- up to `top_p` or higher are kept for generation.
121
+ """An alternative to sampling with temperature, called nucleus sampling, where the model
122
+ considers the results of the
123
+ tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%
124
+ probability mass are considered.
45
125
  """
46
126
 
47
127
 
48
- ChatCompletionFinishReason = Literal["length", "eos_token", "stop_sequence"]
128
+ @dataclass
129
+ class ChatCompletionOutputTopLogprob(BaseInferenceType):
130
+ logprob: float
131
+ token: str
49
132
 
50
133
 
51
134
  @dataclass
52
- class ChatCompletionOutputChoiceMessage(BaseInferenceType):
53
- content: str
54
- """The content of the chat completion message."""
55
- role: "ChatCompletionMessageRole"
135
+ class ChatCompletionOutputLogprob(BaseInferenceType):
136
+ logprob: float
137
+ token: str
138
+ top_logprobs: List[ChatCompletionOutputTopLogprob]
56
139
 
57
140
 
58
141
  @dataclass
59
- class ChatCompletionOutputChoice(BaseInferenceType):
60
- finish_reason: "ChatCompletionFinishReason"
61
- """The reason why the generation was stopped."""
142
+ class ChatCompletionOutputLogprobs(BaseInferenceType):
143
+ content: List[ChatCompletionOutputLogprob]
144
+
145
+
146
+ @dataclass
147
+ class ChatCompletionOutputFunctionDefinition(BaseInferenceType):
148
+ arguments: Any
149
+ name: str
150
+ description: Optional[str] = None
151
+
152
+
153
+ @dataclass
154
+ class ChatCompletionOutputToolCall(BaseInferenceType):
155
+ function: ChatCompletionOutputFunctionDefinition
156
+ id: int
157
+ type: str
158
+
159
+
160
+ @dataclass
161
+ class ChatCompletionOutputMessage(BaseInferenceType):
162
+ role: str
163
+ content: Optional[str] = None
164
+ name: Optional[str] = None
165
+ tool_calls: Optional[List[ChatCompletionOutputToolCall]] = None
166
+
167
+
168
+ @dataclass
169
+ class ChatCompletionOutputComplete(BaseInferenceType):
170
+ finish_reason: str
62
171
  index: int
63
- """The index of the choice in the list of choices."""
64
- message: ChatCompletionOutputChoiceMessage
172
+ message: ChatCompletionOutputMessage
173
+ logprobs: Optional[ChatCompletionOutputLogprobs] = None
174
+
175
+
176
+ @dataclass
177
+ class ChatCompletionOutputUsage(BaseInferenceType):
178
+ completion_tokens: int
179
+ prompt_tokens: int
180
+ total_tokens: int
65
181
 
66
182
 
67
183
  @dataclass
68
184
  class ChatCompletionOutput(BaseInferenceType):
69
- """Outputs for Chat Completion inference"""
185
+ """Chat Completion Output.
186
+ Auto-generated from TGI specs.
187
+ For more details, check out
188
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
189
+ """
70
190
 
71
- choices: List[ChatCompletionOutputChoice]
72
- """A list of chat completion choices."""
191
+ choices: List[ChatCompletionOutputComplete]
73
192
  created: int
74
- """The Unix timestamp (in seconds) of when the chat completion was created."""
193
+ id: str
194
+ model: str
195
+ object: str
196
+ system_fingerprint: str
197
+ usage: ChatCompletionOutputUsage
75
198
 
76
199
 
77
200
  @dataclass
78
- class ChatCompletionStreamOutputDelta(BaseInferenceType):
79
- """A chat completion delta generated by streamed model responses."""
201
+ class ChatCompletionStreamOutputFunction(BaseInferenceType):
202
+ arguments: str
203
+ name: Optional[str] = None
204
+
205
+
206
+ @dataclass
207
+ class ChatCompletionStreamOutputDeltaToolCall(BaseInferenceType):
208
+ function: ChatCompletionStreamOutputFunction
209
+ id: str
210
+ index: int
211
+ type: str
80
212
 
213
+
214
+ @dataclass
215
+ class ChatCompletionStreamOutputDelta(BaseInferenceType):
216
+ role: str
81
217
  content: Optional[str] = None
82
- """The contents of the chunk message."""
83
- role: Optional[str] = None
84
- """The role of the author of this message."""
218
+ tool_calls: Optional[ChatCompletionStreamOutputDeltaToolCall] = None
219
+
220
+
221
+ @dataclass
222
+ class ChatCompletionStreamOutputTopLogprob(BaseInferenceType):
223
+ logprob: float
224
+ token: str
225
+
226
+
227
+ @dataclass
228
+ class ChatCompletionStreamOutputLogprob(BaseInferenceType):
229
+ logprob: float
230
+ token: str
231
+ top_logprobs: List[ChatCompletionStreamOutputTopLogprob]
232
+
233
+
234
+ @dataclass
235
+ class ChatCompletionStreamOutputLogprobs(BaseInferenceType):
236
+ content: List[ChatCompletionStreamOutputLogprob]
85
237
 
86
238
 
87
239
  @dataclass
88
240
  class ChatCompletionStreamOutputChoice(BaseInferenceType):
89
241
  delta: ChatCompletionStreamOutputDelta
90
- """A chat completion delta generated by streamed model responses."""
91
242
  index: int
92
- """The index of the choice in the list of choices."""
93
- finish_reason: Optional["ChatCompletionFinishReason"] = None
94
- """The reason why the generation was stopped."""
243
+ finish_reason: Optional[str] = None
244
+ logprobs: Optional[ChatCompletionStreamOutputLogprobs] = None
95
245
 
96
246
 
97
247
  @dataclass
98
248
  class ChatCompletionStreamOutput(BaseInferenceType):
99
- """Chat Completion Stream Output"""
249
+ """Chat Completion Stream Output.
250
+ Auto-generated from TGI specs.
251
+ For more details, check out
252
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
253
+ """
100
254
 
101
255
  choices: List[ChatCompletionStreamOutputChoice]
102
- """A list of chat completion choices."""
103
256
  created: int
104
- """The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
105
- the same timestamp.
106
- """
257
+ id: str
258
+ model: str
259
+ object: str
260
+ system_fingerprint: str
@@ -4,158 +4,136 @@
4
4
  # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
5
  # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
6
  from dataclasses import dataclass
7
- from typing import List, Literal, Optional
7
+ from typing import Any, List, Literal, Optional
8
8
 
9
9
  from .base import BaseInferenceType
10
10
 
11
11
 
12
+ TypeEnum = Literal["json", "regex"]
13
+
14
+
12
15
  @dataclass
13
- class TextGenerationParameters(BaseInferenceType):
14
- """Additional inference parameters
15
- Additional inference parameters for Text Generation
16
+ class TextGenerationInputGrammarType(BaseInferenceType):
17
+ type: "TypeEnum"
18
+ value: Any
19
+ """A string that represents a [JSON Schema](https://json-schema.org/).
20
+ JSON Schema is a declarative language that allows to annotate JSON documents
21
+ with types and descriptions.
16
22
  """
17
23
 
24
+
25
+ @dataclass
26
+ class TextGenerationInputGenerateParameters(BaseInferenceType):
18
27
  best_of: Optional[int] = None
19
- """The number of sampling queries to run. Only the best one (in terms of total logprob) will
20
- be returned.
21
- """
22
28
  decoder_input_details: Optional[bool] = None
23
- """Whether or not to output decoder input details"""
24
29
  details: Optional[bool] = None
25
- """Whether or not to output details"""
26
30
  do_sample: Optional[bool] = None
27
- """Whether to use logits sampling instead of greedy decoding when generating new tokens."""
31
+ frequency_penalty: Optional[float] = None
32
+ grammar: Optional[TextGenerationInputGrammarType] = None
28
33
  max_new_tokens: Optional[int] = None
29
- """The maximum number of tokens to generate."""
30
34
  repetition_penalty: Optional[float] = None
31
- """The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
32
- paper](https://hf.co/papers/1909.05858) for more details.
33
- """
34
35
  return_full_text: Optional[bool] = None
35
- """Whether to prepend the prompt to the generated text."""
36
36
  seed: Optional[int] = None
37
- """The random sampling seed."""
38
- stop_sequences: Optional[List[str]] = None
39
- """Stop generating tokens if a member of `stop_sequences` is generated."""
37
+ stop: Optional[List[str]] = None
40
38
  temperature: Optional[float] = None
41
- """The value used to modulate the logits distribution."""
42
39
  top_k: Optional[int] = None
43
- """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
40
+ top_n_tokens: Optional[int] = None
44
41
  top_p: Optional[float] = None
45
- """If set to < 1, only the smallest set of most probable tokens with probabilities that add
46
- up to `top_p` or higher are kept for generation.
47
- """
48
42
  truncate: Optional[int] = None
49
- """Truncate input tokens to the given size."""
50
43
  typical_p: Optional[float] = None
51
- """Typical Decoding mass. See [Typical Decoding for Natural Language
52
- Generation](https://hf.co/papers/2202.00666) for more information
53
- """
54
44
  watermark: Optional[bool] = None
55
- """Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)"""
56
45
 
57
46
 
58
47
  @dataclass
59
48
  class TextGenerationInput(BaseInferenceType):
60
- """Inputs for Text Generation inference"""
49
+ """Text Generation Input.
50
+ Auto-generated from TGI specs.
51
+ For more details, check out
52
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
53
+ """
61
54
 
62
55
  inputs: str
63
- """The text to initialize generation with"""
64
- parameters: Optional[TextGenerationParameters] = None
65
- """Additional inference parameters"""
56
+ parameters: Optional[TextGenerationInputGenerateParameters] = None
66
57
  stream: Optional[bool] = None
67
- """Whether to stream output tokens"""
68
58
 
69
59
 
70
- TextGenerationFinishReason = Literal["length", "eos_token", "stop_sequence"]
60
+ TextGenerationOutputFinishReason = Literal["length", "eos_token", "stop_sequence"]
71
61
 
72
62
 
73
63
  @dataclass
74
- class TextGenerationPrefillToken(BaseInferenceType):
64
+ class TextGenerationOutputPrefillToken(BaseInferenceType):
75
65
  id: int
76
66
  logprob: float
77
67
  text: str
78
- """The text associated with that token"""
79
68
 
80
69
 
81
70
  @dataclass
82
71
  class TextGenerationOutputToken(BaseInferenceType):
83
- """Generated token."""
84
-
85
72
  id: int
73
+ logprob: float
86
74
  special: bool
87
- """Whether or not that token is a special one"""
88
75
  text: str
89
- """The text associated with that token"""
90
- logprob: Optional[float] = None
91
76
 
92
77
 
93
78
  @dataclass
94
- class TextGenerationOutputSequenceDetails(BaseInferenceType):
95
- finish_reason: "TextGenerationFinishReason"
79
+ class TextGenerationOutputBestOfSequence(BaseInferenceType):
80
+ finish_reason: "TextGenerationOutputFinishReason"
96
81
  generated_text: str
97
- """The generated text"""
98
82
  generated_tokens: int
99
- """The number of generated tokens"""
100
- prefill: List[TextGenerationPrefillToken]
83
+ prefill: List[TextGenerationOutputPrefillToken]
101
84
  tokens: List[TextGenerationOutputToken]
102
- """The generated tokens and associated details"""
103
85
  seed: Optional[int] = None
104
- """The random seed used for generation"""
105
86
  top_tokens: Optional[List[List[TextGenerationOutputToken]]] = None
106
- """Most likely tokens"""
107
87
 
108
88
 
109
89
  @dataclass
110
90
  class TextGenerationOutputDetails(BaseInferenceType):
111
- """When enabled, details about the generation"""
112
-
113
- finish_reason: "TextGenerationFinishReason"
114
- """The reason why the generation was stopped."""
91
+ finish_reason: "TextGenerationOutputFinishReason"
115
92
  generated_tokens: int
116
- """The number of generated tokens"""
117
- prefill: List[TextGenerationPrefillToken]
93
+ prefill: List[TextGenerationOutputPrefillToken]
118
94
  tokens: List[TextGenerationOutputToken]
119
- """The generated tokens and associated details"""
120
- best_of_sequences: Optional[List[TextGenerationOutputSequenceDetails]] = None
121
- """Details about additional sequences when best_of is provided"""
95
+ best_of_sequences: Optional[List[TextGenerationOutputBestOfSequence]] = None
122
96
  seed: Optional[int] = None
123
- """The random seed used for generation"""
124
97
  top_tokens: Optional[List[List[TextGenerationOutputToken]]] = None
125
- """Most likely tokens"""
126
98
 
127
99
 
128
100
  @dataclass
129
101
  class TextGenerationOutput(BaseInferenceType):
130
- """Outputs for Text Generation inference"""
102
+ """Text Generation Output.
103
+ Auto-generated from TGI specs.
104
+ For more details, check out
105
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
106
+ """
131
107
 
132
108
  generated_text: str
133
- """The generated text"""
134
109
  details: Optional[TextGenerationOutputDetails] = None
135
- """When enabled, details about the generation"""
136
110
 
137
111
 
138
112
  @dataclass
139
- class TextGenerationStreamDetails(BaseInferenceType):
140
- """Generation details. Only available when the generation is finished."""
141
-
142
- finish_reason: "TextGenerationFinishReason"
143
- """The reason why the generation was stopped."""
113
+ class TextGenerationStreamOutputStreamDetails(BaseInferenceType):
114
+ finish_reason: "TextGenerationOutputFinishReason"
144
115
  generated_tokens: int
145
- """The number of generated tokens"""
146
- seed: int
147
- """The random seed used for generation"""
116
+ seed: Optional[int] = None
117
+
118
+
119
+ @dataclass
120
+ class TextGenerationStreamOutputToken(BaseInferenceType):
121
+ id: int
122
+ logprob: float
123
+ special: bool
124
+ text: str
148
125
 
149
126
 
150
127
  @dataclass
151
128
  class TextGenerationStreamOutput(BaseInferenceType):
152
- """Text Generation Stream Output"""
129
+ """Text Generation Stream Output.
130
+ Auto-generated from TGI specs.
131
+ For more details, check out
132
+ https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
133
+ """
153
134
 
154
- token: TextGenerationOutputToken
155
- """Generated token."""
156
- details: Optional[TextGenerationStreamDetails] = None
157
- """Generation details. Only available when the generation is finished."""
135
+ index: int
136
+ token: TextGenerationStreamOutputToken
137
+ details: Optional[TextGenerationStreamOutputStreamDetails] = None
158
138
  generated_text: Optional[str] = None
159
- """The complete generated text. Only available when the generation is finished."""
160
- index: Optional[int] = None
161
- """The token index within the stream. Optional to support older clients that omit it."""
139
+ top_tokens: Optional[List[TextGenerationStreamOutputToken]] = None
@@ -1,11 +1,9 @@
1
1
  from functools import lru_cache
2
2
  from typing import Callable, Dict, List, Optional, Union
3
3
 
4
- from ..utils import HfHubHTTPError, RepositoryNotFoundError, is_minijinja_available
5
-
4
+ from huggingface_hub.errors import TemplateError
6
5
 
7
- class TemplateError(Exception):
8
- """Any error raised while trying to fetch or render a chat template."""
6
+ from ..utils import HfHubHTTPError, RepositoryNotFoundError, is_minijinja_available
9
7
 
10
8
 
11
9
  def _import_minijinja():
@@ -265,9 +265,6 @@ def from_pretrained_keras(*args, **kwargs) -> "KerasModelHubMixin":
265
265
  force_download (`bool`, *optional*, defaults to `False`):
266
266
  Whether to force the (re-)download of the model weights and
267
267
  configuration files, overriding the cached versions if they exist.
268
- resume_download (`bool`, *optional*, defaults to `False`):
269
- Whether to delete incompletely received files. Will attempt to
270
- resume the download if such a file exists.
271
268
  proxies (`Dict[str, str]`, *optional*):
272
269
  A dictionary of proxy servers to use by protocol or endpoint, e.g.,
273
270
  `{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The
huggingface_hub/lfs.py CHANGED
@@ -31,6 +31,7 @@ from huggingface_hub.constants import ENDPOINT, HF_HUB_ENABLE_HF_TRANSFER, REPO_
31
31
 
32
32
  from .utils import (
33
33
  build_hf_headers,
34
+ fix_hf_endpoint_in_url,
34
35
  get_session,
35
36
  hf_raise_for_status,
36
37
  http_backoff,
@@ -193,6 +194,7 @@ def lfs_upload(
193
194
  lfs_batch_action: Dict,
194
195
  token: Optional[str] = None,
195
196
  headers: Optional[Dict[str, str]] = None,
197
+ endpoint: Optional[str] = None,
196
198
  ) -> None:
197
199
  """
198
200
  Handles uploading a given object to the Hub with the LFS protocol.
@@ -230,6 +232,7 @@ def lfs_upload(
230
232
  # 2. Upload file (either single part or multi-part)
231
233
  header = upload_action.get("header", {})
232
234
  chunk_size = header.get("chunk_size")
235
+ upload_url = fix_hf_endpoint_in_url(upload_action["href"], endpoint=endpoint)
233
236
  if chunk_size is not None:
234
237
  try:
235
238
  chunk_size = int(chunk_size)
@@ -237,15 +240,16 @@ def lfs_upload(
237
240
  raise ValueError(
238
241
  f"Malformed response from LFS batch endpoint: `chunk_size` should be an integer. Got '{chunk_size}'."
239
242
  )
240
- _upload_multi_part(operation=operation, header=header, chunk_size=chunk_size, upload_url=upload_action["href"])
243
+ _upload_multi_part(operation=operation, header=header, chunk_size=chunk_size, upload_url=upload_url)
241
244
  else:
242
- _upload_single_part(operation=operation, upload_url=upload_action["href"])
245
+ _upload_single_part(operation=operation, upload_url=upload_url)
243
246
 
244
247
  # 3. Verify upload went well
245
248
  if verify_action is not None:
246
249
  _validate_lfs_action(verify_action)
250
+ verify_url = fix_hf_endpoint_in_url(verify_action["href"], endpoint)
247
251
  verify_resp = get_session().post(
248
- verify_action["href"],
252
+ verify_url,
249
253
  headers=build_hf_headers(token=token, headers=headers),
250
254
  json={"oid": operation.upload_info.sha256.hex(), "size": operation.upload_info.size},
251
255
  )
@@ -426,7 +430,15 @@ def _upload_parts_hf_transfer(
426
430
  # see https://github.com/huggingface/huggingface_hub/pull/2000
427
431
  disable = True if (logger.getEffectiveLevel() == logging.NOTSET) else None
428
432
 
429
- with tqdm(unit="B", unit_scale=True, total=total, initial=0, desc=desc, disable=disable) as progress:
433
+ with tqdm(
434
+ unit="B",
435
+ unit_scale=True,
436
+ total=total,
437
+ initial=0,
438
+ desc=desc,
439
+ disable=disable,
440
+ name="huggingface_hub.lfs_upload",
441
+ ) as progress:
430
442
  try:
431
443
  output = multipart_upload(
432
444
  file_path=operation.path_or_fileobj,
@@ -395,6 +395,7 @@ def _lfs_log_progress():
395
395
  unit="B",
396
396
  unit_scale=True,
397
397
  unit_divisor=1024,
398
+ name="huggingface_hub.lfs_upload",
398
399
  ),
399
400
  "past_bytes": int(current_bytes),
400
401
  }