huggingface-hub 0.33.5__py3-none-any.whl → 0.35.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +487 -525
- huggingface_hub/_commit_api.py +21 -28
- huggingface_hub/_jobs_api.py +145 -0
- huggingface_hub/_local_folder.py +7 -1
- huggingface_hub/_login.py +5 -5
- huggingface_hub/_oauth.py +1 -1
- huggingface_hub/_snapshot_download.py +11 -6
- huggingface_hub/_upload_large_folder.py +46 -23
- huggingface_hub/cli/__init__.py +27 -0
- huggingface_hub/cli/_cli_utils.py +69 -0
- huggingface_hub/cli/auth.py +210 -0
- huggingface_hub/cli/cache.py +405 -0
- huggingface_hub/cli/download.py +181 -0
- huggingface_hub/cli/hf.py +66 -0
- huggingface_hub/cli/jobs.py +522 -0
- huggingface_hub/cli/lfs.py +198 -0
- huggingface_hub/cli/repo.py +243 -0
- huggingface_hub/cli/repo_files.py +128 -0
- huggingface_hub/cli/system.py +52 -0
- huggingface_hub/cli/upload.py +316 -0
- huggingface_hub/cli/upload_large_folder.py +132 -0
- huggingface_hub/commands/_cli_utils.py +5 -0
- huggingface_hub/commands/delete_cache.py +3 -1
- huggingface_hub/commands/download.py +4 -0
- huggingface_hub/commands/env.py +3 -0
- huggingface_hub/commands/huggingface_cli.py +2 -0
- huggingface_hub/commands/repo.py +4 -0
- huggingface_hub/commands/repo_files.py +4 -0
- huggingface_hub/commands/scan_cache.py +3 -1
- huggingface_hub/commands/tag.py +3 -1
- huggingface_hub/commands/upload.py +4 -0
- huggingface_hub/commands/upload_large_folder.py +3 -1
- huggingface_hub/commands/user.py +11 -1
- huggingface_hub/commands/version.py +3 -0
- huggingface_hub/constants.py +1 -0
- huggingface_hub/file_download.py +16 -5
- huggingface_hub/hf_api.py +519 -7
- huggingface_hub/hf_file_system.py +8 -16
- huggingface_hub/hub_mixin.py +3 -3
- huggingface_hub/inference/_client.py +38 -39
- huggingface_hub/inference/_common.py +38 -11
- huggingface_hub/inference/_generated/_async_client.py +50 -51
- huggingface_hub/inference/_generated/types/__init__.py +1 -0
- huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
- huggingface_hub/inference/_mcp/cli.py +36 -18
- huggingface_hub/inference/_mcp/constants.py +8 -0
- huggingface_hub/inference/_mcp/types.py +3 -0
- huggingface_hub/inference/_providers/__init__.py +4 -1
- huggingface_hub/inference/_providers/_common.py +3 -6
- huggingface_hub/inference/_providers/fal_ai.py +85 -42
- huggingface_hub/inference/_providers/hf_inference.py +17 -9
- huggingface_hub/inference/_providers/replicate.py +19 -1
- huggingface_hub/keras_mixin.py +2 -2
- huggingface_hub/repocard.py +1 -1
- huggingface_hub/repository.py +2 -2
- huggingface_hub/utils/_auth.py +1 -1
- huggingface_hub/utils/_cache_manager.py +2 -2
- huggingface_hub/utils/_dotenv.py +51 -0
- huggingface_hub/utils/_headers.py +1 -1
- huggingface_hub/utils/_runtime.py +1 -1
- huggingface_hub/utils/_xet.py +6 -2
- huggingface_hub/utils/_xet_progress_reporting.py +141 -0
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/METADATA +7 -8
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/RECORD +68 -51
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/entry_points.txt +1 -0
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -6,7 +6,7 @@ from urllib.parse import urlparse
|
|
|
6
6
|
|
|
7
7
|
from huggingface_hub import constants
|
|
8
8
|
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
9
|
-
from huggingface_hub.inference._common import RequestParameters, _as_dict
|
|
9
|
+
from huggingface_hub.inference._common import RequestParameters, _as_dict, _as_url
|
|
10
10
|
from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
|
|
11
11
|
from huggingface_hub.utils import get_session, hf_raise_for_status
|
|
12
12
|
from huggingface_hub.utils.logging import get_logger
|
|
@@ -32,6 +32,60 @@ class FalAITask(TaskProviderHelper, ABC):
|
|
|
32
32
|
return f"/{mapped_model}"
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
class FalAIQueueTask(TaskProviderHelper, ABC):
|
|
36
|
+
def __init__(self, task: str):
|
|
37
|
+
super().__init__(provider="fal-ai", base_url="https://queue.fal.run", task=task)
|
|
38
|
+
|
|
39
|
+
def _prepare_headers(self, headers: Dict, api_key: str) -> Dict:
|
|
40
|
+
headers = super()._prepare_headers(headers, api_key)
|
|
41
|
+
if not api_key.startswith("hf_"):
|
|
42
|
+
headers["authorization"] = f"Key {api_key}"
|
|
43
|
+
return headers
|
|
44
|
+
|
|
45
|
+
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
46
|
+
if api_key.startswith("hf_"):
|
|
47
|
+
# Use the queue subdomain for HF routing
|
|
48
|
+
return f"/{mapped_model}?_subdomain=queue"
|
|
49
|
+
return f"/{mapped_model}"
|
|
50
|
+
|
|
51
|
+
def get_response(
|
|
52
|
+
self,
|
|
53
|
+
response: Union[bytes, Dict],
|
|
54
|
+
request_params: Optional[RequestParameters] = None,
|
|
55
|
+
) -> Any:
|
|
56
|
+
response_dict = _as_dict(response)
|
|
57
|
+
|
|
58
|
+
request_id = response_dict.get("request_id")
|
|
59
|
+
if not request_id:
|
|
60
|
+
raise ValueError("No request ID found in the response")
|
|
61
|
+
if request_params is None:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"A `RequestParameters` object should be provided to get {self.task} responses with Fal AI."
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# extract the base url and query params
|
|
67
|
+
parsed_url = urlparse(request_params.url)
|
|
68
|
+
# a bit hacky way to concatenate the provider name without parsing `parsed_url.path`
|
|
69
|
+
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}{'/fal-ai' if parsed_url.netloc == 'router.huggingface.co' else ''}"
|
|
70
|
+
query_param = f"?{parsed_url.query}" if parsed_url.query else ""
|
|
71
|
+
|
|
72
|
+
# extracting the provider model id for status and result urls
|
|
73
|
+
# from the response as it might be different from the mapped model in `request_params.url`
|
|
74
|
+
model_id = urlparse(response_dict.get("response_url")).path
|
|
75
|
+
status_url = f"{base_url}{str(model_id)}/status{query_param}"
|
|
76
|
+
result_url = f"{base_url}{str(model_id)}{query_param}"
|
|
77
|
+
|
|
78
|
+
status = response_dict.get("status")
|
|
79
|
+
logger.info("Generating the output.. this can take several minutes.")
|
|
80
|
+
while status != "COMPLETED":
|
|
81
|
+
time.sleep(_POLLING_INTERVAL)
|
|
82
|
+
status_response = get_session().get(status_url, headers=request_params.headers)
|
|
83
|
+
hf_raise_for_status(status_response)
|
|
84
|
+
status = status_response.json().get("status")
|
|
85
|
+
|
|
86
|
+
return get_session().get(result_url, headers=request_params.headers).json()
|
|
87
|
+
|
|
88
|
+
|
|
35
89
|
class FalAIAutomaticSpeechRecognitionTask(FalAITask):
|
|
36
90
|
def __init__(self):
|
|
37
91
|
super().__init__("automatic-speech-recognition")
|
|
@@ -110,23 +164,10 @@ class FalAITextToSpeechTask(FalAITask):
|
|
|
110
164
|
return get_session().get(url).content
|
|
111
165
|
|
|
112
166
|
|
|
113
|
-
class FalAITextToVideoTask(
|
|
167
|
+
class FalAITextToVideoTask(FalAIQueueTask):
|
|
114
168
|
def __init__(self):
|
|
115
169
|
super().__init__("text-to-video")
|
|
116
170
|
|
|
117
|
-
def _prepare_base_url(self, api_key: str) -> str:
|
|
118
|
-
if api_key.startswith("hf_"):
|
|
119
|
-
return super()._prepare_base_url(api_key)
|
|
120
|
-
else:
|
|
121
|
-
logger.info(f"Calling '{self.provider}' provider directly.")
|
|
122
|
-
return "https://queue.fal.run"
|
|
123
|
-
|
|
124
|
-
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
125
|
-
if api_key.startswith("hf_"):
|
|
126
|
-
# Use the queue subdomain for HF routing
|
|
127
|
-
return f"/{mapped_model}?_subdomain=queue"
|
|
128
|
-
return f"/{mapped_model}"
|
|
129
|
-
|
|
130
171
|
def _prepare_payload_as_dict(
|
|
131
172
|
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
132
173
|
) -> Optional[Dict]:
|
|
@@ -137,36 +178,38 @@ class FalAITextToVideoTask(FalAITask):
|
|
|
137
178
|
response: Union[bytes, Dict],
|
|
138
179
|
request_params: Optional[RequestParameters] = None,
|
|
139
180
|
) -> Any:
|
|
140
|
-
|
|
181
|
+
output = super().get_response(response, request_params)
|
|
182
|
+
url = _as_dict(output)["video"]["url"]
|
|
183
|
+
return get_session().get(url).content
|
|
141
184
|
|
|
142
|
-
request_id = response_dict.get("request_id")
|
|
143
|
-
if not request_id:
|
|
144
|
-
raise ValueError("No request ID found in the response")
|
|
145
|
-
if request_params is None:
|
|
146
|
-
raise ValueError(
|
|
147
|
-
"A `RequestParameters` object should be provided to get text-to-video responses with Fal AI."
|
|
148
|
-
)
|
|
149
185
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}{'/fal-ai' if parsed_url.netloc == 'router.huggingface.co' else ''}"
|
|
154
|
-
query_param = f"?{parsed_url.query}" if parsed_url.query else ""
|
|
186
|
+
class FalAIImageToImageTask(FalAIQueueTask):
|
|
187
|
+
def __init__(self):
|
|
188
|
+
super().__init__("image-to-image")
|
|
155
189
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
190
|
+
def _prepare_payload_as_dict(
|
|
191
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
192
|
+
) -> Optional[Dict]:
|
|
193
|
+
image_url = _as_url(inputs, default_mime_type="image/jpeg")
|
|
194
|
+
payload: Dict[str, Any] = {
|
|
195
|
+
"image_url": image_url,
|
|
196
|
+
**filter_none(parameters),
|
|
197
|
+
}
|
|
198
|
+
if provider_mapping_info.adapter_weights_path is not None:
|
|
199
|
+
lora_path = constants.HUGGINGFACE_CO_URL_TEMPLATE.format(
|
|
200
|
+
repo_id=provider_mapping_info.hf_model_id,
|
|
201
|
+
revision="main",
|
|
202
|
+
filename=provider_mapping_info.adapter_weights_path,
|
|
203
|
+
)
|
|
204
|
+
payload["loras"] = [{"path": lora_path, "scale": 1}]
|
|
161
205
|
|
|
162
|
-
|
|
163
|
-
logger.info("Generating the video.. this can take several minutes.")
|
|
164
|
-
while status != "COMPLETED":
|
|
165
|
-
time.sleep(_POLLING_INTERVAL)
|
|
166
|
-
status_response = get_session().get(status_url, headers=request_params.headers)
|
|
167
|
-
hf_raise_for_status(status_response)
|
|
168
|
-
status = status_response.json().get("status")
|
|
206
|
+
return payload
|
|
169
207
|
|
|
170
|
-
|
|
171
|
-
|
|
208
|
+
def get_response(
|
|
209
|
+
self,
|
|
210
|
+
response: Union[bytes, Dict],
|
|
211
|
+
request_params: Optional[RequestParameters] = None,
|
|
212
|
+
) -> Any:
|
|
213
|
+
output = super().get_response(response, request_params)
|
|
214
|
+
url = _as_dict(output)["images"][0]["url"]
|
|
172
215
|
return get_session().get(url).content
|
|
@@ -2,6 +2,7 @@ import json
|
|
|
2
2
|
from functools import lru_cache
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, Dict, Optional, Union
|
|
5
|
+
from urllib.parse import urlparse, urlunparse
|
|
5
6
|
|
|
6
7
|
from huggingface_hub import constants
|
|
7
8
|
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
@@ -59,7 +60,7 @@ class HFInferenceTask(TaskProviderHelper):
|
|
|
59
60
|
raise ValueError(f"Unexpected binary input for task {self.task}.")
|
|
60
61
|
if isinstance(inputs, Path):
|
|
61
62
|
raise ValueError(f"Unexpected path input for task {self.task} (got {inputs})")
|
|
62
|
-
return {"inputs": inputs, "parameters":
|
|
63
|
+
return filter_none({"inputs": inputs, "parameters": parameters})
|
|
63
64
|
|
|
64
65
|
|
|
65
66
|
class HFInferenceBinaryInputTask(HFInferenceTask):
|
|
@@ -125,18 +126,25 @@ class HFInferenceConversational(HFInferenceTask):
|
|
|
125
126
|
|
|
126
127
|
|
|
127
128
|
def _build_chat_completion_url(model_url: str) -> str:
|
|
128
|
-
|
|
129
|
-
|
|
129
|
+
parsed = urlparse(model_url)
|
|
130
|
+
path = parsed.path.rstrip("/")
|
|
130
131
|
|
|
131
|
-
#
|
|
132
|
-
if
|
|
133
|
-
model_url
|
|
132
|
+
# If the path already ends with /chat/completions, we're done!
|
|
133
|
+
if path.endswith("/chat/completions"):
|
|
134
|
+
return model_url
|
|
134
135
|
|
|
136
|
+
# Append /chat/completions if not already present
|
|
137
|
+
if path.endswith("/v1"):
|
|
138
|
+
new_path = path + "/chat/completions"
|
|
139
|
+
# If path was empty or just "/", set the full path
|
|
140
|
+
elif not path:
|
|
141
|
+
new_path = "/v1/chat/completions"
|
|
135
142
|
# Append /v1/chat/completions if not already present
|
|
136
|
-
|
|
137
|
-
|
|
143
|
+
else:
|
|
144
|
+
new_path = path + "/v1/chat/completions"
|
|
138
145
|
|
|
139
|
-
|
|
146
|
+
# Reconstruct the URL with the new path and original query parameters.
|
|
147
|
+
return urlunparse(parsed._replace(path=new_path))
|
|
140
148
|
|
|
141
149
|
|
|
142
150
|
@lru_cache(maxsize=1)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Any, Dict, Optional, Union
|
|
2
2
|
|
|
3
3
|
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
4
|
-
from huggingface_hub.inference._common import RequestParameters, _as_dict
|
|
4
|
+
from huggingface_hub.inference._common import RequestParameters, _as_dict, _as_url
|
|
5
5
|
from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
|
|
6
6
|
from huggingface_hub.utils import get_session
|
|
7
7
|
|
|
@@ -70,3 +70,21 @@ class ReplicateTextToSpeechTask(ReplicateTask):
|
|
|
70
70
|
payload: Dict = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) # type: ignore[assignment]
|
|
71
71
|
payload["input"]["text"] = payload["input"].pop("prompt") # rename "prompt" to "text" for TTS
|
|
72
72
|
return payload
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class ReplicateImageToImageTask(ReplicateTask):
|
|
76
|
+
def __init__(self):
|
|
77
|
+
super().__init__("image-to-image")
|
|
78
|
+
|
|
79
|
+
def _prepare_payload_as_dict(
|
|
80
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
81
|
+
) -> Optional[Dict]:
|
|
82
|
+
image_url = _as_url(inputs, default_mime_type="image/jpeg")
|
|
83
|
+
|
|
84
|
+
payload: Dict[str, Any] = {"input": {"input_image": image_url, **filter_none(parameters)}}
|
|
85
|
+
|
|
86
|
+
mapped_model = provider_mapping_info.provider_id
|
|
87
|
+
if ":" in mapped_model:
|
|
88
|
+
version = mapped_model.split(":", 1)[1]
|
|
89
|
+
payload["version"] = version
|
|
90
|
+
return payload
|
huggingface_hub/keras_mixin.py
CHANGED
|
@@ -45,7 +45,7 @@ def _requires_keras_2_model(fn: CallableT) -> CallableT:
|
|
|
45
45
|
if not hasattr(model, "history"): # hacky way to check if model is Keras 2.x
|
|
46
46
|
raise NotImplementedError(
|
|
47
47
|
f"Cannot use '{fn.__name__}': Keras 3.x is not supported."
|
|
48
|
-
" Please save models manually and upload them using `upload_folder` or `
|
|
48
|
+
" Please save models manually and upload them using `upload_folder` or `hf upload`."
|
|
49
49
|
)
|
|
50
50
|
return fn(model, *args, **kwargs)
|
|
51
51
|
|
|
@@ -338,7 +338,7 @@ def push_to_hub_keras(
|
|
|
338
338
|
token (`str`, *optional*):
|
|
339
339
|
The token to use as HTTP bearer authorization for remote files. If
|
|
340
340
|
not set, will use the token set when logging in with
|
|
341
|
-
`
|
|
341
|
+
`hf auth login` (stored in `~/.huggingface`).
|
|
342
342
|
branch (`str`, *optional*):
|
|
343
343
|
The git branch on which to push the model. This defaults to
|
|
344
344
|
the default branch as specified in your repository, which
|
huggingface_hub/repocard.py
CHANGED
|
@@ -276,7 +276,7 @@ class RepoCard:
|
|
|
276
276
|
|
|
277
277
|
with SoftTemporaryDirectory() as tmpdir:
|
|
278
278
|
tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
|
|
279
|
-
tmp_path.write_text(str(self))
|
|
279
|
+
tmp_path.write_text(str(self), encoding="utf-8")
|
|
280
280
|
url = upload_file(
|
|
281
281
|
path_or_fileobj=str(tmp_path),
|
|
282
282
|
path_in_repo=constants.REPOCARD_NAME,
|
huggingface_hub/repository.py
CHANGED
|
@@ -487,7 +487,7 @@ class Repository:
|
|
|
487
487
|
To set when cloning a repo from a repo_id. Default is model.
|
|
488
488
|
token (`bool` or `str`, *optional*):
|
|
489
489
|
A valid authentication token (see https://huggingface.co/settings/token).
|
|
490
|
-
If `None` or `True` and machine is logged in (through `
|
|
490
|
+
If `None` or `True` and machine is logged in (through `hf auth login`
|
|
491
491
|
or [`~huggingface_hub.login`]), token will be retrieved from the cache.
|
|
492
492
|
If `False`, token is not sent in the request header.
|
|
493
493
|
git_user (`str`, *optional*):
|
|
@@ -878,7 +878,7 @@ class Repository:
|
|
|
878
878
|
"""
|
|
879
879
|
try:
|
|
880
880
|
lfs_config = "git config lfs.customtransfer.multipart"
|
|
881
|
-
run_subprocess(f"{lfs_config}.path
|
|
881
|
+
run_subprocess(f"{lfs_config}.path hf", self.local_dir)
|
|
882
882
|
run_subprocess(
|
|
883
883
|
f"{lfs_config}.args {LFS_MULTIPART_UPLOAD_COMMAND}",
|
|
884
884
|
self.local_dir,
|
huggingface_hub/utils/_auth.py
CHANGED
|
@@ -41,7 +41,7 @@ def get_token() -> Optional[str]:
|
|
|
41
41
|
|
|
42
42
|
Token is retrieved in priority from the `HF_TOKEN` environment variable. Otherwise, we read the token file located
|
|
43
43
|
in the Hugging Face home folder. Returns None if user is not logged in. To log in, use [`login`] or
|
|
44
|
-
`
|
|
44
|
+
`hf auth login`.
|
|
45
45
|
|
|
46
46
|
Returns:
|
|
47
47
|
`str` or `None`: The token, `None` if it doesn't exist.
|
|
@@ -632,9 +632,9 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
|
632
632
|
)
|
|
633
633
|
```
|
|
634
634
|
|
|
635
|
-
You can also print a detailed report directly from the `
|
|
635
|
+
You can also print a detailed report directly from the `hf` command line using:
|
|
636
636
|
```text
|
|
637
|
-
>
|
|
637
|
+
> hf cache scan
|
|
638
638
|
REPO ID REPO TYPE SIZE ON DISK NB FILES REFS LOCAL PATH
|
|
639
639
|
--------------------------- --------- ------------ -------- ------------------- -------------------------------------------------------------------------
|
|
640
640
|
glue dataset 116.3K 15 1.17.0, main, 2.4.0 /Users/lucain/.cache/huggingface/hub/datasets--glue
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# AI-generated module (ChatGPT)
|
|
2
|
+
import re
|
|
3
|
+
from typing import Dict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_dotenv(dotenv_str: str) -> Dict[str, str]:
|
|
7
|
+
"""
|
|
8
|
+
Parse a DOTENV-format string and return a dictionary of key-value pairs.
|
|
9
|
+
Handles quoted values, comments, export keyword, and blank lines.
|
|
10
|
+
"""
|
|
11
|
+
env: Dict[str, str] = {}
|
|
12
|
+
line_pattern = re.compile(
|
|
13
|
+
r"""
|
|
14
|
+
^\s*
|
|
15
|
+
(?:export\s+)? # optional export
|
|
16
|
+
([A-Za-z_][A-Za-z0-9_]*) # key
|
|
17
|
+
\s*=\s*
|
|
18
|
+
( # value group
|
|
19
|
+
(?:
|
|
20
|
+
'(?:\\'|[^'])*' # single-quoted value
|
|
21
|
+
| "(?:\\"|[^"])*" # double-quoted value
|
|
22
|
+
| [^#\n\r]+? # unquoted value
|
|
23
|
+
)
|
|
24
|
+
)?
|
|
25
|
+
\s*(?:\#.*)?$ # optional inline comment
|
|
26
|
+
""",
|
|
27
|
+
re.VERBOSE,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
for line in dotenv_str.splitlines():
|
|
31
|
+
line = line.strip()
|
|
32
|
+
if not line or line.startswith("#"):
|
|
33
|
+
continue # Skip comments and empty lines
|
|
34
|
+
|
|
35
|
+
match = line_pattern.match(line)
|
|
36
|
+
if not match:
|
|
37
|
+
continue # Skip malformed lines
|
|
38
|
+
|
|
39
|
+
key, raw_val = match.group(1), match.group(2) or ""
|
|
40
|
+
val = raw_val.strip()
|
|
41
|
+
|
|
42
|
+
# Remove surrounding quotes if quoted
|
|
43
|
+
if (val.startswith('"') and val.endswith('"')) or (val.startswith("'") and val.endswith("'")):
|
|
44
|
+
val = val[1:-1]
|
|
45
|
+
val = val.replace(r"\n", "\n").replace(r"\t", "\t").replace(r"\"", '"').replace(r"\\", "\\")
|
|
46
|
+
if raw_val.startswith('"'):
|
|
47
|
+
val = val.replace(r"\$", "$") # only in double quotes
|
|
48
|
+
|
|
49
|
+
env[key] = val
|
|
50
|
+
|
|
51
|
+
return env
|
|
@@ -159,7 +159,7 @@ def get_token_to_send(token: Optional[Union[bool, str]]) -> Optional[str]:
|
|
|
159
159
|
raise LocalTokenNotFoundError(
|
|
160
160
|
"Token is required (`token=True`), but no token found. You"
|
|
161
161
|
" need to provide a token or be logged in to Hugging Face with"
|
|
162
|
-
" `
|
|
162
|
+
" `hf auth login` or `huggingface_hub.login`. See"
|
|
163
163
|
" https://huggingface.co/settings/tokens."
|
|
164
164
|
)
|
|
165
165
|
return cached_token
|
|
@@ -155,7 +155,7 @@ def get_hf_transfer_version() -> str:
|
|
|
155
155
|
# xet
|
|
156
156
|
def is_xet_available() -> bool:
|
|
157
157
|
# since hf_xet is automatically used if available, allow explicit disabling via environment variable
|
|
158
|
-
if constants.
|
|
158
|
+
if constants.HF_HUB_DISABLE_XET:
|
|
159
159
|
return False
|
|
160
160
|
|
|
161
161
|
return is_package_available("hf_xet")
|
huggingface_hub/utils/_xet.py
CHANGED
|
@@ -26,7 +26,9 @@ class XetConnectionInfo:
|
|
|
26
26
|
endpoint: str
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
def parse_xet_file_data_from_response(
|
|
29
|
+
def parse_xet_file_data_from_response(
|
|
30
|
+
response: requests.Response, endpoint: Optional[str] = None
|
|
31
|
+
) -> Optional[XetFileData]:
|
|
30
32
|
"""
|
|
31
33
|
Parse XET file metadata from an HTTP response.
|
|
32
34
|
|
|
@@ -52,7 +54,9 @@ def parse_xet_file_data_from_response(response: requests.Response) -> Optional[X
|
|
|
52
54
|
refresh_route = response.headers[constants.HUGGINGFACE_HEADER_X_XET_REFRESH_ROUTE]
|
|
53
55
|
except KeyError:
|
|
54
56
|
return None
|
|
55
|
-
|
|
57
|
+
endpoint = endpoint if endpoint is not None else constants.ENDPOINT
|
|
58
|
+
if refresh_route.startswith(constants.HUGGINGFACE_CO_URL_HOME):
|
|
59
|
+
refresh_route = refresh_route.replace(constants.HUGGINGFACE_CO_URL_HOME.rstrip("/"), endpoint.rstrip("/"))
|
|
56
60
|
return XetFileData(
|
|
57
61
|
file_hash=file_hash,
|
|
58
62
|
refresh_route=refresh_route,
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from hf_xet import PyItemProgressUpdate, PyTotalProgressUpdate
|
|
5
|
+
|
|
6
|
+
from .tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class XetProgressReporter:
|
|
10
|
+
def __init__(self, n_lines: int = 10, description_width: int = 40):
|
|
11
|
+
self.n_lines = n_lines
|
|
12
|
+
self.description_width = description_width
|
|
13
|
+
|
|
14
|
+
self.tqdm_settings = {
|
|
15
|
+
"unit": "B",
|
|
16
|
+
"unit_scale": True,
|
|
17
|
+
"leave": True,
|
|
18
|
+
"unit_divisor": 1000,
|
|
19
|
+
"nrows": n_lines + 3,
|
|
20
|
+
"miniters": 1,
|
|
21
|
+
"bar_format": "{l_bar}{bar}| {n_fmt:>5}B / {total_fmt:>5}B{postfix:>12}",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
# Overall progress bars
|
|
25
|
+
self.data_processing_bar = tqdm(
|
|
26
|
+
total=0, desc=self.format_desc("Processing Files (0 / 0)", False), position=0, **self.tqdm_settings
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
self.upload_bar = tqdm(
|
|
30
|
+
total=0, desc=self.format_desc("New Data Upload", False), position=1, **self.tqdm_settings
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
self.known_items: set[str] = set()
|
|
34
|
+
self.completed_items: set[str] = set()
|
|
35
|
+
|
|
36
|
+
# Item bars (scrolling view)
|
|
37
|
+
self.item_state: OrderedDict[str, PyItemProgressUpdate] = OrderedDict()
|
|
38
|
+
self.current_bars: List = [None] * self.n_lines
|
|
39
|
+
|
|
40
|
+
def format_desc(self, name: str, indent: bool) -> str:
|
|
41
|
+
"""
|
|
42
|
+
if name is longer than width characters, prints ... at the start and then the last width-3 characters of the name, otherwise
|
|
43
|
+
the whole name right justified into 20 characters. Also adds some padding.
|
|
44
|
+
"""
|
|
45
|
+
padding = " " if indent else ""
|
|
46
|
+
width = self.description_width - len(padding)
|
|
47
|
+
|
|
48
|
+
if len(name) > width:
|
|
49
|
+
name = f"...{name[-(width - 3) :]}"
|
|
50
|
+
|
|
51
|
+
return f"{padding}{name.ljust(width)}"
|
|
52
|
+
|
|
53
|
+
def update_progress(self, total_update: PyTotalProgressUpdate, item_updates: List[PyItemProgressUpdate]):
|
|
54
|
+
# Update all the per-item values.
|
|
55
|
+
for item in item_updates:
|
|
56
|
+
item_name = item.item_name
|
|
57
|
+
|
|
58
|
+
self.known_items.add(item_name)
|
|
59
|
+
|
|
60
|
+
# Only care about items where the processing has already started.
|
|
61
|
+
if item.bytes_completed == 0:
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
# Overwrite the existing value in there.
|
|
65
|
+
self.item_state[item_name] = item
|
|
66
|
+
|
|
67
|
+
bar_idx = 0
|
|
68
|
+
new_completed = []
|
|
69
|
+
|
|
70
|
+
# Now, go through and update all the bars
|
|
71
|
+
for name, item in self.item_state.items():
|
|
72
|
+
# Is this ready to be removed on the next update?
|
|
73
|
+
if item.bytes_completed == item.total_bytes:
|
|
74
|
+
self.completed_items.add(name)
|
|
75
|
+
new_completed.append(name)
|
|
76
|
+
|
|
77
|
+
# If we've run out of bars to use, then collapse the last ones together.
|
|
78
|
+
if bar_idx >= len(self.current_bars):
|
|
79
|
+
bar = self.current_bars[-1]
|
|
80
|
+
in_final_bar_mode = True
|
|
81
|
+
final_bar_aggregation_count = bar_idx + 1 - len(self.current_bars)
|
|
82
|
+
else:
|
|
83
|
+
bar = self.current_bars[bar_idx]
|
|
84
|
+
in_final_bar_mode = False
|
|
85
|
+
|
|
86
|
+
if bar is None:
|
|
87
|
+
self.current_bars[bar_idx] = tqdm(
|
|
88
|
+
desc=self.format_desc(name, True),
|
|
89
|
+
position=2 + bar_idx, # Set to the position past the initial bars.
|
|
90
|
+
total=item.total_bytes,
|
|
91
|
+
initial=item.bytes_completed,
|
|
92
|
+
**self.tqdm_settings,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
elif in_final_bar_mode:
|
|
96
|
+
bar.n += item.bytes_completed
|
|
97
|
+
bar.total += item.total_bytes
|
|
98
|
+
bar.set_description(self.format_desc(f"[+ {final_bar_aggregation_count} files]", True), refresh=False)
|
|
99
|
+
else:
|
|
100
|
+
bar.set_description(self.format_desc(name, True), refresh=False)
|
|
101
|
+
bar.n = item.bytes_completed
|
|
102
|
+
bar.total = item.total_bytes
|
|
103
|
+
|
|
104
|
+
bar_idx += 1
|
|
105
|
+
|
|
106
|
+
# Remove all the completed ones from the ordered dictionary
|
|
107
|
+
for name in new_completed:
|
|
108
|
+
# Only remove ones from consideration to make room for more items coming in.
|
|
109
|
+
if len(self.item_state) <= self.n_lines:
|
|
110
|
+
break
|
|
111
|
+
|
|
112
|
+
del self.item_state[name]
|
|
113
|
+
|
|
114
|
+
# Now manually refresh each of the bars
|
|
115
|
+
for bar in self.current_bars:
|
|
116
|
+
if bar:
|
|
117
|
+
bar.refresh()
|
|
118
|
+
|
|
119
|
+
# Update overall bars
|
|
120
|
+
def postfix(speed):
|
|
121
|
+
s = tqdm.format_sizeof(speed) if speed is not None else "???"
|
|
122
|
+
return f"{s}B/s ".rjust(10, " ")
|
|
123
|
+
|
|
124
|
+
self.data_processing_bar.total = total_update.total_bytes
|
|
125
|
+
self.data_processing_bar.set_description(
|
|
126
|
+
self.format_desc(f"Processing Files ({len(self.completed_items)} / {len(self.known_items)})", False),
|
|
127
|
+
refresh=False,
|
|
128
|
+
)
|
|
129
|
+
self.data_processing_bar.set_postfix_str(postfix(total_update.total_bytes_completion_rate), refresh=False)
|
|
130
|
+
self.data_processing_bar.update(total_update.total_bytes_completion_increment)
|
|
131
|
+
|
|
132
|
+
self.upload_bar.total = total_update.total_transfer_bytes
|
|
133
|
+
self.upload_bar.set_postfix_str(postfix(total_update.total_transfer_bytes_completion_rate), refresh=False)
|
|
134
|
+
self.upload_bar.update(total_update.total_transfer_bytes_completion_increment)
|
|
135
|
+
|
|
136
|
+
def close(self, _success):
|
|
137
|
+
self.data_processing_bar.close()
|
|
138
|
+
self.upload_bar.close()
|
|
139
|
+
for bar in self.current_bars:
|
|
140
|
+
if bar:
|
|
141
|
+
bar.close()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: huggingface-hub
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.35.0rc0
|
|
4
4
|
Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
|
|
5
5
|
Home-page: https://github.com/huggingface/huggingface_hub
|
|
6
6
|
Author: Hugging Face, Inc.
|
|
@@ -32,7 +32,7 @@ Requires-Dist: pyyaml>=5.1
|
|
|
32
32
|
Requires-Dist: requests
|
|
33
33
|
Requires-Dist: tqdm>=4.42.1
|
|
34
34
|
Requires-Dist: typing-extensions>=3.7.4.3
|
|
35
|
-
Requires-Dist: hf-xet<2.0.0,>=1.1.
|
|
35
|
+
Requires-Dist: hf-xet<2.0.0,>=1.1.3; platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "arm64" or platform_machine == "aarch64"
|
|
36
36
|
Provides-Extra: all
|
|
37
37
|
Requires-Dist: InquirerPy==0.3.4; extra == "all"
|
|
38
38
|
Requires-Dist: aiohttp; extra == "all"
|
|
@@ -56,7 +56,7 @@ Requires-Dist: Pillow; extra == "all"
|
|
|
56
56
|
Requires-Dist: gradio>=4.0.0; extra == "all"
|
|
57
57
|
Requires-Dist: numpy; extra == "all"
|
|
58
58
|
Requires-Dist: ruff>=0.9.0; extra == "all"
|
|
59
|
-
Requires-Dist: libcst
|
|
59
|
+
Requires-Dist: libcst>=1.4.0; extra == "all"
|
|
60
60
|
Requires-Dist: typing-extensions>=4.8.0; extra == "all"
|
|
61
61
|
Requires-Dist: types-PyYAML; extra == "all"
|
|
62
62
|
Requires-Dist: types-requests; extra == "all"
|
|
@@ -91,7 +91,7 @@ Requires-Dist: Pillow; extra == "dev"
|
|
|
91
91
|
Requires-Dist: gradio>=4.0.0; extra == "dev"
|
|
92
92
|
Requires-Dist: numpy; extra == "dev"
|
|
93
93
|
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
|
94
|
-
Requires-Dist: libcst
|
|
94
|
+
Requires-Dist: libcst>=1.4.0; extra == "dev"
|
|
95
95
|
Requires-Dist: typing-extensions>=4.8.0; extra == "dev"
|
|
96
96
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
97
97
|
Requires-Dist: types-requests; extra == "dev"
|
|
@@ -122,7 +122,7 @@ Requires-Dist: httpx; extra == "oauth"
|
|
|
122
122
|
Requires-Dist: itsdangerous; extra == "oauth"
|
|
123
123
|
Provides-Extra: quality
|
|
124
124
|
Requires-Dist: ruff>=0.9.0; extra == "quality"
|
|
125
|
-
Requires-Dist: libcst
|
|
125
|
+
Requires-Dist: libcst>=1.4.0; extra == "quality"
|
|
126
126
|
Requires-Dist: mypy<1.15.0,>=1.14.1; python_version == "3.8" and extra == "quality"
|
|
127
127
|
Requires-Dist: mypy==1.15.0; python_version >= "3.9" and extra == "quality"
|
|
128
128
|
Provides-Extra: tensorflow
|
|
@@ -265,9 +265,9 @@ Files will be downloaded in a local cache folder. More details in [this guide](h
|
|
|
265
265
|
The Hugging Face Hub uses tokens to authenticate applications (see [docs](https://huggingface.co/docs/hub/security-tokens)). To log in your machine, run the following CLI:
|
|
266
266
|
|
|
267
267
|
```bash
|
|
268
|
-
|
|
268
|
+
hf auth login
|
|
269
269
|
# or using an environment variable
|
|
270
|
-
|
|
270
|
+
hf auth login --token $HUGGINGFACE_TOKEN
|
|
271
271
|
```
|
|
272
272
|
|
|
273
273
|
### Create a repository
|
|
@@ -314,7 +314,6 @@ The advantages are:
|
|
|
314
314
|
|
|
315
315
|
- Free model or dataset hosting for libraries and their users.
|
|
316
316
|
- Built-in file versioning, even with very large files, thanks to a git-based approach.
|
|
317
|
-
- Serverless inference API for all models publicly available.
|
|
318
317
|
- In-browser widgets to play with the uploaded models.
|
|
319
318
|
- Anyone can upload a new model for your library, they just need to add the corresponding tag for the model to be discoverable.
|
|
320
319
|
- Fast downloads! We use Cloudfront (a CDN) to geo-replicate downloads so they're blazing fast from anywhere on the globe.
|