huggingface-hub 1.0.0rc1__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +4 -7
- huggingface_hub/_commit_api.py +126 -66
- huggingface_hub/_commit_scheduler.py +4 -7
- huggingface_hub/_login.py +10 -16
- huggingface_hub/_snapshot_download.py +119 -21
- huggingface_hub/_tensorboard_logger.py +2 -5
- huggingface_hub/_upload_large_folder.py +1 -2
- huggingface_hub/_webhooks_server.py +8 -20
- huggingface_hub/cli/_cli_utils.py +12 -6
- huggingface_hub/cli/download.py +32 -7
- huggingface_hub/cli/repo.py +137 -5
- huggingface_hub/dataclasses.py +122 -2
- huggingface_hub/errors.py +4 -0
- huggingface_hub/fastai_utils.py +22 -32
- huggingface_hub/file_download.py +234 -38
- huggingface_hub/hf_api.py +385 -424
- huggingface_hub/hf_file_system.py +55 -65
- huggingface_hub/inference/_client.py +27 -48
- huggingface_hub/inference/_generated/_async_client.py +27 -48
- huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
- huggingface_hub/inference/_mcp/agent.py +2 -5
- huggingface_hub/inference/_mcp/mcp_client.py +6 -8
- huggingface_hub/inference/_providers/__init__.py +16 -0
- huggingface_hub/inference/_providers/_common.py +2 -0
- huggingface_hub/inference/_providers/fal_ai.py +2 -0
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/lfs.py +14 -8
- huggingface_hub/repocard.py +12 -16
- huggingface_hub/serialization/_base.py +3 -6
- huggingface_hub/serialization/_torch.py +16 -34
- huggingface_hub/utils/__init__.py +1 -2
- huggingface_hub/utils/_cache_manager.py +42 -72
- huggingface_hub/utils/_chunk_utils.py +2 -3
- huggingface_hub/utils/_http.py +37 -68
- huggingface_hub/utils/_validators.py +2 -2
- huggingface_hub/utils/logging.py +8 -11
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/METADATA +2 -2
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/RECORD +44 -56
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/entry_points.txt +0 -1
- huggingface_hub/commands/__init__.py +0 -27
- huggingface_hub/commands/_cli_utils.py +0 -74
- huggingface_hub/commands/delete_cache.py +0 -476
- huggingface_hub/commands/download.py +0 -195
- huggingface_hub/commands/env.py +0 -39
- huggingface_hub/commands/huggingface_cli.py +0 -65
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo.py +0 -151
- huggingface_hub/commands/repo_files.py +0 -132
- huggingface_hub/commands/scan_cache.py +0 -183
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -318
- huggingface_hub/commands/upload_large_folder.py +0 -131
- huggingface_hub/commands/user.py +0 -207
- huggingface_hub/commands/version.py +0 -40
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/LICENSE +0 -0
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/WHEEL +0 -0
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/top_level.txt +0 -0
|
@@ -191,6 +191,8 @@ class FalAIImageToImageTask(FalAIQueueTask):
|
|
|
191
191
|
self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
|
|
192
192
|
) -> Optional[dict]:
|
|
193
193
|
image_url = _as_url(inputs, default_mime_type="image/jpeg")
|
|
194
|
+
if "target_size" in parameters:
|
|
195
|
+
parameters["image_size"] = parameters.pop("target_size")
|
|
194
196
|
payload: dict[str, Any] = {
|
|
195
197
|
"image_url": image_url,
|
|
196
198
|
**filter_none(parameters),
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional, Union
|
|
2
|
+
|
|
3
|
+
from huggingface_hub.inference._common import RequestParameters, _as_dict
|
|
4
|
+
|
|
5
|
+
from ._common import BaseConversationalTask, InferenceProviderMapping, TaskProviderHelper, filter_none
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ScalewayConversationalTask(BaseConversationalTask):
|
|
9
|
+
def __init__(self):
|
|
10
|
+
super().__init__(provider="scaleway", base_url="https://api.scaleway.ai")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ScalewayFeatureExtractionTask(TaskProviderHelper):
|
|
14
|
+
def __init__(self):
|
|
15
|
+
super().__init__(provider="scaleway", base_url="https://api.scaleway.ai", task="feature-extraction")
|
|
16
|
+
|
|
17
|
+
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
18
|
+
return "/v1/embeddings"
|
|
19
|
+
|
|
20
|
+
def _prepare_payload_as_dict(
|
|
21
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
22
|
+
) -> Optional[Dict]:
|
|
23
|
+
parameters = filter_none(parameters)
|
|
24
|
+
return {"input": inputs, "model": provider_mapping_info.provider_id, **parameters}
|
|
25
|
+
|
|
26
|
+
def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
|
|
27
|
+
embeddings = _as_dict(response)["data"]
|
|
28
|
+
return [embedding["embedding"] for embedding in embeddings]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
3
|
+
from huggingface_hub.inference._providers._common import BaseConversationalTask
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ZaiConversationalTask(BaseConversationalTask):
|
|
7
|
+
def __init__(self):
|
|
8
|
+
super().__init__(provider="zai-org", base_url="https://api.z.ai")
|
|
9
|
+
|
|
10
|
+
def _prepare_headers(self, headers: Dict, api_key: str) -> Dict[str, Any]:
|
|
11
|
+
headers = super()._prepare_headers(headers, api_key)
|
|
12
|
+
headers["Accept-Language"] = "en-US,en"
|
|
13
|
+
headers["x-source-channel"] = "hugging_face"
|
|
14
|
+
return headers
|
|
15
|
+
|
|
16
|
+
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
17
|
+
return "/api/paas/v4/chat/completions"
|
huggingface_hub/lfs.py
CHANGED
|
@@ -107,7 +107,8 @@ def post_lfs_batch_info(
|
|
|
107
107
|
revision: Optional[str] = None,
|
|
108
108
|
endpoint: Optional[str] = None,
|
|
109
109
|
headers: Optional[dict[str, str]] = None,
|
|
110
|
-
|
|
110
|
+
transfers: Optional[list[str]] = None,
|
|
111
|
+
) -> tuple[list[dict], list[dict], Optional[str]]:
|
|
111
112
|
"""
|
|
112
113
|
Requests the LFS batch endpoint to retrieve upload instructions
|
|
113
114
|
|
|
@@ -126,11 +127,14 @@ def post_lfs_batch_info(
|
|
|
126
127
|
The git revision to upload to.
|
|
127
128
|
headers (`dict`, *optional*):
|
|
128
129
|
Additional headers to include in the request
|
|
130
|
+
transfers (`list`, *optional*):
|
|
131
|
+
List of transfer methods to use. Defaults to ["basic", "multipart"].
|
|
129
132
|
|
|
130
133
|
Returns:
|
|
131
|
-
`LfsBatchInfo`:
|
|
134
|
+
`LfsBatchInfo`: 3-tuple:
|
|
132
135
|
- First element is the list of upload instructions from the server
|
|
133
|
-
- Second element is
|
|
136
|
+
- Second element is a list of errors, if any
|
|
137
|
+
- Third element is the chosen transfer adapter if provided by the server (e.g. "basic", "multipart", "xet")
|
|
134
138
|
|
|
135
139
|
Raises:
|
|
136
140
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
@@ -145,7 +149,7 @@ def post_lfs_batch_info(
|
|
|
145
149
|
batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch"
|
|
146
150
|
payload: dict = {
|
|
147
151
|
"operation": "upload",
|
|
148
|
-
"transfers": ["basic", "multipart"],
|
|
152
|
+
"transfers": transfers if transfers is not None else ["basic", "multipart"],
|
|
149
153
|
"objects": [
|
|
150
154
|
{
|
|
151
155
|
"oid": upload.sha256.hex(),
|
|
@@ -171,9 +175,13 @@ def post_lfs_batch_info(
|
|
|
171
175
|
if not isinstance(objects, list):
|
|
172
176
|
raise ValueError("Malformed response from server")
|
|
173
177
|
|
|
178
|
+
chosen_transfer = batch_info.get("transfer")
|
|
179
|
+
chosen_transfer = chosen_transfer if isinstance(chosen_transfer, str) else None
|
|
180
|
+
|
|
174
181
|
return (
|
|
175
182
|
[_validate_batch_actions(obj) for obj in objects if "error" not in obj],
|
|
176
183
|
[_validate_batch_error(obj) for obj in objects if "error" in obj],
|
|
184
|
+
chosen_transfer,
|
|
177
185
|
)
|
|
178
186
|
|
|
179
187
|
|
|
@@ -313,7 +321,7 @@ def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> Non
|
|
|
313
321
|
"""
|
|
314
322
|
with operation.as_file(with_tqdm=True) as fileobj:
|
|
315
323
|
# S3 might raise a transient 500 error -> let's retry if that happens
|
|
316
|
-
response = http_backoff("PUT", upload_url, data=fileobj
|
|
324
|
+
response = http_backoff("PUT", upload_url, data=fileobj)
|
|
317
325
|
hf_raise_for_status(response)
|
|
318
326
|
|
|
319
327
|
|
|
@@ -397,9 +405,7 @@ def _upload_parts_iteratively(
|
|
|
397
405
|
read_limit=chunk_size,
|
|
398
406
|
) as fileobj_slice:
|
|
399
407
|
# S3 might raise a transient 500 error -> let's retry if that happens
|
|
400
|
-
part_upload_res = http_backoff(
|
|
401
|
-
"PUT", part_upload_url, data=fileobj_slice, retry_on_status_codes=(500, 502, 503, 504)
|
|
402
|
-
)
|
|
408
|
+
part_upload_res = http_backoff("PUT", part_upload_url, data=fileobj_slice)
|
|
403
409
|
hf_raise_for_status(part_upload_res)
|
|
404
410
|
headers.append(part_upload_res.headers)
|
|
405
411
|
return headers # type: ignore
|
huggingface_hub/repocard.py
CHANGED
|
@@ -64,13 +64,11 @@ class RepoCard:
|
|
|
64
64
|
'\\n# My repo\\n'
|
|
65
65
|
|
|
66
66
|
```
|
|
67
|
-
|
|
68
|
-
Raises the following error:
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
</Tip>
|
|
67
|
+
> [!TIP]
|
|
68
|
+
> Raises the following error:
|
|
69
|
+
>
|
|
70
|
+
> - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
71
|
+
> when the content of the repo card metadata is not a dictionary.
|
|
74
72
|
"""
|
|
75
73
|
|
|
76
74
|
# Set the content of the RepoCard, as well as underlying .data and .text attributes.
|
|
@@ -198,15 +196,13 @@ class RepoCard:
|
|
|
198
196
|
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space".
|
|
199
197
|
If this function is called from a child class, the default will be the child class's `repo_type`.
|
|
200
198
|
|
|
201
|
-
|
|
202
|
-
Raises the following errors:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
</Tip>
|
|
199
|
+
> [!TIP]
|
|
200
|
+
> Raises the following errors:
|
|
201
|
+
>
|
|
202
|
+
> - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
203
|
+
> if the card fails validation checks.
|
|
204
|
+
> - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
|
|
205
|
+
> if the request to the Hub API fails for any other reason.
|
|
210
206
|
"""
|
|
211
207
|
|
|
212
208
|
# If repo type is provided, otherwise, use the repo type of the card.
|
|
@@ -62,12 +62,9 @@ def split_state_dict_into_shards_factory(
|
|
|
62
62
|
have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
|
|
63
63
|
[6+2+2GB], [6+2GB], [6GB].
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
size greater than `max_shard_size`.
|
|
69
|
-
|
|
70
|
-
</Tip>
|
|
65
|
+
> [!WARNING]
|
|
66
|
+
> If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
|
|
67
|
+
> size greater than `max_shard_size`.
|
|
71
68
|
|
|
72
69
|
Args:
|
|
73
70
|
state_dict (`dict[str, Tensor]`):
|
|
@@ -63,18 +63,12 @@ def save_torch_model(
|
|
|
63
63
|
|
|
64
64
|
Before saving the model, the `save_directory` is cleaned from any previous shard files.
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
> [!WARNING]
|
|
67
|
+
> If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
|
|
68
|
+
> size greater than `max_shard_size`.
|
|
67
69
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
</Tip>
|
|
72
|
-
|
|
73
|
-
<Tip warning={true}>
|
|
74
|
-
|
|
75
|
-
If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
|
|
76
|
-
|
|
77
|
-
</Tip>
|
|
70
|
+
> [!WARNING]
|
|
71
|
+
> If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
|
|
78
72
|
|
|
79
73
|
Args:
|
|
80
74
|
model (`torch.nn.Module`):
|
|
@@ -163,18 +157,12 @@ def save_torch_state_dict(
|
|
|
163
157
|
|
|
164
158
|
Before saving the model, the `save_directory` is cleaned from any previous shard files.
|
|
165
159
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
size greater than `max_shard_size`.
|
|
170
|
-
|
|
171
|
-
</Tip>
|
|
160
|
+
> [!WARNING]
|
|
161
|
+
> If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
|
|
162
|
+
> size greater than `max_shard_size`.
|
|
172
163
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
|
|
176
|
-
|
|
177
|
-
</Tip>
|
|
164
|
+
> [!WARNING]
|
|
165
|
+
> If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
|
|
178
166
|
|
|
179
167
|
Args:
|
|
180
168
|
state_dict (`dict[str, torch.Tensor]`):
|
|
@@ -314,19 +302,13 @@ def split_torch_state_dict_into_shards(
|
|
|
314
302
|
[6+2+2GB], [6+2GB], [6GB].
|
|
315
303
|
|
|
316
304
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
`split_torch_state_dict_into_shards` under the hood.
|
|
321
|
-
|
|
322
|
-
</Tip>
|
|
323
|
-
|
|
324
|
-
<Tip warning={true}>
|
|
325
|
-
|
|
326
|
-
If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
|
|
327
|
-
size greater than `max_shard_size`.
|
|
305
|
+
> [!TIP]
|
|
306
|
+
> To save a model state dictionary to the disk, see [`save_torch_state_dict`]. This helper uses
|
|
307
|
+
> `split_torch_state_dict_into_shards` under the hood.
|
|
328
308
|
|
|
329
|
-
|
|
309
|
+
> [!WARNING]
|
|
310
|
+
> If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
|
|
311
|
+
> size greater than `max_shard_size`.
|
|
330
312
|
|
|
331
313
|
Args:
|
|
332
314
|
state_dict (`dict[str, torch.Tensor]`):
|
|
@@ -42,6 +42,7 @@ from ._cache_manager import (
|
|
|
42
42
|
CachedRevisionInfo,
|
|
43
43
|
DeleteCacheStrategy,
|
|
44
44
|
HFCacheInfo,
|
|
45
|
+
_format_size,
|
|
45
46
|
scan_cache_dir,
|
|
46
47
|
)
|
|
47
48
|
from ._chunk_utils import chunk_iterable
|
|
@@ -53,8 +54,6 @@ from ._headers import build_hf_headers, get_token_to_send
|
|
|
53
54
|
from ._http import (
|
|
54
55
|
ASYNC_CLIENT_FACTORY_T,
|
|
55
56
|
CLIENT_FACTORY_T,
|
|
56
|
-
HfHubAsyncTransport,
|
|
57
|
-
HfHubTransport,
|
|
58
57
|
close_session,
|
|
59
58
|
fix_hf_endpoint_in_url,
|
|
60
59
|
get_async_session,
|
|
@@ -24,7 +24,7 @@ from typing import Literal, Optional, Union
|
|
|
24
24
|
|
|
25
25
|
from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
|
|
26
26
|
|
|
27
|
-
from ..
|
|
27
|
+
from ..cli._cli_utils import tabulate
|
|
28
28
|
from ..constants import HF_HUB_CACHE
|
|
29
29
|
from . import logging
|
|
30
30
|
|
|
@@ -57,13 +57,10 @@ class CachedFileInfo:
|
|
|
57
57
|
blob_last_modified (`float`):
|
|
58
58
|
Timestamp of the last time the blob file has been modified/created.
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
for more details.
|
|
65
|
-
|
|
66
|
-
</Tip>
|
|
60
|
+
> [!WARNING]
|
|
61
|
+
> `blob_last_accessed` and `blob_last_modified` reliability can depend on the OS you
|
|
62
|
+
> are using. See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
|
|
63
|
+
> for more details.
|
|
67
64
|
"""
|
|
68
65
|
|
|
69
66
|
file_name: str
|
|
@@ -130,20 +127,14 @@ class CachedRevisionInfo:
|
|
|
130
127
|
last_modified (`float`):
|
|
131
128
|
Timestamp of the last time the revision has been created/modified.
|
|
132
129
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
are shared across revisions.
|
|
137
|
-
|
|
138
|
-
</Tip>
|
|
139
|
-
|
|
140
|
-
<Tip warning={true}>
|
|
141
|
-
|
|
142
|
-
`size_on_disk` is not necessarily the sum of all file sizes because of possible
|
|
143
|
-
duplicated files. Besides, only blobs are taken into account, not the (negligible)
|
|
144
|
-
size of folders and symlinks.
|
|
130
|
+
> [!WARNING]
|
|
131
|
+
> `last_accessed` cannot be determined correctly on a single revision as blob files
|
|
132
|
+
> are shared across revisions.
|
|
145
133
|
|
|
146
|
-
|
|
134
|
+
> [!WARNING]
|
|
135
|
+
> `size_on_disk` is not necessarily the sum of all file sizes because of possible
|
|
136
|
+
> duplicated files. Besides, only blobs are taken into account, not the (negligible)
|
|
137
|
+
> size of folders and symlinks.
|
|
147
138
|
"""
|
|
148
139
|
|
|
149
140
|
commit_hash: str
|
|
@@ -203,21 +194,15 @@ class CachedRepoInfo:
|
|
|
203
194
|
last_modified (`float`):
|
|
204
195
|
Timestamp of the last time a blob file of the repo has been modified/created.
|
|
205
196
|
|
|
206
|
-
|
|
197
|
+
> [!WARNING]
|
|
198
|
+
> `size_on_disk` is not necessarily the sum of all revisions sizes because of
|
|
199
|
+
> duplicated files. Besides, only blobs are taken into account, not the (negligible)
|
|
200
|
+
> size of folders and symlinks.
|
|
207
201
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
</Tip>
|
|
213
|
-
|
|
214
|
-
<Tip warning={true}>
|
|
215
|
-
|
|
216
|
-
`last_accessed` and `last_modified` reliability can depend on the OS you are using.
|
|
217
|
-
See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
|
|
218
|
-
for more details.
|
|
219
|
-
|
|
220
|
-
</Tip>
|
|
202
|
+
> [!WARNING]
|
|
203
|
+
> `last_accessed` and `last_modified` reliability can depend on the OS you are using.
|
|
204
|
+
> See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
|
|
205
|
+
> for more details.
|
|
221
206
|
"""
|
|
222
207
|
|
|
223
208
|
repo_id: str
|
|
@@ -305,20 +290,14 @@ class DeleteCacheStrategy:
|
|
|
305
290
|
def execute(self) -> None:
|
|
306
291
|
"""Execute the defined strategy.
|
|
307
292
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
files.
|
|
293
|
+
> [!WARNING]
|
|
294
|
+
> If this method is interrupted, the cache might get corrupted. Deletion order is
|
|
295
|
+
> implemented so that references and symlinks are deleted before the actual blob
|
|
296
|
+
> files.
|
|
313
297
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
This method is irreversible. If executed, cached files are erased and must be
|
|
319
|
-
downloaded again.
|
|
320
|
-
|
|
321
|
-
</Tip>
|
|
298
|
+
> [!WARNING]
|
|
299
|
+
> This method is irreversible. If executed, cached files are erased and must be
|
|
300
|
+
> downloaded again.
|
|
322
301
|
"""
|
|
323
302
|
# Deletion order matters. Blobs are deleted in last so that the user can't end
|
|
324
303
|
# up in a state where a `ref`` refers to a missing snapshot or a snapshot
|
|
@@ -360,12 +339,9 @@ class HFCacheInfo:
|
|
|
360
339
|
Those exceptions are captured so that the scan can continue. Corrupted repos
|
|
361
340
|
are skipped from the scan.
|
|
362
341
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
some cached repos are corrupted, their sizes are not taken into account.
|
|
367
|
-
|
|
368
|
-
</Tip>
|
|
342
|
+
> [!WARNING]
|
|
343
|
+
> Here `size_on_disk` is equal to the sum of all repo sizes (only blobs). However if
|
|
344
|
+
> some cached repos are corrupted, their sizes are not taken into account.
|
|
369
345
|
"""
|
|
370
346
|
|
|
371
347
|
size_on_disk: int
|
|
@@ -412,13 +388,10 @@ class HFCacheInfo:
|
|
|
412
388
|
Cache deletion done. Saved 8.6G.
|
|
413
389
|
```
|
|
414
390
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
allows having a dry run before actually executing the deletion.
|
|
420
|
-
|
|
421
|
-
</Tip>
|
|
391
|
+
> [!WARNING]
|
|
392
|
+
> `delete_revisions` returns a [`~utils.DeleteCacheStrategy`] object that needs to
|
|
393
|
+
> be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
|
|
394
|
+
> allows having a dry run before actually executing the deletion.
|
|
422
395
|
"""
|
|
423
396
|
hashes_to_delete: set[str] = set(revisions)
|
|
424
397
|
|
|
@@ -652,17 +625,14 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
|
652
625
|
cache_dir (`str` or `Path`, `optional`):
|
|
653
626
|
Cache directory to cache. Defaults to the default HF cache directory.
|
|
654
627
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
If the cache directory is a file, instead of a directory.
|
|
664
|
-
|
|
665
|
-
</Tip>
|
|
628
|
+
> [!WARNING]
|
|
629
|
+
> Raises:
|
|
630
|
+
>
|
|
631
|
+
> `CacheNotFound`
|
|
632
|
+
> If the cache directory does not exist.
|
|
633
|
+
>
|
|
634
|
+
> [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
635
|
+
> If the cache directory is a file, instead of a directory.
|
|
666
636
|
|
|
667
637
|
Returns: a [`~HFCacheInfo`] object.
|
|
668
638
|
"""
|
|
@@ -49,9 +49,8 @@ def chunk_iterable(iterable: Iterable[T], chunk_size: int) -> Iterable[Iterable[
|
|
|
49
49
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
50
50
|
If `chunk_size` <= 0.
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
</Tip>
|
|
52
|
+
> [!WARNING]
|
|
53
|
+
> The last chunk can be smaller than `chunk_size`.
|
|
55
54
|
"""
|
|
56
55
|
if not isinstance(chunk_size, int) or chunk_size <= 0:
|
|
57
56
|
raise ValueError("`chunk_size` must be a strictly positive integer (>0).")
|
huggingface_hub/utils/_http.py
CHANGED
|
@@ -69,49 +69,21 @@ REPO_API_REGEX = re.compile(
|
|
|
69
69
|
)
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
|
|
72
|
+
def hf_request_event_hook(request: httpx.Request) -> None:
|
|
73
73
|
"""
|
|
74
|
-
|
|
74
|
+
Event hook that will be used to make HTTP requests to the Hugging Face Hub.
|
|
75
75
|
|
|
76
76
|
What it does:
|
|
77
77
|
- Block requests if offline mode is enabled
|
|
78
78
|
- Add a request ID to the request headers
|
|
79
79
|
- Log the request if debug mode is enabled
|
|
80
80
|
"""
|
|
81
|
+
if constants.HF_HUB_OFFLINE:
|
|
82
|
+
raise OfflineModeIsEnabled(
|
|
83
|
+
f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
|
|
84
|
+
)
|
|
81
85
|
|
|
82
|
-
|
|
83
|
-
if constants.HF_HUB_OFFLINE:
|
|
84
|
-
raise OfflineModeIsEnabled(
|
|
85
|
-
f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
|
|
86
|
-
)
|
|
87
|
-
request_id = _add_request_id(request)
|
|
88
|
-
try:
|
|
89
|
-
return super().handle_request(request)
|
|
90
|
-
except httpx.RequestError as e:
|
|
91
|
-
if request_id is not None:
|
|
92
|
-
# Taken from https://stackoverflow.com/a/58270258
|
|
93
|
-
e.args = (*e.args, f"(Request ID: {request_id})")
|
|
94
|
-
raise
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
class HfHubAsyncTransport(httpx.AsyncHTTPTransport):
|
|
98
|
-
async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
|
|
99
|
-
if constants.HF_HUB_OFFLINE:
|
|
100
|
-
raise OfflineModeIsEnabled(
|
|
101
|
-
f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
|
|
102
|
-
)
|
|
103
|
-
request_id = _add_request_id(request)
|
|
104
|
-
try:
|
|
105
|
-
return await super().handle_async_request(request)
|
|
106
|
-
except httpx.RequestError as e:
|
|
107
|
-
if request_id is not None:
|
|
108
|
-
# Taken from https://stackoverflow.com/a/58270258
|
|
109
|
-
e.args = (*e.args, f"(Request ID: {request_id})")
|
|
110
|
-
raise
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def _add_request_id(request: httpx.Request) -> Optional[str]:
|
|
114
|
-
# Add random request ID => easier for server-side debug
|
|
86
|
+
# Add random request ID => easier for server-side debugging
|
|
115
87
|
if X_AMZN_TRACE_ID not in request.headers:
|
|
116
88
|
request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())
|
|
117
89
|
request_id = request.headers.get(X_AMZN_TRACE_ID)
|
|
@@ -135,7 +107,7 @@ def default_client_factory() -> httpx.Client:
|
|
|
135
107
|
Factory function to create a `httpx.Client` with the default transport.
|
|
136
108
|
"""
|
|
137
109
|
return httpx.Client(
|
|
138
|
-
|
|
110
|
+
event_hooks={"request": [hf_request_event_hook]},
|
|
139
111
|
follow_redirects=True,
|
|
140
112
|
timeout=httpx.Timeout(constants.DEFAULT_REQUEST_TIMEOUT, write=60.0),
|
|
141
113
|
)
|
|
@@ -146,7 +118,7 @@ def default_async_client_factory() -> httpx.AsyncClient:
|
|
|
146
118
|
Factory function to create a `httpx.AsyncClient` with the default transport.
|
|
147
119
|
"""
|
|
148
120
|
return httpx.AsyncClient(
|
|
149
|
-
|
|
121
|
+
event_hooks={"request": [hf_request_event_hook]},
|
|
150
122
|
follow_redirects=True,
|
|
151
123
|
timeout=httpx.Timeout(constants.DEFAULT_REQUEST_TIMEOUT, write=60.0),
|
|
152
124
|
)
|
|
@@ -232,7 +204,7 @@ def close_session() -> None:
|
|
|
232
204
|
"""
|
|
233
205
|
Close the global `httpx.Client` used by `huggingface_hub`.
|
|
234
206
|
|
|
235
|
-
If a Client is closed, it will be recreated on the next call to [`
|
|
207
|
+
If a Client is closed, it will be recreated on the next call to [`get_session`].
|
|
236
208
|
|
|
237
209
|
Can be useful if e.g. an SSL certificate has been updated.
|
|
238
210
|
"""
|
|
@@ -397,17 +369,14 @@ def http_backoff(
|
|
|
397
369
|
>>> response.raise_for_status()
|
|
398
370
|
```
|
|
399
371
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
issue on [Github](https://github.com/huggingface/huggingface_hub).
|
|
409
|
-
|
|
410
|
-
</Tip>
|
|
372
|
+
> [!WARNING]
|
|
373
|
+
> When using `requests` it is possible to stream data by passing an iterator to the
|
|
374
|
+
> `data` argument. On http backoff this is a problem as the iterator is not reset
|
|
375
|
+
> after a failed call. This issue is mitigated for file objects or any IO streams
|
|
376
|
+
> by saving the initial position of the cursor (with `data.tell()`) and resetting the
|
|
377
|
+
> cursor between each call (with `data.seek()`). For arbitrary iterators, http backoff
|
|
378
|
+
> will fail. If this is a hard constraint for you, please let us know by opening an
|
|
379
|
+
> issue on [Github](https://github.com/huggingface/huggingface_hub).
|
|
411
380
|
"""
|
|
412
381
|
return next(
|
|
413
382
|
_http_backoff_base(
|
|
@@ -536,25 +505,25 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] =
|
|
|
536
505
|
endpoint_name (`str`, *optional*):
|
|
537
506
|
Name of the endpoint that has been called. If provided, the error message will be more complete.
|
|
538
507
|
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
508
|
+
> [!WARNING]
|
|
509
|
+
> Raises when the request has failed:
|
|
510
|
+
>
|
|
511
|
+
> - [`~utils.RepositoryNotFoundError`]
|
|
512
|
+
> If the repository to download from cannot be found. This may be because it
|
|
513
|
+
> doesn't exist, because `repo_type` is not set correctly, or because the repo
|
|
514
|
+
> is `private` and you do not have access.
|
|
515
|
+
> - [`~utils.GatedRepoError`]
|
|
516
|
+
> If the repository exists but is gated and the user is not on the authorized
|
|
517
|
+
> list.
|
|
518
|
+
> - [`~utils.RevisionNotFoundError`]
|
|
519
|
+
> If the repository exists but the revision couldn't be find.
|
|
520
|
+
> - [`~utils.EntryNotFoundError`]
|
|
521
|
+
> If the repository exists but the entry (e.g. the requested file) couldn't be
|
|
522
|
+
> find.
|
|
523
|
+
> - [`~utils.BadRequestError`]
|
|
524
|
+
> If request failed with a HTTP 400 BadRequest error.
|
|
525
|
+
> - [`~utils.HfHubHTTPError`]
|
|
526
|
+
> If request failed for a reason not listed above.
|
|
558
527
|
"""
|
|
559
528
|
try:
|
|
560
529
|
response.raise_for_status()
|
|
@@ -133,8 +133,8 @@ def validate_repo_id(repo_id: str) -> None:
|
|
|
133
133
|
|
|
134
134
|
if not REPO_ID_REGEX.match(repo_id):
|
|
135
135
|
raise HFValidationError(
|
|
136
|
-
"Repo id must use alphanumeric chars
|
|
137
|
-
"
|
|
136
|
+
"Repo id must use alphanumeric chars, '-', '_' or '.'."
|
|
137
|
+
" The name cannot start or end with '-' or '.' and the maximum length is 96:"
|
|
138
138
|
f" '{repo_id}'."
|
|
139
139
|
)
|
|
140
140
|
|