huggingface-hub 1.0.0rc1__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (59) hide show
  1. huggingface_hub/__init__.py +4 -7
  2. huggingface_hub/_commit_api.py +126 -66
  3. huggingface_hub/_commit_scheduler.py +4 -7
  4. huggingface_hub/_login.py +10 -16
  5. huggingface_hub/_snapshot_download.py +119 -21
  6. huggingface_hub/_tensorboard_logger.py +2 -5
  7. huggingface_hub/_upload_large_folder.py +1 -2
  8. huggingface_hub/_webhooks_server.py +8 -20
  9. huggingface_hub/cli/_cli_utils.py +12 -6
  10. huggingface_hub/cli/download.py +32 -7
  11. huggingface_hub/cli/repo.py +137 -5
  12. huggingface_hub/dataclasses.py +122 -2
  13. huggingface_hub/errors.py +4 -0
  14. huggingface_hub/fastai_utils.py +22 -32
  15. huggingface_hub/file_download.py +234 -38
  16. huggingface_hub/hf_api.py +385 -424
  17. huggingface_hub/hf_file_system.py +55 -65
  18. huggingface_hub/inference/_client.py +27 -48
  19. huggingface_hub/inference/_generated/_async_client.py +27 -48
  20. huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
  21. huggingface_hub/inference/_mcp/agent.py +2 -5
  22. huggingface_hub/inference/_mcp/mcp_client.py +6 -8
  23. huggingface_hub/inference/_providers/__init__.py +16 -0
  24. huggingface_hub/inference/_providers/_common.py +2 -0
  25. huggingface_hub/inference/_providers/fal_ai.py +2 -0
  26. huggingface_hub/inference/_providers/publicai.py +6 -0
  27. huggingface_hub/inference/_providers/scaleway.py +28 -0
  28. huggingface_hub/inference/_providers/zai_org.py +17 -0
  29. huggingface_hub/lfs.py +14 -8
  30. huggingface_hub/repocard.py +12 -16
  31. huggingface_hub/serialization/_base.py +3 -6
  32. huggingface_hub/serialization/_torch.py +16 -34
  33. huggingface_hub/utils/__init__.py +1 -2
  34. huggingface_hub/utils/_cache_manager.py +42 -72
  35. huggingface_hub/utils/_chunk_utils.py +2 -3
  36. huggingface_hub/utils/_http.py +37 -68
  37. huggingface_hub/utils/_validators.py +2 -2
  38. huggingface_hub/utils/logging.py +8 -11
  39. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/METADATA +2 -2
  40. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/RECORD +44 -56
  41. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/entry_points.txt +0 -1
  42. huggingface_hub/commands/__init__.py +0 -27
  43. huggingface_hub/commands/_cli_utils.py +0 -74
  44. huggingface_hub/commands/delete_cache.py +0 -476
  45. huggingface_hub/commands/download.py +0 -195
  46. huggingface_hub/commands/env.py +0 -39
  47. huggingface_hub/commands/huggingface_cli.py +0 -65
  48. huggingface_hub/commands/lfs.py +0 -200
  49. huggingface_hub/commands/repo.py +0 -151
  50. huggingface_hub/commands/repo_files.py +0 -132
  51. huggingface_hub/commands/scan_cache.py +0 -183
  52. huggingface_hub/commands/tag.py +0 -159
  53. huggingface_hub/commands/upload.py +0 -318
  54. huggingface_hub/commands/upload_large_folder.py +0 -131
  55. huggingface_hub/commands/user.py +0 -207
  56. huggingface_hub/commands/version.py +0 -40
  57. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/LICENSE +0 -0
  58. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/WHEEL +0 -0
  59. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/top_level.txt +0 -0
@@ -33,7 +33,9 @@ HARDCODED_MODEL_INFERENCE_MAPPING: dict[str, dict[str, InferenceProviderMapping]
33
33
  "nscale": {},
34
34
  "replicate": {},
35
35
  "sambanova": {},
36
+ "scaleway": {},
36
37
  "together": {},
38
+ "zai-org": {},
37
39
  }
38
40
 
39
41
 
@@ -191,6 +191,8 @@ class FalAIImageToImageTask(FalAIQueueTask):
191
191
  self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping
192
192
  ) -> Optional[dict]:
193
193
  image_url = _as_url(inputs, default_mime_type="image/jpeg")
194
+ if "target_size" in parameters:
195
+ parameters["image_size"] = parameters.pop("target_size")
194
196
  payload: dict[str, Any] = {
195
197
  "image_url": image_url,
196
198
  **filter_none(parameters),
@@ -0,0 +1,6 @@
1
+ from ._common import BaseConversationalTask
2
+
3
+
4
+ class PublicAIConversationalTask(BaseConversationalTask):
5
+ def __init__(self):
6
+ super().__init__(provider="publicai", base_url="https://api.publicai.co")
@@ -0,0 +1,28 @@
1
+ from typing import Any, Dict, Optional, Union
2
+
3
+ from huggingface_hub.inference._common import RequestParameters, _as_dict
4
+
5
+ from ._common import BaseConversationalTask, InferenceProviderMapping, TaskProviderHelper, filter_none
6
+
7
+
8
+ class ScalewayConversationalTask(BaseConversationalTask):
9
+ def __init__(self):
10
+ super().__init__(provider="scaleway", base_url="https://api.scaleway.ai")
11
+
12
+
13
+ class ScalewayFeatureExtractionTask(TaskProviderHelper):
14
+ def __init__(self):
15
+ super().__init__(provider="scaleway", base_url="https://api.scaleway.ai", task="feature-extraction")
16
+
17
+ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
18
+ return "/v1/embeddings"
19
+
20
+ def _prepare_payload_as_dict(
21
+ self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
22
+ ) -> Optional[Dict]:
23
+ parameters = filter_none(parameters)
24
+ return {"input": inputs, "model": provider_mapping_info.provider_id, **parameters}
25
+
26
+ def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
27
+ embeddings = _as_dict(response)["data"]
28
+ return [embedding["embedding"] for embedding in embeddings]
@@ -0,0 +1,17 @@
1
+ from typing import Any, Dict
2
+
3
+ from huggingface_hub.inference._providers._common import BaseConversationalTask
4
+
5
+
6
+ class ZaiConversationalTask(BaseConversationalTask):
7
+ def __init__(self):
8
+ super().__init__(provider="zai-org", base_url="https://api.z.ai")
9
+
10
+ def _prepare_headers(self, headers: Dict, api_key: str) -> Dict[str, Any]:
11
+ headers = super()._prepare_headers(headers, api_key)
12
+ headers["Accept-Language"] = "en-US,en"
13
+ headers["x-source-channel"] = "hugging_face"
14
+ return headers
15
+
16
+ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
17
+ return "/api/paas/v4/chat/completions"
huggingface_hub/lfs.py CHANGED
@@ -107,7 +107,8 @@ def post_lfs_batch_info(
107
107
  revision: Optional[str] = None,
108
108
  endpoint: Optional[str] = None,
109
109
  headers: Optional[dict[str, str]] = None,
110
- ) -> tuple[list[dict], list[dict]]:
110
+ transfers: Optional[list[str]] = None,
111
+ ) -> tuple[list[dict], list[dict], Optional[str]]:
111
112
  """
112
113
  Requests the LFS batch endpoint to retrieve upload instructions
113
114
 
@@ -126,11 +127,14 @@ def post_lfs_batch_info(
126
127
  The git revision to upload to.
127
128
  headers (`dict`, *optional*):
128
129
  Additional headers to include in the request
130
+ transfers (`list`, *optional*):
131
+ List of transfer methods to use. Defaults to ["basic", "multipart"].
129
132
 
130
133
  Returns:
131
- `LfsBatchInfo`: 2-tuple:
134
+ `LfsBatchInfo`: 3-tuple:
132
135
  - First element is the list of upload instructions from the server
133
- - Second element is an list of errors, if any
136
+ - Second element is a list of errors, if any
137
+ - Third element is the chosen transfer adapter if provided by the server (e.g. "basic", "multipart", "xet")
134
138
 
135
139
  Raises:
136
140
  [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
@@ -145,7 +149,7 @@ def post_lfs_batch_info(
145
149
  batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch"
146
150
  payload: dict = {
147
151
  "operation": "upload",
148
- "transfers": ["basic", "multipart"],
152
+ "transfers": transfers if transfers is not None else ["basic", "multipart"],
149
153
  "objects": [
150
154
  {
151
155
  "oid": upload.sha256.hex(),
@@ -171,9 +175,13 @@ def post_lfs_batch_info(
171
175
  if not isinstance(objects, list):
172
176
  raise ValueError("Malformed response from server")
173
177
 
178
+ chosen_transfer = batch_info.get("transfer")
179
+ chosen_transfer = chosen_transfer if isinstance(chosen_transfer, str) else None
180
+
174
181
  return (
175
182
  [_validate_batch_actions(obj) for obj in objects if "error" not in obj],
176
183
  [_validate_batch_error(obj) for obj in objects if "error" in obj],
184
+ chosen_transfer,
177
185
  )
178
186
 
179
187
 
@@ -313,7 +321,7 @@ def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> Non
313
321
  """
314
322
  with operation.as_file(with_tqdm=True) as fileobj:
315
323
  # S3 might raise a transient 500 error -> let's retry if that happens
316
- response = http_backoff("PUT", upload_url, data=fileobj, retry_on_status_codes=(500, 502, 503, 504))
324
+ response = http_backoff("PUT", upload_url, data=fileobj)
317
325
  hf_raise_for_status(response)
318
326
 
319
327
 
@@ -397,9 +405,7 @@ def _upload_parts_iteratively(
397
405
  read_limit=chunk_size,
398
406
  ) as fileobj_slice:
399
407
  # S3 might raise a transient 500 error -> let's retry if that happens
400
- part_upload_res = http_backoff(
401
- "PUT", part_upload_url, data=fileobj_slice, retry_on_status_codes=(500, 502, 503, 504)
402
- )
408
+ part_upload_res = http_backoff("PUT", part_upload_url, data=fileobj_slice)
403
409
  hf_raise_for_status(part_upload_res)
404
410
  headers.append(part_upload_res.headers)
405
411
  return headers # type: ignore
@@ -64,13 +64,11 @@ class RepoCard:
64
64
  '\\n# My repo\\n'
65
65
 
66
66
  ```
67
- <Tip>
68
- Raises the following error:
69
-
70
- - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
71
- when the content of the repo card metadata is not a dictionary.
72
-
73
- </Tip>
67
+ > [!TIP]
68
+ > Raises the following error:
69
+ >
70
+ > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
71
+ > when the content of the repo card metadata is not a dictionary.
74
72
  """
75
73
 
76
74
  # Set the content of the RepoCard, as well as underlying .data and .text attributes.
@@ -198,15 +196,13 @@ class RepoCard:
198
196
  The type of Hugging Face repo to push to. Options are "model", "dataset", and "space".
199
197
  If this function is called from a child class, the default will be the child class's `repo_type`.
200
198
 
201
- <Tip>
202
- Raises the following errors:
203
-
204
- - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
205
- if the card fails validation checks.
206
- - [`HfHubHTTPError`]
207
- if the request to the Hub API fails for any other reason.
208
-
209
- </Tip>
199
+ > [!TIP]
200
+ > Raises the following errors:
201
+ >
202
+ > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
203
+ > if the card fails validation checks.
204
+ > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
205
+ > if the request to the Hub API fails for any other reason.
210
206
  """
211
207
 
212
208
  # If repo type is provided, otherwise, use the repo type of the card.
@@ -62,12 +62,9 @@ def split_state_dict_into_shards_factory(
62
62
  have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
63
63
  [6+2+2GB], [6+2GB], [6GB].
64
64
 
65
- <Tip warning={true}>
66
-
67
- If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
68
- size greater than `max_shard_size`.
69
-
70
- </Tip>
65
+ > [!WARNING]
66
+ > If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
67
+ > size greater than `max_shard_size`.
71
68
 
72
69
  Args:
73
70
  state_dict (`dict[str, Tensor]`):
@@ -63,18 +63,12 @@ def save_torch_model(
63
63
 
64
64
  Before saving the model, the `save_directory` is cleaned from any previous shard files.
65
65
 
66
- <Tip warning={true}>
66
+ > [!WARNING]
67
+ > If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
68
+ > size greater than `max_shard_size`.
67
69
 
68
- If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
69
- size greater than `max_shard_size`.
70
-
71
- </Tip>
72
-
73
- <Tip warning={true}>
74
-
75
- If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
76
-
77
- </Tip>
70
+ > [!WARNING]
71
+ > If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
78
72
 
79
73
  Args:
80
74
  model (`torch.nn.Module`):
@@ -163,18 +157,12 @@ def save_torch_state_dict(
163
157
 
164
158
  Before saving the model, the `save_directory` is cleaned from any previous shard files.
165
159
 
166
- <Tip warning={true}>
167
-
168
- If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
169
- size greater than `max_shard_size`.
170
-
171
- </Tip>
160
+ > [!WARNING]
161
+ > If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
162
+ > size greater than `max_shard_size`.
172
163
 
173
- <Tip warning={true}>
174
-
175
- If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
176
-
177
- </Tip>
164
+ > [!WARNING]
165
+ > If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
178
166
 
179
167
  Args:
180
168
  state_dict (`dict[str, torch.Tensor]`):
@@ -314,19 +302,13 @@ def split_torch_state_dict_into_shards(
314
302
  [6+2+2GB], [6+2GB], [6GB].
315
303
 
316
304
 
317
- <Tip>
318
-
319
- To save a model state dictionary to the disk, see [`save_torch_state_dict`]. This helper uses
320
- `split_torch_state_dict_into_shards` under the hood.
321
-
322
- </Tip>
323
-
324
- <Tip warning={true}>
325
-
326
- If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
327
- size greater than `max_shard_size`.
305
+ > [!TIP]
306
+ > To save a model state dictionary to the disk, see [`save_torch_state_dict`]. This helper uses
307
+ > `split_torch_state_dict_into_shards` under the hood.
328
308
 
329
- </Tip>
309
+ > [!WARNING]
310
+ > If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
311
+ > size greater than `max_shard_size`.
330
312
 
331
313
  Args:
332
314
  state_dict (`dict[str, torch.Tensor]`):
@@ -42,6 +42,7 @@ from ._cache_manager import (
42
42
  CachedRevisionInfo,
43
43
  DeleteCacheStrategy,
44
44
  HFCacheInfo,
45
+ _format_size,
45
46
  scan_cache_dir,
46
47
  )
47
48
  from ._chunk_utils import chunk_iterable
@@ -53,8 +54,6 @@ from ._headers import build_hf_headers, get_token_to_send
53
54
  from ._http import (
54
55
  ASYNC_CLIENT_FACTORY_T,
55
56
  CLIENT_FACTORY_T,
56
- HfHubAsyncTransport,
57
- HfHubTransport,
58
57
  close_session,
59
58
  fix_hf_endpoint_in_url,
60
59
  get_async_session,
@@ -24,7 +24,7 @@ from typing import Literal, Optional, Union
24
24
 
25
25
  from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
26
26
 
27
- from ..commands._cli_utils import tabulate
27
+ from ..cli._cli_utils import tabulate
28
28
  from ..constants import HF_HUB_CACHE
29
29
  from . import logging
30
30
 
@@ -57,13 +57,10 @@ class CachedFileInfo:
57
57
  blob_last_modified (`float`):
58
58
  Timestamp of the last time the blob file has been modified/created.
59
59
 
60
- <Tip warning={true}>
61
-
62
- `blob_last_accessed` and `blob_last_modified` reliability can depend on the OS you
63
- are using. See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
64
- for more details.
65
-
66
- </Tip>
60
+ > [!WARNING]
61
+ > `blob_last_accessed` and `blob_last_modified` reliability can depend on the OS you
62
+ > are using. See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
63
+ > for more details.
67
64
  """
68
65
 
69
66
  file_name: str
@@ -130,20 +127,14 @@ class CachedRevisionInfo:
130
127
  last_modified (`float`):
131
128
  Timestamp of the last time the revision has been created/modified.
132
129
 
133
- <Tip warning={true}>
134
-
135
- `last_accessed` cannot be determined correctly on a single revision as blob files
136
- are shared across revisions.
137
-
138
- </Tip>
139
-
140
- <Tip warning={true}>
141
-
142
- `size_on_disk` is not necessarily the sum of all file sizes because of possible
143
- duplicated files. Besides, only blobs are taken into account, not the (negligible)
144
- size of folders and symlinks.
130
+ > [!WARNING]
131
+ > `last_accessed` cannot be determined correctly on a single revision as blob files
132
+ > are shared across revisions.
145
133
 
146
- </Tip>
134
+ > [!WARNING]
135
+ > `size_on_disk` is not necessarily the sum of all file sizes because of possible
136
+ > duplicated files. Besides, only blobs are taken into account, not the (negligible)
137
+ > size of folders and symlinks.
147
138
  """
148
139
 
149
140
  commit_hash: str
@@ -203,21 +194,15 @@ class CachedRepoInfo:
203
194
  last_modified (`float`):
204
195
  Timestamp of the last time a blob file of the repo has been modified/created.
205
196
 
206
- <Tip warning={true}>
197
+ > [!WARNING]
198
+ > `size_on_disk` is not necessarily the sum of all revisions sizes because of
199
+ > duplicated files. Besides, only blobs are taken into account, not the (negligible)
200
+ > size of folders and symlinks.
207
201
 
208
- `size_on_disk` is not necessarily the sum of all revisions sizes because of
209
- duplicated files. Besides, only blobs are taken into account, not the (negligible)
210
- size of folders and symlinks.
211
-
212
- </Tip>
213
-
214
- <Tip warning={true}>
215
-
216
- `last_accessed` and `last_modified` reliability can depend on the OS you are using.
217
- See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
218
- for more details.
219
-
220
- </Tip>
202
+ > [!WARNING]
203
+ > `last_accessed` and `last_modified` reliability can depend on the OS you are using.
204
+ > See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
205
+ > for more details.
221
206
  """
222
207
 
223
208
  repo_id: str
@@ -305,20 +290,14 @@ class DeleteCacheStrategy:
305
290
  def execute(self) -> None:
306
291
  """Execute the defined strategy.
307
292
 
308
- <Tip warning={true}>
309
-
310
- If this method is interrupted, the cache might get corrupted. Deletion order is
311
- implemented so that references and symlinks are deleted before the actual blob
312
- files.
293
+ > [!WARNING]
294
+ > If this method is interrupted, the cache might get corrupted. Deletion order is
295
+ > implemented so that references and symlinks are deleted before the actual blob
296
+ > files.
313
297
 
314
- </Tip>
315
-
316
- <Tip warning={true}>
317
-
318
- This method is irreversible. If executed, cached files are erased and must be
319
- downloaded again.
320
-
321
- </Tip>
298
+ > [!WARNING]
299
+ > This method is irreversible. If executed, cached files are erased and must be
300
+ > downloaded again.
322
301
  """
323
302
  # Deletion order matters. Blobs are deleted in last so that the user can't end
324
303
  # up in a state where a `ref`` refers to a missing snapshot or a snapshot
@@ -360,12 +339,9 @@ class HFCacheInfo:
360
339
  Those exceptions are captured so that the scan can continue. Corrupted repos
361
340
  are skipped from the scan.
362
341
 
363
- <Tip warning={true}>
364
-
365
- Here `size_on_disk` is equal to the sum of all repo sizes (only blobs). However if
366
- some cached repos are corrupted, their sizes are not taken into account.
367
-
368
- </Tip>
342
+ > [!WARNING]
343
+ > Here `size_on_disk` is equal to the sum of all repo sizes (only blobs). However if
344
+ > some cached repos are corrupted, their sizes are not taken into account.
369
345
  """
370
346
 
371
347
  size_on_disk: int
@@ -412,13 +388,10 @@ class HFCacheInfo:
412
388
  Cache deletion done. Saved 8.6G.
413
389
  ```
414
390
 
415
- <Tip warning={true}>
416
-
417
- `delete_revisions` returns a [`~utils.DeleteCacheStrategy`] object that needs to
418
- be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
419
- allows having a dry run before actually executing the deletion.
420
-
421
- </Tip>
391
+ > [!WARNING]
392
+ > `delete_revisions` returns a [`~utils.DeleteCacheStrategy`] object that needs to
393
+ > be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
394
+ > allows having a dry run before actually executing the deletion.
422
395
  """
423
396
  hashes_to_delete: set[str] = set(revisions)
424
397
 
@@ -652,17 +625,14 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
652
625
  cache_dir (`str` or `Path`, `optional`):
653
626
  Cache directory to cache. Defaults to the default HF cache directory.
654
627
 
655
- <Tip warning={true}>
656
-
657
- Raises:
658
-
659
- `CacheNotFound`
660
- If the cache directory does not exist.
661
-
662
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
663
- If the cache directory is a file, instead of a directory.
664
-
665
- </Tip>
628
+ > [!WARNING]
629
+ > Raises:
630
+ >
631
+ > `CacheNotFound`
632
+ > If the cache directory does not exist.
633
+ >
634
+ > [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
635
+ > If the cache directory is a file, instead of a directory.
666
636
 
667
637
  Returns: a [`~HFCacheInfo`] object.
668
638
  """
@@ -49,9 +49,8 @@ def chunk_iterable(iterable: Iterable[T], chunk_size: int) -> Iterable[Iterable[
49
49
  [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
50
50
  If `chunk_size` <= 0.
51
51
 
52
- <Tip warning={true}>
53
- The last chunk can be smaller than `chunk_size`.
54
- </Tip>
52
+ > [!WARNING]
53
+ > The last chunk can be smaller than `chunk_size`.
55
54
  """
56
55
  if not isinstance(chunk_size, int) or chunk_size <= 0:
57
56
  raise ValueError("`chunk_size` must be a strictly positive integer (>0).")
@@ -69,49 +69,21 @@ REPO_API_REGEX = re.compile(
69
69
  )
70
70
 
71
71
 
72
- class HfHubTransport(httpx.HTTPTransport):
72
+ def hf_request_event_hook(request: httpx.Request) -> None:
73
73
  """
74
- Transport that will be used to make HTTP requests to the Hugging Face Hub.
74
+ Event hook that will be used to make HTTP requests to the Hugging Face Hub.
75
75
 
76
76
  What it does:
77
77
  - Block requests if offline mode is enabled
78
78
  - Add a request ID to the request headers
79
79
  - Log the request if debug mode is enabled
80
80
  """
81
+ if constants.HF_HUB_OFFLINE:
82
+ raise OfflineModeIsEnabled(
83
+ f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
84
+ )
81
85
 
82
- def handle_request(self, request: httpx.Request) -> httpx.Response:
83
- if constants.HF_HUB_OFFLINE:
84
- raise OfflineModeIsEnabled(
85
- f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
86
- )
87
- request_id = _add_request_id(request)
88
- try:
89
- return super().handle_request(request)
90
- except httpx.RequestError as e:
91
- if request_id is not None:
92
- # Taken from https://stackoverflow.com/a/58270258
93
- e.args = (*e.args, f"(Request ID: {request_id})")
94
- raise
95
-
96
-
97
- class HfHubAsyncTransport(httpx.AsyncHTTPTransport):
98
- async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
99
- if constants.HF_HUB_OFFLINE:
100
- raise OfflineModeIsEnabled(
101
- f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
102
- )
103
- request_id = _add_request_id(request)
104
- try:
105
- return await super().handle_async_request(request)
106
- except httpx.RequestError as e:
107
- if request_id is not None:
108
- # Taken from https://stackoverflow.com/a/58270258
109
- e.args = (*e.args, f"(Request ID: {request_id})")
110
- raise
111
-
112
-
113
- def _add_request_id(request: httpx.Request) -> Optional[str]:
114
- # Add random request ID => easier for server-side debug
86
+ # Add random request ID => easier for server-side debugging
115
87
  if X_AMZN_TRACE_ID not in request.headers:
116
88
  request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())
117
89
  request_id = request.headers.get(X_AMZN_TRACE_ID)
@@ -135,7 +107,7 @@ def default_client_factory() -> httpx.Client:
135
107
  Factory function to create a `httpx.Client` with the default transport.
136
108
  """
137
109
  return httpx.Client(
138
- transport=HfHubTransport(),
110
+ event_hooks={"request": [hf_request_event_hook]},
139
111
  follow_redirects=True,
140
112
  timeout=httpx.Timeout(constants.DEFAULT_REQUEST_TIMEOUT, write=60.0),
141
113
  )
@@ -146,7 +118,7 @@ def default_async_client_factory() -> httpx.AsyncClient:
146
118
  Factory function to create a `httpx.AsyncClient` with the default transport.
147
119
  """
148
120
  return httpx.AsyncClient(
149
- transport=HfHubAsyncTransport(),
121
+ event_hooks={"request": [hf_request_event_hook]},
150
122
  follow_redirects=True,
151
123
  timeout=httpx.Timeout(constants.DEFAULT_REQUEST_TIMEOUT, write=60.0),
152
124
  )
@@ -232,7 +204,7 @@ def close_session() -> None:
232
204
  """
233
205
  Close the global `httpx.Client` used by `huggingface_hub`.
234
206
 
235
- If a Client is closed, it will be recreated on the next call to [`get_client`].
207
+ If a Client is closed, it will be recreated on the next call to [`get_session`].
236
208
 
237
209
  Can be useful if e.g. an SSL certificate has been updated.
238
210
  """
@@ -397,17 +369,14 @@ def http_backoff(
397
369
  >>> response.raise_for_status()
398
370
  ```
399
371
 
400
- <Tip warning={true}>
401
-
402
- When using `httpx` it is possible to stream data by passing an iterator to the
403
- `data` argument. On http backoff this is a problem as the iterator is not reset
404
- after a failed call. This issue is mitigated for file objects or any IO streams
405
- by saving the initial position of the cursor (with `data.tell()`) and resetting the
406
- cursor between each call (with `data.seek()`). For arbitrary iterators, http backoff
407
- will fail. If this is a hard constraint for you, please let us know by opening an
408
- issue on [Github](https://github.com/huggingface/huggingface_hub).
409
-
410
- </Tip>
372
+ > [!WARNING]
373
+ > When using `requests` it is possible to stream data by passing an iterator to the
374
+ > `data` argument. On http backoff this is a problem as the iterator is not reset
375
+ > after a failed call. This issue is mitigated for file objects or any IO streams
376
+ > by saving the initial position of the cursor (with `data.tell()`) and resetting the
377
+ > cursor between each call (with `data.seek()`). For arbitrary iterators, http backoff
378
+ > will fail. If this is a hard constraint for you, please let us know by opening an
379
+ > issue on [Github](https://github.com/huggingface/huggingface_hub).
411
380
  """
412
381
  return next(
413
382
  _http_backoff_base(
@@ -536,25 +505,25 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] =
536
505
  endpoint_name (`str`, *optional*):
537
506
  Name of the endpoint that has been called. If provided, the error message will be more complete.
538
507
 
539
- <Tip warning={true}>
540
-
541
- Raises when the request has failed:
542
-
543
- - [`~utils.RepositoryNotFoundError`]
544
- If the repository to download from cannot be found. This may be because it doesn't exist, because `repo_type`
545
- is not set correctly, or because the repo is `private` and you do not have access.
546
- - [`~utils.GatedRepoError`]
547
- If the repository exists but is gated and the user is not on the authorized list.
548
- - [`~utils.RevisionNotFoundError`]
549
- If the repository exists but the revision couldn't be find.
550
- - [`~utils.RemoteEntryNotFoundError`]
551
- If the repository exists but the entry (e.g. the requested file) couldn't be find.
552
- - [`~utils.BadRequestError`]
553
- If request failed with a HTTP 400 BadRequest error.
554
- - [`~utils.HfHubHTTPError`]
555
- If request failed for a reason not listed above.
556
-
557
- </Tip>
508
+ > [!WARNING]
509
+ > Raises when the request has failed:
510
+ >
511
+ > - [`~utils.RepositoryNotFoundError`]
512
+ > If the repository to download from cannot be found. This may be because it
513
+ > doesn't exist, because `repo_type` is not set correctly, or because the repo
514
+ > is `private` and you do not have access.
515
+ > - [`~utils.GatedRepoError`]
516
+ > If the repository exists but is gated and the user is not on the authorized
517
+ > list.
518
+ > - [`~utils.RevisionNotFoundError`]
519
+ > If the repository exists but the revision couldn't be find.
520
+ > - [`~utils.EntryNotFoundError`]
521
+ > If the repository exists but the entry (e.g. the requested file) couldn't be
522
+ > find.
523
+ > - [`~utils.BadRequestError`]
524
+ > If request failed with a HTTP 400 BadRequest error.
525
+ > - [`~utils.HfHubHTTPError`]
526
+ > If request failed for a reason not listed above.
558
527
  """
559
528
  try:
560
529
  response.raise_for_status()
@@ -133,8 +133,8 @@ def validate_repo_id(repo_id: str) -> None:
133
133
 
134
134
  if not REPO_ID_REGEX.match(repo_id):
135
135
  raise HFValidationError(
136
- "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are"
137
- " forbidden, '-' and '.' cannot start or end the name, max length is 96:"
136
+ "Repo id must use alphanumeric chars, '-', '_' or '.'."
137
+ " The name cannot start or end with '-' or '.' and the maximum length is 96:"
138
138
  f" '{repo_id}'."
139
139
  )
140
140