huggingface-hub 0.33.5__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (68) hide show
  1. huggingface_hub/__init__.py +487 -525
  2. huggingface_hub/_commit_api.py +21 -28
  3. huggingface_hub/_jobs_api.py +145 -0
  4. huggingface_hub/_local_folder.py +7 -1
  5. huggingface_hub/_login.py +5 -5
  6. huggingface_hub/_oauth.py +6 -10
  7. huggingface_hub/_snapshot_download.py +11 -6
  8. huggingface_hub/_upload_large_folder.py +46 -23
  9. huggingface_hub/cli/__init__.py +27 -0
  10. huggingface_hub/cli/_cli_utils.py +69 -0
  11. huggingface_hub/cli/auth.py +210 -0
  12. huggingface_hub/cli/cache.py +405 -0
  13. huggingface_hub/cli/download.py +181 -0
  14. huggingface_hub/cli/hf.py +66 -0
  15. huggingface_hub/cli/jobs.py +522 -0
  16. huggingface_hub/cli/lfs.py +198 -0
  17. huggingface_hub/cli/repo.py +243 -0
  18. huggingface_hub/cli/repo_files.py +128 -0
  19. huggingface_hub/cli/system.py +52 -0
  20. huggingface_hub/cli/upload.py +316 -0
  21. huggingface_hub/cli/upload_large_folder.py +132 -0
  22. huggingface_hub/commands/_cli_utils.py +5 -0
  23. huggingface_hub/commands/delete_cache.py +3 -1
  24. huggingface_hub/commands/download.py +4 -0
  25. huggingface_hub/commands/env.py +3 -0
  26. huggingface_hub/commands/huggingface_cli.py +2 -0
  27. huggingface_hub/commands/repo.py +4 -0
  28. huggingface_hub/commands/repo_files.py +4 -0
  29. huggingface_hub/commands/scan_cache.py +3 -1
  30. huggingface_hub/commands/tag.py +3 -1
  31. huggingface_hub/commands/upload.py +4 -0
  32. huggingface_hub/commands/upload_large_folder.py +3 -1
  33. huggingface_hub/commands/user.py +11 -1
  34. huggingface_hub/commands/version.py +3 -0
  35. huggingface_hub/constants.py +1 -0
  36. huggingface_hub/file_download.py +16 -5
  37. huggingface_hub/hf_api.py +519 -7
  38. huggingface_hub/hf_file_system.py +8 -16
  39. huggingface_hub/hub_mixin.py +3 -3
  40. huggingface_hub/inference/_client.py +38 -39
  41. huggingface_hub/inference/_common.py +38 -11
  42. huggingface_hub/inference/_generated/_async_client.py +50 -51
  43. huggingface_hub/inference/_generated/types/__init__.py +1 -0
  44. huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
  45. huggingface_hub/inference/_mcp/cli.py +36 -18
  46. huggingface_hub/inference/_mcp/constants.py +8 -0
  47. huggingface_hub/inference/_mcp/types.py +3 -0
  48. huggingface_hub/inference/_providers/__init__.py +4 -1
  49. huggingface_hub/inference/_providers/_common.py +3 -6
  50. huggingface_hub/inference/_providers/fal_ai.py +85 -42
  51. huggingface_hub/inference/_providers/hf_inference.py +17 -9
  52. huggingface_hub/inference/_providers/replicate.py +19 -1
  53. huggingface_hub/keras_mixin.py +2 -2
  54. huggingface_hub/repocard.py +1 -1
  55. huggingface_hub/repository.py +2 -2
  56. huggingface_hub/utils/_auth.py +1 -1
  57. huggingface_hub/utils/_cache_manager.py +2 -2
  58. huggingface_hub/utils/_dotenv.py +51 -0
  59. huggingface_hub/utils/_headers.py +1 -1
  60. huggingface_hub/utils/_runtime.py +1 -1
  61. huggingface_hub/utils/_xet.py +6 -2
  62. huggingface_hub/utils/_xet_progress_reporting.py +141 -0
  63. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/METADATA +7 -8
  64. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/RECORD +68 -51
  65. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/entry_points.txt +1 -0
  66. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/LICENSE +0 -0
  67. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/WHEEL +0 -0
  68. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ from urllib.parse import urlparse
6
6
 
7
7
  from huggingface_hub import constants
8
8
  from huggingface_hub.hf_api import InferenceProviderMapping
9
- from huggingface_hub.inference._common import RequestParameters, _as_dict
9
+ from huggingface_hub.inference._common import RequestParameters, _as_dict, _as_url
10
10
  from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
11
11
  from huggingface_hub.utils import get_session, hf_raise_for_status
12
12
  from huggingface_hub.utils.logging import get_logger
@@ -32,6 +32,60 @@ class FalAITask(TaskProviderHelper, ABC):
32
32
  return f"/{mapped_model}"
33
33
 
34
34
 
35
+ class FalAIQueueTask(TaskProviderHelper, ABC):
36
+ def __init__(self, task: str):
37
+ super().__init__(provider="fal-ai", base_url="https://queue.fal.run", task=task)
38
+
39
+ def _prepare_headers(self, headers: Dict, api_key: str) -> Dict:
40
+ headers = super()._prepare_headers(headers, api_key)
41
+ if not api_key.startswith("hf_"):
42
+ headers["authorization"] = f"Key {api_key}"
43
+ return headers
44
+
45
+ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
46
+ if api_key.startswith("hf_"):
47
+ # Use the queue subdomain for HF routing
48
+ return f"/{mapped_model}?_subdomain=queue"
49
+ return f"/{mapped_model}"
50
+
51
+ def get_response(
52
+ self,
53
+ response: Union[bytes, Dict],
54
+ request_params: Optional[RequestParameters] = None,
55
+ ) -> Any:
56
+ response_dict = _as_dict(response)
57
+
58
+ request_id = response_dict.get("request_id")
59
+ if not request_id:
60
+ raise ValueError("No request ID found in the response")
61
+ if request_params is None:
62
+ raise ValueError(
63
+ f"A `RequestParameters` object should be provided to get {self.task} responses with Fal AI."
64
+ )
65
+
66
+ # extract the base url and query params
67
+ parsed_url = urlparse(request_params.url)
68
+ # a bit hacky way to concatenate the provider name without parsing `parsed_url.path`
69
+ base_url = f"{parsed_url.scheme}://{parsed_url.netloc}{'/fal-ai' if parsed_url.netloc == 'router.huggingface.co' else ''}"
70
+ query_param = f"?{parsed_url.query}" if parsed_url.query else ""
71
+
72
+ # extracting the provider model id for status and result urls
73
+ # from the response as it might be different from the mapped model in `request_params.url`
74
+ model_id = urlparse(response_dict.get("response_url")).path
75
+ status_url = f"{base_url}{str(model_id)}/status{query_param}"
76
+ result_url = f"{base_url}{str(model_id)}{query_param}"
77
+
78
+ status = response_dict.get("status")
79
+ logger.info("Generating the output.. this can take several minutes.")
80
+ while status != "COMPLETED":
81
+ time.sleep(_POLLING_INTERVAL)
82
+ status_response = get_session().get(status_url, headers=request_params.headers)
83
+ hf_raise_for_status(status_response)
84
+ status = status_response.json().get("status")
85
+
86
+ return get_session().get(result_url, headers=request_params.headers).json()
87
+
88
+
35
89
  class FalAIAutomaticSpeechRecognitionTask(FalAITask):
36
90
  def __init__(self):
37
91
  super().__init__("automatic-speech-recognition")
@@ -110,23 +164,10 @@ class FalAITextToSpeechTask(FalAITask):
110
164
  return get_session().get(url).content
111
165
 
112
166
 
113
- class FalAITextToVideoTask(FalAITask):
167
+ class FalAITextToVideoTask(FalAIQueueTask):
114
168
  def __init__(self):
115
169
  super().__init__("text-to-video")
116
170
 
117
- def _prepare_base_url(self, api_key: str) -> str:
118
- if api_key.startswith("hf_"):
119
- return super()._prepare_base_url(api_key)
120
- else:
121
- logger.info(f"Calling '{self.provider}' provider directly.")
122
- return "https://queue.fal.run"
123
-
124
- def _prepare_route(self, mapped_model: str, api_key: str) -> str:
125
- if api_key.startswith("hf_"):
126
- # Use the queue subdomain for HF routing
127
- return f"/{mapped_model}?_subdomain=queue"
128
- return f"/{mapped_model}"
129
-
130
171
  def _prepare_payload_as_dict(
131
172
  self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
132
173
  ) -> Optional[Dict]:
@@ -137,36 +178,38 @@ class FalAITextToVideoTask(FalAITask):
137
178
  response: Union[bytes, Dict],
138
179
  request_params: Optional[RequestParameters] = None,
139
180
  ) -> Any:
140
- response_dict = _as_dict(response)
181
+ output = super().get_response(response, request_params)
182
+ url = _as_dict(output)["video"]["url"]
183
+ return get_session().get(url).content
141
184
 
142
- request_id = response_dict.get("request_id")
143
- if not request_id:
144
- raise ValueError("No request ID found in the response")
145
- if request_params is None:
146
- raise ValueError(
147
- "A `RequestParameters` object should be provided to get text-to-video responses with Fal AI."
148
- )
149
185
 
150
- # extract the base url and query params
151
- parsed_url = urlparse(request_params.url)
152
- # a bit hacky way to concatenate the provider name without parsing `parsed_url.path`
153
- base_url = f"{parsed_url.scheme}://{parsed_url.netloc}{'/fal-ai' if parsed_url.netloc == 'router.huggingface.co' else ''}"
154
- query_param = f"?{parsed_url.query}" if parsed_url.query else ""
186
+ class FalAIImageToImageTask(FalAIQueueTask):
187
+ def __init__(self):
188
+ super().__init__("image-to-image")
155
189
 
156
- # extracting the provider model id for status and result urls
157
- # from the response as it might be different from the mapped model in `request_params.url`
158
- model_id = urlparse(response_dict.get("response_url")).path
159
- status_url = f"{base_url}{str(model_id)}/status{query_param}"
160
- result_url = f"{base_url}{str(model_id)}{query_param}"
190
+ def _prepare_payload_as_dict(
191
+ self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
192
+ ) -> Optional[Dict]:
193
+ image_url = _as_url(inputs, default_mime_type="image/jpeg")
194
+ payload: Dict[str, Any] = {
195
+ "image_url": image_url,
196
+ **filter_none(parameters),
197
+ }
198
+ if provider_mapping_info.adapter_weights_path is not None:
199
+ lora_path = constants.HUGGINGFACE_CO_URL_TEMPLATE.format(
200
+ repo_id=provider_mapping_info.hf_model_id,
201
+ revision="main",
202
+ filename=provider_mapping_info.adapter_weights_path,
203
+ )
204
+ payload["loras"] = [{"path": lora_path, "scale": 1}]
161
205
 
162
- status = response_dict.get("status")
163
- logger.info("Generating the video.. this can take several minutes.")
164
- while status != "COMPLETED":
165
- time.sleep(_POLLING_INTERVAL)
166
- status_response = get_session().get(status_url, headers=request_params.headers)
167
- hf_raise_for_status(status_response)
168
- status = status_response.json().get("status")
206
+ return payload
169
207
 
170
- response = get_session().get(result_url, headers=request_params.headers).json()
171
- url = _as_dict(response)["video"]["url"]
208
+ def get_response(
209
+ self,
210
+ response: Union[bytes, Dict],
211
+ request_params: Optional[RequestParameters] = None,
212
+ ) -> Any:
213
+ output = super().get_response(response, request_params)
214
+ url = _as_dict(output)["images"][0]["url"]
172
215
  return get_session().get(url).content
@@ -2,6 +2,7 @@ import json
2
2
  from functools import lru_cache
3
3
  from pathlib import Path
4
4
  from typing import Any, Dict, Optional, Union
5
+ from urllib.parse import urlparse, urlunparse
5
6
 
6
7
  from huggingface_hub import constants
7
8
  from huggingface_hub.hf_api import InferenceProviderMapping
@@ -59,7 +60,7 @@ class HFInferenceTask(TaskProviderHelper):
59
60
  raise ValueError(f"Unexpected binary input for task {self.task}.")
60
61
  if isinstance(inputs, Path):
61
62
  raise ValueError(f"Unexpected path input for task {self.task} (got {inputs})")
62
- return {"inputs": inputs, "parameters": filter_none(parameters)}
63
+ return filter_none({"inputs": inputs, "parameters": parameters})
63
64
 
64
65
 
65
66
  class HFInferenceBinaryInputTask(HFInferenceTask):
@@ -125,18 +126,25 @@ class HFInferenceConversational(HFInferenceTask):
125
126
 
126
127
 
127
128
  def _build_chat_completion_url(model_url: str) -> str:
128
- # Strip trailing /
129
- model_url = model_url.rstrip("/")
129
+ parsed = urlparse(model_url)
130
+ path = parsed.path.rstrip("/")
130
131
 
131
- # Append /chat/completions if not already present
132
- if model_url.endswith("/v1"):
133
- model_url += "/chat/completions"
132
+ # If the path already ends with /chat/completions, we're done!
133
+ if path.endswith("/chat/completions"):
134
+ return model_url
134
135
 
136
+ # Append /chat/completions if not already present
137
+ if path.endswith("/v1"):
138
+ new_path = path + "/chat/completions"
139
+ # If path was empty or just "/", set the full path
140
+ elif not path:
141
+ new_path = "/v1/chat/completions"
135
142
  # Append /v1/chat/completions if not already present
136
- if not model_url.endswith("/chat/completions"):
137
- model_url += "/v1/chat/completions"
143
+ else:
144
+ new_path = path + "/v1/chat/completions"
138
145
 
139
- return model_url
146
+ # Reconstruct the URL with the new path and original query parameters.
147
+ return urlunparse(parsed._replace(path=new_path))
140
148
 
141
149
 
142
150
  @lru_cache(maxsize=1)
@@ -1,7 +1,7 @@
1
1
  from typing import Any, Dict, Optional, Union
2
2
 
3
3
  from huggingface_hub.hf_api import InferenceProviderMapping
4
- from huggingface_hub.inference._common import RequestParameters, _as_dict
4
+ from huggingface_hub.inference._common import RequestParameters, _as_dict, _as_url
5
5
  from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
6
6
  from huggingface_hub.utils import get_session
7
7
 
@@ -70,3 +70,21 @@ class ReplicateTextToSpeechTask(ReplicateTask):
70
70
  payload: Dict = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info) # type: ignore[assignment]
71
71
  payload["input"]["text"] = payload["input"].pop("prompt") # rename "prompt" to "text" for TTS
72
72
  return payload
73
+
74
+
75
+ class ReplicateImageToImageTask(ReplicateTask):
76
+ def __init__(self):
77
+ super().__init__("image-to-image")
78
+
79
+ def _prepare_payload_as_dict(
80
+ self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
81
+ ) -> Optional[Dict]:
82
+ image_url = _as_url(inputs, default_mime_type="image/jpeg")
83
+
84
+ payload: Dict[str, Any] = {"input": {"input_image": image_url, **filter_none(parameters)}}
85
+
86
+ mapped_model = provider_mapping_info.provider_id
87
+ if ":" in mapped_model:
88
+ version = mapped_model.split(":", 1)[1]
89
+ payload["version"] = version
90
+ return payload
@@ -45,7 +45,7 @@ def _requires_keras_2_model(fn: CallableT) -> CallableT:
45
45
  if not hasattr(model, "history"): # hacky way to check if model is Keras 2.x
46
46
  raise NotImplementedError(
47
47
  f"Cannot use '{fn.__name__}': Keras 3.x is not supported."
48
- " Please save models manually and upload them using `upload_folder` or `huggingface-cli upload`."
48
+ " Please save models manually and upload them using `upload_folder` or `hf upload`."
49
49
  )
50
50
  return fn(model, *args, **kwargs)
51
51
 
@@ -338,7 +338,7 @@ def push_to_hub_keras(
338
338
  token (`str`, *optional*):
339
339
  The token to use as HTTP bearer authorization for remote files. If
340
340
  not set, will use the token set when logging in with
341
- `huggingface-cli login` (stored in `~/.huggingface`).
341
+ `hf auth login` (stored in `~/.huggingface`).
342
342
  branch (`str`, *optional*):
343
343
  The git branch on which to push the model. This defaults to
344
344
  the default branch as specified in your repository, which
@@ -276,7 +276,7 @@ class RepoCard:
276
276
 
277
277
  with SoftTemporaryDirectory() as tmpdir:
278
278
  tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
279
- tmp_path.write_text(str(self))
279
+ tmp_path.write_text(str(self), encoding="utf-8")
280
280
  url = upload_file(
281
281
  path_or_fileobj=str(tmp_path),
282
282
  path_in_repo=constants.REPOCARD_NAME,
@@ -487,7 +487,7 @@ class Repository:
487
487
  To set when cloning a repo from a repo_id. Default is model.
488
488
  token (`bool` or `str`, *optional*):
489
489
  A valid authentication token (see https://huggingface.co/settings/token).
490
- If `None` or `True` and machine is logged in (through `huggingface-cli login`
490
+ If `None` or `True` and machine is logged in (through `hf auth login`
491
491
  or [`~huggingface_hub.login`]), token will be retrieved from the cache.
492
492
  If `False`, token is not sent in the request header.
493
493
  git_user (`str`, *optional*):
@@ -878,7 +878,7 @@ class Repository:
878
878
  """
879
879
  try:
880
880
  lfs_config = "git config lfs.customtransfer.multipart"
881
- run_subprocess(f"{lfs_config}.path huggingface-cli", self.local_dir)
881
+ run_subprocess(f"{lfs_config}.path hf", self.local_dir)
882
882
  run_subprocess(
883
883
  f"{lfs_config}.args {LFS_MULTIPART_UPLOAD_COMMAND}",
884
884
  self.local_dir,
@@ -41,7 +41,7 @@ def get_token() -> Optional[str]:
41
41
 
42
42
  Token is retrieved in priority from the `HF_TOKEN` environment variable. Otherwise, we read the token file located
43
43
  in the Hugging Face home folder. Returns None if user is not logged in. To log in, use [`login`] or
44
- `huggingface-cli login`.
44
+ `hf auth login`.
45
45
 
46
46
  Returns:
47
47
  `str` or `None`: The token, `None` if it doesn't exist.
@@ -632,9 +632,9 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
632
632
  )
633
633
  ```
634
634
 
635
- You can also print a detailed report directly from the `huggingface-cli` using:
635
+ You can also print a detailed report directly from the `hf` command line using:
636
636
  ```text
637
- > huggingface-cli scan-cache
637
+ > hf cache scan
638
638
  REPO ID REPO TYPE SIZE ON DISK NB FILES REFS LOCAL PATH
639
639
  --------------------------- --------- ------------ -------- ------------------- -------------------------------------------------------------------------
640
640
  glue dataset 116.3K 15 1.17.0, main, 2.4.0 /Users/lucain/.cache/huggingface/hub/datasets--glue
@@ -0,0 +1,51 @@
1
+ # AI-generated module (ChatGPT)
2
+ import re
3
+ from typing import Dict
4
+
5
+
6
+ def load_dotenv(dotenv_str: str) -> Dict[str, str]:
7
+ """
8
+ Parse a DOTENV-format string and return a dictionary of key-value pairs.
9
+ Handles quoted values, comments, export keyword, and blank lines.
10
+ """
11
+ env: Dict[str, str] = {}
12
+ line_pattern = re.compile(
13
+ r"""
14
+ ^\s*
15
+ (?:export\s+)? # optional export
16
+ ([A-Za-z_][A-Za-z0-9_]*) # key
17
+ \s*=\s*
18
+ ( # value group
19
+ (?:
20
+ '(?:\\'|[^'])*' # single-quoted value
21
+ | "(?:\\"|[^"])*" # double-quoted value
22
+ | [^#\n\r]+? # unquoted value
23
+ )
24
+ )?
25
+ \s*(?:\#.*)?$ # optional inline comment
26
+ """,
27
+ re.VERBOSE,
28
+ )
29
+
30
+ for line in dotenv_str.splitlines():
31
+ line = line.strip()
32
+ if not line or line.startswith("#"):
33
+ continue # Skip comments and empty lines
34
+
35
+ match = line_pattern.match(line)
36
+ if not match:
37
+ continue # Skip malformed lines
38
+
39
+ key, raw_val = match.group(1), match.group(2) or ""
40
+ val = raw_val.strip()
41
+
42
+ # Remove surrounding quotes if quoted
43
+ if (val.startswith('"') and val.endswith('"')) or (val.startswith("'") and val.endswith("'")):
44
+ val = val[1:-1]
45
+ val = val.replace(r"\n", "\n").replace(r"\t", "\t").replace(r"\"", '"').replace(r"\\", "\\")
46
+ if raw_val.startswith('"'):
47
+ val = val.replace(r"\$", "$") # only in double quotes
48
+
49
+ env[key] = val
50
+
51
+ return env
@@ -159,7 +159,7 @@ def get_token_to_send(token: Optional[Union[bool, str]]) -> Optional[str]:
159
159
  raise LocalTokenNotFoundError(
160
160
  "Token is required (`token=True`), but no token found. You"
161
161
  " need to provide a token or be logged in to Hugging Face with"
162
- " `huggingface-cli login` or `huggingface_hub.login`. See"
162
+ " `hf auth login` or `huggingface_hub.login`. See"
163
163
  " https://huggingface.co/settings/tokens."
164
164
  )
165
165
  return cached_token
@@ -155,7 +155,7 @@ def get_hf_transfer_version() -> str:
155
155
  # xet
156
156
  def is_xet_available() -> bool:
157
157
  # since hf_xet is automatically used if available, allow explicit disabling via environment variable
158
- if constants._is_true(os.environ.get("HF_HUB_DISABLE_XET")): # type: ignore
158
+ if constants.HF_HUB_DISABLE_XET:
159
159
  return False
160
160
 
161
161
  return is_package_available("hf_xet")
@@ -26,7 +26,9 @@ class XetConnectionInfo:
26
26
  endpoint: str
27
27
 
28
28
 
29
- def parse_xet_file_data_from_response(response: requests.Response) -> Optional[XetFileData]:
29
+ def parse_xet_file_data_from_response(
30
+ response: requests.Response, endpoint: Optional[str] = None
31
+ ) -> Optional[XetFileData]:
30
32
  """
31
33
  Parse XET file metadata from an HTTP response.
32
34
 
@@ -52,7 +54,9 @@ def parse_xet_file_data_from_response(response: requests.Response) -> Optional[X
52
54
  refresh_route = response.headers[constants.HUGGINGFACE_HEADER_X_XET_REFRESH_ROUTE]
53
55
  except KeyError:
54
56
  return None
55
-
57
+ endpoint = endpoint if endpoint is not None else constants.ENDPOINT
58
+ if refresh_route.startswith(constants.HUGGINGFACE_CO_URL_HOME):
59
+ refresh_route = refresh_route.replace(constants.HUGGINGFACE_CO_URL_HOME.rstrip("/"), endpoint.rstrip("/"))
56
60
  return XetFileData(
57
61
  file_hash=file_hash,
58
62
  refresh_route=refresh_route,
@@ -0,0 +1,141 @@
1
+ from collections import OrderedDict
2
+ from typing import List
3
+
4
+ from hf_xet import PyItemProgressUpdate, PyTotalProgressUpdate
5
+
6
+ from .tqdm import tqdm
7
+
8
+
9
+ class XetProgressReporter:
10
+ def __init__(self, n_lines: int = 10, description_width: int = 40):
11
+ self.n_lines = n_lines
12
+ self.description_width = description_width
13
+
14
+ self.tqdm_settings = {
15
+ "unit": "B",
16
+ "unit_scale": True,
17
+ "leave": True,
18
+ "unit_divisor": 1000,
19
+ "nrows": n_lines + 3,
20
+ "miniters": 1,
21
+ "bar_format": "{l_bar}{bar}| {n_fmt:>5}B / {total_fmt:>5}B{postfix:>12}",
22
+ }
23
+
24
+ # Overall progress bars
25
+ self.data_processing_bar = tqdm(
26
+ total=0, desc=self.format_desc("Processing Files (0 / 0)", False), position=0, **self.tqdm_settings
27
+ )
28
+
29
+ self.upload_bar = tqdm(
30
+ total=0, desc=self.format_desc("New Data Upload", False), position=1, **self.tqdm_settings
31
+ )
32
+
33
+ self.known_items: set[str] = set()
34
+ self.completed_items: set[str] = set()
35
+
36
+ # Item bars (scrolling view)
37
+ self.item_state: OrderedDict[str, PyItemProgressUpdate] = OrderedDict()
38
+ self.current_bars: List = [None] * self.n_lines
39
+
40
+ def format_desc(self, name: str, indent: bool) -> str:
41
+ """
42
+ if name is longer than width characters, prints ... at the start and then the last width-3 characters of the name, otherwise
43
+ the whole name right justified into 20 characters. Also adds some padding.
44
+ """
45
+ padding = " " if indent else ""
46
+ width = self.description_width - len(padding)
47
+
48
+ if len(name) > width:
49
+ name = f"...{name[-(width - 3) :]}"
50
+
51
+ return f"{padding}{name.ljust(width)}"
52
+
53
+ def update_progress(self, total_update: PyTotalProgressUpdate, item_updates: List[PyItemProgressUpdate]):
54
+ # Update all the per-item values.
55
+ for item in item_updates:
56
+ item_name = item.item_name
57
+
58
+ self.known_items.add(item_name)
59
+
60
+ # Only care about items where the processing has already started.
61
+ if item.bytes_completed == 0:
62
+ continue
63
+
64
+ # Overwrite the existing value in there.
65
+ self.item_state[item_name] = item
66
+
67
+ bar_idx = 0
68
+ new_completed = []
69
+
70
+ # Now, go through and update all the bars
71
+ for name, item in self.item_state.items():
72
+ # Is this ready to be removed on the next update?
73
+ if item.bytes_completed == item.total_bytes:
74
+ self.completed_items.add(name)
75
+ new_completed.append(name)
76
+
77
+ # If we've run out of bars to use, then collapse the last ones together.
78
+ if bar_idx >= len(self.current_bars):
79
+ bar = self.current_bars[-1]
80
+ in_final_bar_mode = True
81
+ final_bar_aggregation_count = bar_idx + 1 - len(self.current_bars)
82
+ else:
83
+ bar = self.current_bars[bar_idx]
84
+ in_final_bar_mode = False
85
+
86
+ if bar is None:
87
+ self.current_bars[bar_idx] = tqdm(
88
+ desc=self.format_desc(name, True),
89
+ position=2 + bar_idx, # Set to the position past the initial bars.
90
+ total=item.total_bytes,
91
+ initial=item.bytes_completed,
92
+ **self.tqdm_settings,
93
+ )
94
+
95
+ elif in_final_bar_mode:
96
+ bar.n += item.bytes_completed
97
+ bar.total += item.total_bytes
98
+ bar.set_description(self.format_desc(f"[+ {final_bar_aggregation_count} files]", True), refresh=False)
99
+ else:
100
+ bar.set_description(self.format_desc(name, True), refresh=False)
101
+ bar.n = item.bytes_completed
102
+ bar.total = item.total_bytes
103
+
104
+ bar_idx += 1
105
+
106
+ # Remove all the completed ones from the ordered dictionary
107
+ for name in new_completed:
108
+ # Only remove ones from consideration to make room for more items coming in.
109
+ if len(self.item_state) <= self.n_lines:
110
+ break
111
+
112
+ del self.item_state[name]
113
+
114
+ # Now manually refresh each of the bars
115
+ for bar in self.current_bars:
116
+ if bar:
117
+ bar.refresh()
118
+
119
+ # Update overall bars
120
+ def postfix(speed):
121
+ s = tqdm.format_sizeof(speed) if speed is not None else "???"
122
+ return f"{s}B/s ".rjust(10, " ")
123
+
124
+ self.data_processing_bar.total = total_update.total_bytes
125
+ self.data_processing_bar.set_description(
126
+ self.format_desc(f"Processing Files ({len(self.completed_items)} / {len(self.known_items)})", False),
127
+ refresh=False,
128
+ )
129
+ self.data_processing_bar.set_postfix_str(postfix(total_update.total_bytes_completion_rate), refresh=False)
130
+ self.data_processing_bar.update(total_update.total_bytes_completion_increment)
131
+
132
+ self.upload_bar.total = total_update.total_transfer_bytes
133
+ self.upload_bar.set_postfix_str(postfix(total_update.total_transfer_bytes_completion_rate), refresh=False)
134
+ self.upload_bar.update(total_update.total_transfer_bytes_completion_increment)
135
+
136
+ def close(self, _success):
137
+ self.data_processing_bar.close()
138
+ self.upload_bar.close()
139
+ for bar in self.current_bars:
140
+ if bar:
141
+ bar.close()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: huggingface-hub
3
- Version: 0.33.5
3
+ Version: 0.34.0
4
4
  Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
5
5
  Home-page: https://github.com/huggingface/huggingface_hub
6
6
  Author: Hugging Face, Inc.
@@ -32,7 +32,7 @@ Requires-Dist: pyyaml>=5.1
32
32
  Requires-Dist: requests
33
33
  Requires-Dist: tqdm>=4.42.1
34
34
  Requires-Dist: typing-extensions>=3.7.4.3
35
- Requires-Dist: hf-xet<2.0.0,>=1.1.2; platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "arm64" or platform_machine == "aarch64"
35
+ Requires-Dist: hf-xet<2.0.0,>=1.1.3; platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "arm64" or platform_machine == "aarch64"
36
36
  Provides-Extra: all
37
37
  Requires-Dist: InquirerPy==0.3.4; extra == "all"
38
38
  Requires-Dist: aiohttp; extra == "all"
@@ -56,7 +56,7 @@ Requires-Dist: Pillow; extra == "all"
56
56
  Requires-Dist: gradio>=4.0.0; extra == "all"
57
57
  Requires-Dist: numpy; extra == "all"
58
58
  Requires-Dist: ruff>=0.9.0; extra == "all"
59
- Requires-Dist: libcst==1.4.0; extra == "all"
59
+ Requires-Dist: libcst>=1.4.0; extra == "all"
60
60
  Requires-Dist: typing-extensions>=4.8.0; extra == "all"
61
61
  Requires-Dist: types-PyYAML; extra == "all"
62
62
  Requires-Dist: types-requests; extra == "all"
@@ -91,7 +91,7 @@ Requires-Dist: Pillow; extra == "dev"
91
91
  Requires-Dist: gradio>=4.0.0; extra == "dev"
92
92
  Requires-Dist: numpy; extra == "dev"
93
93
  Requires-Dist: ruff>=0.9.0; extra == "dev"
94
- Requires-Dist: libcst==1.4.0; extra == "dev"
94
+ Requires-Dist: libcst>=1.4.0; extra == "dev"
95
95
  Requires-Dist: typing-extensions>=4.8.0; extra == "dev"
96
96
  Requires-Dist: types-PyYAML; extra == "dev"
97
97
  Requires-Dist: types-requests; extra == "dev"
@@ -122,7 +122,7 @@ Requires-Dist: httpx; extra == "oauth"
122
122
  Requires-Dist: itsdangerous; extra == "oauth"
123
123
  Provides-Extra: quality
124
124
  Requires-Dist: ruff>=0.9.0; extra == "quality"
125
- Requires-Dist: libcst==1.4.0; extra == "quality"
125
+ Requires-Dist: libcst>=1.4.0; extra == "quality"
126
126
  Requires-Dist: mypy<1.15.0,>=1.14.1; python_version == "3.8" and extra == "quality"
127
127
  Requires-Dist: mypy==1.15.0; python_version >= "3.9" and extra == "quality"
128
128
  Provides-Extra: tensorflow
@@ -265,9 +265,9 @@ Files will be downloaded in a local cache folder. More details in [this guide](h
265
265
  The Hugging Face Hub uses tokens to authenticate applications (see [docs](https://huggingface.co/docs/hub/security-tokens)). To log in your machine, run the following CLI:
266
266
 
267
267
  ```bash
268
- huggingface-cli login
268
+ hf auth login
269
269
  # or using an environment variable
270
- huggingface-cli login --token $HUGGINGFACE_TOKEN
270
+ hf auth login --token $HUGGINGFACE_TOKEN
271
271
  ```
272
272
 
273
273
  ### Create a repository
@@ -314,7 +314,6 @@ The advantages are:
314
314
 
315
315
  - Free model or dataset hosting for libraries and their users.
316
316
  - Built-in file versioning, even with very large files, thanks to a git-based approach.
317
- - Serverless inference API for all models publicly available.
318
317
  - In-browser widgets to play with the uploaded models.
319
318
  - Anyone can upload a new model for your library, they just need to add the corresponding tag for the model to be discoverable.
320
319
  - Fast downloads! We use Cloudfront (a CDN) to geo-replicate downloads so they're blazing fast from anywhere on the globe.