huggingface-hub 0.35.1__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +28 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +20 -20
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +5 -5
- huggingface_hub/_upload_large_folder.py +15 -15
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/__init__.py +0 -14
- huggingface_hub/cli/_cli_utils.py +80 -3
- huggingface_hub/cli/auth.py +104 -150
- huggingface_hub/cli/cache.py +102 -126
- huggingface_hub/cli/download.py +93 -110
- huggingface_hub/cli/hf.py +37 -41
- huggingface_hub/cli/jobs.py +689 -1017
- huggingface_hub/cli/lfs.py +120 -143
- huggingface_hub/cli/repo.py +158 -216
- huggingface_hub/cli/repo_files.py +50 -84
- huggingface_hub/cli/system.py +6 -25
- huggingface_hub/cli/upload.py +198 -212
- huggingface_hub/cli/upload_large_folder.py +90 -105
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +11 -11
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +4 -5
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -22
- huggingface_hub/errors.py +43 -30
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +154 -253
- huggingface_hub/hf_api.py +329 -558
- huggingface_hub/hf_file_system.py +104 -62
- huggingface_hub/hub_mixin.py +32 -54
- huggingface_hub/inference/_client.py +178 -163
- huggingface_hub/inference/_common.py +38 -54
- huggingface_hub/inference/_generated/_async_client.py +219 -259
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/constants.py +1 -2
- huggingface_hub/inference/_mcp/mcp_client.py +33 -22
- huggingface_hub/inference/_mcp/types.py +10 -10
- huggingface_hub/inference/_mcp/utils.py +4 -4
- huggingface_hub/inference/_providers/__init__.py +2 -13
- huggingface_hub/inference/_providers/_common.py +24 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +13 -13
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +24 -33
- huggingface_hub/repocard.py +16 -17
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +3 -3
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +369 -209
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +15 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +3 -3
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +1 -1
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/METADATA +17 -26
- huggingface_hub-1.0.0rc1.dist-info/RECORD +161 -0
- huggingface_hub/inference/_providers/publicai.py +0 -6
- huggingface_hub/inference/_providers/scaleway.py +0 -28
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.35.1.dist-info/RECORD +0 -168
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -14,9 +14,9 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Contains utilities to handle pagination on Huggingface Hub."""
|
|
16
16
|
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Iterable, Optional
|
|
18
18
|
|
|
19
|
-
import
|
|
19
|
+
import httpx
|
|
20
20
|
|
|
21
21
|
from . import get_session, hf_raise_for_status, http_backoff, logging
|
|
22
22
|
|
|
@@ -24,7 +24,7 @@ from . import get_session, hf_raise_for_status, http_backoff, logging
|
|
|
24
24
|
logger = logging.get_logger(__name__)
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def paginate(path: str, params:
|
|
27
|
+
def paginate(path: str, params: dict, headers: dict) -> Iterable:
|
|
28
28
|
"""Fetch a list of models/datasets/spaces and paginate through results.
|
|
29
29
|
|
|
30
30
|
This is using the same "Link" header format as GitHub.
|
|
@@ -48,5 +48,5 @@ def paginate(path: str, params: Dict, headers: Dict) -> Iterable:
|
|
|
48
48
|
next_page = _get_next_page(r)
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
def _get_next_page(response:
|
|
51
|
+
def _get_next_page(response: httpx.Response) -> Optional[str]:
|
|
52
52
|
return response.links.get("next", {}).get("url")
|
huggingface_hub/utils/_paths.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
from fnmatch import fnmatch
|
|
18
18
|
from pathlib import Path
|
|
19
|
-
from typing import Callable, Generator, Iterable,
|
|
19
|
+
from typing import Callable, Generator, Iterable, Optional, TypeVar, Union
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
T = TypeVar("T")
|
|
@@ -39,8 +39,8 @@ FORBIDDEN_FOLDERS = [".git", ".cache"]
|
|
|
39
39
|
def filter_repo_objects(
|
|
40
40
|
items: Iterable[T],
|
|
41
41
|
*,
|
|
42
|
-
allow_patterns: Optional[Union[
|
|
43
|
-
ignore_patterns: Optional[Union[
|
|
42
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
43
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
44
44
|
key: Optional[Callable[[T], str]] = None,
|
|
45
45
|
) -> Generator[T, None, None]:
|
|
46
46
|
"""Filter repo objects based on an allowlist and a denylist.
|
|
@@ -55,10 +55,10 @@ def filter_repo_objects(
|
|
|
55
55
|
Args:
|
|
56
56
|
items (`Iterable`):
|
|
57
57
|
List of items to filter.
|
|
58
|
-
allow_patterns (`str` or `
|
|
58
|
+
allow_patterns (`str` or `list[str]`, *optional*):
|
|
59
59
|
Patterns constituting the allowlist. If provided, item paths must match at
|
|
60
60
|
least one pattern from the allowlist.
|
|
61
|
-
ignore_patterns (`str` or `
|
|
61
|
+
ignore_patterns (`str` or `list[str]`, *optional*):
|
|
62
62
|
Patterns constituting the denylist. If provided, item paths must not match
|
|
63
63
|
any patterns from the denylist.
|
|
64
64
|
key (`Callable[[T], str]`, *optional*):
|
|
@@ -19,7 +19,7 @@ import os
|
|
|
19
19
|
import platform
|
|
20
20
|
import sys
|
|
21
21
|
import warnings
|
|
22
|
-
from typing import Any
|
|
22
|
+
from typing import Any
|
|
23
23
|
|
|
24
24
|
from .. import __version__, constants
|
|
25
25
|
|
|
@@ -38,6 +38,7 @@ _CANDIDATES = {
|
|
|
38
38
|
"hf_transfer": {"hf_transfer"},
|
|
39
39
|
"hf_xet": {"hf_xet"},
|
|
40
40
|
"jinja": {"Jinja2"},
|
|
41
|
+
"httpx": {"httpx"},
|
|
41
42
|
"keras": {"keras"},
|
|
42
43
|
"numpy": {"numpy"},
|
|
43
44
|
"pillow": {"Pillow"},
|
|
@@ -152,6 +153,15 @@ def get_hf_transfer_version() -> str:
|
|
|
152
153
|
return _get_version("hf_transfer")
|
|
153
154
|
|
|
154
155
|
|
|
156
|
+
# httpx
|
|
157
|
+
def is_httpx_available() -> bool:
|
|
158
|
+
return is_package_available("httpx")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def get_httpx_version() -> str:
|
|
162
|
+
return _get_version("httpx")
|
|
163
|
+
|
|
164
|
+
|
|
155
165
|
# xet
|
|
156
166
|
def is_xet_available() -> bool:
|
|
157
167
|
# since hf_xet is automatically used if available, allow explicit disabling via environment variable
|
|
@@ -312,7 +322,7 @@ def is_colab_enterprise() -> bool:
|
|
|
312
322
|
return os.environ.get("VERTEX_PRODUCT") == "COLAB_ENTERPRISE"
|
|
313
323
|
|
|
314
324
|
|
|
315
|
-
def dump_environment_info() ->
|
|
325
|
+
def dump_environment_info() -> dict[str, Any]:
|
|
316
326
|
"""Dump information about the machine to help debugging issues.
|
|
317
327
|
|
|
318
328
|
Similar helper exist in:
|
|
@@ -326,7 +336,7 @@ def dump_environment_info() -> Dict[str, Any]:
|
|
|
326
336
|
token = get_token()
|
|
327
337
|
|
|
328
338
|
# Generic machine info
|
|
329
|
-
info:
|
|
339
|
+
info: dict[str, Any] = {
|
|
330
340
|
"huggingface_hub version": get_hf_hub_version(),
|
|
331
341
|
"Platform": platform.platform(),
|
|
332
342
|
"Python version": get_python_version(),
|
|
@@ -357,21 +367,13 @@ def dump_environment_info() -> Dict[str, Any]:
|
|
|
357
367
|
pass
|
|
358
368
|
|
|
359
369
|
# Installed dependencies
|
|
360
|
-
info["FastAI"] = get_fastai_version()
|
|
361
|
-
info["Tensorflow"] = get_tf_version()
|
|
362
370
|
info["Torch"] = get_torch_version()
|
|
363
|
-
info["
|
|
364
|
-
info["Graphviz"] = get_graphviz_version()
|
|
365
|
-
info["keras"] = get_keras_version()
|
|
366
|
-
info["Pydot"] = get_pydot_version()
|
|
367
|
-
info["Pillow"] = get_pillow_version()
|
|
371
|
+
info["httpx"] = get_httpx_version()
|
|
368
372
|
info["hf_transfer"] = get_hf_transfer_version()
|
|
373
|
+
info["hf_xet"] = get_xet_version()
|
|
369
374
|
info["gradio"] = get_gradio_version()
|
|
370
375
|
info["tensorboard"] = get_tensorboard_version()
|
|
371
|
-
info["numpy"] = get_numpy_version()
|
|
372
376
|
info["pydantic"] = get_pydantic_version()
|
|
373
|
-
info["aiohttp"] = get_aiohttp_version()
|
|
374
|
-
info["hf_xet"] = get_xet_version()
|
|
375
377
|
|
|
376
378
|
# Environment variables
|
|
377
379
|
info["ENDPOINT"] = constants.ENDPOINT
|
|
@@ -2,7 +2,7 @@ import functools
|
|
|
2
2
|
import operator
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Literal, Optional
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
FILENAME_T = str
|
|
@@ -19,17 +19,17 @@ class TensorInfo:
|
|
|
19
19
|
Attributes:
|
|
20
20
|
dtype (`str`):
|
|
21
21
|
The data type of the tensor ("F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL").
|
|
22
|
-
shape (`
|
|
22
|
+
shape (`list[int]`):
|
|
23
23
|
The shape of the tensor.
|
|
24
|
-
data_offsets (`
|
|
24
|
+
data_offsets (`tuple[int, int]`):
|
|
25
25
|
The offsets of the data in the file as a tuple `[BEGIN, END]`.
|
|
26
26
|
parameter_count (`int`):
|
|
27
27
|
The number of parameters in the tensor.
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
30
|
dtype: DTYPE_T
|
|
31
|
-
shape:
|
|
32
|
-
data_offsets:
|
|
31
|
+
shape: list[int]
|
|
32
|
+
data_offsets: tuple[int, int]
|
|
33
33
|
parameter_count: int = field(init=False)
|
|
34
34
|
|
|
35
35
|
def __post_init__(self) -> None:
|
|
@@ -49,22 +49,22 @@ class SafetensorsFileMetadata:
|
|
|
49
49
|
For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
|
|
50
50
|
|
|
51
51
|
Attributes:
|
|
52
|
-
metadata (`
|
|
52
|
+
metadata (`dict`):
|
|
53
53
|
The metadata contained in the file.
|
|
54
|
-
tensors (`
|
|
54
|
+
tensors (`dict[str, TensorInfo]`):
|
|
55
55
|
A map of all tensors. Keys are tensor names and values are information about the corresponding tensor, as a
|
|
56
56
|
[`TensorInfo`] object.
|
|
57
|
-
parameter_count (`
|
|
57
|
+
parameter_count (`dict[str, int]`):
|
|
58
58
|
A map of the number of parameters per data type. Keys are data types and values are the number of parameters
|
|
59
59
|
of that data type.
|
|
60
60
|
"""
|
|
61
61
|
|
|
62
|
-
metadata:
|
|
63
|
-
tensors:
|
|
64
|
-
parameter_count:
|
|
62
|
+
metadata: dict[str, str]
|
|
63
|
+
tensors: dict[TENSOR_NAME_T, TensorInfo]
|
|
64
|
+
parameter_count: dict[DTYPE_T, int] = field(init=False)
|
|
65
65
|
|
|
66
66
|
def __post_init__(self) -> None:
|
|
67
|
-
parameter_count:
|
|
67
|
+
parameter_count: dict[DTYPE_T, int] = defaultdict(int)
|
|
68
68
|
for tensor in self.tensors.values():
|
|
69
69
|
parameter_count[tensor.dtype] += tensor.parameter_count
|
|
70
70
|
self.parameter_count = dict(parameter_count)
|
|
@@ -82,29 +82,29 @@ class SafetensorsRepoMetadata:
|
|
|
82
82
|
For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
|
|
83
83
|
|
|
84
84
|
Attributes:
|
|
85
|
-
metadata (`
|
|
85
|
+
metadata (`dict`, *optional*):
|
|
86
86
|
The metadata contained in the 'model.safetensors.index.json' file, if it exists. Only populated for sharded
|
|
87
87
|
models.
|
|
88
88
|
sharded (`bool`):
|
|
89
89
|
Whether the repo contains a sharded model or not.
|
|
90
|
-
weight_map (`
|
|
90
|
+
weight_map (`dict[str, str]`):
|
|
91
91
|
A map of all weights. Keys are tensor names and values are filenames of the files containing the tensors.
|
|
92
|
-
files_metadata (`
|
|
92
|
+
files_metadata (`dict[str, SafetensorsFileMetadata]`):
|
|
93
93
|
A map of all files metadata. Keys are filenames and values are the metadata of the corresponding file, as
|
|
94
94
|
a [`SafetensorsFileMetadata`] object.
|
|
95
|
-
parameter_count (`
|
|
95
|
+
parameter_count (`dict[str, int]`):
|
|
96
96
|
A map of the number of parameters per data type. Keys are data types and values are the number of parameters
|
|
97
97
|
of that data type.
|
|
98
98
|
"""
|
|
99
99
|
|
|
100
|
-
metadata: Optional[
|
|
100
|
+
metadata: Optional[dict]
|
|
101
101
|
sharded: bool
|
|
102
|
-
weight_map:
|
|
103
|
-
files_metadata:
|
|
104
|
-
parameter_count:
|
|
102
|
+
weight_map: dict[TENSOR_NAME_T, FILENAME_T] # tensor name -> filename
|
|
103
|
+
files_metadata: dict[FILENAME_T, SafetensorsFileMetadata] # filename -> metadata
|
|
104
|
+
parameter_count: dict[DTYPE_T, int] = field(init=False)
|
|
105
105
|
|
|
106
106
|
def __post_init__(self) -> None:
|
|
107
|
-
parameter_count:
|
|
107
|
+
parameter_count: dict[DTYPE_T, int] = defaultdict(int)
|
|
108
108
|
for file_metadata in self.files_metadata.values():
|
|
109
109
|
for dtype, nb_parameters_ in file_metadata.parameter_count.items():
|
|
110
110
|
parameter_count[dtype] += nb_parameters_
|
|
@@ -20,7 +20,7 @@ import sys
|
|
|
20
20
|
from contextlib import contextmanager
|
|
21
21
|
from io import StringIO
|
|
22
22
|
from pathlib import Path
|
|
23
|
-
from typing import IO, Generator,
|
|
23
|
+
from typing import IO, Generator, Optional, Union
|
|
24
24
|
|
|
25
25
|
from .logging import get_logger
|
|
26
26
|
|
|
@@ -51,7 +51,7 @@ def capture_output() -> Generator[StringIO, None, None]:
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
def run_subprocess(
|
|
54
|
-
command: Union[str,
|
|
54
|
+
command: Union[str, list[str]],
|
|
55
55
|
folder: Optional[Union[str, Path]] = None,
|
|
56
56
|
check=True,
|
|
57
57
|
**kwargs,
|
|
@@ -62,7 +62,7 @@ def run_subprocess(
|
|
|
62
62
|
be captured.
|
|
63
63
|
|
|
64
64
|
Args:
|
|
65
|
-
command (`str` or `
|
|
65
|
+
command (`str` or `list[str]`):
|
|
66
66
|
The command to execute as a string or list of strings.
|
|
67
67
|
folder (`str`, *optional*):
|
|
68
68
|
The folder in which to run the command. Defaults to current working
|
|
@@ -70,7 +70,7 @@ def run_subprocess(
|
|
|
70
70
|
check (`bool`, *optional*, defaults to `True`):
|
|
71
71
|
Setting `check` to `True` will raise a `subprocess.CalledProcessError`
|
|
72
72
|
when the subprocess has a non-zero exit code.
|
|
73
|
-
kwargs (`
|
|
73
|
+
kwargs (`dict[str]`):
|
|
74
74
|
Keyword arguments to be passed to the `subprocess.run` underlying command.
|
|
75
75
|
|
|
76
76
|
Returns:
|
|
@@ -96,23 +96,23 @@ def run_subprocess(
|
|
|
96
96
|
|
|
97
97
|
@contextmanager
|
|
98
98
|
def run_interactive_subprocess(
|
|
99
|
-
command: Union[str,
|
|
99
|
+
command: Union[str, list[str]],
|
|
100
100
|
folder: Optional[Union[str, Path]] = None,
|
|
101
101
|
**kwargs,
|
|
102
|
-
) -> Generator[
|
|
102
|
+
) -> Generator[tuple[IO[str], IO[str]], None, None]:
|
|
103
103
|
"""Run a subprocess in an interactive mode in a context manager.
|
|
104
104
|
|
|
105
105
|
Args:
|
|
106
|
-
command (`str` or `
|
|
106
|
+
command (`str` or `list[str]`):
|
|
107
107
|
The command to execute as a string or list of strings.
|
|
108
108
|
folder (`str`, *optional*):
|
|
109
109
|
The folder in which to run the command. Defaults to current working
|
|
110
110
|
directory (from `os.getcwd()`).
|
|
111
|
-
kwargs (`
|
|
111
|
+
kwargs (`dict[str]`):
|
|
112
112
|
Keyword arguments to be passed to the `subprocess.run` underlying command.
|
|
113
113
|
|
|
114
114
|
Returns:
|
|
115
|
-
`
|
|
115
|
+
`tuple[IO[str], IO[str]]`: A tuple with `stdin` and `stdout` to interact
|
|
116
116
|
with the process (input and output are utf-8 encoded).
|
|
117
117
|
|
|
118
118
|
Example:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from queue import Queue
|
|
2
2
|
from threading import Lock, Thread
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional, Union
|
|
4
4
|
from urllib.parse import quote
|
|
5
5
|
|
|
6
6
|
from .. import constants, logging
|
|
@@ -22,7 +22,7 @@ def send_telemetry(
|
|
|
22
22
|
*,
|
|
23
23
|
library_name: Optional[str] = None,
|
|
24
24
|
library_version: Optional[str] = None,
|
|
25
|
-
user_agent: Union[
|
|
25
|
+
user_agent: Union[dict, str, None] = None,
|
|
26
26
|
) -> None:
|
|
27
27
|
"""
|
|
28
28
|
Sends telemetry that helps tracking usage of different HF libraries.
|
|
@@ -98,7 +98,7 @@ def _send_telemetry_in_thread(
|
|
|
98
98
|
*,
|
|
99
99
|
library_name: Optional[str] = None,
|
|
100
100
|
library_version: Optional[str] = None,
|
|
101
|
-
user_agent: Union[
|
|
101
|
+
user_agent: Union[dict, str, None] = None,
|
|
102
102
|
) -> None:
|
|
103
103
|
"""Contains the actual data sending data to the Hub.
|
|
104
104
|
|
huggingface_hub/utils/_typing.py
CHANGED
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
"""Handle typing imports based on system compatibility."""
|
|
16
16
|
|
|
17
17
|
import sys
|
|
18
|
-
from typing import Any, Callable,
|
|
18
|
+
from typing import Any, Callable, Literal, Optional, Type, TypeVar, Union, get_args, get_origin
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
UNION_TYPES:
|
|
21
|
+
UNION_TYPES: list[Any] = [Union]
|
|
22
22
|
if sys.version_info >= (3, 10):
|
|
23
23
|
from types import UnionType
|
|
24
24
|
|
|
@@ -33,7 +33,7 @@ CallableT = TypeVar("CallableT", bound=Callable)
|
|
|
33
33
|
_JSON_SERIALIZABLE_TYPES = (int, float, str, bool, type(None))
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def is_jsonable(obj: Any, _visited: Optional[
|
|
36
|
+
def is_jsonable(obj: Any, _visited: Optional[set[int]] = None) -> bool:
|
|
37
37
|
"""Check if an object is JSON serializable.
|
|
38
38
|
|
|
39
39
|
This is a weak check, as it does not check for the actual JSON serialization, but only for the types of the object.
|
|
@@ -19,7 +19,7 @@ import re
|
|
|
19
19
|
import warnings
|
|
20
20
|
from functools import wraps
|
|
21
21
|
from itertools import chain
|
|
22
|
-
from typing import Any
|
|
22
|
+
from typing import Any
|
|
23
23
|
|
|
24
24
|
from huggingface_hub.errors import HFValidationError
|
|
25
25
|
|
|
@@ -48,9 +48,7 @@ def validate_hf_hub_args(fn: CallableT) -> CallableT:
|
|
|
48
48
|
Validators:
|
|
49
49
|
- [`~utils.validate_repo_id`]: `repo_id` must be `"repo_name"`
|
|
50
50
|
or `"namespace/repo_name"`. Namespace is a username or an organization.
|
|
51
|
-
- [`~utils.
|
|
52
|
-
`use_auth_token` (only if `use_auth_token` is not expected by the decorated
|
|
53
|
-
function - in practice, always the case in `huggingface_hub`).
|
|
51
|
+
- [`~utils.smoothly_deprecate_legacy_arguments`]: Ignore `proxies` when downloading files (should be set globally).
|
|
54
52
|
|
|
55
53
|
Example:
|
|
56
54
|
```py
|
|
@@ -68,20 +66,6 @@ def validate_hf_hub_args(fn: CallableT) -> CallableT:
|
|
|
68
66
|
|
|
69
67
|
>>> my_cool_method(repo_id="other..repo..id")
|
|
70
68
|
huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
|
|
71
|
-
|
|
72
|
-
>>> @validate_hf_hub_args
|
|
73
|
-
... def my_cool_auth_method(token: str):
|
|
74
|
-
... print(token)
|
|
75
|
-
|
|
76
|
-
>>> my_cool_auth_method(token="a token")
|
|
77
|
-
"a token"
|
|
78
|
-
|
|
79
|
-
>>> my_cool_auth_method(use_auth_token="a use_auth_token")
|
|
80
|
-
"a use_auth_token"
|
|
81
|
-
|
|
82
|
-
>>> my_cool_auth_method(token="a token", use_auth_token="a use_auth_token")
|
|
83
|
-
UserWarning: Both `token` and `use_auth_token` are passed (...)
|
|
84
|
-
"a token"
|
|
85
69
|
```
|
|
86
70
|
|
|
87
71
|
Raises:
|
|
@@ -91,13 +75,8 @@ def validate_hf_hub_args(fn: CallableT) -> CallableT:
|
|
|
91
75
|
# TODO: add an argument to opt-out validation for specific argument?
|
|
92
76
|
signature = inspect.signature(fn)
|
|
93
77
|
|
|
94
|
-
# Should the validator switch `use_auth_token` values to `token`? In practice, always
|
|
95
|
-
# True in `huggingface_hub`. Might not be the case in a downstream library.
|
|
96
|
-
check_use_auth_token = "use_auth_token" not in signature.parameters and "token" in signature.parameters
|
|
97
|
-
|
|
98
78
|
@wraps(fn)
|
|
99
79
|
def _inner_fn(*args, **kwargs):
|
|
100
|
-
has_token = False
|
|
101
80
|
for arg_name, arg_value in chain(
|
|
102
81
|
zip(signature.parameters, args), # Args values
|
|
103
82
|
kwargs.items(), # Kwargs values
|
|
@@ -105,11 +84,7 @@ def validate_hf_hub_args(fn: CallableT) -> CallableT:
|
|
|
105
84
|
if arg_name in ["repo_id", "from_id", "to_id"]:
|
|
106
85
|
validate_repo_id(arg_value)
|
|
107
86
|
|
|
108
|
-
|
|
109
|
-
has_token = True
|
|
110
|
-
|
|
111
|
-
if check_use_auth_token:
|
|
112
|
-
kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
|
|
87
|
+
kwargs = smoothly_deprecate_legacy_arguments(fn_name=fn.__name__, kwargs=kwargs)
|
|
113
88
|
|
|
114
89
|
return fn(*args, **kwargs)
|
|
115
90
|
|
|
@@ -170,57 +145,63 @@ def validate_repo_id(repo_id: str) -> None:
|
|
|
170
145
|
raise HFValidationError(f"Repo_id cannot end by '.git': '{repo_id}'.")
|
|
171
146
|
|
|
172
147
|
|
|
173
|
-
def
|
|
174
|
-
"""Smoothly deprecate
|
|
175
|
-
|
|
176
|
-
The long-term goal is to remove any mention of `use_auth_token` in the codebase in
|
|
177
|
-
favor of a unique and less verbose `token` argument. This will be done a few steps:
|
|
148
|
+
def smoothly_deprecate_legacy_arguments(fn_name: str, kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
149
|
+
"""Smoothly deprecate legacy arguments in the `huggingface_hub` codebase.
|
|
178
150
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
argument (`str`, `None`). This implicit rule exists to be able to not send the
|
|
182
|
-
token when not necessary (`use_auth_token=False`) even if logged in.
|
|
151
|
+
This function ignores some deprecated arguments from the kwargs and warns the user they are ignored.
|
|
152
|
+
The goal is to avoid breaking existing code while guiding the user to the new way of doing things.
|
|
183
153
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
a. Corner case: if both `use_auth_token` and `token` values are passed, a warning
|
|
189
|
-
is thrown and the `use_auth_token` value is ignored.
|
|
154
|
+
List of deprecated arguments:
|
|
155
|
+
- `proxies`:
|
|
156
|
+
To set up proxies, user must either use the HTTP_PROXY environment variable or configure the `httpx.Client`
|
|
157
|
+
manually using the [`set_client_factory`] function.
|
|
190
158
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
159
|
+
In huggingface_hub 0.x, `proxies` was a dictionary directly passed to `requests.request`.
|
|
160
|
+
In huggingface_hub 1.x, we migrated to `httpx` which does not support `proxies` the same way.
|
|
161
|
+
In particular, it is not possible to configure proxies on a per-request basis. The solution is to configure
|
|
162
|
+
it globally using the [`set_client_factory`] function or using the HTTP_PROXY environment variable.
|
|
194
163
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
In addition, unit tests in `huggingface_hub` must be adapted to expect warnings
|
|
199
|
-
to be thrown (but still use `use_auth_token` as before).
|
|
164
|
+
More more details, see:
|
|
165
|
+
- https://www.python-httpx.org/advanced/proxies/
|
|
166
|
+
- https://www.python-httpx.org/compatibility/#proxy-keys.
|
|
200
167
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
This has been discussed in:
|
|
206
|
-
- https://github.com/huggingface/huggingface_hub/issues/1094.
|
|
207
|
-
- https://github.com/huggingface/huggingface_hub/pull/928
|
|
208
|
-
- (related) https://github.com/huggingface/huggingface_hub/pull/1064
|
|
168
|
+
- `resume_download`: deprecated without replacement. `huggingface_hub` always resumes downloads whenever possible.
|
|
169
|
+
- `force_filename`: deprecated without replacement. Filename is always the same as on the Hub.
|
|
170
|
+
- `local_dir_use_symlinks`: deprecated without replacement. Downloading to a local directory does not use symlinks anymore.
|
|
209
171
|
"""
|
|
210
172
|
new_kwargs = kwargs.copy() # do not mutate input !
|
|
211
173
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
174
|
+
# proxies
|
|
175
|
+
proxies = new_kwargs.pop("proxies", None) # remove from kwargs
|
|
176
|
+
if proxies is not None:
|
|
177
|
+
warnings.warn(
|
|
178
|
+
f"The `proxies` argument is ignored in `{fn_name}`. To set up proxies, use the HTTP_PROXY / HTTPS_PROXY"
|
|
179
|
+
" environment variables or configure the `httpx.Client` manually using `huggingface_hub.set_client_factory`."
|
|
180
|
+
" See https://www.python-httpx.org/advanced/proxies/ for more details."
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# resume_download
|
|
184
|
+
resume_download = new_kwargs.pop("resume_download", None) # remove from kwargs
|
|
185
|
+
if resume_download is not None:
|
|
186
|
+
warnings.warn(
|
|
187
|
+
f"The `resume_download` argument is deprecated and ignored in `{fn_name}`. Downloads always resume"
|
|
188
|
+
" whenever possible."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# force_filename
|
|
192
|
+
force_filename = new_kwargs.pop("force_filename", None) # remove from kwargs
|
|
193
|
+
if force_filename is not None:
|
|
194
|
+
warnings.warn(
|
|
195
|
+
f"The `force_filename` argument is deprecated and ignored in `{fn_name}`. Filename is always the same "
|
|
196
|
+
"as on the Hub."
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# local_dir_use_symlinks
|
|
200
|
+
local_dir_use_symlinks = new_kwargs.pop("local_dir_use_symlinks", None) # remove from kwargs
|
|
201
|
+
if local_dir_use_symlinks is not None:
|
|
202
|
+
warnings.warn(
|
|
203
|
+
f"The `local_dir_use_symlinks` argument is deprecated and ignored in `{fn_name}`. Downloading to a local"
|
|
204
|
+
" directory does not use symlinks anymore."
|
|
205
|
+
)
|
|
225
206
|
|
|
226
207
|
return new_kwargs
|
huggingface_hub/utils/_xet.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
|
-
import
|
|
5
|
+
import httpx
|
|
6
6
|
|
|
7
7
|
from .. import constants
|
|
8
8
|
from . import get_session, hf_raise_for_status, validate_hf_hub_args
|
|
@@ -27,7 +27,7 @@ class XetConnectionInfo:
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def parse_xet_file_data_from_response(
|
|
30
|
-
response:
|
|
30
|
+
response: httpx.Response, endpoint: Optional[str] = None
|
|
31
31
|
) -> Optional[XetFileData]:
|
|
32
32
|
"""
|
|
33
33
|
Parse XET file metadata from an HTTP response.
|
|
@@ -36,7 +36,7 @@ def parse_xet_file_data_from_response(
|
|
|
36
36
|
of a given response object. If the required metadata is not found, it returns `None`.
|
|
37
37
|
|
|
38
38
|
Args:
|
|
39
|
-
response (`
|
|
39
|
+
response (`httpx.Response`):
|
|
40
40
|
The HTTP response object containing headers dict and links dict to extract the XET metadata from.
|
|
41
41
|
Returns:
|
|
42
42
|
`Optional[XetFileData]`:
|
|
@@ -63,11 +63,11 @@ def parse_xet_file_data_from_response(
|
|
|
63
63
|
)
|
|
64
64
|
|
|
65
65
|
|
|
66
|
-
def parse_xet_connection_info_from_headers(headers:
|
|
66
|
+
def parse_xet_connection_info_from_headers(headers: dict[str, str]) -> Optional[XetConnectionInfo]:
|
|
67
67
|
"""
|
|
68
68
|
Parse XET connection info from the HTTP headers or return None if not found.
|
|
69
69
|
Args:
|
|
70
|
-
headers (`
|
|
70
|
+
headers (`dict`):
|
|
71
71
|
HTTP headers to extract the XET metadata from.
|
|
72
72
|
Returns:
|
|
73
73
|
`XetConnectionInfo` or `None`:
|
|
@@ -92,7 +92,7 @@ def parse_xet_connection_info_from_headers(headers: Dict[str, str]) -> Optional[
|
|
|
92
92
|
def refresh_xet_connection_info(
|
|
93
93
|
*,
|
|
94
94
|
file_data: XetFileData,
|
|
95
|
-
headers:
|
|
95
|
+
headers: dict[str, str],
|
|
96
96
|
) -> XetConnectionInfo:
|
|
97
97
|
"""
|
|
98
98
|
Utilizes the information in the parsed metadata to request the Hub xet connection information.
|
|
@@ -100,7 +100,7 @@ def refresh_xet_connection_info(
|
|
|
100
100
|
Args:
|
|
101
101
|
file_data: (`XetFileData`):
|
|
102
102
|
The file data needed to refresh the xet connection information.
|
|
103
|
-
headers (`
|
|
103
|
+
headers (`dict[str, str]`):
|
|
104
104
|
Headers to use for the request, including authorization headers and user agent.
|
|
105
105
|
Returns:
|
|
106
106
|
`XetConnectionInfo`:
|
|
@@ -123,9 +123,9 @@ def fetch_xet_connection_info_from_repo_info(
|
|
|
123
123
|
repo_id: str,
|
|
124
124
|
repo_type: str,
|
|
125
125
|
revision: Optional[str] = None,
|
|
126
|
-
headers:
|
|
126
|
+
headers: dict[str, str],
|
|
127
127
|
endpoint: Optional[str] = None,
|
|
128
|
-
params: Optional[
|
|
128
|
+
params: Optional[dict[str, str]] = None,
|
|
129
129
|
) -> XetConnectionInfo:
|
|
130
130
|
"""
|
|
131
131
|
Uses the repo info to request a xet access token from Hub.
|
|
@@ -138,11 +138,11 @@ def fetch_xet_connection_info_from_repo_info(
|
|
|
138
138
|
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
|
|
139
139
|
revision (`str`, `optional`):
|
|
140
140
|
The revision of the repo to get the token for.
|
|
141
|
-
headers (`
|
|
141
|
+
headers (`dict[str, str]`):
|
|
142
142
|
Headers to use for the request, including authorization headers and user agent.
|
|
143
143
|
endpoint (`str`, `optional`):
|
|
144
144
|
The endpoint to use for the request. Defaults to the Hub endpoint.
|
|
145
|
-
params (`
|
|
145
|
+
params (`dict[str, str]`, `optional`):
|
|
146
146
|
Additional parameters to pass with the request.
|
|
147
147
|
Returns:
|
|
148
148
|
`XetConnectionInfo`:
|
|
@@ -161,8 +161,8 @@ def fetch_xet_connection_info_from_repo_info(
|
|
|
161
161
|
@validate_hf_hub_args
|
|
162
162
|
def _fetch_xet_connection_info_with_url(
|
|
163
163
|
url: str,
|
|
164
|
-
headers:
|
|
165
|
-
params: Optional[
|
|
164
|
+
headers: dict[str, str],
|
|
165
|
+
params: Optional[dict[str, str]] = None,
|
|
166
166
|
) -> XetConnectionInfo:
|
|
167
167
|
"""
|
|
168
168
|
Requests the xet connection info from the supplied URL. This includes the
|
|
@@ -170,9 +170,9 @@ def _fetch_xet_connection_info_with_url(
|
|
|
170
170
|
Args:
|
|
171
171
|
url: (`str`):
|
|
172
172
|
The access token endpoint URL.
|
|
173
|
-
headers (`
|
|
173
|
+
headers (`dict[str, str]`):
|
|
174
174
|
Headers to use for the request, including authorization headers and user agent.
|
|
175
|
-
params (`
|
|
175
|
+
params (`dict[str, str]`, `optional`):
|
|
176
176
|
Additional parameters to pass with the request.
|
|
177
177
|
Returns:
|
|
178
178
|
`XetConnectionInfo`:
|
|
@@ -64,7 +64,7 @@ class XetProgressReporter:
|
|
|
64
64
|
|
|
65
65
|
return f"{padding}{name.ljust(width)}"
|
|
66
66
|
|
|
67
|
-
def update_progress(self, total_update: PyTotalProgressUpdate, item_updates:
|
|
67
|
+
def update_progress(self, total_update: PyTotalProgressUpdate, item_updates: list[PyItemProgressUpdate]):
|
|
68
68
|
# Update all the per-item values.
|
|
69
69
|
for item in item_updates:
|
|
70
70
|
item_name = item.item_name
|
|
@@ -25,14 +25,8 @@
|
|
|
25
25
|
# ```
|
|
26
26
|
import functools
|
|
27
27
|
import hashlib
|
|
28
|
-
import sys
|
|
29
28
|
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
sha256 = functools.partial(hashlib.sha256, usedforsecurity=False)
|
|
35
|
-
else:
|
|
36
|
-
md5 = hashlib.md5
|
|
37
|
-
sha1 = hashlib.sha1
|
|
38
|
-
sha256 = hashlib.sha256
|
|
30
|
+
md5 = functools.partial(hashlib.md5, usedforsecurity=False)
|
|
31
|
+
sha1 = functools.partial(hashlib.sha1, usedforsecurity=False)
|
|
32
|
+
sha256 = functools.partial(hashlib.sha256, usedforsecurity=False)
|