huggingface-hub 0.22.2__py3-none-any.whl → 0.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +51 -19
- huggingface_hub/_commit_api.py +9 -8
- huggingface_hub/_commit_scheduler.py +2 -2
- huggingface_hub/_inference_endpoints.py +10 -17
- huggingface_hub/_local_folder.py +229 -0
- huggingface_hub/_login.py +4 -3
- huggingface_hub/_multi_commits.py +1 -1
- huggingface_hub/_snapshot_download.py +16 -38
- huggingface_hub/_tensorboard_logger.py +16 -6
- huggingface_hub/_webhooks_payload.py +22 -1
- huggingface_hub/_webhooks_server.py +24 -20
- huggingface_hub/commands/download.py +11 -34
- huggingface_hub/commands/huggingface_cli.py +2 -0
- huggingface_hub/commands/tag.py +159 -0
- huggingface_hub/constants.py +3 -5
- huggingface_hub/errors.py +58 -0
- huggingface_hub/file_download.py +545 -376
- huggingface_hub/hf_api.py +756 -622
- huggingface_hub/hf_file_system.py +14 -5
- huggingface_hub/hub_mixin.py +127 -43
- huggingface_hub/inference/_client.py +402 -183
- huggingface_hub/inference/_common.py +19 -29
- huggingface_hub/inference/_generated/_async_client.py +402 -184
- huggingface_hub/inference/_generated/types/__init__.py +23 -6
- huggingface_hub/inference/_generated/types/chat_completion.py +197 -43
- huggingface_hub/inference/_generated/types/text_generation.py +57 -79
- huggingface_hub/inference/_templating.py +2 -4
- huggingface_hub/keras_mixin.py +0 -3
- huggingface_hub/lfs.py +9 -1
- huggingface_hub/repository.py +1 -0
- huggingface_hub/utils/__init__.py +12 -6
- huggingface_hub/utils/_fixes.py +1 -0
- huggingface_hub/utils/_headers.py +2 -4
- huggingface_hub/utils/_http.py +2 -4
- huggingface_hub/utils/_paths.py +13 -1
- huggingface_hub/utils/_runtime.py +10 -0
- huggingface_hub/utils/_safetensors.py +0 -13
- huggingface_hub/utils/_validators.py +2 -7
- huggingface_hub/utils/tqdm.py +124 -46
- {huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/METADATA +5 -1
- {huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/RECORD +45 -43
- {huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/top_level.txt +0 -0
|
@@ -18,7 +18,6 @@ import base64
|
|
|
18
18
|
import io
|
|
19
19
|
import json
|
|
20
20
|
import logging
|
|
21
|
-
import time
|
|
22
21
|
from contextlib import contextmanager
|
|
23
22
|
from dataclasses import dataclass
|
|
24
23
|
from pathlib import Path
|
|
@@ -70,7 +69,7 @@ from ._generated.types import (
|
|
|
70
69
|
|
|
71
70
|
if TYPE_CHECKING:
|
|
72
71
|
from aiohttp import ClientResponse, ClientSession
|
|
73
|
-
from PIL import Image
|
|
72
|
+
from PIL.Image import Image
|
|
74
73
|
|
|
75
74
|
# TYPES
|
|
76
75
|
UrlT = str
|
|
@@ -307,30 +306,17 @@ def _format_text_generation_stream_output(
|
|
|
307
306
|
return output.token.text if not details else output
|
|
308
307
|
|
|
309
308
|
|
|
310
|
-
def _stream_chat_completion_response_from_text_generation(
|
|
311
|
-
text_generation_output: Iterable[TextGenerationStreamOutput],
|
|
312
|
-
) -> Iterable[ChatCompletionStreamOutput]:
|
|
313
|
-
"""Used in `InferenceClient.chat_completion`."""
|
|
314
|
-
created = int(time.time())
|
|
315
|
-
for item in text_generation_output:
|
|
316
|
-
yield _format_chat_completion_stream_output_from_text_generation(item, created)
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
async def _async_stream_chat_completion_response_from_text_generation(
|
|
320
|
-
text_generation_output: AsyncIterable[TextGenerationStreamOutput],
|
|
321
|
-
) -> AsyncIterable[ChatCompletionStreamOutput]:
|
|
322
|
-
"""Used in `AsyncInferenceClient.chat_completion`."""
|
|
323
|
-
created = int(time.time())
|
|
324
|
-
async for item in text_generation_output:
|
|
325
|
-
yield _format_chat_completion_stream_output_from_text_generation(item, created)
|
|
326
|
-
|
|
327
|
-
|
|
328
309
|
def _format_chat_completion_stream_output_from_text_generation(
|
|
329
310
|
item: TextGenerationStreamOutput, created: int
|
|
330
311
|
) -> ChatCompletionStreamOutput:
|
|
331
312
|
if item.details is None:
|
|
332
313
|
# new token generated => return delta
|
|
333
314
|
return ChatCompletionStreamOutput(
|
|
315
|
+
# explicitly set 'dummy' values to reduce expectations from users
|
|
316
|
+
id="dummy",
|
|
317
|
+
model="dummy",
|
|
318
|
+
object="dummy",
|
|
319
|
+
system_fingerprint="dummy",
|
|
334
320
|
choices=[
|
|
335
321
|
ChatCompletionStreamOutputChoice(
|
|
336
322
|
delta=ChatCompletionStreamOutputDelta(
|
|
@@ -346,9 +332,14 @@ def _format_chat_completion_stream_output_from_text_generation(
|
|
|
346
332
|
else:
|
|
347
333
|
# generation is completed => return finish reason
|
|
348
334
|
return ChatCompletionStreamOutput(
|
|
335
|
+
# explicitly set 'dummy' values to reduce expectations from users
|
|
336
|
+
id="dummy",
|
|
337
|
+
model="dummy",
|
|
338
|
+
object="dummy",
|
|
339
|
+
system_fingerprint="dummy",
|
|
349
340
|
choices=[
|
|
350
341
|
ChatCompletionStreamOutputChoice(
|
|
351
|
-
delta=ChatCompletionStreamOutputDelta(),
|
|
342
|
+
delta=ChatCompletionStreamOutputDelta(role="assistant"),
|
|
352
343
|
finish_reason=item.details.finish_reason,
|
|
353
344
|
index=0,
|
|
354
345
|
)
|
|
@@ -403,8 +394,8 @@ async def _async_yield_from(client: "ClientSession", response: "ClientResponse")
|
|
|
403
394
|
# Both approaches have very similar APIs, but not exactly the same. What we do first in
|
|
404
395
|
# the `text_generation` method is to assume the model is served via TGI. If we realize
|
|
405
396
|
# it's not the case (i.e. we receive an HTTP 400 Bad Request), we fallback to the
|
|
406
|
-
# default API with a warning message.
|
|
407
|
-
#
|
|
397
|
+
# default API with a warning message. When that's the case, We remember the unsupported
|
|
398
|
+
# attributes for this model in the `_UNSUPPORTED_TEXT_GENERATION_KWARGS` global variable.
|
|
408
399
|
#
|
|
409
400
|
# In addition, TGI servers have a built-in API route for chat-completion, which is not
|
|
410
401
|
# available on the default API. We use this route to provide a more consistent behavior
|
|
@@ -413,22 +404,21 @@ async def _async_yield_from(client: "ClientSession", response: "ClientResponse")
|
|
|
413
404
|
# For more details, see https://github.com/huggingface/text-generation-inference and
|
|
414
405
|
# https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task.
|
|
415
406
|
|
|
416
|
-
|
|
407
|
+
_UNSUPPORTED_TEXT_GENERATION_KWARGS: Dict[Optional[str], List[str]] = {}
|
|
417
408
|
|
|
418
409
|
|
|
419
|
-
def
|
|
420
|
-
|
|
410
|
+
def _set_unsupported_text_generation_kwargs(model: Optional[str], unsupported_kwargs: List[str]) -> None:
|
|
411
|
+
_UNSUPPORTED_TEXT_GENERATION_KWARGS.setdefault(model, []).extend(unsupported_kwargs)
|
|
421
412
|
|
|
422
413
|
|
|
423
|
-
def
|
|
424
|
-
return model
|
|
414
|
+
def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
|
|
415
|
+
return _UNSUPPORTED_TEXT_GENERATION_KWARGS.get(model, [])
|
|
425
416
|
|
|
426
417
|
|
|
427
418
|
_NON_CHAT_COMPLETION_SERVER: Set[str] = set()
|
|
428
419
|
|
|
429
420
|
|
|
430
421
|
def _set_as_non_chat_completion_server(model: str) -> None:
|
|
431
|
-
print("Set as non chat completion", model)
|
|
432
422
|
_NON_CHAT_COMPLETION_SERVER.add(model)
|
|
433
423
|
|
|
434
424
|
|