huggingface-hub 0.31.4__py3-none-any.whl → 0.32.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (41) hide show
  1. huggingface_hub/__init__.py +42 -4
  2. huggingface_hub/_local_folder.py +8 -0
  3. huggingface_hub/_oauth.py +464 -0
  4. huggingface_hub/_snapshot_download.py +11 -3
  5. huggingface_hub/_upload_large_folder.py +16 -36
  6. huggingface_hub/commands/huggingface_cli.py +2 -0
  7. huggingface_hub/commands/repo.py +147 -0
  8. huggingface_hub/commands/user.py +2 -108
  9. huggingface_hub/constants.py +9 -1
  10. huggingface_hub/dataclasses.py +2 -2
  11. huggingface_hub/file_download.py +13 -11
  12. huggingface_hub/hf_api.py +48 -19
  13. huggingface_hub/hub_mixin.py +2 -2
  14. huggingface_hub/inference/_client.py +8 -7
  15. huggingface_hub/inference/_generated/_async_client.py +8 -7
  16. huggingface_hub/inference/_generated/types/__init__.py +4 -1
  17. huggingface_hub/inference/_generated/types/chat_completion.py +43 -9
  18. huggingface_hub/inference/_mcp/__init__.py +0 -0
  19. huggingface_hub/inference/_mcp/agent.py +99 -0
  20. huggingface_hub/inference/_mcp/cli.py +153 -0
  21. huggingface_hub/inference/_mcp/constants.py +80 -0
  22. huggingface_hub/inference/_mcp/mcp_client.py +322 -0
  23. huggingface_hub/inference/_mcp/utils.py +123 -0
  24. huggingface_hub/inference/_providers/__init__.py +13 -1
  25. huggingface_hub/inference/_providers/_common.py +1 -0
  26. huggingface_hub/inference/_providers/cerebras.py +1 -1
  27. huggingface_hub/inference/_providers/cohere.py +20 -3
  28. huggingface_hub/inference/_providers/fireworks_ai.py +18 -0
  29. huggingface_hub/inference/_providers/hf_inference.py +8 -1
  30. huggingface_hub/inference/_providers/nebius.py +28 -0
  31. huggingface_hub/inference/_providers/nscale.py +44 -0
  32. huggingface_hub/inference/_providers/sambanova.py +14 -0
  33. huggingface_hub/inference/_providers/together.py +15 -0
  34. huggingface_hub/utils/_experimental.py +7 -5
  35. huggingface_hub/utils/insecure_hashlib.py +8 -4
  36. {huggingface_hub-0.31.4.dist-info → huggingface_hub-0.32.0rc0.dist-info}/METADATA +30 -8
  37. {huggingface_hub-0.31.4.dist-info → huggingface_hub-0.32.0rc0.dist-info}/RECORD +41 -32
  38. {huggingface_hub-0.31.4.dist-info → huggingface_hub-0.32.0rc0.dist-info}/entry_points.txt +1 -0
  39. {huggingface_hub-0.31.4.dist-info → huggingface_hub-0.32.0rc0.dist-info}/LICENSE +0 -0
  40. {huggingface_hub-0.31.4.dist-info → huggingface_hub-0.32.0rc0.dist-info}/WHEEL +0 -0
  41. {huggingface_hub-0.31.4.dist-info → huggingface_hub-0.32.0rc0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,322 @@
1
+ import json
2
+ import logging
3
+ from contextlib import AsyncExitStack
4
+ from datetime import timedelta
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Union, overload
7
+
8
+ from typing_extensions import NotRequired, TypeAlias, TypedDict, Unpack
9
+
10
+ from ...utils._runtime import get_hf_hub_version
11
+ from .._generated._async_client import AsyncInferenceClient
12
+ from .._generated.types import (
13
+ ChatCompletionInputMessage,
14
+ ChatCompletionInputTool,
15
+ ChatCompletionStreamOutput,
16
+ ChatCompletionStreamOutputDeltaToolCall,
17
+ )
18
+ from .._providers import PROVIDER_OR_POLICY_T
19
+ from .utils import format_result
20
+
21
+
22
+ if TYPE_CHECKING:
23
+ from mcp import ClientSession
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Type alias for tool names
28
+ ToolName: TypeAlias = str
29
+
30
+ ServerType: TypeAlias = Literal["stdio", "sse", "http"]
31
+
32
+
33
+ class StdioServerParameters_T(TypedDict):
34
+ command: str
35
+ args: NotRequired[List[str]]
36
+ env: NotRequired[Dict[str, str]]
37
+ cwd: NotRequired[Union[str, Path, None]]
38
+
39
+
40
+ class SSEServerParameters_T(TypedDict):
41
+ url: str
42
+ headers: NotRequired[Dict[str, Any]]
43
+ timeout: NotRequired[float]
44
+ sse_read_timeout: NotRequired[float]
45
+
46
+
47
+ class StreamableHTTPParameters_T(TypedDict):
48
+ url: str
49
+ headers: NotRequired[dict[str, Any]]
50
+ timeout: NotRequired[timedelta]
51
+ sse_read_timeout: NotRequired[timedelta]
52
+ terminate_on_close: NotRequired[bool]
53
+
54
+
55
+ class MCPClient:
56
+ """
57
+ Client for connecting to one or more MCP servers and processing chat completions with tools.
58
+
59
+ <Tip warning={true}>
60
+
61
+ This class is experimental and might be subject to breaking changes in the future without prior notice.
62
+
63
+ </Tip>
64
+
65
+ Args:
66
+ model (`str`, `optional`):
67
+ The model to run inference with. Can be a model id hosted on the Hugging Face Hub, e.g. `meta-llama/Meta-Llama-3-8B-Instruct`
68
+ or a URL to a deployed Inference Endpoint or other local or remote endpoint.
69
+ provider (`str`, *optional*):
70
+ Name of the provider to use for inference. Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
71
+ If model is a URL or `base_url` is passed, then `provider` is not used.
72
+ api_key (`str`, `optional`):
73
+ Token to use for authentication. Will default to the locally Hugging Face saved token if not provided. You can also use your own provider API key to interact directly with the provider's service.
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ *,
79
+ model: str,
80
+ provider: Optional[PROVIDER_OR_POLICY_T] = None,
81
+ api_key: Optional[str] = None,
82
+ ):
83
+ # Initialize MCP sessions as a dictionary of ClientSession objects
84
+ self.sessions: Dict[ToolName, "ClientSession"] = {}
85
+ self.exit_stack = AsyncExitStack()
86
+ self.available_tools: List[ChatCompletionInputTool] = []
87
+
88
+ # Initialize the AsyncInferenceClient
89
+ self.client = AsyncInferenceClient(model=model, provider=provider, api_key=api_key)
90
+
91
+ async def __aenter__(self):
92
+ """Enter the context manager"""
93
+ await self.client.__aenter__()
94
+ await self.exit_stack.__aenter__()
95
+ return self
96
+
97
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
98
+ """Exit the context manager"""
99
+ await self.client.__aexit__(exc_type, exc_val, exc_tb)
100
+ await self.cleanup()
101
+
102
+ @overload
103
+ async def add_mcp_server(self, type: Literal["stdio"], **params: Unpack[StdioServerParameters_T]): ...
104
+
105
+ @overload
106
+ async def add_mcp_server(self, type: Literal["sse"], **params: Unpack[SSEServerParameters_T]): ...
107
+
108
+ @overload
109
+ async def add_mcp_server(self, type: Literal["http"], **params: Unpack[StreamableHTTPParameters_T]): ...
110
+
111
+ async def add_mcp_server(self, type: ServerType, **params: Any):
112
+ """Connect to an MCP server
113
+
114
+ Args:
115
+ type (`str`):
116
+ Type of the server to connect to. Can be one of:
117
+ - "stdio": Standard input/output server (local)
118
+ - "sse": Server-sent events (SSE) server
119
+ - "http": StreamableHTTP server
120
+ **params (`Dict[str, Any]`):
121
+ Server parameters that can be either:
122
+ - For stdio servers:
123
+ - command (str): The command to run the MCP server
124
+ - args (List[str], optional): Arguments for the command
125
+ - env (Dict[str, str], optional): Environment variables for the command
126
+ - cwd (Union[str, Path, None], optional): Working directory for the command
127
+ - For SSE servers:
128
+ - url (str): The URL of the SSE server
129
+ - headers (Dict[str, Any], optional): Headers for the SSE connection
130
+ - timeout (float, optional): Connection timeout
131
+ - sse_read_timeout (float, optional): SSE read timeout
132
+ - For StreamableHTTP servers:
133
+ - url (str): The URL of the StreamableHTTP server
134
+ - headers (Dict[str, Any], optional): Headers for the StreamableHTTP connection
135
+ - timeout (timedelta, optional): Connection timeout
136
+ - sse_read_timeout (timedelta, optional): SSE read timeout
137
+ - terminate_on_close (bool, optional): Whether to terminate on close
138
+ """
139
+ from mcp import ClientSession, StdioServerParameters
140
+ from mcp import types as mcp_types
141
+
142
+ # Determine server type and create appropriate parameters
143
+ if type == "stdio":
144
+ # Handle stdio server
145
+ from mcp.client.stdio import stdio_client
146
+
147
+ logger.info(f"Connecting to stdio MCP server with command: {params['command']} {params.get('args', [])}")
148
+
149
+ client_kwargs = {"command": params["command"]}
150
+ for key in ["args", "env", "cwd"]:
151
+ if params.get(key) is not None:
152
+ client_kwargs[key] = params[key]
153
+ server_params = StdioServerParameters(**client_kwargs)
154
+ read, write = await self.exit_stack.enter_async_context(stdio_client(server_params))
155
+ elif type == "sse":
156
+ # Handle SSE server
157
+ from mcp.client.sse import sse_client
158
+
159
+ logger.info(f"Connecting to SSE MCP server at: {params['url']}")
160
+
161
+ client_kwargs = {"url": params["url"]}
162
+ for key in ["headers", "timeout", "sse_read_timeout"]:
163
+ if params.get(key) is not None:
164
+ client_kwargs[key] = params[key]
165
+ read, write = await self.exit_stack.enter_async_context(sse_client(**client_kwargs))
166
+ elif type == "http":
167
+ # Handle StreamableHTTP server
168
+ from mcp.client.streamable_http import streamablehttp_client
169
+
170
+ logger.info(f"Connecting to StreamableHTTP MCP server at: {params['url']}")
171
+
172
+ client_kwargs = {"url": params["url"]}
173
+ for key in ["headers", "timeout", "sse_read_timeout", "terminate_on_close"]:
174
+ if params.get(key) is not None:
175
+ client_kwargs[key] = params[key]
176
+ read, write, _ = await self.exit_stack.enter_async_context(streamablehttp_client(**client_kwargs))
177
+ # ^ TODO: should be handle `get_session_id_callback`? (function to retrieve the current session ID)
178
+ else:
179
+ raise ValueError(f"Unsupported server type: {type}")
180
+
181
+ session = await self.exit_stack.enter_async_context(
182
+ ClientSession(
183
+ read_stream=read,
184
+ write_stream=write,
185
+ client_info=mcp_types.Implementation(
186
+ name="huggingface_hub.MCPClient",
187
+ version=get_hf_hub_version(),
188
+ ),
189
+ )
190
+ )
191
+
192
+ logger.debug("Initializing session...")
193
+ await session.initialize()
194
+
195
+ # List available tools
196
+ response = await session.list_tools()
197
+ logger.debug("Connected to server with tools:", [tool.name for tool in response.tools])
198
+
199
+ for tool in response.tools:
200
+ if tool.name in self.sessions:
201
+ logger.warning(f"Tool '{tool.name}' already defined by another server. Skipping.")
202
+ continue
203
+
204
+ # Map tool names to their server for later lookup
205
+ self.sessions[tool.name] = session
206
+
207
+ # Add tool to the list of available tools (for use in chat completions)
208
+ self.available_tools.append(
209
+ ChatCompletionInputTool.parse_obj_as_instance(
210
+ {
211
+ "type": "function",
212
+ "function": {
213
+ "name": tool.name,
214
+ "description": tool.description,
215
+ "parameters": tool.inputSchema,
216
+ },
217
+ }
218
+ )
219
+ )
220
+
221
+ async def process_single_turn_with_tools(
222
+ self,
223
+ messages: List[Union[Dict, ChatCompletionInputMessage]],
224
+ exit_loop_tools: Optional[List[ChatCompletionInputTool]] = None,
225
+ exit_if_first_chunk_no_tool: bool = False,
226
+ ) -> AsyncIterable[Union[ChatCompletionStreamOutput, ChatCompletionInputMessage]]:
227
+ """Process a query using `self.model` and available tools, yielding chunks and tool outputs.
228
+
229
+ Args:
230
+ messages (`List[Dict]`):
231
+ List of message objects representing the conversation history
232
+ exit_loop_tools (`List[ChatCompletionInputTool]`, *optional*):
233
+ List of tools that should exit the generator when called
234
+ exit_if_first_chunk_no_tool (`bool`, *optional*):
235
+ Exit if no tool is present in the first chunks. Default to False.
236
+
237
+ Yields:
238
+ [`ChatCompletionStreamOutput`] chunks or [`ChatCompletionInputMessage`] objects
239
+ """
240
+ # Prepare tools list based on options
241
+ tools = self.available_tools
242
+ if exit_loop_tools is not None:
243
+ tools = [*exit_loop_tools, *self.available_tools]
244
+
245
+ # Create the streaming request
246
+ response = await self.client.chat.completions.create(
247
+ messages=messages,
248
+ tools=tools,
249
+ tool_choice="auto",
250
+ stream=True,
251
+ )
252
+
253
+ message = {"role": "unknown", "content": ""}
254
+ final_tool_calls: Dict[int, ChatCompletionStreamOutputDeltaToolCall] = {}
255
+ num_of_chunks = 0
256
+
257
+ # Read from stream
258
+ async for chunk in response:
259
+ # Yield each chunk to caller
260
+ yield chunk
261
+
262
+ num_of_chunks += 1
263
+ delta = chunk.choices[0].delta if chunk.choices and len(chunk.choices) > 0 else None
264
+ if not delta:
265
+ continue
266
+
267
+ # Process message
268
+ if delta.role:
269
+ message["role"] = delta.role
270
+ if delta.content:
271
+ message["content"] += delta.content
272
+
273
+ # Process tool calls
274
+ if delta.tool_calls:
275
+ for tool_call in delta.tool_calls:
276
+ # Aggregate chunks into tool calls
277
+ if tool_call.index not in final_tool_calls:
278
+ if tool_call.function.arguments is None: # Corner case (depends on provider)
279
+ tool_call.function.arguments = ""
280
+ final_tool_calls[tool_call.index] = tool_call
281
+
282
+ if tool_call.function.arguments:
283
+ final_tool_calls[tool_call.index].function.arguments += tool_call.function.arguments
284
+
285
+ # Optionally exit early if no tools in first chunks
286
+ if exit_if_first_chunk_no_tool and num_of_chunks <= 2 and len(final_tool_calls) == 0:
287
+ return
288
+
289
+ if message["content"]:
290
+ messages.append(message)
291
+
292
+ # Process tool calls one by one
293
+ for tool_call in final_tool_calls.values():
294
+ function_name = tool_call.function.name
295
+ function_args = json.loads(tool_call.function.arguments or "{}")
296
+
297
+ tool_message = {"role": "tool", "tool_call_id": tool_call.id, "content": "", "name": function_name}
298
+
299
+ # Check if this is an exit loop tool
300
+ if exit_loop_tools and function_name in [t.function.name for t in exit_loop_tools]:
301
+ tool_message_as_obj = ChatCompletionInputMessage.parse_obj_as_instance(tool_message)
302
+ messages.append(tool_message_as_obj)
303
+ yield tool_message_as_obj
304
+ return
305
+
306
+ # Execute tool call with the appropriate session
307
+ session = self.sessions.get(function_name)
308
+ if session is not None:
309
+ result = await session.call_tool(function_name, function_args)
310
+ tool_message["content"] = format_result(result)
311
+ else:
312
+ error_msg = f"Error: No session found for tool: {function_name}"
313
+ tool_message["content"] = error_msg
314
+
315
+ # Yield tool message
316
+ tool_message_as_obj = ChatCompletionInputMessage.parse_obj_as_instance(tool_message)
317
+ messages.append(tool_message_as_obj)
318
+ yield tool_message_as_obj
319
+
320
+ async def cleanup(self):
321
+ """Clean up resources"""
322
+ await self.exit_stack.aclose()
@@ -0,0 +1,123 @@
1
+ """
2
+ Utility functions for MCPClient and Tiny Agents.
3
+
4
+ Formatting utilities taken from the JS SDK: https://github.com/huggingface/huggingface.js/blob/main/packages/mcp-client/src/ResultFormatter.ts.
5
+ """
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
10
+
11
+ from huggingface_hub import snapshot_download
12
+ from huggingface_hub.errors import EntryNotFoundError
13
+
14
+ from .constants import DEFAULT_AGENT, DEFAULT_REPO_ID, FILENAME_CONFIG, FILENAME_PROMPT
15
+
16
+
17
+ if TYPE_CHECKING:
18
+ from mcp import types as mcp_types
19
+
20
+
21
+ def format_result(result: "mcp_types.CallToolResult") -> str:
22
+ """
23
+ Formats a mcp.types.CallToolResult content into a human-readable string.
24
+
25
+ Args:
26
+ result (CallToolResult)
27
+ Object returned by mcp.ClientSession.call_tool.
28
+
29
+ Returns:
30
+ str
31
+ A formatted string representing the content of the result.
32
+ """
33
+ content = result.content
34
+
35
+ if len(content) == 0:
36
+ return "[No content]"
37
+
38
+ formatted_parts: List[str] = []
39
+
40
+ for item in content:
41
+ if item.type == "text":
42
+ formatted_parts.append(item.text)
43
+
44
+ elif item.type == "image":
45
+ formatted_parts.append(
46
+ f"[Binary Content: Image {item.mimeType}, {_get_base64_size(item.data)} bytes]\n"
47
+ f"The task is complete and the content accessible to the User"
48
+ )
49
+
50
+ elif item.type == "audio":
51
+ formatted_parts.append(
52
+ f"[Binary Content: Audio {item.mimeType}, {_get_base64_size(item.data)} bytes]\n"
53
+ f"The task is complete and the content accessible to the User"
54
+ )
55
+
56
+ elif item.type == "resource":
57
+ resource = item.resource
58
+
59
+ if hasattr(resource, "text"):
60
+ formatted_parts.append(resource.text)
61
+
62
+ elif hasattr(resource, "blob"):
63
+ formatted_parts.append(
64
+ f"[Binary Content ({resource.uri}): {resource.mimeType}, {_get_base64_size(resource.blob)} bytes]\n"
65
+ f"The task is complete and the content accessible to the User"
66
+ )
67
+
68
+ return "\n".join(formatted_parts)
69
+
70
+
71
+ def _get_base64_size(base64_str: str) -> int:
72
+ """Estimate the byte size of a base64-encoded string."""
73
+ # Remove any prefix like "data:image/png;base64,"
74
+ if "," in base64_str:
75
+ base64_str = base64_str.split(",")[1]
76
+
77
+ padding = 0
78
+ if base64_str.endswith("=="):
79
+ padding = 2
80
+ elif base64_str.endswith("="):
81
+ padding = 1
82
+
83
+ return (len(base64_str) * 3) // 4 - padding
84
+
85
+
86
+ def _load_agent_config(agent_path: Optional[str]) -> Tuple[Dict[str, Any], Optional[str]]:
87
+ """Load server config and prompt."""
88
+
89
+ def _read_dir(directory: Path) -> Tuple[Dict[str, Any], Optional[str]]:
90
+ cfg_file = directory / FILENAME_CONFIG
91
+ if not cfg_file.exists():
92
+ raise FileNotFoundError(f" Config file not found in {directory}! Please make sure it exists locally")
93
+
94
+ config: Dict[str, Any] = json.loads(cfg_file.read_text(encoding="utf-8"))
95
+ prompt_file = directory / FILENAME_PROMPT
96
+ prompt: Optional[str] = prompt_file.read_text(encoding="utf-8") if prompt_file.exists() else None
97
+ return config, prompt
98
+
99
+ if agent_path is None:
100
+ return DEFAULT_AGENT, None
101
+
102
+ path = Path(agent_path).expanduser()
103
+
104
+ if path.is_file():
105
+ return json.loads(path.read_text(encoding="utf-8")), None
106
+
107
+ if path.is_dir():
108
+ return _read_dir(path)
109
+
110
+ # fetch from the Hub
111
+ try:
112
+ repo_dir = Path(
113
+ snapshot_download(
114
+ repo_id=DEFAULT_REPO_ID,
115
+ allow_patterns=f"{agent_path}/*",
116
+ repo_type="dataset",
117
+ )
118
+ )
119
+ return _read_dir(repo_dir / agent_path)
120
+ except Exception as err:
121
+ raise EntryNotFoundError(
122
+ f" Agent {agent_path} not found in tiny-agents/tiny-agents! Please make sure it exists in https://huggingface.co/datasets/tiny-agents/tiny-agents."
123
+ ) from err
@@ -20,8 +20,14 @@ from .hf_inference import (
20
20
  HFInferenceTask,
21
21
  )
22
22
  from .hyperbolic import HyperbolicTextGenerationTask, HyperbolicTextToImageTask
23
- from .nebius import NebiusConversationalTask, NebiusTextGenerationTask, NebiusTextToImageTask
23
+ from .nebius import (
24
+ NebiusConversationalTask,
25
+ NebiusFeatureExtractionTask,
26
+ NebiusTextGenerationTask,
27
+ NebiusTextToImageTask,
28
+ )
24
29
  from .novita import NovitaConversationalTask, NovitaTextGenerationTask, NovitaTextToVideoTask
30
+ from .nscale import NscaleConversationalTask, NscaleTextToImageTask
25
31
  from .openai import OpenAIConversationalTask
26
32
  from .replicate import ReplicateTask, ReplicateTextToImageTask, ReplicateTextToSpeechTask
27
33
  from .sambanova import SambanovaConversationalTask, SambanovaFeatureExtractionTask
@@ -41,6 +47,7 @@ PROVIDER_T = Literal[
41
47
  "hyperbolic",
42
48
  "nebius",
43
49
  "novita",
50
+ "nscale",
44
51
  "openai",
45
52
  "replicate",
46
53
  "sambanova",
@@ -105,12 +112,17 @@ PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
105
112
  "text-to-image": NebiusTextToImageTask(),
106
113
  "conversational": NebiusConversationalTask(),
107
114
  "text-generation": NebiusTextGenerationTask(),
115
+ "feature-extraction": NebiusFeatureExtractionTask(),
108
116
  },
109
117
  "novita": {
110
118
  "text-generation": NovitaTextGenerationTask(),
111
119
  "conversational": NovitaConversationalTask(),
112
120
  "text-to-video": NovitaTextToVideoTask(),
113
121
  },
122
+ "nscale": {
123
+ "conversational": NscaleConversationalTask(),
124
+ "text-to-image": NscaleTextToImageTask(),
125
+ },
114
126
  "openai": {
115
127
  "conversational": OpenAIConversationalTask(),
116
128
  },
@@ -27,6 +27,7 @@ HARDCODED_MODEL_INFERENCE_MAPPING: Dict[str, Dict[str, InferenceProviderMapping]
27
27
  "hf-inference": {},
28
28
  "hyperbolic": {},
29
29
  "nebius": {},
30
+ "nscale": {},
30
31
  "replicate": {},
31
32
  "sambanova": {},
32
33
  "together": {},
@@ -1,4 +1,4 @@
1
- from huggingface_hub.inference._providers._common import BaseConversationalTask
1
+ from ._common import BaseConversationalTask
2
2
 
3
3
 
4
4
  class CerebrasConversationalTask(BaseConversationalTask):
@@ -1,6 +1,8 @@
1
- from huggingface_hub.inference._providers._common import (
2
- BaseConversationalTask,
3
- )
1
+ from typing import Any, Dict, Optional
2
+
3
+ from huggingface_hub.hf_api import InferenceProviderMapping
4
+
5
+ from ._common import BaseConversationalTask
4
6
 
5
7
 
6
8
  _PROVIDER = "cohere"
@@ -13,3 +15,18 @@ class CohereConversationalTask(BaseConversationalTask):
13
15
 
14
16
  def _prepare_route(self, mapped_model: str, api_key: str) -> str:
15
17
  return "/compatibility/v1/chat/completions"
18
+
19
+ def _prepare_payload_as_dict(
20
+ self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
21
+ ) -> Optional[Dict]:
22
+ payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
23
+ response_format = parameters.get("response_format")
24
+ if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
25
+ json_schema_details = response_format.get("json_schema")
26
+ if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
27
+ payload["response_format"] = { # type: ignore [index]
28
+ "type": "json_object",
29
+ "schema": json_schema_details["schema"],
30
+ }
31
+
32
+ return payload
@@ -1,3 +1,7 @@
1
+ from typing import Any, Dict, Optional
2
+
3
+ from huggingface_hub.hf_api import InferenceProviderMapping
4
+
1
5
  from ._common import BaseConversationalTask
2
6
 
3
7
 
@@ -7,3 +11,17 @@ class FireworksAIConversationalTask(BaseConversationalTask):
7
11
 
8
12
  def _prepare_route(self, mapped_model: str, api_key: str) -> str:
9
13
  return "/inference/v1/chat/completions"
14
+
15
+ def _prepare_payload_as_dict(
16
+ self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
17
+ ) -> Optional[Dict]:
18
+ payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
19
+ response_format = parameters.get("response_format")
20
+ if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
21
+ json_schema_details = response_format.get("json_schema")
22
+ if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
23
+ payload["response_format"] = { # type: ignore [index]
24
+ "type": "json_object",
25
+ "schema": json_schema_details["schema"],
26
+ }
27
+ return payload
@@ -96,13 +96,20 @@ class HFInferenceConversational(HFInferenceTask):
96
96
  def _prepare_payload_as_dict(
97
97
  self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
98
98
  ) -> Optional[Dict]:
99
+ payload = filter_none(parameters)
99
100
  mapped_model = provider_mapping_info.provider_id
100
101
  payload_model = parameters.get("model") or mapped_model
101
102
 
102
103
  if payload_model is None or payload_model.startswith(("http://", "https://")):
103
104
  payload_model = "dummy"
104
105
 
105
- return {**filter_none(parameters), "model": payload_model, "messages": inputs}
106
+ response_format = parameters.get("response_format")
107
+ if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
108
+ payload["response_format"] = {
109
+ "type": "json_object",
110
+ "value": response_format["json_schema"]["schema"],
111
+ }
112
+ return {**payload, "model": payload_model, "messages": inputs}
106
113
 
107
114
  def _prepare_url(self, api_key: str, mapped_model: str) -> str:
108
115
  base_url = (
@@ -30,6 +30,17 @@ class NebiusConversationalTask(BaseConversationalTask):
30
30
  def __init__(self):
31
31
  super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai")
32
32
 
33
+ def _prepare_payload_as_dict(
34
+ self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
35
+ ) -> Optional[Dict]:
36
+ payload = super()._prepare_payload_as_dict(inputs, parameters, provider_mapping_info)
37
+ response_format = parameters.get("response_format")
38
+ if isinstance(response_format, dict) and response_format.get("type") == "json_schema":
39
+ json_schema_details = response_format.get("json_schema")
40
+ if isinstance(json_schema_details, dict) and "schema" in json_schema_details:
41
+ payload["guided_json"] = json_schema_details["schema"] # type: ignore [index]
42
+ return payload
43
+
33
44
 
34
45
  class NebiusTextToImageTask(TaskProviderHelper):
35
46
  def __init__(self):
@@ -53,3 +64,20 @@ class NebiusTextToImageTask(TaskProviderHelper):
53
64
  def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
54
65
  response_dict = _as_dict(response)
55
66
  return base64.b64decode(response_dict["data"][0]["b64_json"])
67
+
68
+
69
+ class NebiusFeatureExtractionTask(TaskProviderHelper):
70
+ def __init__(self):
71
+ super().__init__(task="feature-extraction", provider="nebius", base_url="https://api.studio.nebius.ai")
72
+
73
+ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
74
+ return "/v1/embeddings"
75
+
76
+ def _prepare_payload_as_dict(
77
+ self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
78
+ ) -> Optional[Dict]:
79
+ return {"input": inputs, "model": provider_mapping_info.provider_id}
80
+
81
+ def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
82
+ embeddings = _as_dict(response)["data"]
83
+ return [embedding["embedding"] for embedding in embeddings]
@@ -0,0 +1,44 @@
1
+ import base64
2
+ from typing import Any, Dict, Optional, Union
3
+
4
+ from huggingface_hub.hf_api import InferenceProviderMapping
5
+ from huggingface_hub.inference._common import RequestParameters, _as_dict
6
+
7
+ from ._common import BaseConversationalTask, TaskProviderHelper, filter_none
8
+
9
+
10
+ class NscaleConversationalTask(BaseConversationalTask):
11
+ def __init__(self):
12
+ super().__init__(provider="nscale", base_url="https://inference.api.nscale.com")
13
+
14
+
15
+ class NscaleTextToImageTask(TaskProviderHelper):
16
+ def __init__(self):
17
+ super().__init__(provider="nscale", base_url="https://inference.api.nscale.com", task="text-to-image")
18
+
19
+ def _prepare_route(self, mapped_model: str, api_key: str) -> str:
20
+ return "/v1/images/generations"
21
+
22
+ def _prepare_payload_as_dict(
23
+ self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
24
+ ) -> Optional[Dict]:
25
+ mapped_model = provider_mapping_info.provider_id
26
+ # Combine all parameters except inputs and parameters
27
+ parameters = filter_none(parameters)
28
+ if "width" in parameters and "height" in parameters:
29
+ parameters["size"] = f"{parameters.pop('width')}x{parameters.pop('height')}"
30
+ if "num_inference_steps" in parameters:
31
+ parameters.pop("num_inference_steps")
32
+ if "cfg_scale" in parameters:
33
+ parameters.pop("cfg_scale")
34
+ payload = {
35
+ "response_format": "b64_json",
36
+ "prompt": inputs,
37
+ "model": mapped_model,
38
+ **parameters,
39
+ }
40
+ return payload
41
+
42
+ def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
43
+ response_dict = _as_dict(response)
44
+ return base64.b64decode(response_dict["data"][0]["b64_json"])