PyPI - blaxel - Versions diffs - 0.2.36__py3-none-any.whl → 0.2.38__py3-none-any.whl - Mend

blaxel 0.2.36py3-none-any.whl → 0.2.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

blaxel/__init__.py +2 -2
blaxel/core/client/models/create_job_execution_request_env.py +3 -3
blaxel/core/client/models/preview.py +48 -1
blaxel/core/client/models/sandbox.py +10 -0
blaxel/core/jobs/__init__.py +2 -2
blaxel/core/sandbox/__init__.py +12 -0
blaxel/core/sandbox/client/api/system/__init__.py +0 -0
blaxel/core/sandbox/client/api/system/get_health.py +134 -0
blaxel/core/sandbox/client/api/system/post_upgrade.py +196 -0
blaxel/core/sandbox/client/models/__init__.py +8 -0
blaxel/core/sandbox/client/models/content_search_match.py +24 -25
blaxel/core/sandbox/client/models/content_search_response.py +25 -29
blaxel/core/sandbox/client/models/find_match.py +13 -14
blaxel/core/sandbox/client/models/find_response.py +21 -24
blaxel/core/sandbox/client/models/fuzzy_search_match.py +17 -19
blaxel/core/sandbox/client/models/fuzzy_search_response.py +21 -24
blaxel/core/sandbox/client/models/health_response.py +159 -0
blaxel/core/sandbox/client/models/process_upgrade_state.py +20 -0
blaxel/core/sandbox/client/models/upgrade_request.py +71 -0
blaxel/core/sandbox/client/models/upgrade_status.py +125 -0
blaxel/core/sandbox/default/__init__.py +2 -0
blaxel/core/sandbox/default/filesystem.py +20 -6
blaxel/core/sandbox/default/preview.py +48 -1
blaxel/core/sandbox/default/process.py +66 -21
blaxel/core/sandbox/default/sandbox.py +36 -5
blaxel/core/sandbox/default/system.py +71 -0
blaxel/core/sandbox/sync/__init__.py +2 -0
blaxel/core/sandbox/sync/filesystem.py +19 -2
blaxel/core/sandbox/sync/preview.py +50 -3
blaxel/core/sandbox/sync/process.py +38 -15
blaxel/core/sandbox/sync/sandbox.py +29 -4
blaxel/core/sandbox/sync/system.py +71 -0
blaxel/core/sandbox/types.py +212 -5
blaxel/core/tools/__init__.py +4 -0
blaxel/core/volume/volume.py +10 -0
blaxel/crewai/model.py +81 -44
blaxel/crewai/tools.py +85 -2
blaxel/googleadk/model.py +22 -3
blaxel/googleadk/tools.py +25 -6
blaxel/langgraph/custom/gemini.py +19 -12
blaxel/langgraph/model.py +26 -18
blaxel/langgraph/tools.py +6 -12
blaxel/livekit/model.py +7 -2
blaxel/livekit/tools.py +3 -1
blaxel/llamaindex/model.py +145 -84
blaxel/llamaindex/tools.py +6 -4
blaxel/openai/model.py +7 -1
blaxel/openai/tools.py +13 -3
blaxel/pydantic/model.py +38 -24
blaxel/pydantic/tools.py +37 -4
blaxel-0.2.38.dist-info/METADATA +528 -0
{blaxel-0.2.36.dist-info → blaxel-0.2.38.dist-info}/RECORD +54 -45
blaxel-0.2.36.dist-info/METADATA +0 -228
{blaxel-0.2.36.dist-info → blaxel-0.2.38.dist-info}/WHEEL +0 -0
{blaxel-0.2.36.dist-info → blaxel-0.2.38.dist-info}/licenses/LICENSE +0 -0

blaxel/langgraph/custom/gemini.py CHANGED Viewed

@@ -23,16 +23,18 @@ from typing import (
 import httpx
 import requests
-from langchain_core.callbacks.manager import (
+from langchain_core.callbacks.manager import (  # type: ignore[import-not-found]
     AsyncCallbackManagerForLLMRun,
     CallbackManagerForLLMRun,
 )
-from langchain_core.language_models import LanguageModelInput
-from langchain_core.language_models.chat_models import (
+from langchain_core.language_models import (  # type: ignore[import-not-found]
+    LanguageModelInput,
+)
+from langchain_core.language_models.chat_models import (  # type: ignore[import-not-found]
     BaseChatModel,
     LangSmithParams,
 )
-from langchain_core.messages import (
+from langchain_core.messages import (  # type: ignore[import-not-found]
     AIMessage,
     AIMessageChunk,
     BaseMessage,
@@ -41,25 +43,30 @@ from langchain_core.messages import (
     SystemMessage,
     ToolMessage,
 )
-from langchain_core.messages.ai import UsageMetadata
-from langchain_core.messages.tool import (
+from langchain_core.messages.ai import UsageMetadata  # type: ignore[import-not-found]
+from langchain_core.messages.tool import (  # type: ignore[import-not-found]
     invalid_tool_call,
     tool_call,
     tool_call_chunk,
 )
-from langchain_core.output_parsers.openai_tools import (
+from langchain_core.output_parsers.openai_tools import (  # type: ignore[import-not-found]
     JsonOutputKeyToolsParser,
     PydanticToolsParser,
     parse_tool_calls,
 )
-from langchain_core.outputs import (
+from langchain_core.outputs import (  # type: ignore[import-not-found]
     ChatGeneration,
     ChatGenerationChunk,
     ChatResult,
 )
-from langchain_core.runnables import Runnable, RunnablePassthrough
-from langchain_core.tools import BaseTool
-from langchain_core.utils.function_calling import convert_to_openai_tool
+from langchain_core.runnables import (  # type: ignore[import-not-found]
+    Runnable,
+    RunnablePassthrough,
+)
+from langchain_core.tools import BaseTool  # type: ignore[import-not-found]
+from langchain_core.utils.function_calling import (  # type: ignore[import-not-found]
+    convert_to_openai_tool,
+)
 from PIL import Image
 from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
 from tenacity import (
@@ -1467,4 +1474,4 @@ def image_bytes_to_b64_string(image_bytes: bytes, image_format: str = "jpeg") ->
     """Convert image bytes to base64 string."""
     import base64
-    return f"data:image/{image_format};base64,{base64.b64encode(image_bytes).decode('utf-8')}"
+    return f"data:image/{image_format};base64,{base64.b64encode(image_bytes).decode('utf-8')}"

blaxel/langgraph/model.py CHANGED Viewed

@@ -7,11 +7,15 @@ from blaxel.core import bl_model as bl_model_core
 from blaxel.core import settings
 if TYPE_CHECKING:
-    from langchain_core.callbacks import Callbacks
-    from langchain_core.language_models import LanguageModelInput
-    from langchain_core.messages import BaseMessage
-    from langchain_core.outputs import LLMResult
-    from langchain_core.runnables import RunnableConfig
+    from langchain_core.callbacks import Callbacks  # type: ignore[import-not-found]
+    from langchain_core.language_models import (  # type: ignore[import-not-found]
+        LanguageModelInput,
+    )
+    from langchain_core.messages import BaseMessage  # type: ignore[import-not-found]
+    from langchain_core.outputs import LLMResult  # type: ignore[import-not-found]
+    from langchain_core.runnables import (  # type: ignore[import-not-found]
+        RunnableConfig,
+    )
 logger = getLogger(__name__)
@@ -32,7 +36,7 @@ class TokenRefreshingWrapper:
         kwargs = config.get("kwargs", {})
         if model_type == "mistral":
-            from langchain_openai import ChatOpenAI
+            from langchain_openai import ChatOpenAI  # type: ignore[import-not-found]
             return ChatOpenAI(
                 api_key=settings.auth.token,
@@ -41,7 +45,7 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         elif model_type == "cohere":
-            from langchain_cohere import ChatCohere
+            from langchain_cohere import ChatCohere  # type: ignore[import-not-found]
             return ChatCohere(
                 cohere_api_key=settings.auth.token,
@@ -50,7 +54,7 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         elif model_type == "xai":
-            from langchain_xai import ChatXAI
+            from langchain_xai import ChatXAI  # type: ignore[import-not-found]
             return ChatXAI(
                 model=model,
@@ -59,7 +63,9 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         elif model_type == "deepseek":
-            from langchain_deepseek import ChatDeepSeek
+            from langchain_deepseek import (  # type: ignore[import-not-found]
+                ChatDeepSeek,
+            )
             return ChatDeepSeek(
                 api_key=settings.auth.token,
@@ -68,7 +74,9 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         elif model_type == "anthropic":
-            from langchain_anthropic import ChatAnthropic
+            from langchain_anthropic import (  # type: ignore[import-not-found]
+                ChatAnthropic,
+            )
             return ChatAnthropic(
                 api_key=settings.auth.token,
@@ -78,7 +86,9 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         elif model_type == "gemini":
-            from .custom.gemini import ChatGoogleGenerativeAI
+            from .custom.gemini import (
+                ChatGoogleGenerativeAI,  # type: ignore[import-not-found]
+            )
             return ChatGoogleGenerativeAI(
                 model=model,
@@ -88,7 +98,9 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         elif model_type == "cerebras":
-            from langchain_cerebras import ChatCerebras
+            from langchain_cerebras import (  # type: ignore[import-not-found]
+                ChatCerebras,
+            )
             return ChatCerebras(
                 api_key=settings.auth.token,
@@ -97,7 +109,7 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         else:
-            from langchain_openai import ChatOpenAI
+            from langchain_openai import ChatOpenAI  # type: ignore[import-not-found]
             if model_type != "openai":
                 logger.warning(f"Model {model} is not supported by Langchain, defaulting to OpenAI")
@@ -113,10 +125,6 @@ class TokenRefreshingWrapper:
         # Only refresh if using ClientCredentials (which has get_token method)
         current_token = settings.auth.token
-        if hasattr(settings.auth, "get_token"):
-            # This will trigger token refresh if needed
-            settings.auth.get_token()
         new_token = settings.auth.token
         # If token changed, recreate the model
@@ -251,4 +259,4 @@ async def bl_model(name: str, **kwargs):
     model_config = {"type": type, "model": model, "url": url, "kwargs": kwargs}
     # Create and return the wrapper
-    return TokenRefreshingChatModel(model_config)
+    return TokenRefreshingChatModel(model_config)

blaxel/langgraph/tools.py CHANGED Viewed

@@ -4,8 +4,7 @@ from blaxel.core.tools import bl_tools as bl_tools_core
 from blaxel.core.tools.types import Tool, ToolException
 if TYPE_CHECKING:
-    from langchain_core.tools import StructuredTool
-    from mcp.types import EmbeddedResource, ImageContent
+    from langchain_core.tools import StructuredTool  # type: ignore[import-not-found]
 def _clean_schema_for_openai(schema: Dict[str, Any]) -> Dict[str, Any]:
@@ -38,19 +37,14 @@ def _clean_schema_for_openai(schema: Dict[str, Any]) -> Dict[str, Any]:
 def get_langchain_tool(tool: Tool) -> "StructuredTool":
-    from langchain_core.tools import StructuredTool
-    from mcp.types import (
-        CallToolResult,
-        EmbeddedResource,
-        ImageContent,
-        TextContent,
-    )
-    NonTextContent = ImageContent | EmbeddedResource
+    from langchain_core.tools import StructuredTool  # type: ignore[import-not-found]
+    from mcp.types import CallToolResult, EmbeddedResource, ImageContent, TextContent
     async def langchain_coroutine(
         **arguments: dict[str, Any],
-    ) -> tuple[str | list[str], list[NonTextContent] | None]:
+    ) -> tuple[str | list[str], list[ImageContent | EmbeddedResource] | None]:
+        if not tool.coroutine:
+            raise ValueError(f"Tool {tool.name} does not have a coroutine defined")
         result: CallToolResult = await tool.coroutine(**arguments)
         text_contents: list[TextContent] = []
         non_text_contents = []

blaxel/livekit/model.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from logging import getLogger
 import httpx
-from livekit.plugins import openai
-from openai import AsyncOpenAI
+from livekit.plugins import openai  # type: ignore[import-not-found]
+from openai import AsyncOpenAI  # type: ignore[import-not-found]
 from blaxel.core import bl_model as bl_model_core
 from blaxel.core import settings
@@ -20,6 +20,11 @@ class DynamicHeadersHTTPClient(httpx.AsyncClient):
     async def send(self, request, *args, **kwargs):
         # Update headers with the latest auth headers before each request
         auth_headers = settings.auth.get_headers()
+        # Remove the SDK's default "Authorization: Bearer replaced" header
+        # when our auth uses a different header (e.g. X-Blaxel-Authorization with API keys)
+        if "Authorization" not in auth_headers:
+            request.headers.pop("Authorization", None)
+            request.headers.pop("authorization", None)
         for key, value in auth_headers.items():
             request.headers[key] = value
         return await super().send(request, *args, **kwargs)

blaxel/livekit/tools.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from livekit.agents import function_tool, llm
+from livekit.agents import function_tool, llm  # type: ignore[import-not-found]
 from blaxel.core.tools import bl_tools as bl_tools_core
 from blaxel.core.tools.types import Tool
@@ -6,6 +6,8 @@ from blaxel.core.tools.types import Tool
 def livekit_coroutine(tool: Tool):
     async def livekit_coroutine_wrapper(raw_arguments: dict[str, object]):
+        if not tool.coroutine:
+            raise ValueError(f"Tool {tool.name} does not have a coroutine defined")
         result = await tool.coroutine(**raw_arguments)
         return result.model_dump_json()

blaxel/llamaindex/model.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import os
 from logging import getLogger
-from typing import TYPE_CHECKING, Any, Sequence
+from typing import TYPE_CHECKING, Any, Dict, List, Sequence, Union
 from blaxel.core import bl_model as bl_model_core
 from blaxel.core import settings
@@ -11,7 +11,7 @@ from blaxel.core import settings
 os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
 if TYPE_CHECKING:
-    from llama_index.core.base.llms.types import (
+    from llama_index.core.base.llms.types import (  # type: ignore[import-not-found]
         ChatMessage,
         ChatResponse,
         ChatResponseAsyncGen,
@@ -20,27 +20,76 @@ if TYPE_CHECKING:
         CompletionResponseAsyncGen,
         CompletionResponseGen,
     )
+    from llama_index.core.llms.llm import (  # type: ignore[import-not-found]
+        ToolSelection,
+    )
+    from llama_index.core.tools.types import BaseTool  # type: ignore[import-not-found]
+# Runtime imports needed for class inheritance and construction
+from llama_index.core.base.llms.types import (  # type: ignore[import-not-found]
+    LLMMetadata,
+)
+from llama_index.core.llms.function_calling import (  # type: ignore[import-not-found]
+    FunctionCallingLLM,
+)
+from pydantic import PrivateAttr  # type: ignore[import-not-found]
 logger = getLogger(__name__)
+DEFAULT_CONTEXT_WINDOW = 128000
+DEFAULT_NUM_OUTPUT = 4096
+class TokenRefreshingLLM(FunctionCallingLLM):
+    """Wrapper for LlamaIndex LLMs that refreshes token before each call.
-class TokenRefreshingWrapper:
-    """Base wrapper class that refreshes token before each call."""
+    Inherits from FunctionCallingLLM to maintain type compatibility with
+    LlamaIndex's agents and components that validate isinstance(model, LLM).
+    """
+    _model_config_data: dict = PrivateAttr(default_factory=dict)
+    _wrapped: Any = PrivateAttr(default=None)
     def __init__(self, model_config: dict):
-        self.model_config = model_config
-        self.wrapped_model = self._create_model()
+        super().__init__()
+        self._model_config_data = model_config
+        self._wrapped = self._create_model()
+    @classmethod
+    def class_name(cls) -> str:
+        return "TokenRefreshingLLM"
+    @property
+    def wrapped_model(self) -> Any:
+        """Access the underlying wrapped LLM model."""
+        return self._wrapped
+    @property
+    def metadata(self) -> LLMMetadata:
+        """Get LLM metadata, with fallback for unknown model names."""
+        try:
+            return self._wrapped.metadata
+        except (ValueError, KeyError) as e:
+            logger.warning(f"Could not get metadata from wrapped model: {e}. Using defaults.")
+            return LLMMetadata(
+                context_window=DEFAULT_CONTEXT_WINDOW,
+                num_output=DEFAULT_NUM_OUTPUT,
+                is_chat_model=True,
+                model_name=self._model_config_data.get("model", "unknown"),
+            )
     def _create_model(self):
         """Create the model instance with current token."""
-        config = self.model_config
+        config = self._model_config_data
         model_type = config["type"]
         model = config["model"]
         url = config["url"]
         kwargs = config.get("kwargs", {})
         if model_type == "anthropic":
-            from llama_index.llms.anthropic import Anthropic
+            from llama_index.llms.anthropic import (  # type: ignore[import-not-found]
+                Anthropic,
+            )
             return Anthropic(
                 model=model,
@@ -50,7 +99,7 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         elif model_type == "xai":
-            from llama_index.llms.groq import Groq
+            from llama_index.llms.groq import Groq  # type: ignore[import-not-found]
             return Groq(
                 model=model,
@@ -60,7 +109,9 @@ class TokenRefreshingWrapper:
             )
         elif model_type == "gemini":
             from google.genai.types import HttpOptions
-            from llama_index.llms.google_genai import GoogleGenAI
+            from llama_index.llms.google_genai import (  # type: ignore[import-not-found]
+                GoogleGenAI,
+            )
             return GoogleGenAI(
                 api_key=settings.auth.token,
@@ -73,11 +124,13 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         elif model_type == "cohere":
-            from .custom.cohere import Cohere
+            from .custom.cohere import Cohere  # type: ignore[import-not-found]
             return Cohere(model=model, api_key=settings.auth.token, api_base=url, **kwargs)
         elif model_type == "deepseek":
-            from llama_index.llms.deepseek import DeepSeek
+            from llama_index.llms.deepseek import (  # type: ignore[import-not-found]
+                DeepSeek,
+            )
             return DeepSeek(
                 model=model,
@@ -86,11 +139,15 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         elif model_type == "mistral":
-            from llama_index.llms.mistralai import MistralAI
+            from llama_index.llms.mistralai import (  # type: ignore[import-not-found]
+                MistralAI,
+            )
             return MistralAI(model=model, api_key=settings.auth.token, endpoint=url, **kwargs)
         elif model_type == "cerebras":
-            from llama_index.llms.cerebras import Cerebras
+            from llama_index.llms.cerebras import (  # type: ignore[import-not-found]
+                Cerebras,
+            )
             return Cerebras(
                 model=model,
@@ -99,7 +156,7 @@ class TokenRefreshingWrapper:
                 **kwargs,
             )
         else:
-            from llama_index.llms.openai import OpenAI
+            from llama_index.llms.openai import OpenAI  # type: ignore[import-not-found]
             if model_type != "openai":
                 logger.warning(
@@ -115,102 +172,106 @@ class TokenRefreshingWrapper:
     def _refresh_token(self):
         """Refresh the token and recreate the model if needed."""
-        # Only refresh if using ClientCredentials (which has get_token method)
         current_token = settings.auth.token
-        if hasattr(settings.auth, "get_token"):
-            # This will trigger token refresh if needed
-            settings.auth.get_token()
         new_token = settings.auth.token
-        # If token changed, recreate the model
         if current_token != new_token:
-            self.wrapped_model = self._create_model()
-    def __getattr__(self, name):
-        """Delegate attribute access to wrapped model."""
-        return getattr(self.wrapped_model, name)
+            self._wrapped = self._create_model()
+    # --- Core LLM methods with token refresh ---
-class TokenRefreshingLLM(TokenRefreshingWrapper):
-    """Wrapper for LlamaIndex LLMs that refreshes token before each call."""
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        self._refresh_token()
+        return self._wrapped.chat(messages, **kwargs)
-    async def achat(
-        self,
-        messages: Sequence[ChatMessage],
-        **kwargs: Any,
-    ) -> ChatResponse:
-        """Async chat with token refresh."""
+    async def achat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
         self._refresh_token()
-        return await self.wrapped_model.achat(messages, **kwargs)
+        return await self._wrapped.achat(messages, **kwargs)
-    def chat(
-        self,
-        messages: Sequence[ChatMessage],
-        **kwargs: Any,
-    ) -> ChatResponse:
-        """Sync chat with token refresh."""
+    def complete(self, prompt: str, formatted: bool = False, **kwargs: Any) -> CompletionResponse:
         self._refresh_token()
-        return self.wrapped_model.chat(messages, **kwargs)
+        return self._wrapped.complete(prompt, formatted=formatted, **kwargs)
-    async def astream_chat(
-        self,
-        messages: Sequence[ChatMessage],
-        **kwargs: Any,
-    ) -> ChatResponseAsyncGen:
-        """Async stream chat with token refresh."""
+    async def acomplete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
         self._refresh_token()
-        async for chunk in self.wrapped_model.astream_chat(messages, **kwargs):
-            yield chunk
+        return await self._wrapped.acomplete(prompt, formatted=formatted, **kwargs)
     def stream_chat(
-        self,
-        messages: Sequence[ChatMessage],
-        **kwargs: Any,
+        self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseGen:
-        """Sync stream chat with token refresh."""
         self._refresh_token()
-        for chunk in self.wrapped_model.stream_chat(messages, **kwargs):
-            yield chunk
+        return self._wrapped.stream_chat(messages, **kwargs)
-    async def acomplete(
-        self,
-        prompt: str,
-        **kwargs: Any,
-    ) -> CompletionResponse:
-        """Async complete with token refresh."""
+    async def astream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseAsyncGen:
         self._refresh_token()
-        return await self.wrapped_model.acomplete(prompt, **kwargs)
+        result = self._wrapped.astream_chat(messages, **kwargs)
+        # Handle both coroutine and async generator patterns
+        if hasattr(result, "__aiter__"):
+            return result
+        return await result
-    def complete(
-        self,
-        prompt: str,
-        **kwargs: Any,
-    ) -> CompletionResponse:
-        """Sync complete with token refresh."""
+    def stream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseGen:
         self._refresh_token()
-        return self.wrapped_model.complete(prompt, **kwargs)
+        return self._wrapped.stream_complete(prompt, formatted=formatted, **kwargs)
     async def astream_complete(
-        self,
-        prompt: str,
-        **kwargs: Any,
+        self, prompt: str, formatted: bool = False, **kwargs: Any
     ) -> CompletionResponseAsyncGen:
-        """Async stream complete with token refresh."""
         self._refresh_token()
-        async for chunk in self.wrapped_model.astream_complete(prompt, **kwargs):
-            yield chunk
+        result = self._wrapped.astream_complete(prompt, formatted=formatted, **kwargs)
+        # Handle both coroutine and async generator patterns
+        if hasattr(result, "__aiter__"):
+            return result
+        return await result
-    def stream_complete(
+    # --- FunctionCallingLLM methods (delegate to wrapped model) ---
+    def _prepare_chat_with_tools(
         self,
-        prompt: str,
+        tools: Sequence[BaseTool],
+        user_msg: Union[str, ChatMessage, None] = None,
+        chat_history: List[ChatMessage] | None = None,
+        verbose: bool = False,
+        allow_parallel_tool_calls: bool = False,
+        tool_required: Any = None,
         **kwargs: Any,
-    ) -> CompletionResponseGen:
-        """Sync stream complete with token refresh."""
-        self._refresh_token()
-        for chunk in self.wrapped_model.stream_complete(prompt, **kwargs):
-            yield chunk
+    ) -> Dict[str, Any]:
+        if hasattr(self._wrapped, "_prepare_chat_with_tools"):
+            return self._wrapped._prepare_chat_with_tools(
+                tools,
+                user_msg=user_msg,
+                chat_history=chat_history,
+                verbose=verbose,
+                allow_parallel_tool_calls=allow_parallel_tool_calls,
+                tool_required=tool_required,
+                **kwargs,
+            )
+        raise NotImplementedError(
+            f"The wrapped model ({type(self._wrapped).__name__}) does not support function calling"
+        )
+    def get_tool_calls_from_response(
+        self,
+        response: ChatResponse,
+        error_on_no_tool_call: bool = True,
+        **kwargs: Any,
+    ) -> List[ToolSelection]:
+        if hasattr(self._wrapped, "get_tool_calls_from_response"):
+            return self._wrapped.get_tool_calls_from_response(
+                response,
+                error_on_no_tool_call=error_on_no_tool_call,
+                **kwargs,
+            )
+        raise NotImplementedError(
+            f"The wrapped model ({type(self._wrapped).__name__}) does not support function calling"
+        )
 async def bl_model(name, **kwargs):
@@ -220,4 +281,4 @@ async def bl_model(name, **kwargs):
     model_config = {"type": type, "model": model, "url": url, "kwargs": kwargs}
     # Create and return the wrapper
-    return TokenRefreshingLLM(model_config)
+    return TokenRefreshingLLM(model_config)

blaxel/llamaindex/tools.py CHANGED Viewed

@@ -5,12 +5,14 @@ from blaxel.core.tools.common import create_model_from_json_schema
 from blaxel.core.tools.types import Tool
 if TYPE_CHECKING:
-    from llama_index.core.tools import FunctionTool
+    from llama_index.core.tools import FunctionTool  # type: ignore[import-not-found]
 def get_llamaindex_tool(tool: Tool) -> "FunctionTool":
-    from llama_index.core.tools import FunctionTool
-    from llama_index.core.tools.types import ToolMetadata
+    from llama_index.core.tools import FunctionTool  # type: ignore[import-not-found]
+    from llama_index.core.tools.types import (  # type: ignore[import-not-found]
+        ToolMetadata,
+    )
     model_schema = create_model_from_json_schema(
         tool.input_schema, model_name=f"{tool.name}_Schema"
@@ -29,4 +31,4 @@ def get_llamaindex_tool(tool: Tool) -> "FunctionTool":
 async def bl_tools(tools_names: list[str], **kwargs) -> list["FunctionTool"]:
     tools = bl_tools_core(tools_names, **kwargs)
     await tools.initialize()
-    return [get_llamaindex_tool(tool) for tool in tools.get_tools()]
+    return [get_llamaindex_tool(tool) for tool in tools.get_tools()]

blaxel/openai/model.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import httpx
-from agents import AsyncOpenAI, OpenAIChatCompletionsModel
+from agents import OpenAIChatCompletionsModel
+from openai import AsyncOpenAI
 from blaxel.core import bl_model as bl_model_core
 from blaxel.core import settings
@@ -14,6 +15,11 @@ class DynamicHeadersHTTPClient(httpx.AsyncClient):
     async def send(self, request, *args, **kwargs):
         # Update headers with the latest auth headers before each request
         auth_headers = settings.auth.get_headers()
+        # Remove the SDK's default "Authorization: Bearer replaced" header
+        # when our auth uses a different header (e.g. X-Blaxel-Authorization with API keys)
+        if "Authorization" not in auth_headers:
+            request.headers.pop("Authorization", None)
+            request.headers.pop("authorization", None)
         for key, value in auth_headers.items():
             request.headers[key] = value
         return await super().send(request, *args, **kwargs)

blaxel 0.2.36__py3-none-any.whl → 0.2.38__py3-none-any.whl

blaxel 0.2.36py3-none-any.whl → 0.2.38py3-none-any.whl