PyPI - chatlas - Versions diffs - 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

chatlas 0.8.1py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chatlas might be problematic. Click here for more details.

Files changed (32) hide show

chatlas/__init__.py +2 -1
chatlas/_anthropic.py +79 -45
chatlas/_auto.py +3 -12
chatlas/_chat.py +774 -148
chatlas/_content.py +149 -29
chatlas/_databricks.py +4 -14
chatlas/_github.py +21 -25
chatlas/_google.py +71 -32
chatlas/_groq.py +15 -18
chatlas/_interpolate.py +3 -4
chatlas/_mcp_manager.py +306 -0
chatlas/_ollama.py +14 -18
chatlas/_openai.py +74 -39
chatlas/_perplexity.py +14 -18
chatlas/_provider.py +78 -8
chatlas/_snowflake.py +29 -18
chatlas/_tokens.py +85 -5
chatlas/_tools.py +181 -22
chatlas/_turn.py +2 -18
chatlas/_utils.py +27 -1
chatlas/_version.py +2 -2
chatlas/data/prices.json +264 -0
chatlas/types/anthropic/_submit.py +2 -0
chatlas/types/openai/_client.py +1 -0
chatlas/types/openai/_client_azure.py +1 -0
chatlas/types/openai/_submit.py +4 -1
chatlas-0.9.1.dist-info/METADATA +141 -0
chatlas-0.9.1.dist-info/RECORD +48 -0
chatlas-0.8.1.dist-info/METADATA +0 -383
chatlas-0.8.1.dist-info/RECORD +0 -46
{chatlas-0.8.1.dist-info → chatlas-0.9.1.dist-info}/WHEEL +0 -0
{chatlas-0.8.1.dist-info → chatlas-0.9.1.dist-info}/licenses/LICENSE +0 -0

chatlas/_chat.py CHANGED Viewed

@@ -42,27 +42,41 @@ from ._display import (
     MockMarkdownDisplay,
 )
 from ._logging import log_tool_error
-from ._provider import Provider
+from ._mcp_manager import MCPSessionManager
+from ._provider import Provider, StandardModelParams, SubmitInputArgsT
+from ._tokens import compute_cost, get_token_pricing
 from ._tools import Tool, ToolRejectError
 from ._turn import Turn, user_turn
-from ._typing_extensions import TypedDict
-from ._utils import html_escape, wrap_async
+from ._typing_extensions import TypedDict, TypeGuard
+from ._utils import MISSING, MISSING_TYPE, html_escape, wrap_async
-class AnyTypeDict(TypedDict, total=False):
-    pass
+class TokensDict(TypedDict):
+    """
+    A TypedDict representing the token counts for a turn in the chat.
+    This is used to represent the token counts for each turn in the chat.
+        `role` represents the role of the turn (i.e., "user" or "assistant").
+        `tokens` represents the new tokens used in the turn.
+        `tokens_total` represents the total tokens used in the turn.
+        Ex. A new user input of 2 tokens is sent, plus 10 tokens of context from prior turns (input and output).
+         This would have a `tokens_total` of 12.
+    """
+    role: Literal["user", "assistant"]
+    tokens: int
+    tokens_total: int
-SubmitInputArgsT = TypeVar("SubmitInputArgsT", bound=AnyTypeDict)
-"""
-A TypedDict representing the arguments that can be passed to the `.chat()`
-method of a [](`~chatlas.Chat`) instance.
-"""
 CompletionT = TypeVar("CompletionT")
 EchoOptions = Literal["output", "all", "none", "text"]
+T = TypeVar("T")
+def is_present(value: T | None | MISSING_TYPE) -> TypeGuard[T]:
+    return value is not None and not isinstance(value, MISSING_TYPE)
 class Chat(Generic[SubmitInputArgsT, CompletionT]):
     """
@@ -82,7 +96,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
     def __init__(
         self,
         provider: Provider,
-        turns: Optional[Sequence[Turn]] = None,
+        system_prompt: Optional[str] = None,
     ):
         """
         Create a new chat object.
@@ -91,11 +105,13 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         ----------
         provider
             A [](`~chatlas.Provider`) object.
-        turns
-            A list of [](`~chatlas.Turn`) objects to initialize the chat with.
+        system_prompt
+            A system prompt to set the behavior of the assistant.
         """
         self.provider = provider
-        self._turns: list[Turn] = list(turns or [])
+        self._turns: list[Turn] = []
+        self.system_prompt = system_prompt
         self._tools: dict[str, Tool] = {}
         self._on_tool_request_callbacks = CallbackManager()
         self._on_tool_result_callbacks = CallbackManager()
@@ -105,6 +121,11 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             "rich_console": {},
             "css_styles": {},
         }
+        self._mcp_manager = MCPSessionManager()
+        # Chat input parameters from `set_model_params()`
+        self._standard_model_params: StandardModelParams = {}
+        self._submit_input_kwargs: Optional[SubmitInputArgsT] = None
     def get_turns(
         self,
@@ -149,8 +170,10 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         """
         Set the turns of the chat.
-        This method is primarily useful for clearing or setting the turns of the
-        chat (i.e., limiting the context window).
+        Replaces the current chat history state (i.e., turns) with the provided turns.
+        This can be useful for:
+            * Clearing (or trimming) the chat history (i.e., `.set_turns([])`).
+            * Restoring context from a previous chat.
         Parameters
         ----------
@@ -165,7 +188,28 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                 "Consider removing this turn and setting the `.system_prompt` separately "
                 "if you want to change the system prompt."
             )
-        self._turns = list(turns)
+        turns_list = list(turns)
+        # Preserve the system prompt if it exists
+        if self._turns and self._turns[0].role == "system":
+            turns_list.insert(0, self._turns[0])
+        self._turns = turns_list
+    def add_turn(self, turn: Turn):
+        """
+        Add a turn to the chat.
+        Parameters
+        ----------
+        turn
+            The turn to add. Turns with the role "system" are not allowed.
+        """
+        if turn.role == "system":
+            raise ValueError(
+                "Turns with the role 'system' are not allowed. "
+                "The system prompt must be set separately using the `.system_prompt` property."
+            )
+        self._turns.append(turn)
     @property
     def system_prompt(self) -> str | None:
@@ -188,43 +232,14 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         if value is not None:
             self._turns.insert(0, Turn("system", value))
-    @overload
-    def tokens(self) -> list[tuple[int, int] | None]: ...
-    @overload
-    def tokens(
-        self,
-        values: Literal["cumulative"],
-    ) -> list[tuple[int, int] | None]: ...
-    @overload
-    def tokens(
-        self,
-        values: Literal["discrete"],
-    ) -> list[int]: ...
-    def tokens(
-        self,
-        values: Literal["cumulative", "discrete"] = "discrete",
-    ) -> list[int] | list[tuple[int, int] | None]:
+    def get_tokens(self) -> list[TokensDict]:
         """
         Get the tokens for each turn in the chat.
-        Parameters
-        ----------
-        values
-            If "cumulative" (the default), the result can be summed to get the
-            chat's overall token usage (helpful for computing overall cost of
-            the chat). If "discrete", the result can be summed to get the number of
-            tokens the turns will cost to generate the next response (helpful
-            for estimating cost of the next response, or for determining if you
-            are about to exceed the token limit).
         Returns
         -------
-        list[int]
-            A list of token counts for each (non-system) turn in the chat. The
-            1st turn includes the tokens count for the system prompt (if any).
+        list[TokensDict]
+             A list of dictionaries with the token counts for each (non-system) turn
         Raises
         ------
@@ -238,9 +253,6 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         turns = self.get_turns(include_system_prompt=False)
-        if values == "cumulative":
-            return [turn.tokens for turn in turns]
         if len(turns) == 0:
             return []
@@ -276,12 +288,21 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                 "Expected the 1st assistant turn to contain token counts. " + err_info
             )
-        res: list[int] = [
+        res: list[TokensDict] = [
             # Implied token count for the 1st user input
-            turns[1].tokens[0],
+            {
+                "role": "user",
+                "tokens": turns[1].tokens[0],
+                "tokens_total": turns[1].tokens[0],
+            },
             # The token count for the 1st assistant response
-            turns[1].tokens[1],
+            {
+                "role": "assistant",
+                "tokens": turns[1].tokens[1],
+                "tokens_total": turns[1].tokens[1],
+            },
         ]
         for i in range(1, len(turns) - 1, 2):
             ti = turns[i]
             tj = turns[i + 2]
@@ -296,15 +317,102 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                 )
             res.extend(
                 [
-                    # Implied token count for the user input
-                    tj.tokens[0] - sum(ti.tokens),
-                    # The token count for the assistant response
-                    tj.tokens[1],
+                    {
+                        "role": "user",
+                        # Implied token count for the user input
+                        "tokens": tj.tokens[0] - sum(ti.tokens),
+                        # Total tokens = Total User Tokens for the Turn = Distinct new tokens + context sent
+                        "tokens_total": tj.tokens[0],
+                    },
+                    {
+                        "role": "assistant",
+                        # The token count for the assistant response
+                        "tokens": tj.tokens[1],
+                        # Total tokens = Total Assistant tokens used in the turn
+                        "tokens_total": tj.tokens[1],
+                    },
                 ]
             )
         return res
+    def get_cost(
+        self,
+        options: Literal["all", "last"] = "all",
+        token_price: Optional[tuple[float, float]] = None,
+    ) -> float:
+        """
+        Estimate the cost of the chat.
+        Note
+        ----
+        This is a rough estimate, treat it as such. Providers may change their
+        pricing frequently and without notice.
+        Parameters
+        ----------
+        options
+            One of the following (default is "all"):
+              - `"all"`: Return the total cost of all turns in the chat.
+              - `"last"`: Return the cost of the last turn in the chat.
+        token_price
+            An optional tuple in the format of (input_token_cost,
+            output_token_cost) for bringing your own cost information.
+                 - `"input_token_cost"`: The cost per user token in USD per
+                   million tokens.
+                 - `"output_token_cost"`: The cost per assistant token in USD
+                   per million tokens.
+        Returns
+        -------
+        float
+            The cost of the chat, in USD.
+        """
+        # Look up token cost for user and input tokens based on the provider and model
+        turns_tokens = self.get_tokens()
+        if token_price:
+            input_token_price = token_price[0] / 1e6
+            output_token_price = token_price[1] / 1e6
+        else:
+            price_token = get_token_pricing(self.provider.name, self.provider.model)
+            if not price_token:
+                raise KeyError(
+                    f"We could not locate pricing information for model '{self.provider.model}' from provider '{self.provider.name}'. "
+                    "If you know the pricing for this model, specify it in `token_price`."
+                )
+            input_token_price = price_token["input"] / 1e6
+            output_token_price = price_token["output"] / 1e6
+        if len(turns_tokens) == 0:
+            return 0.0
+        if options not in ("all", "last"):
+            raise ValueError(
+                f"Expected `options` to be one of 'all' or 'last', not '{options}'"
+            )
+        if options == "all":
+            asst_tokens = sum(
+                u["tokens_total"] for u in turns_tokens if u["role"] == "assistant"
+            )
+            user_tokens = sum(
+                u["tokens_total"] for u in turns_tokens if u["role"] == "user"
+            )
+            cost = (asst_tokens * output_token_price) + (
+                user_tokens * input_token_price
+            )
+            return cost
+        last_turn = turns_tokens[-1]
+        if last_turn["role"] == "assistant":
+            return last_turn["tokens"] * output_token_price
+        if last_turn["role"] == "user":
+            return last_turn["tokens_total"] * input_token_price
+        raise ValueError(
+            f"Expected last turn to have a role of 'user' or `'assistant'`, not '{last_turn['role']}'"
+        )
     def token_count(
         self,
         *args: Content | str,
@@ -397,6 +505,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         *,
         stream: bool = True,
         port: int = 0,
+        host: str = "127.0.0.1",
         launch_browser: bool = True,
         bg_thread: Optional[bool] = None,
         echo: Optional[EchoOptions] = None,
@@ -412,6 +521,8 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             Whether to stream the response (i.e., have the response appear in chunks).
         port
             The port to run the app on (the default is 0, which will choose a random port).
+        host
+            The host to run the app on (the default is "127.0.0.1").
         launch_browser
             Whether to launch a browser window.
         bg_thread
@@ -479,7 +590,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         app = App(app_ui, server)
         def _run_app():
-            run_app(app, launch_browser=launch_browser, port=port)
+            run_app(app, launch_browser=launch_browser, port=port, host=host)
         # Use bg_thread by default in Jupyter and Positron
         if bg_thread is None:
@@ -910,10 +1021,422 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         json = res[0]
         return json.value
+    def set_model_params(
+        self,
+        *,
+        temperature: float | None | MISSING_TYPE = MISSING,
+        top_p: float | None | MISSING_TYPE = MISSING,
+        top_k: int | None | MISSING_TYPE = MISSING,
+        frequency_penalty: float | None | MISSING_TYPE = MISSING,
+        presence_penalty: float | None | MISSING_TYPE = MISSING,
+        seed: int | None | MISSING_TYPE = MISSING,
+        max_tokens: int | None | MISSING_TYPE = MISSING,
+        log_probs: bool | None | MISSING_TYPE = MISSING,
+        stop_sequences: list[str] | None | MISSING_TYPE = MISSING,
+        kwargs: SubmitInputArgsT | None | MISSING_TYPE = MISSING,
+    ):
+        """
+        Set common model parameters for the chat.
+        A unified interface for setting common model parameters
+        across different providers. This method is useful for setting
+        parameters that are commonly supported by most providers, such as
+        temperature, top_p, etc.
+        By default, if the parameter is not set (i.e., set to `MISSING`),
+        the provider's default value is used. If you want to reset a
+        parameter to its default value, set it to `None`.
+        Parameters
+        ----------
+        temperature
+            Temperature of the sampling distribution.
+        top_p
+            The cumulative probability for token selection.
+        top_k
+            The number of highest probability vocabulary tokens to keep.
+        frequency_penalty
+            Frequency penalty for generated tokens.
+        presence_penalty
+            Presence penalty for generated tokens.
+        seed
+            Seed for random number generator.
+        max_tokens
+            Maximum number of tokens to generate.
+        log_probs
+            Include the log probabilities in the output?
+        stop_sequences
+            A character vector of tokens to stop generation on.
+        kwargs
+            Additional keyword arguments to use when submitting input to the
+            model. When calling this method repeatedly with different parameters,
+            only the parameters from the last call will be used.
+        """
+        params: StandardModelParams = {}
+        # Collect specified parameters
+        if is_present(temperature):
+            params["temperature"] = temperature
+        if is_present(top_p):
+            params["top_p"] = top_p
+        if is_present(top_k):
+            params["top_k"] = top_k
+        if is_present(frequency_penalty):
+            params["frequency_penalty"] = frequency_penalty
+        if is_present(presence_penalty):
+            params["presence_penalty"] = presence_penalty
+        if is_present(seed):
+            params["seed"] = seed
+        if is_present(max_tokens):
+            params["max_tokens"] = max_tokens
+        if is_present(log_probs):
+            params["log_probs"] = log_probs
+        if is_present(stop_sequences):
+            params["stop_sequences"] = stop_sequences
+        # Warn about un-supported parameters
+        supported = self.provider.supported_model_params()
+        unsupported = set(params.keys()) - set(supported)
+        if unsupported:
+            warnings.warn(
+                f"The following parameters are not supported by the provider: {unsupported}. "
+                "Please check the provider's documentation for supported parameters.",
+                UserWarning,
+            )
+            # Drop the unsupported parameters
+            for key in unsupported:
+                del params[key]
+        # Drop parameters that are set to None
+        discard = []
+        if temperature is None:
+            discard.append("temperature")
+        if top_p is None:
+            discard.append("top_p")
+        if top_k is None:
+            discard.append("top_k")
+        if frequency_penalty is None:
+            discard.append("frequency_penalty")
+        if presence_penalty is None:
+            discard.append("presence_penalty")
+        if seed is None:
+            discard.append("seed")
+        if max_tokens is None:
+            discard.append("max_tokens")
+        if log_probs is None:
+            discard.append("log_probs")
+        if stop_sequences is None:
+            discard.append("stop_sequences")
+        for key in discard:
+            if key in self._standard_model_params:
+                del self._standard_model_params[key]
+        # Update the standard model parameters
+        self._standard_model_params.update(params)
+        # Update the submit input kwargs
+        if kwargs is None:
+            self._submit_input_kwargs = None
+        if is_present(kwargs):
+            self._submit_input_kwargs = kwargs
+    async def register_mcp_tools_http_stream_async(
+        self,
+        *,
+        url: str,
+        include_tools: Sequence[str] = (),
+        exclude_tools: Sequence[str] = (),
+        name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        transport_kwargs: Optional[dict[str, Any]] = None,
+    ):
+        """
+        Register tools from an MCP server using streamable HTTP transport.
+        Connects to an MCP server (that communicates over a streamable HTTP
+        transport) and registers the available tools. This is useful for
+        utilizing tools provided by an MCP server running on a remote server (or
+        locally) over HTTP.
+        Pre-requisites
+        --------------
+        ::: {.callout-note}
+        Requires the `mcp` package to be installed. Install it with:
+        ```bash
+        pip install mcp
+        ```
+        :::
+        Parameters
+        ----------
+        url
+            URL endpoint where the Streamable HTTP server is mounted (e.g.,
+            `http://localhost:8000/mcp`)
+        name
+            A unique name for the MCP server session. If not provided, the name
+            is derived from the MCP server information. This name is primarily
+            useful for cleanup purposes (i.e., to close a particular MCP
+            session).
+        include_tools
+            List of tool names to include. By default, all available tools are
+            included.
+        exclude_tools
+            List of tool names to exclude. This parameter and `include_tools`
+            are mutually exclusive.
+        namespace
+            A namespace to prepend to tool names (i.e., `namespace.tool_name`)
+            from this MCP server. This is primarily useful to avoid name
+            collisions with other tools already registered with the chat. This
+            namespace applies when tools are advertised to the LLM, so try
+            to use a meaningful name that describes the server and/or the tools
+            it provides. For example, if you have a server that provides tools
+            for mathematical operations, you might use `math` as the namespace.
+        transport_kwargs
+            Additional keyword arguments for the transport layer (i.e.,
+            `mcp.client.streamable_http.streamablehttp_client`).
+        Returns
+        -------
+        None
+        See Also
+        --------
+        * `.cleanup_mcp_tools_async()` : Cleanup registered MCP tools.
+        * `.register_mcp_tools_stdio_async()` : Register tools from an MCP server using stdio transport.
+        Note
+        ----
+        Unlike the `.register_mcp_tools_stdio_async()` method, this method does
+        not launch an MCP server. Instead, it assumes an HTTP server is already
+        running at the specified URL. This is useful for connecting to an
+        existing MCP server that is already running and serving tools.
+        Examples
+        --------
+        Assuming you have a Python script `my_mcp_server.py` that implements an
+        MCP server like so:
+        ```python
+        from mcp.server.fastmcp import FastMCP
+        app = FastMCP("my_server")
+        @app.tool(description="Add two numbers.")
+        def add(x: int, y: int) -> int:
+            return x + y
+        app.run(transport="streamable-http")
+        ```
+        You can launch this server like so:
+        ```bash
+        python my_mcp_server.py
+        ```
+        Then, you can register this server with the chat as follows:
+        ```python
+        await chat.register_mcp_tools_http_stream_async(
+            url="http://localhost:8080/mcp"
+        )
+        ```
+        """
+        if isinstance(exclude_tools, str):
+            exclude_tools = [exclude_tools]
+        if isinstance(include_tools, str):
+            include_tools = [include_tools]
+        session_info = await self._mcp_manager.register_http_stream_tools(
+            name=name,
+            url=url,
+            include_tools=include_tools,
+            exclude_tools=exclude_tools,
+            namespace=namespace,
+            transport_kwargs=transport_kwargs or {},
+        )
+        overlapping_tools = set(self._tools.keys()) & set(session_info.tools)
+        if overlapping_tools:
+            await self._mcp_manager.close_sessions([session_info.name])
+            raise ValueError(
+                f"The following tools are already registered: {overlapping_tools}. "
+                "Consider providing a namespace when registering this MCP server "
+                "to avoid name collisions."
+            )
+        self._tools.update(session_info.tools)
+    async def register_mcp_tools_stdio_async(
+        self,
+        *,
+        command: str,
+        args: list[str],
+        name: Optional[str] = None,
+        include_tools: Sequence[str] = (),
+        exclude_tools: Sequence[str] = (),
+        namespace: Optional[str] = None,
+        transport_kwargs: Optional[dict[str, Any]] = None,
+    ):
+        """
+        Register tools from a MCP server using stdio (standard input/output) transport.
+        Useful for launching an MCP server and registering its tools with the chat -- all
+        from the same Python process.
+        In more detail, this method:
+        1. Executes the given `command` with the provided `args`.
+            * This should start an MCP server that communicates via stdio.
+        2. Establishes a client connection to the MCP server using the `mcp` package.
+        3. Registers the available tools from the MCP server with the chat.
+        4. Returns a cleanup callback to close the MCP session and remove the tools.
+        Pre-requisites
+        --------------
+        ::: {.callout-note}
+        Requires the `mcp` package to be installed. Install it with:
+        ```bash
+        pip install mcp
+        ```
+        :::
+        Parameters
+        ----------
+        command
+            System command to execute to start the MCP server (e.g., `python`).
+        args
+            Arguments to pass to the system command (e.g., `["-m",
+            "my_mcp_server"]`).
+        name
+            A unique name for the MCP server session. If not provided, the name
+            is derived from the MCP server information. This name is primarily
+            useful for cleanup purposes (i.e., to close a particular MCP
+            session).
+        include_tools
+            List of tool names to include. By default, all available tools are
+            included.
+        exclude_tools
+            List of tool names to exclude. This parameter and `include_tools`
+            are mutually exclusive.
+        namespace
+            A namespace to prepend to tool names (i.e., `namespace.tool_name`)
+            from this MCP server. This is primarily useful to avoid name
+            collisions with other tools already registered with the chat. This
+            namespace applies when tools are advertised to the LLM, so try
+            to use a meaningful name that describes the server and/or the tools
+            it provides. For example, if you have a server that provides tools
+            for mathematical operations, you might use `math` as the namespace.
+        transport_kwargs
+            Additional keyword arguments for the stdio transport layer (i.e.,
+            `mcp.client.stdio.stdio_client`).
+        Returns
+        -------
+        None
+        See Also
+        --------
+        * `.cleanup_mcp_tools_async()` : Cleanup registered MCP tools.
+        * `.register_mcp_tools_http_stream_async()` : Register tools from an MCP server using streamable HTTP transport.
+        Examples
+        --------
+        Assuming you have a Python script `my_mcp_server.py` that implements an
+        MCP server like so
+        ```python
+        from mcp.server.fastmcp import FastMCP
+        app = FastMCP("my_server")
+        @app.tool(description="Add two numbers.")
+        def add(y: int, z: int) -> int:
+            return y - z
+        app.run(transport="stdio")
+        ```
+        You can register this server with the chat as follows:
+        ```python
+        from chatlas import ChatOpenAI
+        chat = ChatOpenAI()
+        await chat.register_mcp_tools_stdio_async(
+            command="python",
+            args=["-m", "my_mcp_server"],
+        )
+        ```
+        """
+        if isinstance(exclude_tools, str):
+            exclude_tools = [exclude_tools]
+        if isinstance(include_tools, str):
+            include_tools = [include_tools]
+        session_info = await self._mcp_manager.register_stdio_tools(
+            command=command,
+            args=args,
+            name=name,
+            include_tools=include_tools,
+            exclude_tools=exclude_tools,
+            namespace=namespace,
+            transport_kwargs=transport_kwargs or {},
+        )
+        overlapping_tools = set(self._tools.keys()) & set(session_info.tools)
+        if overlapping_tools:
+            await self._mcp_manager.close_sessions([session_info.name])
+            raise ValueError(
+                f"The following tools are already registered: {overlapping_tools}. "
+                "Consider providing a namespace when registering this MCP server "
+                "to avoid name collisions."
+            )
+        self._tools.update(session_info.tools)
+    async def cleanup_mcp_tools(self, names: Optional[Sequence[str]] = None):
+        """
+        Close MCP server connections (and their corresponding tools).
+        This method closes the MCP client sessions and removes the tools registered
+        from the MCP servers. If a specific `name` is provided, it will only clean
+        up the tools and session associated with that name. If no name is provided,
+        it will clean up all registered MCP tools and sessions.
+        Parameters
+        ----------
+        names
+            If provided, only clean up the tools and session associated
+            with these names. If not provided, clean up all registered MCP tools and sessions.
+        Returns
+        -------
+        None
+        """
+        closed_sessions = await self._mcp_manager.close_sessions(names)
+        # Remove relevant MCP tools from the main tools registry
+        for session in closed_sessions:
+            for tool_name in session.tools:
+                if tool_name in self._tools:
+                    del self._tools[tool_name]
     def register_tool(
         self,
         func: Callable[..., Any] | Callable[..., Awaitable[Any]],
         *,
+        force: bool = False,
         model: Optional[type[BaseModel]] = None,
     ):
         """
@@ -930,7 +1453,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         recommended):
         ```python
-        from chatlas import ChatOpenAI, Tool
+        from chatlas import ChatOpenAI
         def add(a: int, b: int) -> int:
@@ -958,7 +1481,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         and also more directly document the input parameters:
         ```python
-        from chatlas import ChatOpenAI, Tool
+        from chatlas import ChatOpenAI
         from pydantic import BaseModel, Field
@@ -983,16 +1506,62 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         ----------
         func
             The function to be invoked when the tool is called.
+        force
+            If `True`, overwrite any existing tool with the same name. If `False`
+            (the default), raise an error if a tool with the same name already exists.
         model
             A Pydantic model that describes the input parameters for the function.
             If not provided, the model will be inferred from the function's type hints.
             The primary reason why you might want to provide a model in
             Note that the name and docstring of the model takes precedence over the
             name and docstring of the function.
+        Raises
+        ------
+        ValueError
+            If a tool with the same name already exists and `force` is `False`.
         """
-        tool = Tool(func, model=model)
+        tool = Tool.from_func(func, model=model)
+        if tool.name in self._tools and not force:
+            raise ValueError(
+                f"Tool with name '{tool.name}' is already registered. "
+                "Set `force=True` to overwrite it."
+            )
         self._tools[tool.name] = tool
+    def get_tools(self) -> list[Tool]:
+        """
+        Get the list of registered tools.
+        Returns
+        -------
+        list[Tool]
+            A list of `Tool` instances that are currently registered with the chat.
+        """
+        return list(self._tools.values())
+    def set_tools(
+        self, tools: list[Callable[..., Any] | Callable[..., Awaitable[Any]] | Tool]
+    ):
+        """
+        Set the tools for the chat.
+        This replaces any previously registered tools with the provided list of
+        tools. This is for advanced usage -- typically, you would use
+        `.register_tool()` to register individual tools as needed.
+        Parameters
+        ----------
+        tools
+            A list of `Tool` instances to set as the chat's tools.
+        """
+        self._tools = {}
+        for tool in tools:
+            if isinstance(tool, Tool):
+                self._tools[tool.name] = tool
+            else:
+                self.register_tool(tool)
     def on_tool_request(self, callback: Callable[[ContentToolRequest], None]):
         """
         Register a callback for a tool request event.
@@ -1257,22 +1826,23 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             assert turn is not None
             user_turn_result = None
-            results: list[ContentToolResult] = []
+            all_results: list[ContentToolResult] = []
             for x in turn.contents:
                 if isinstance(x, ContentToolRequest):
                     if echo == "output":
                         self._echo_content(f"\n\n{x}\n\n")
                     if content == "all":
                         yield x
-                    res = self._invoke_tool(x)
-                    if echo == "output":
-                        self._echo_content(f"\n\n{res}\n\n")
-                    if content == "all":
-                        yield res
-                    results.append(res)
+                    results = self._invoke_tool(x)
+                    for res in results:
+                        if echo == "output":
+                            self._echo_content(f"\n\n{res}\n\n")
+                        if content == "all":
+                            yield res
+                        all_results.append(res)
-            if results:
-                user_turn_result = Turn("user", results)
+            if all_results:
+                user_turn_result = Turn("user", all_results)
     @overload
     def _chat_impl_async(
@@ -1316,24 +1886,25 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             assert turn is not None
             user_turn_result = None
-            results: list[ContentToolResult] = []
+            all_results: list[ContentToolResult] = []
             for x in turn.contents:
                 if isinstance(x, ContentToolRequest):
                     if echo == "output":
                         self._echo_content(f"\n\n{x}\n\n")
                     if content == "all":
                         yield x
-                    res = await self._invoke_tool_async(x)
-                    if echo == "output":
-                        self._echo_content(f"\n\n{res}\n\n")
-                    if content == "all":
-                        yield res
-                    else:
-                        yield "\n\n"
-                    results.append(res)
-            if results:
-                user_turn_result = Turn("user", results)
+                    results = self._invoke_tool_async(x)
+                    async for res in results:
+                        if echo == "output":
+                            self._echo_content(f"\n\n{res}\n\n")
+                        if content == "all":
+                            yield res
+                        else:
+                            yield "\n\n"
+                        all_results.append(res)
+            if all_results:
+                user_turn_result = Turn("user", all_results)
     def _submit_turns(
         self,
@@ -1354,13 +1925,25 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         if echo == "all":
             emit_user_contents(user_turn, emit)
+        # Start collecting additional keyword args (from model parameters)
+        all_kwargs = self.provider.translate_model_params(
+            params=self._standard_model_params,
+        )
+        # Add any additional kwargs provided by the user
+        if self._submit_input_kwargs:
+            all_kwargs.update(self._submit_input_kwargs)
+        if kwargs:
+            all_kwargs.update(kwargs)
         if stream:
             response = self.provider.chat_perform(
                 stream=True,
                 turns=[*self._turns, user_turn],
                 tools=self._tools,
                 data_model=data_model,
-                kwargs=kwargs,
+                kwargs=all_kwargs,
             )
             result = None
@@ -1385,7 +1968,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                 turns=[*self._turns, user_turn],
                 tools=self._tools,
                 data_model=data_model,
-                kwargs=kwargs,
+                kwargs=all_kwargs,
             )
             turn = self.provider.value_turn(
@@ -1462,54 +2045,56 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         self._turns.extend([user_turn, turn])
-    def _invoke_tool(self, x: ContentToolRequest) -> ContentToolResult:
-        tool_def = self._tools.get(x.name, None)
+    def _invoke_tool(self, request: ContentToolRequest):
+        tool_def = self._tools.get(request.name, None)
         func = tool_def.func if tool_def is not None else None
         if func is None:
-            e = RuntimeError(f"Unknown tool: {x.name}")
-            return ContentToolResult(value=None, error=e, request=x)
+            yield self._handle_tool_error_result(
+                request,
+                error=RuntimeError("Unknown tool."),
+            )
+            return
         # First, invoke the request callbacks. If a ToolRejectError is raised,
         # treat it like a tool failure (i.e., gracefully handle it).
         result: ContentToolResult | None = None
         try:
-            self._on_tool_request_callbacks.invoke(x)
+            self._on_tool_request_callbacks.invoke(request)
         except ToolRejectError as e:
-            result = ContentToolResult(value=None, error=e, request=x)
+            yield self._handle_tool_error_result(request, e)
+            return
-        # Invoke the tool (if it hasn't been rejected).
-        if result is None:
-            try:
-                if isinstance(x.arguments, dict):
-                    res = func(**x.arguments)
-                else:
-                    res = func(x.arguments)
+        try:
+            if isinstance(request.arguments, dict):
+                res = func(**request.arguments)
+            else:
+                res = func(request.arguments)
+            # Normalize res as a generator of results.
+            if not inspect.isgenerator(res):
+                def _as_generator(res):
+                    yield res
-                if isinstance(res, ContentToolResult):
-                    result = res
+                res = _as_generator(res)
+            for x in res:
+                if isinstance(x, ContentToolResult):
+                    result = x
                 else:
-                    result = ContentToolResult(value=res)
+                    result = ContentToolResult(value=x)
-                result.request = x
-            except Exception as e:
-                result = ContentToolResult(value=None, error=e, request=x)
+                result.request = request
-        # If we've captured an error, notify and log it.
-        if result.error:
-            warnings.warn(
-                f"Calling tool '{x.name}' led to an error.",
-                ToolFailureWarning,
-                stacklevel=2,
-            )
-            traceback.print_exc()
-            log_tool_error(x.name, str(x.arguments), result.error)
+                self._on_tool_result_callbacks.invoke(result)
+                yield result
-        self._on_tool_result_callbacks.invoke(result)
-        return result
+        except Exception as e:
+            yield self._handle_tool_error_result(request, e)
-    async def _invoke_tool_async(self, x: ContentToolRequest) -> ContentToolResult:
-        tool_def = self._tools.get(x.name, None)
+    async def _invoke_tool_async(self, request: ContentToolRequest):
+        tool_def = self._tools.get(request.name, None)
         func = None
         if tool_def:
             if tool_def._is_async:
@@ -1518,45 +2103,59 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                 func = wrap_async(tool_def.func)
         if func is None:
-            e = RuntimeError(f"Unknown tool: {x.name}")
-            return ContentToolResult(value=None, error=e, request=x)
+            yield self._handle_tool_error_result(
+                request,
+                error=RuntimeError("Unknown tool."),
+            )
+            return
         # First, invoke the request callbacks. If a ToolRejectError is raised,
         # treat it like a tool failure (i.e., gracefully handle it).
         result: ContentToolResult | None = None
         try:
-            await self._on_tool_request_callbacks.invoke_async(x)
+            await self._on_tool_request_callbacks.invoke_async(request)
         except ToolRejectError as e:
-            result = ContentToolResult(value=None, error=e, request=x)
+            yield self._handle_tool_error_result(request, e)
+            return
         # Invoke the tool (if it hasn't been rejected).
-        if result is None:
-            try:
-                if isinstance(x.arguments, dict):
-                    res = await func(**x.arguments)
-                else:
-                    res = await func(x.arguments)
+        try:
+            if isinstance(request.arguments, dict):
+                res = await func(**request.arguments)
+            else:
+                res = await func(request.arguments)
+            # Normalize res into a generator of results.
+            if not inspect.isasyncgen(res):
+                async def _as_async_generator(res):
+                    yield res
-                if isinstance(res, ContentToolResult):
-                    result = res
+                res = _as_async_generator(res)
+            async for x in res:
+                if isinstance(x, ContentToolResult):
+                    result = x
                 else:
-                    result = ContentToolResult(value=res)
+                    result = ContentToolResult(value=x)
-                result.request = x
-            except Exception as e:
-                result = ContentToolResult(value=None, error=e, request=x)
+                result.request = request
+                await self._on_tool_result_callbacks.invoke_async(result)
+                yield result
-        # If we've captured an error, notify and log it.
-        if result.error:
-            warnings.warn(
-                f"Calling tool '{x.name}' led to an error.",
-                ToolFailureWarning,
-                stacklevel=2,
-            )
-            traceback.print_exc()
-            log_tool_error(x.name, str(x.arguments), result.error)
+        except Exception as e:
+            yield self._handle_tool_error_result(request, e)
-        await self._on_tool_result_callbacks.invoke_async(result)
+    def _handle_tool_error_result(self, request: ContentToolRequest, error: Exception):
+        warnings.warn(
+            f"Calling tool '{request.name}' led to an error: {error}",
+            ToolFailureWarning,
+            stacklevel=2,
+        )
+        traceback.print_exc()
+        log_tool_error(request.name, str(request.arguments), error)
+        result = ContentToolResult(value=None, error=error, request=request)
+        self._on_tool_result_callbacks.invoke(result)
         return result
     def _markdown_display(self, echo: EchoOptions) -> ChatMarkdownDisplay:
@@ -1571,10 +2170,10 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             return ChatMarkdownDisplay(MockMarkdownDisplay(), self)
         # rich does a lot to detect a notebook environment, but it doesn't
-        # detect Quarto (at least not yet).
+        # detect Quarto, or a Positron notebook
         from rich.console import Console
-        is_web = Console().is_jupyter or os.getenv("QUARTO_PYTHON", None) is not None
+        is_web = Console().is_jupyter or is_quarto() or is_positron_notebook()
         opts = self._echo_options
@@ -1622,8 +2221,23 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
     def __repr__(self):
         turns = self.get_turns(include_system_prompt=True)
-        tokens = sum(sum(turn.tokens) for turn in turns if turn.tokens)
-        res = f"<Chat turns={len(turns)} tokens={tokens}>"
+        tokens = self.get_tokens()
+        tokens_asst = sum(u["tokens_total"] for u in tokens if u["role"] == "assistant")
+        tokens_user = sum(u["tokens_total"] for u in tokens if u["role"] == "user")
+        res = f"<Chat {self.provider.name}/{self.provider.model} turns={len(turns)} tokens={tokens_user}/{tokens_asst}"
+        # Add cost info only if we can compute it
+        cost = compute_cost(
+            self.provider.name,
+            self.provider.model,
+            tokens_user,
+            tokens_asst,
+        )
+        if cost is not None:
+            res += f" ${round(cost, ndigits=2)}"
+        res += ">"
         for turn in turns:
             res += "\n" + turn.__repr__(indent=2)
         return res + "\n"
@@ -1818,3 +2432,15 @@ class ToolFailureWarning(RuntimeWarning):
 # By default warnings are shown once; we want to always show them.
 warnings.simplefilter("always", ToolFailureWarning)
+def is_quarto():
+    return os.getenv("QUARTO_PYTHON", None) is not None
+def is_positron_notebook():
+    try:
+        mode = get_ipython().session_mode  # noqa: F821 # type: ignore
+        return mode == "notebook"
+    except Exception:
+        return False

chatlas 0.8.1__py3-none-any.whl → 0.9.1__py3-none-any.whl

Potentially problematic release.

chatlas 0.8.1py3-none-any.whl → 0.9.1py3-none-any.whl