PyPI - chatlas - Versions diffs - 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

chatlas 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chatlas might be problematic. Click here for more details.

Files changed (22) hide show

chatlas/__init__.py +2 -1
chatlas/_anthropic.py +104 -6
chatlas/_chat.py +246 -24
chatlas/_content.py +20 -7
chatlas/_google.py +312 -161
chatlas/_merge.py +1 -1
chatlas/_ollama.py +8 -0
chatlas/_openai.py +64 -7
chatlas/_provider.py +16 -8
chatlas/py.typed +0 -0
chatlas/types/__init__.py +5 -1
chatlas/types/anthropic/_client.py +0 -8
chatlas/types/anthropic/_submit.py +2 -3
chatlas/types/google/_client.py +12 -91
chatlas/types/google/_submit.py +40 -87
chatlas/types/openai/_client.py +1 -0
chatlas/types/openai/_client_azure.py +1 -0
chatlas/types/openai/_submit.py +10 -2
{chatlas-0.2.0.dist-info → chatlas-0.4.0.dist-info}/METADATA +25 -11
chatlas-0.4.0.dist-info/RECORD +38 -0
{chatlas-0.2.0.dist-info → chatlas-0.4.0.dist-info}/WHEEL +1 -1
chatlas-0.2.0.dist-info/RECORD +0 -37

chatlas/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ from ._anthropic import ChatAnthropic, ChatBedrockAnthropic
 from ._chat import Chat
 from ._content_image import content_image_file, content_image_plot, content_image_url
 from ._github import ChatGithub
-from ._google import ChatGoogle
+from ._google import ChatGoogle, ChatVertex
 from ._groq import ChatGroq
 from ._interpolate import interpolate, interpolate_file
 from ._ollama import ChatOllama
@@ -24,6 +24,7 @@ __all__ = (
     "ChatOpenAI",
     "ChatAzureOpenAI",
     "ChatPerplexity",
+    "ChatVertex",
     "Chat",
     "content_image_file",
     "content_image_plot",

chatlas/_anthropic.py CHANGED Viewed

@@ -20,7 +20,7 @@ from ._logging import log_model_default
 from ._provider import Provider
 from ._tokens import tokens_log
 from ._tools import Tool, basemodel_to_param_schema
-from ._turn import Turn, normalize_turns
+from ._turn import Turn, normalize_turns, user_turn
 if TYPE_CHECKING:
     from anthropic.types import (
@@ -311,7 +311,8 @@ class AnthropicProvider(Provider[Message, RawMessageStreamEvent, Message]):
             if stream:
                 stream = False
                 warnings.warn(
-                    "Anthropic does not support structured data extraction in streaming mode."
+                    "Anthropic does not support structured data extraction in streaming mode.",
+                    stacklevel=2,
                 )
         kwargs_full: "SubmitInputArgs" = {
@@ -371,15 +372,65 @@ class AnthropicProvider(Provider[Message, RawMessageStreamEvent, Message]):
         return completion
-    def stream_turn(self, completion, has_data_model, stream) -> Turn:
-        return self._as_turn(completion, has_data_model)
-    async def stream_turn_async(self, completion, has_data_model, stream) -> Turn:
+    def stream_turn(self, completion, has_data_model) -> Turn:
         return self._as_turn(completion, has_data_model)
     def value_turn(self, completion, has_data_model) -> Turn:
         return self._as_turn(completion, has_data_model)
+    def token_count(
+        self,
+        *args: Content | str,
+        tools: dict[str, Tool],
+        data_model: Optional[type[BaseModel]],
+    ) -> int:
+        kwargs = self._token_count_args(
+            *args,
+            tools=tools,
+            data_model=data_model,
+        )
+        res = self._client.messages.count_tokens(**kwargs)
+        return res.input_tokens
+    async def token_count_async(
+        self,
+        *args: Content | str,
+        tools: dict[str, Tool],
+        data_model: Optional[type[BaseModel]],
+    ) -> int:
+        kwargs = self._token_count_args(
+            *args,
+            tools=tools,
+            data_model=data_model,
+        )
+        res = await self._async_client.messages.count_tokens(**kwargs)
+        return res.input_tokens
+    def _token_count_args(
+        self,
+        *args: Content | str,
+        tools: dict[str, Tool],
+        data_model: Optional[type[BaseModel]],
+    ) -> dict[str, Any]:
+        turn = user_turn(*args)
+        kwargs = self._chat_perform_args(
+            stream=False,
+            turns=[turn],
+            tools=tools,
+            data_model=data_model,
+        )
+        args_to_keep = [
+            "messages",
+            "model",
+            "system",
+            "tools",
+            "tool_choice",
+        ]
+        return {arg: kwargs[arg] for arg in args_to_keep if arg in kwargs}
     def _as_message_params(self, turns: list[Turn]) -> list["MessageParam"]:
         messages: list["MessageParam"] = []
         for turn in turns:
@@ -575,6 +626,53 @@ def ChatBedrockAnthropic(
         Additional arguments to pass to the `anthropic.AnthropicBedrock()`
         client constructor.
+    Troubleshooting
+    ---------------
+    If you encounter 400 or 403 errors when trying to use the model, keep the
+    following in mind:
+    ::: {.callout-note}
+    #### Incorrect model name
+    If the model name is completely incorrect, you'll see an error like
+    `Error code: 400 - {'message': 'The provided model identifier is invalid.'}`
+    Make sure the model name is correct and active in the specified region.
+    :::
+    ::: {.callout-note}
+    #### Models are region specific
+    If you encounter errors similar to `Error code: 403 - {'message': "You don't
+    have access to the model with the specified model ID."}`, make sure your
+    model is active in the relevant `aws_region`.
+    Keep in mind, if `aws_region` is not specified, and AWS_REGION is not set,
+    the region defaults to us-east-1, which may not match to your AWS config's
+    default region.
+    :::
+    ::: {.callout-note}
+    #### Cross region inference ID
+    In some cases, even if you have the right model and the right region, you
+    may still encounter an error like  `Error code: 400 - {'message':
+    'Invocation of model ID anthropic.claude-3-5-sonnet-20240620-v1:0 with
+    on-demand throughput isn't supported. Retry your request with the ID or ARN
+    of an inference profile that contains this model.'}`
+    In this case, you'll need to look up the 'cross region inference ID' for
+    your model. This might required opening your `aws-console` and navigating to
+    the 'Anthropic Bedrock' service page. From there, go to the 'cross region
+    inference' tab and copy the relevant ID.
+    For example, if the desired model ID is
+    `anthropic.claude-3-5-sonnet-20240620-v1:0`, the cross region ID might look
+    something like `us.anthropic.claude-3-5-sonnet-20240620-v1:0`.
+    :::
     Returns
     -------
     Chat

chatlas/_chat.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from __future__ import annotations
+import inspect
 import os
+import sys
 from pathlib import Path
 from threading import Thread
 from typing import (
@@ -16,6 +18,7 @@ from typing import (
     Optional,
     Sequence,
     TypeVar,
+    overload,
 )
 from pydantic import BaseModel
@@ -39,7 +42,7 @@ from ._provider import Provider
 from ._tools import Tool
 from ._turn import Turn, user_turn
 from ._typing_extensions import TypedDict
-from ._utils import html_escape
+from ._utils import html_escape, wrap_async
 class AnyTypeDict(TypedDict, total=False):
@@ -176,17 +179,209 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         if value is not None:
             self._turns.insert(0, Turn("system", value))
-    def tokens(self) -> list[tuple[int, int] | None]:
+    @overload
+    def tokens(self) -> list[tuple[int, int] | None]: ...
+    @overload
+    def tokens(
+        self,
+        values: Literal["cumulative"],
+    ) -> list[tuple[int, int] | None]: ...
+    @overload
+    def tokens(
+        self,
+        values: Literal["discrete"],
+    ) -> list[int]: ...
+    def tokens(
+        self,
+        values: Literal["cumulative", "discrete"] = "discrete",
+    ) -> list[int] | list[tuple[int, int] | None]:
         """
         Get the tokens for each turn in the chat.
+        Parameters
+        ----------
+        values
+            If "cumulative" (the default), the result can be summed to get the
+            chat's overall token usage (helpful for computing overall cost of
+            the chat). If "discrete", the result can be summed to get the number of
+            tokens the turns will cost to generate the next response (helpful
+            for estimating cost of the next response, or for determining if you
+            are about to exceed the token limit).
+        Returns
+        -------
+        list[int]
+            A list of token counts for each (non-system) turn in the chat. The
+            1st turn includes the tokens count for the system prompt (if any).
+        Raises
+        ------
+        ValueError
+            If the chat's turns (i.e., `.get_turns()`) are not in an expected
+            format. This may happen if the chat history is manually set (i.e.,
+            `.set_turns()`). In this case, you can inspect the "raw" token
+            values via the `.get_turns()` method (each turn has a `.tokens`
+            attribute).
+        """
+        turns = self.get_turns(include_system_prompt=False)
+        if values == "cumulative":
+            return [turn.tokens for turn in turns]
+        if len(turns) == 0:
+            return []
+        err_info = (
+            "This can happen if the chat history is manually set (i.e., `.set_turns()`). "
+            "Consider getting the 'raw' token values via the `.get_turns()` method "
+            "(each turn has a `.tokens` attribute)."
+        )
+        # Sanity checks for the assumptions made to figure out user token counts
+        if len(turns) == 1:
+            raise ValueError(
+                "Expected at least two turns in the chat history. " + err_info
+            )
+        if len(turns) % 2 != 0:
+            raise ValueError(
+                "Expected an even number of turns in the chat history. " + err_info
+            )
+        if turns[0].role != "user":
+            raise ValueError(
+                "Expected the 1st non-system turn to have role='user'. " + err_info
+            )
+        if turns[1].role != "assistant":
+            raise ValueError(
+                "Expected the 2nd turn non-system to have role='assistant'. " + err_info
+            )
+        if turns[1].tokens is None:
+            raise ValueError(
+                "Expected the 1st assistant turn to contain token counts. " + err_info
+            )
+        res: list[int] = [
+            # Implied token count for the 1st user input
+            turns[1].tokens[0],
+            # The token count for the 1st assistant response
+            turns[1].tokens[1],
+        ]
+        for i in range(1, len(turns) - 1, 2):
+            ti = turns[i]
+            tj = turns[i + 2]
+            if ti.role != "assistant" or tj.role != "assistant":
+                raise ValueError(
+                    "Expected even turns to have role='assistant'." + err_info
+                )
+            if ti.tokens is None or tj.tokens is None:
+                raise ValueError(
+                    "Expected role='assistant' turns to contain token counts."
+                    + err_info
+                )
+            res.extend(
+                [
+                    # Implied token count for the user input
+                    tj.tokens[0] - sum(ti.tokens),
+                    # The token count for the assistant response
+                    tj.tokens[1],
+                ]
+            )
+        return res
+    def token_count(
+        self,
+        *args: Content | str,
+        data_model: Optional[type[BaseModel]] = None,
+    ) -> int:
+        """
+        Get an estimated token count for the given input.
+        Estimate the token size of input content. This can help determine whether input(s)
+        and/or conversation history (i.e., `.get_turns()`) should be reduced in size before
+        sending it to the model.
+        Parameters
+        ----------
+        args
+            The input to get a token count for.
+        data_model
+            If the input is meant for data extraction (i.e., `.extract_data()`), then
+            this should be the Pydantic model that describes the structure of the data to
+            extract.
+        Returns
+        -------
+        int
+            The token count for the input.
+        Note
+        ----
+        Remember that the token count is an estimate. Also, models based on
+        `ChatOpenAI()` currently does not take tools into account when
+        estimating token counts.
+        Examples
+        --------
+        ```python
+        from chatlas import ChatAnthropic
+        chat = ChatAnthropic()
+        # Estimate the token count before sending the input
+        print(chat.token_count("What is 2 + 2?"))
+        # Once input is sent, you can get the actual input and output
+        # token counts from the chat object
+        chat.chat("What is 2 + 2?", echo="none")
+        print(chat.token_usage())
+        ```
+        """
+        return self.provider.token_count(
+            *args,
+            tools=self._tools,
+            data_model=data_model,
+        )
+    async def token_count_async(
+        self,
+        *args: Content | str,
+        data_model: Optional[type[BaseModel]] = None,
+    ) -> int:
+        """
+        Get an estimated token count for the given input asynchronously.
+        Estimate the token size of input content. This can help determine whether input(s)
+        and/or conversation history (i.e., `.get_turns()`) should be reduced in size before
+        sending it to the model.
+        Parameters
+        ----------
+        args
+            The input to get a token count for.
+        data_model
+            If this input is meant for data extraction (i.e., `.extract_data_async()`),
+            then this should be the Pydantic model that describes the structure of the data
+            to extract.
         Returns
         -------
-        list[tuple[int, int] | None]
-            A list of tuples, where each tuple contains the start and end token
-            indices for a turn.
+        int
+            The token count for the input.
         """
-        return [turn.tokens for turn in self._turns]
+        return await self.provider.token_count_async(
+            *args,
+            tools=self._tools,
+            data_model=data_model,
+        )
     def app(
         self,
@@ -195,6 +390,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         port: int = 0,
         launch_browser: bool = True,
         bg_thread: Optional[bool] = None,
+        echo: Optional[Literal["text", "all", "none"]] = None,
         kwargs: Optional[SubmitInputArgsT] = None,
     ):
         """
@@ -211,6 +407,8 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         bg_thread
             Whether to run the app in a background thread. If `None`, the app will
             run in a background thread if the current environment is a notebook.
+        echo
+            Whether to echo text content, all content (i.e., tool calls), or no content. Defaults to `"none"` when `stream=True` and `"text"` when `stream=False`.
         kwargs
             Additional keyword arguments to pass to the method used for requesting
             the response.
@@ -245,10 +443,22 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                     return
                 if stream:
                     await chat.append_message_stream(
-                        self.stream(user_input, kwargs=kwargs)
+                        await self.stream_async(
+                            user_input,
+                            kwargs=kwargs,
+                            echo=echo or "none",
+                        )
                     )
                 else:
-                    await chat.append_message(str(self.chat(user_input, kwargs=kwargs)))
+                    await chat.append_message(
+                        str(
+                            self.chat(
+                                user_input,
+                                kwargs=kwargs,
+                                echo=echo or "text",
+                            )
+                        )
+                    )
         app = App(app_ui, server)
@@ -755,11 +965,11 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         is_html = filename.suffix == ".html"
         # Get contents from each turn
-        contents = ""
+        content_arr: list[str] = []
         for turn in turns:
             turn_content = "\n\n".join(
                 [
-                    str(content)
+                    str(content).strip()
                     for content in turn.contents
                     if include == "all" or isinstance(content, ContentText)
                 ]
@@ -770,7 +980,8 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                 turn_content = f"<shiny-{msg_type}-message content='{content_attr}'></shiny-{msg_type}-message>"
             else:
                 turn_content = f"## {turn.role.capitalize()}\n\n{turn_content}"
-            contents += f"{turn_content}\n\n"
+            content_arr.append(turn_content)
+        contents = "\n\n".join(content_arr)
         # Shiny chat message components requires container elements
         if is_html:
@@ -900,7 +1111,6 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             turn = self.provider.stream_turn(
                 result,
                 has_data_model=data_model is not None,
-                stream=response,
             )
             if echo == "all":
@@ -961,10 +1171,9 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                     yield text
                 result = self.provider.stream_merge_chunks(result, chunk)
-            turn = await self.provider.stream_turn_async(
+            turn = self.provider.stream_turn(
                 result,
                 has_data_model=data_model is not None,
-                stream=response,
             )
             if echo == "all":
@@ -1017,7 +1226,12 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         for x in turn.contents:
             if isinstance(x, ContentToolRequest):
                 tool_def = self._tools.get(x.name, None)
-                func = tool_def.func if tool_def is not None else None
+                func = None
+                if tool_def:
+                    if tool_def._is_async:
+                        func = tool_def.func
+                    else:
+                        func = wrap_async(tool_def.func)
                 results.append(await self._invoke_tool_async(func, x.arguments, x.id))
         if not results:
@@ -1032,7 +1246,9 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         id_: str,
     ) -> ContentToolResult:
         if func is None:
-            return ContentToolResult(id_, None, "Unknown tool")
+            return ContentToolResult(id_, value=None, error="Unknown tool")
+        name = func.__name__
         try:
             if isinstance(arguments, dict):
@@ -1040,10 +1256,10 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             else:
                 result = func(arguments)
-            return ContentToolResult(id_, result, None)
+            return ContentToolResult(id_, value=result, error=None, name=name)
         except Exception as e:
-            log_tool_error(func.__name__, str(arguments), e)
-            return ContentToolResult(id_, None, str(e))
+            log_tool_error(name, str(arguments), e)
+            return ContentToolResult(id_, value=None, error=str(e), name=name)
     @staticmethod
     async def _invoke_tool_async(
@@ -1052,7 +1268,9 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         id_: str,
     ) -> ContentToolResult:
         if func is None:
-            return ContentToolResult(id_, None, "Unknown tool")
+            return ContentToolResult(id_, value=None, error="Unknown tool")
+        name = func.__name__
         try:
             if isinstance(arguments, dict):
@@ -1060,10 +1278,10 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             else:
                 result = await func(arguments)
-            return ContentToolResult(id_, result, None)
+            return ContentToolResult(id_, value=result, error=None, name=name)
         except Exception as e:
             log_tool_error(func.__name__, str(arguments), e)
-            return ContentToolResult(id_, None, str(e))
+            return ContentToolResult(id_, value=None, error=str(e), name=name)
     def _markdown_display(
         self, echo: Literal["text", "all", "none"]
@@ -1180,7 +1398,7 @@ class ChatResponse:
     @property
     def consumed(self) -> bool:
-        return self._generator.gi_frame is None
+        return inspect.getgeneratorstate(self._generator) == inspect.GEN_CLOSED
     def __str__(self) -> str:
         return self.get_content()
@@ -1230,7 +1448,11 @@ class ChatResponseAsync:
     @property
     def consumed(self) -> bool:
-        return self._generator.ag_frame is None
+        if sys.version_info < (3, 12):
+            raise NotImplementedError(
+                "Checking for consumed state is only supported in Python 3.12+"
+            )
+        return inspect.getasyncgenstate(self._generator) == inspect.AGEN_CLOSED
 # ----------------------------------------------------------------------------

chatlas/_content.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import json
 from dataclasses import dataclass
+from pprint import pformat
 from typing import Any, Literal, Optional
 ImageContentTypes = Literal[
@@ -154,7 +155,7 @@ class ContentToolRequest(Content):
         args_str = self._arguments_str()
         func_call = f"{self.name}({args_str})"
         comment = f"# tool request ({self.id})"
-        return f"\n```python\n{comment}\n{func_call}\n```\n"
+        return f"```python\n{comment}\n{func_call}\n```\n"
     def _repr_markdown_(self):
         return self.__str__()
@@ -187,18 +188,31 @@ class ContentToolResult(Content):
         The unique identifier of the tool request.
     value
         The value returned by the tool/function.
+    name
+        The name of the tool/function that was called.
     error
         An error message if the tool/function call failed.
     """
     id: str
     value: Any = None
+    name: Optional[str] = None
     error: Optional[str] = None
+    def _get_value_and_language(self) -> tuple[str, str]:
+        if self.error:
+            return f"Tool calling failed with error: '{self.error}'", ""
+        try:
+            json_val = json.loads(self.value)
+            return pformat(json_val, indent=2, sort_dicts=False), "python"
+        except:  # noqa: E722
+            return str(self.value), ""
     def __str__(self):
         comment = f"# tool result ({self.id})"
-        val = self.get_final_value()
-        return f"""\n```python\n{comment}\n"{val}"\n```\n"""
+        value, language = self._get_value_and_language()
+        return f"""```{language}\n{comment}\n{value}\n```"""
     def _repr_markdown_(self):
         return self.__str__()
@@ -211,9 +225,8 @@ class ContentToolResult(Content):
         return res + ">"
     def get_final_value(self) -> str:
-        if self.error:
-            return f"Tool calling failed with error: '{self.error}'"
-        return str(self.value)
+        value, _language = self._get_value_and_language()
+        return value
 @dataclass
@@ -236,7 +249,7 @@ class ContentJson(Content):
         return json.dumps(self.value, indent=2)
     def _repr_markdown_(self):
-        return f"""\n```json\n{self.__str__()}\n```\n"""
+        return f"""```json\n{self.__str__()}\n```"""
     def __repr__(self, indent: int = 0):
         return " " * indent + f"<ContentJson value={self.value}>"

chatlas 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

chatlas 0.2.0py3-none-any.whl → 0.4.0py3-none-any.whl