PyPI - chatlas - Versions diffs - 0.12.0__py3-none-any.whl → 0.13.1__py3-none-any.whl - Mend

chatlas 0.12.0py3-none-any.whl → 0.13.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chatlas might be problematic. Click here for more details.

Files changed (14) hide show

chatlas/__init__.py +10 -0
chatlas/_batch_chat.py +211 -0
chatlas/_batch_job.py +234 -0
chatlas/_chat.py +171 -42
chatlas/_content.py +2 -2
chatlas/_provider.py +88 -0
chatlas/_provider_anthropic.py +106 -2
chatlas/_provider_github.py +2 -2
chatlas/_provider_openai.py +143 -12
chatlas/_version.py +2 -2
{chatlas-0.12.0.dist-info → chatlas-0.13.1.dist-info}/METADATA +2 -1
{chatlas-0.12.0.dist-info → chatlas-0.13.1.dist-info}/RECORD +14 -12
{chatlas-0.12.0.dist-info → chatlas-0.13.1.dist-info}/WHEEL +0 -0
{chatlas-0.12.0.dist-info → chatlas-0.13.1.dist-info}/licenses/LICENSE +0 -0

chatlas/_chat.py CHANGED Viewed

@@ -78,6 +78,7 @@ CompletionT = TypeVar("CompletionT")
 EchoOptions = Literal["output", "all", "none", "text"]
 T = TypeVar("T")
+BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
 def is_present(value: T | None | MISSING_TYPE) -> TypeGuard[T]:
@@ -209,6 +210,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         self,
         *,
         include_system_prompt: bool = False,
+        tool_result_role: Literal["assistant", "user"] = "user",
     ) -> list[Turn[CompletionT]]:
         """
         Get all the turns (i.e., message contents) in the chat.
@@ -217,14 +219,50 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         ----------
         include_system_prompt
             Whether to include the system prompt in the turns.
+        tool_result_role
+            The role to assign to turns containing tool results. By default,
+            tool results are assigned a role of "user" since they represent
+            information provided to the assistant. If set to "assistant" tool
+            result content (plus the surrounding assistant turn contents) is
+            collected into a single assistant turn. This is convenient for
+            display purposes and more generally if you want the tool calling
+            loop to be contained in a single turn.
         """
         if not self._turns:
             return self._turns
         if not include_system_prompt and self._turns[0].role == "system":
-            return self._turns[1:]
-        return self._turns
+            turns = self._turns[1:]
+        else:
+            turns = self._turns
+        if tool_result_role == "user":
+            return turns
+        if tool_result_role != "assistant":
+            raise ValueError(
+                f"Expected `tool_result_role` to be one of 'user' or 'assistant', not '{tool_result_role}'"
+            )
+        # If a turn is purely a tool result, change its role
+        turns2 = copy.deepcopy(turns)
+        for turn in turns2:
+            if all(isinstance(c, ContentToolResult) for c in turn.contents):
+                turn.role = tool_result_role
+        # If two consecutive turns have the same role (i.e., assistant), collapse them into one
+        final_turns: list[Turn[CompletionT]] = []
+        for x in turns2:
+            if not final_turns:
+                final_turns.append(x)
+                continue
+            if x.role != final_turns[-1].role:
+                final_turns.append(x)
+            else:
+                final_turns[-1].contents.extend(x.contents)
+        return final_turns
     def get_last_turn(
         self,
@@ -531,7 +569,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         args
             The input to get a token count for.
         data_model
-            If the input is meant for data extraction (i.e., `.extract_data()`), then
+            If the input is meant for data extraction (i.e., `.chat_structured()`), then
             this should be the Pydantic model that describes the structure of the data to
             extract.
@@ -585,7 +623,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         args
             The input to get a token count for.
         data_model
-            If this input is meant for data extraction (i.e., `.extract_data_async()`),
+            If this input is meant for data extraction (i.e., `.chat_structured_async()`),
             then this should be the Pydantic model that describes the structure of the data
             to extract.
@@ -608,6 +646,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         port: int = 0,
         host: str = "127.0.0.1",
         launch_browser: bool = True,
+        bookmark_store: Literal["url", "server", "disable"] = "url",
         bg_thread: Optional[bool] = None,
         echo: Optional[EchoOptions] = None,
         content: Literal["text", "all"] = "all",
@@ -626,6 +665,12 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             The host to run the app on (the default is "127.0.0.1").
         launch_browser
             Whether to launch a browser window.
+        bookmark_store
+            One of the following (default is "url"):
+              - `"url"`: Store bookmarks in the URL (default).
+              - `"server"`: Store bookmarks on the server (requires a server-side
+                storage backend).
+              - `"disable"`: Disable bookmarking.
         bg_thread
             Whether to run the app in a background thread. If `None`, the app will
             run in a background thread if the current environment is a notebook.
@@ -647,24 +692,37 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             from shiny import App, run_app, ui
         except ImportError:
             raise ImportError(
-                "The `shiny` package is required for the `browser` method. "
+                "The `shiny` package is required for the `app()` method. "
                 "Install it with `pip install shiny`."
             )
-        app_ui = ui.page_fillable(
-            ui.chat_ui("chat"),
-            fillable_mobile=True,
-        )
+        try:
+            from shinychat import (
+                Chat,
+                chat_ui,
+                message_content,  # pyright: ignore[reportAttributeAccessIssue]
+            )
+        except ImportError:
+            raise ImportError(
+                "The `shinychat` package is required for the `app()` method. "
+                "Install it with `pip install shinychat`."
+            )
-        def server(input):  # noqa: A002
-            chat = ui.Chat(
-                "chat",
-                messages=[
-                    {"role": turn.role, "content": turn.text}
-                    for turn in self.get_turns()
-                ],
+        messages = [
+            message_content(x) for x in self.get_turns(tool_result_role="assistant")
+        ]
+        def app_ui(x):
+            return ui.page_fillable(
+                chat_ui("chat", messages=messages),
+                fillable_mobile=True,
             )
+        def server(input):  # noqa: A002
+            chat = Chat("chat")
+            chat.enable_bookmarking(self)
             @chat.on_user_submit
             async def _(user_input: str):
                 if stream:
@@ -688,7 +746,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                         )
                     )
-        app = App(app_ui, server)
+        app = App(app_ui, server, bookmark_store=bookmark_store)
         def _run_app():
             run_app(app, launch_browser=launch_browser, port=port, host=host)
@@ -997,20 +1055,22 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         return wrapper()
-    def extract_data(
+    def chat_structured(
         self,
         *args: Content | str,
-        data_model: type[BaseModel],
+        data_model: type[BaseModelT],
         echo: EchoOptions = "none",
         stream: bool = False,
-    ) -> dict[str, Any]:
+    ) -> BaseModelT:
         """
-        Extract structured data from the given input.
+        Extract structured data.
         Parameters
         ----------
         args
-            The input to extract data from.
+            The input to send to the chatbot. This is typically the text you
+            want to extract data from, but it can be omitted if the data is
+            obvious from the existing conversation.
         data_model
             A Pydantic model describing the structure of the data to extract.
         echo
@@ -1024,10 +1084,47 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         Returns
         -------
-        dict[str, Any]
-            The extracted data.
+        BaseModelT
+            An instance of the provided `data_model` containing the extracted data.
         """
+        dat = self._submit_and_extract_data(
+            *args,
+            data_model=data_model,
+            echo=echo,
+            stream=stream,
+        )
+        return data_model.model_validate(dat)
+    def extract_data(
+        self,
+        *args: Content | str,
+        data_model: type[BaseModel],
+        echo: EchoOptions = "none",
+        stream: bool = False,
+    ) -> dict[str, Any]:
+        """
+        Deprecated: use `.chat_structured()` instead.
+        """
+        warnings.warn(
+            "The `extract_data()` method is deprecated and will be removed in a future release. "
+            "Use the `chat_structured()` method instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._submit_and_extract_data(
+            *args,
+            data_model=data_model,
+            echo=echo,
+            stream=stream,
+        )
+    def _submit_and_extract_data(
+        self,
+        *args: Content | str,
+        data_model: type[BaseModel],
+        echo: EchoOptions = "none",
+        stream: bool = False,
+    ) -> dict[str, Any]:
         display = self._markdown_display(echo=echo)
         response = ChatResponse(
@@ -1046,33 +1143,24 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         turn = self.get_last_turn()
         assert turn is not None
-        res: list[ContentJson] = []
-        for x in turn.contents:
-            if isinstance(x, ContentJson):
-                res.append(x)
-        if len(res) != 1:
-            raise ValueError(
-                f"Data extraction failed: {len(res)} data results received."
-            )
-        json = res[0]
-        return json.value
+        return Chat._extract_turn_json(turn)
-    async def extract_data_async(
+    async def chat_structured_async(
         self,
         *args: Content | str,
-        data_model: type[BaseModel],
+        data_model: type[BaseModelT],
         echo: EchoOptions = "none",
         stream: bool = False,
-    ) -> dict[str, Any]:
+    ) -> BaseModelT:
         """
         Extract structured data from the given input asynchronously.
         Parameters
         ----------
         args
-            The input to extract data from.
+            The input to send to the chatbot. This is typically the text you
+            want to extract data from, but it can be omitted if the data is
+            obvious from the existing conversation.
         data_model
             A Pydantic model describing the structure of the data to extract.
         echo
@@ -1087,10 +1175,47 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         Returns
         -------
-        dict[str, Any]
-            The extracted data.
+        BaseModelT
+            An instance of the provided `data_model` containing the extracted data.
         """
+        dat = await self._submit_and_extract_data_async(
+            *args,
+            data_model=data_model,
+            echo=echo,
+            stream=stream,
+        )
+        return data_model.model_validate(dat)
+    async def extract_data_async(
+        self,
+        *args: Content | str,
+        data_model: type[BaseModel],
+        echo: EchoOptions = "none",
+        stream: bool = False,
+    ) -> dict[str, Any]:
+        """
+        Deprecated: use `.chat_structured_async()` instead.
+        """
+        warnings.warn(
+            "The `extract_data_async()` method is deprecated and will be removed in a future release. "
+            "Use the `chat_structured_async()` method instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return await self._submit_and_extract_data_async(
+            *args,
+            data_model=data_model,
+            echo=echo,
+            stream=stream,
+        )
+    async def _submit_and_extract_data_async(
+        self,
+        *args: Content | str,
+        data_model: type[BaseModel],
+        echo: EchoOptions = "none",
+        stream: bool = False,
+    ) -> dict[str, Any]:
         display = self._markdown_display(echo=echo)
         response = ChatResponseAsync(
@@ -1109,6 +1234,10 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         turn = self.get_last_turn()
         assert turn is not None
+        return Chat._extract_turn_json(turn)
+    @staticmethod
+    def _extract_turn_json(turn: Turn) -> dict[str, Any]:
         res: list[ContentJson] = []
         for x in turn.contents:
             if isinstance(x, ContentJson):

chatlas/_content.py CHANGED Viewed

@@ -603,7 +603,7 @@ class ContentJson(Content):
     JSON content
     This content type primarily exists to signal structured data extraction
-    (i.e., data extracted via [](`~chatlas.Chat`)'s `.extract_data()` method)
+    (i.e., data extracted via [](`~chatlas.Chat`)'s `.chat_structured()` method)
     Parameters
     ----------
@@ -630,7 +630,7 @@ class ContentPDF(Content):
     PDF content
     This content type primarily exists to signal PDF data extraction
-    (i.e., data extracted via [](`~chatlas.Chat`)'s `.extract_data()` method)
+    (i.e., data extracted via [](`~chatlas.Chat`)'s `.chat_structured()` method)
     Parameters
     ----------

chatlas/_provider.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 from datetime import date
 from typing import (
+    Any,
     AsyncIterable,
     Generic,
     Iterable,
@@ -100,6 +101,16 @@ StandardModelParamNames = Literal[
 ]
+# Provider-agnostic batch status info
+class BatchStatus(BaseModel):
+    """Status information for a batch job."""
+    working: bool
+    n_processing: int
+    n_succeeded: int
+    n_failed: int
 class Provider(
     ABC,
     Generic[
@@ -261,3 +272,80 @@ class Provider(
     @abstractmethod
     def supported_model_params(self) -> set[StandardModelParamNames]: ...
+    def has_batch_support(self) -> bool:
+        """
+        Returns whether this provider supports batch processing.
+        Override this method to return True for providers that implement batch methods.
+        """
+        return False
+    def batch_submit(
+        self,
+        conversations: list[list[Turn]],
+        data_model: Optional[type[BaseModel]] = None,
+    ) -> dict[str, Any]:
+        """
+        Submit a batch of conversations for processing.
+        Args:
+            conversations: List of conversation histories (each is a list of Turns)
+            data_model: Optional structured data model for responses
+        Returns:
+            BatchInfo containing batch job information
+        """
+        raise NotImplementedError("This provider does not support batch processing")
+    def batch_poll(self, batch: dict[str, Any]) -> dict[str, Any]:
+        """
+        Poll the status of a submitted batch.
+        Args:
+            batch: Batch information returned from batch_submit
+        Returns:
+            Updated batch information
+        """
+        raise NotImplementedError("This provider does not support batch processing")
+    def batch_status(self, batch: dict[str, Any]) -> BatchStatus:
+        """
+        Get the status of a batch.
+        Args:
+            batch: Batch information
+        Returns:
+            BatchStatus with processing status information
+        """
+        raise NotImplementedError("This provider does not support batch processing")
+    def batch_retrieve(self, batch: dict[str, Any]) -> list[dict[str, Any]]:
+        """
+        Retrieve results from a completed batch.
+        Args:
+            batch: Batch information
+        Returns:
+            List of BatchResult objects, one for each request in the batch
+        """
+        raise NotImplementedError("This provider does not support batch processing")
+    def batch_result_turn(
+        self,
+        result: dict[str, Any],
+        has_data_model: bool = False,
+    ) -> Turn | None:
+        """
+        Convert a batch result to a Turn.
+        Args:
+            result: Individual BatchResult from batch_retrieve
+            has_data_model: Whether the request used a structured data model
+        Returns:
+            Turn object or None if the result was an error
+        """
+        raise NotImplementedError("This provider does not support batch processing")

chatlas/_provider_anthropic.py CHANGED Viewed

@@ -1,10 +1,12 @@
 from __future__ import annotations
 import base64
+import re
 import warnings
 from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast, overload
 import orjson
+from openai.types.chat import ChatCompletionToolParam
 from pydantic import BaseModel
 from ._chat import Chat
@@ -21,7 +23,13 @@ from ._content import (
     ContentToolResultResource,
 )
 from ._logging import log_model_default
-from ._provider import ModelInfo, Provider, StandardModelParamNames, StandardModelParams
+from ._provider import (
+    BatchStatus,
+    ModelInfo,
+    Provider,
+    StandardModelParamNames,
+    StandardModelParams,
+)
 from ._tokens import get_token_pricing, tokens_log
 from ._tools import Tool, basemodel_to_param_schema
 from ._turn import Turn, user_turn
@@ -38,11 +46,12 @@ if TYPE_CHECKING:
     )
     from anthropic.types.document_block_param import DocumentBlockParam
     from anthropic.types.image_block_param import ImageBlockParam
+    from anthropic.types.message_create_params import MessageCreateParamsNonStreaming
+    from anthropic.types.messages.batch_create_params import Request as BatchRequest
     from anthropic.types.model_param import ModelParam
     from anthropic.types.text_block_param import TextBlockParam
     from anthropic.types.tool_result_block_param import ToolResultBlockParam
     from anthropic.types.tool_use_block_param import ToolUseBlockParam
-    from openai.types.chat import ChatCompletionToolParam
     from .types.anthropic import ChatBedrockClientArgs, ChatClientArgs, SubmitInputArgs
@@ -631,6 +640,101 @@ class AnthropicProvider(
             completion=completion,
         )
+    def has_batch_support(self) -> bool:
+        return True
+    def batch_submit(
+        self,
+        conversations: list[list[Turn]],
+        data_model: Optional[type[BaseModel]] = None,
+    ):
+        from anthropic import NotGiven
+        requests: list["BatchRequest"] = []
+        for i, turns in enumerate(conversations):
+            kwargs = self._chat_perform_args(
+                stream=False,
+                turns=turns,
+                tools={},
+                data_model=data_model,
+            )
+            params: "MessageCreateParamsNonStreaming" = {
+                "messages": kwargs.get("messages", {}),
+                "model": self.model,
+                "max_tokens": kwargs.get("max_tokens", 4096),
+            }
+            # If data_model, tools/tool_choice should be present
+            tools = kwargs.get("tools")
+            tool_choice = kwargs.get("tool_choice")
+            if tools and not isinstance(tools, NotGiven):
+                params["tools"] = tools
+            if tool_choice and not isinstance(tool_choice, NotGiven):
+                params["tool_choice"] = tool_choice
+            requests.append({"custom_id": f"request-{i}", "params": params})
+        batch = self._client.messages.batches.create(requests=requests)
+        return batch.model_dump()
+    def batch_poll(self, batch):
+        from anthropic.types.messages import MessageBatch
+        batch = MessageBatch.model_validate(batch)
+        b = self._client.messages.batches.retrieve(batch.id)
+        return b.model_dump()
+    def batch_status(self, batch) -> "BatchStatus":
+        from anthropic.types.messages import MessageBatch
+        batch = MessageBatch.model_validate(batch)
+        status = batch.processing_status
+        counts = batch.request_counts
+        return BatchStatus(
+            working=status != "ended",
+            n_processing=counts.processing,
+            n_succeeded=counts.succeeded,
+            n_failed=counts.errored + counts.canceled + counts.expired,
+        )
+    # https://docs.anthropic.com/en/api/retrieving-message-batch-results
+    def batch_retrieve(self, batch):
+        from anthropic.types.messages import MessageBatch
+        batch = MessageBatch.model_validate(batch)
+        if batch.results_url is None:
+            raise ValueError("Batch has no results URL")
+        results: list[dict[str, Any]] = []
+        for res in self._client.messages.batches.results(batch.id):
+            results.append(res.model_dump())
+        # Sort by custom_id to maintain order
+        def extract_id(x: str):
+            match = re.search(r"-(\d+)$", x)
+            return int(match.group(1)) if match else 0
+        results.sort(key=lambda x: extract_id(x.get("custom_id", "")))
+        return results
+    def batch_result_turn(self, result, has_data_model: bool = False) -> Turn | None:
+        from anthropic.types.messages.message_batch_individual_response import (
+            MessageBatchIndividualResponse,
+        )
+        result = MessageBatchIndividualResponse.model_validate(result)
+        if result.result.type != "succeeded":
+            # TODO: offer advice on what to do?
+            warnings.warn(f"Batch request didn't succeed: {result.result}")
+            return None
+        message = result.result.message
+        return self._as_turn(message, has_data_model)
 def ChatBedrockAnthropic(
     *,

chatlas/_provider_github.py CHANGED Viewed

@@ -141,7 +141,7 @@ def ChatGithub(
 class GitHubProvider(OpenAIProvider):
     def __init__(self, base_url: str, **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(base_url=base_url, **kwargs)
         self._base_url = base_url
     def list_models(self) -> list[ModelInfo]:
@@ -190,7 +190,7 @@ def list_models_gh_azure(base_url: str = "https://models.inference.ai.azure.com"
     for m in models:
         info: ModelInfo = {
             "id": m["name"],
-            "provider": m["publisher"]
+            "provider": m["publisher"],
         }
         res.append(info)

chatlas 0.12.0__py3-none-any.whl → 0.13.1__py3-none-any.whl

Potentially problematic release.

chatlas 0.12.0py3-none-any.whl → 0.13.1py3-none-any.whl