PyPI - chatlas - Versions diffs - 0.11.1__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

chatlas 0.11.1py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chatlas might be problematic. Click here for more details.

Files changed (19) hide show

chatlas/__init__.py +10 -0
chatlas/_auto.py +173 -61
chatlas/_batch_chat.py +211 -0
chatlas/_batch_job.py +234 -0
chatlas/_chat.py +181 -43
chatlas/_content.py +13 -8
chatlas/_provider.py +88 -0
chatlas/_provider_anthropic.py +106 -2
chatlas/_provider_openai.py +143 -12
chatlas/_tools.py +11 -3
chatlas/_version.py +2 -2
chatlas/types/anthropic/_submit.py +2 -2
chatlas/types/openai/_client.py +2 -2
chatlas/types/openai/_client_azure.py +2 -2
chatlas/types/openai/_submit.py +2 -2
{chatlas-0.11.1.dist-info → chatlas-0.13.0.dist-info}/METADATA +2 -1
{chatlas-0.11.1.dist-info → chatlas-0.13.0.dist-info}/RECORD +19 -17
{chatlas-0.11.1.dist-info → chatlas-0.13.0.dist-info}/WHEEL +0 -0
{chatlas-0.11.1.dist-info → chatlas-0.13.0.dist-info}/licenses/LICENSE +0 -0

chatlas/_batch_job.py ADDED Viewed

@@ -0,0 +1,234 @@
+from __future__ import annotations
+import hashlib
+import json
+import time
+from datetime import timedelta
+from pathlib import Path
+from typing import Any, Literal, Optional, TypeVar, Union
+from pydantic import BaseModel
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn
+from ._chat import Chat
+from ._content import Content
+from ._provider import BatchStatus
+from ._turn import Turn, user_turn
+from ._typing_extensions import TypedDict
+BatchStage = Literal["submitting", "waiting", "retrieving", "done"]
+class BatchStateHash(TypedDict):
+    provider: str
+    model: str
+    prompts: str
+    user_turns: str
+class BatchState(BaseModel):
+    version: int
+    stage: BatchStage
+    batch: dict[str, Any]
+    results: list[dict[str, Any]]
+    started_at: int
+    hash: BatchStateHash
+ContentT = TypeVar("ContentT", bound=Union[str, Content])
+class BatchJob:
+    """
+    Manages the lifecycle of a batch processing job.
+    A batch job goes through several stages:
+    1. "submitting" - Initial submission to the provider
+    2. "waiting" - Waiting for processing to complete
+    3. "retrieving" - Downloading results
+    4. "done" - Processing complete
+    """
+    def __init__(
+        self,
+        chat: Chat,
+        prompts: list[ContentT] | list[list[ContentT]],
+        path: Union[str, Path],
+        data_model: Optional[type[BaseModel]] = None,
+        wait: bool = True,
+    ):
+        if not chat.provider.has_batch_support():
+            raise ValueError("Batch requests are not supported by this provider")
+        self.chat = chat
+        self.prompts = prompts
+        self.path = Path(path)
+        self.data_model = data_model
+        self.should_wait = wait
+        # Convert prompts to user turns
+        self.user_turns: list[Turn] = []
+        for prompt in prompts:
+            if not isinstance(prompt, (str, Content)):
+                turn = user_turn(*prompt)
+            else:
+                turn = user_turn(prompt)
+            self.user_turns.append(turn)
+        # Job state management
+        self.provider = chat.provider
+        self.stage: BatchStage = "submitting"
+        self.batch: dict[str, Any] = {}
+        self.results: list[dict[str, Any]] = []
+        # Load existing state if file exists and is not empty
+        if self.path.exists() and self.path.stat().st_size > 0:
+            self._load_state()
+        else:
+            self.started_at = time.time()
+    def _load_state(self) -> None:
+        with open(self.path, "r") as f:
+            state = BatchState.model_validate_json(f.read())
+        self.stage = state.stage
+        self.batch = state.batch
+        self.results = state.results
+        self.started_at = state.started_at
+        # Verify hash to ensure consistency
+        stored_hash = state.hash
+        current_hash = self._compute_hash()
+        for key, value in current_hash.items():
+            if stored_hash.get(key) != value:
+                raise ValueError(
+                    f"Batch state mismatch: {key} doesn't match stored value. "
+                    f"Do you need to pick a different path?"
+                )
+    def _save_state(self) -> None:
+        state = BatchState(
+            version=1,
+            stage=self.stage,
+            batch=self.batch,
+            results=self.results,
+            started_at=int(self.started_at) if self.started_at else 0,
+            hash=self._compute_hash(),
+        )
+        with open(self.path, "w") as f:
+            f.write(state.model_dump_json(indent=2))
+    def _compute_hash(self) -> BatchStateHash:
+        turns = self.chat.get_turns(include_system_prompt=True)
+        return {
+            "provider": self.provider.name,
+            "model": self.provider.model,
+            "prompts": self._hash([str(p) for p in self.prompts]),
+            "user_turns": self._hash([str(turn) for turn in turns]),
+        }
+    @staticmethod
+    def _hash(x: Any) -> str:
+        return hashlib.md5(json.dumps(x, sort_keys=True).encode()).hexdigest()
+    def step(self) -> bool:
+        if self.stage == "submitting":
+            return self._submit()
+        elif self.stage == "waiting":
+            return self._wait()
+        elif self.stage == "retrieving":
+            return self._retrieve()
+        else:
+            raise ValueError(f"Unknown stage: {self.stage}")
+    def step_until_done(self) -> Optional["BatchJob"]:
+        while self.stage != "done":
+            if not self.step():
+                return None
+        return self
+    def _submit(self) -> bool:
+        existing_turns = self.chat.get_turns(include_system_prompt=True)
+        conversations = []
+        for turn in self.user_turns:
+            conversation = existing_turns + [turn]
+            conversations.append(conversation)
+        self.batch = self.provider.batch_submit(conversations, self.data_model)
+        self.stage = "waiting"
+        self._save_state()
+        return True
+    def _wait(self) -> bool:
+        # Always poll once, even when wait=False
+        status = self._poll()
+        if self.should_wait:
+            console = Console()
+            with Progress(
+                SpinnerColumn(),
+                TextColumn("Processing..."),
+                TextColumn("[{task.fields[elapsed]}]"),
+                TextColumn("{task.fields[n_processing]} pending |"),
+                TextColumn("[green]{task.fields[n_succeeded]}[/green] done |"),
+                TextColumn("[red]{task.fields[n_failed]}[/red] failed"),
+                console=console,
+            ) as progress:
+                task = progress.add_task(
+                    "processing",
+                    elapsed=self._elapsed(),
+                    n_processing=status.n_processing,
+                    n_succeeded=status.n_succeeded,
+                    n_failed=status.n_failed,
+                )
+                while status.working:
+                    time.sleep(0.5)
+                    status = self._poll()
+                    progress.update(
+                        task,
+                        elapsed=self._elapsed(),
+                        n_processing=status.n_processing,
+                        n_succeeded=status.n_succeeded,
+                        n_failed=status.n_failed,
+                    )
+        if not status.working:
+            self.stage = "retrieving"
+            self._save_state()
+            return True
+        else:
+            return False
+    def _poll(self) -> "BatchStatus":
+        if not self.batch:
+            raise ValueError("No batch to poll")
+        self.batch = self.provider.batch_poll(self.batch)
+        self._save_state()
+        return self.provider.batch_status(self.batch)
+    def _elapsed(self) -> str:
+        return str(timedelta(seconds=int(time.time()) - int(self.started_at)))
+    def _retrieve(self) -> bool:
+        if not self.batch:
+            raise ValueError("No batch to retrieve")
+        self.results = self.provider.batch_retrieve(self.batch)
+        self.stage = "done"
+        self._save_state()
+        return True
+    def result_turns(self) -> list[Turn | None]:
+        turns = []
+        for result in self.results:
+            turn = self.provider.batch_result_turn(
+                result, has_data_model=self.data_model is not None
+            )
+            turns.append(turn)
+        return turns

chatlas/_chat.py CHANGED Viewed

@@ -78,6 +78,7 @@ CompletionT = TypeVar("CompletionT")
 EchoOptions = Literal["output", "all", "none", "text"]
 T = TypeVar("T")
+BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
 def is_present(value: T | None | MISSING_TYPE) -> TypeGuard[T]:
@@ -209,6 +210,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         self,
         *,
         include_system_prompt: bool = False,
+        tool_result_role: Literal["assistant", "user"] = "user",
     ) -> list[Turn[CompletionT]]:
         """
         Get all the turns (i.e., message contents) in the chat.
@@ -217,14 +219,50 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         ----------
         include_system_prompt
             Whether to include the system prompt in the turns.
+        tool_result_role
+            The role to assign to turns containing tool results. By default,
+            tool results are assigned a role of "user" since they represent
+            information provided to the assistant. If set to "assistant" tool
+            result content (plus the surrounding assistant turn contents) is
+            collected into a single assistant turn. This is convenient for
+            display purposes and more generally if you want the tool calling
+            loop to be contained in a single turn.
         """
         if not self._turns:
             return self._turns
         if not include_system_prompt and self._turns[0].role == "system":
-            return self._turns[1:]
-        return self._turns
+            turns = self._turns[1:]
+        else:
+            turns = self._turns
+        if tool_result_role == "user":
+            return turns
+        if tool_result_role != "assistant":
+            raise ValueError(
+                f"Expected `tool_result_role` to be one of 'user' or 'assistant', not '{tool_result_role}'"
+            )
+        # If a turn is purely a tool result, change its role
+        turns2 = copy.deepcopy(turns)
+        for turn in turns2:
+            if all(isinstance(c, ContentToolResult) for c in turn.contents):
+                turn.role = tool_result_role
+        # If two consecutive turns have the same role (i.e., assistant), collapse them into one
+        final_turns: list[Turn[CompletionT]] = []
+        for x in turns2:
+            if not final_turns:
+                final_turns.append(x)
+                continue
+            if x.role != final_turns[-1].role:
+                final_turns.append(x)
+            else:
+                final_turns[-1].contents.extend(x.contents)
+        return final_turns
     def get_last_turn(
         self,
@@ -531,7 +569,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         args
             The input to get a token count for.
         data_model
-            If the input is meant for data extraction (i.e., `.extract_data()`), then
+            If the input is meant for data extraction (i.e., `.chat_structured()`), then
             this should be the Pydantic model that describes the structure of the data to
             extract.
@@ -585,7 +623,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         args
             The input to get a token count for.
         data_model
-            If this input is meant for data extraction (i.e., `.extract_data_async()`),
+            If this input is meant for data extraction (i.e., `.chat_structured_async()`),
             then this should be the Pydantic model that describes the structure of the data
             to extract.
@@ -608,6 +646,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         port: int = 0,
         host: str = "127.0.0.1",
         launch_browser: bool = True,
+        bookmark_store: Literal["url", "server", "disable"] = "url",
         bg_thread: Optional[bool] = None,
         echo: Optional[EchoOptions] = None,
         content: Literal["text", "all"] = "all",
@@ -626,6 +665,12 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             The host to run the app on (the default is "127.0.0.1").
         launch_browser
             Whether to launch a browser window.
+        bookmark_store
+            One of the following (default is "url"):
+              - `"url"`: Store bookmarks in the URL (default).
+              - `"server"`: Store bookmarks on the server (requires a server-side
+                storage backend).
+              - `"disable"`: Disable bookmarking.
         bg_thread
             Whether to run the app in a background thread. If `None`, the app will
             run in a background thread if the current environment is a notebook.
@@ -647,24 +692,37 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             from shiny import App, run_app, ui
         except ImportError:
             raise ImportError(
-                "The `shiny` package is required for the `browser` method. "
+                "The `shiny` package is required for the `app()` method. "
                 "Install it with `pip install shiny`."
             )
-        app_ui = ui.page_fillable(
-            ui.chat_ui("chat"),
-            fillable_mobile=True,
-        )
+        try:
+            from shinychat import (
+                Chat,
+                chat_ui,
+                message_content,  # pyright: ignore[reportAttributeAccessIssue]
+            )
+        except ImportError:
+            raise ImportError(
+                "The `shinychat` package is required for the `app()` method. "
+                "Install it with `pip install shinychat`."
+            )
-        def server(input):  # noqa: A002
-            chat = ui.Chat(
-                "chat",
-                messages=[
-                    {"role": turn.role, "content": turn.text}
-                    for turn in self.get_turns()
-                ],
+        messages = [
+            message_content(x) for x in self.get_turns(tool_result_role="assistant")
+        ]
+        def app_ui(x):
+            return ui.page_fillable(
+                chat_ui("chat", messages=messages),
+                fillable_mobile=True,
             )
+        def server(input):  # noqa: A002
+            chat = Chat("chat")
+            chat.enable_bookmarking(self)
             @chat.on_user_submit
             async def _(user_input: str):
                 if stream:
@@ -688,7 +746,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                         )
                     )
-        app = App(app_ui, server)
+        app = App(app_ui, server, bookmark_store=bookmark_store)
         def _run_app():
             run_app(app, launch_browser=launch_browser, port=port, host=host)
@@ -997,20 +1055,22 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         return wrapper()
-    def extract_data(
+    def chat_structured(
         self,
         *args: Content | str,
-        data_model: type[BaseModel],
+        data_model: type[BaseModelT],
         echo: EchoOptions = "none",
         stream: bool = False,
-    ) -> dict[str, Any]:
+    ) -> BaseModelT:
         """
-        Extract structured data from the given input.
+        Extract structured data.
         Parameters
         ----------
         args
-            The input to extract data from.
+            The input to send to the chatbot. This is typically the text you
+            want to extract data from, but it can be omitted if the data is
+            obvious from the existing conversation.
         data_model
             A Pydantic model describing the structure of the data to extract.
         echo
@@ -1024,10 +1084,47 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         Returns
         -------
-        dict[str, Any]
-            The extracted data.
+        BaseModelT
+            An instance of the provided `data_model` containing the extracted data.
+        """
+        dat = self._submit_and_extract_data(
+            *args,
+            data_model=data_model,
+            echo=echo,
+            stream=stream,
+        )
+        return data_model.model_validate(dat)
+    def extract_data(
+        self,
+        *args: Content | str,
+        data_model: type[BaseModel],
+        echo: EchoOptions = "none",
+        stream: bool = False,
+    ) -> dict[str, Any]:
         """
+        Deprecated: use `.chat_structured()` instead.
+        """
+        warnings.warn(
+            "The `extract_data()` method is deprecated and will be removed in a future release. "
+            "Use the `chat_structured()` method instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._submit_and_extract_data(
+            *args,
+            data_model=data_model,
+            echo=echo,
+            stream=stream,
+        )
+    def _submit_and_extract_data(
+        self,
+        *args: Content | str,
+        data_model: type[BaseModel],
+        echo: EchoOptions = "none",
+        stream: bool = False,
+    ) -> dict[str, Any]:
         display = self._markdown_display(echo=echo)
         response = ChatResponse(
@@ -1046,33 +1143,24 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         turn = self.get_last_turn()
         assert turn is not None
-        res: list[ContentJson] = []
-        for x in turn.contents:
-            if isinstance(x, ContentJson):
-                res.append(x)
-        if len(res) != 1:
-            raise ValueError(
-                f"Data extraction failed: {len(res)} data results received."
-            )
-        json = res[0]
-        return json.value
+        return Chat._extract_turn_json(turn)
-    async def extract_data_async(
+    async def chat_structured_async(
         self,
         *args: Content | str,
-        data_model: type[BaseModel],
+        data_model: type[BaseModelT],
         echo: EchoOptions = "none",
         stream: bool = False,
-    ) -> dict[str, Any]:
+    ) -> BaseModelT:
         """
         Extract structured data from the given input asynchronously.
         Parameters
         ----------
         args
-            The input to extract data from.
+            The input to send to the chatbot. This is typically the text you
+            want to extract data from, but it can be omitted if the data is
+            obvious from the existing conversation.
         data_model
             A Pydantic model describing the structure of the data to extract.
         echo
@@ -1087,10 +1175,47 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         Returns
         -------
-        dict[str, Any]
-            The extracted data.
+        BaseModelT
+            An instance of the provided `data_model` containing the extracted data.
+        """
+        dat = await self._submit_and_extract_data_async(
+            *args,
+            data_model=data_model,
+            echo=echo,
+            stream=stream,
+        )
+        return data_model.model_validate(dat)
+    async def extract_data_async(
+        self,
+        *args: Content | str,
+        data_model: type[BaseModel],
+        echo: EchoOptions = "none",
+        stream: bool = False,
+    ) -> dict[str, Any]:
         """
+        Deprecated: use `.chat_structured_async()` instead.
+        """
+        warnings.warn(
+            "The `extract_data_async()` method is deprecated and will be removed in a future release. "
+            "Use the `chat_structured_async()` method instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return await self._submit_and_extract_data_async(
+            *args,
+            data_model=data_model,
+            echo=echo,
+            stream=stream,
+        )
+    async def _submit_and_extract_data_async(
+        self,
+        *args: Content | str,
+        data_model: type[BaseModel],
+        echo: EchoOptions = "none",
+        stream: bool = False,
+    ) -> dict[str, Any]:
         display = self._markdown_display(echo=echo)
         response = ChatResponseAsync(
@@ -1109,6 +1234,10 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         turn = self.get_last_turn()
         assert turn is not None
+        return Chat._extract_turn_json(turn)
+    @staticmethod
+    def _extract_turn_json(turn: Turn) -> dict[str, Any]:
         res: list[ContentJson] = []
         for x in turn.contents:
             if isinstance(x, ContentJson):
@@ -1535,7 +1664,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
     def register_tool(
         self,
-        func: Callable[..., Any] | Callable[..., Awaitable[Any]],
+        func: Callable[..., Any] | Callable[..., Awaitable[Any]] | Tool,
         *,
         force: bool = False,
         name: Optional[str] = None,
@@ -1629,6 +1758,15 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         ValueError
             If a tool with the same name already exists and `force` is `False`.
         """
+        if isinstance(func, Tool):
+            name = name or func.name
+            annotations = annotations or func.annotations
+            if model is not None:
+                func = Tool.from_func(
+                    func.func, name=name, model=model, annotations=annotations
+                )
+            func = func.func
         tool = Tool.from_func(func, name=name, model=model, annotations=annotations)
         if tool.name in self._tools and not force:
             raise ValueError(

chatlas/_content.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
 import orjson
 from pydantic import BaseModel, ConfigDict
-from ._typing_extensions import NotRequired, TypedDict
+from ._typing_extensions import TypedDict
 if TYPE_CHECKING:
     from ._tools import Tool
@@ -24,16 +24,16 @@ class ToolAnnotations(TypedDict, total=False):
     received from untrusted servers.
     """
-    title: NotRequired[str]
+    title: str
     """A human-readable title for the tool."""
-    readOnlyHint: NotRequired[bool]
+    readOnlyHint: bool
     """
     If true, the tool does not modify its environment.
     Default: false
     """
-    destructiveHint: NotRequired[bool]
+    destructiveHint: bool
     """
     If true, the tool may perform destructive updates to its environment.
     If false, the tool performs only additive updates.
@@ -41,7 +41,7 @@ class ToolAnnotations(TypedDict, total=False):
     Default: true
     """
-    idempotentHint: NotRequired[bool]
+    idempotentHint: bool
     """
     If true, calling the tool repeatedly with the same arguments
     will have no additional effect on the its environment.
@@ -49,7 +49,7 @@ class ToolAnnotations(TypedDict, total=False):
     Default: false
     """
-    openWorldHint: NotRequired[bool]
+    openWorldHint: bool
     """
     If true, this tool may interact with an "open world" of external
     entities. If false, the tool's domain of interaction is closed.
@@ -58,6 +58,11 @@ class ToolAnnotations(TypedDict, total=False):
     Default: true
     """
+    extra: dict[str, Any]
+    """
+    Additional metadata about the tool.
+    """
 ImageContentTypes = Literal[
     "image/png",
@@ -598,7 +603,7 @@ class ContentJson(Content):
     JSON content
     This content type primarily exists to signal structured data extraction
-    (i.e., data extracted via [](`~chatlas.Chat`)'s `.extract_data()` method)
+    (i.e., data extracted via [](`~chatlas.Chat`)'s `.chat_structured()` method)
     Parameters
     ----------
@@ -625,7 +630,7 @@ class ContentPDF(Content):
     PDF content
     This content type primarily exists to signal PDF data extraction
-    (i.e., data extracted via [](`~chatlas.Chat`)'s `.extract_data()` method)
+    (i.e., data extracted via [](`~chatlas.Chat`)'s `.chat_structured()` method)
     Parameters
     ----------

chatlas 0.11.1__py3-none-any.whl → 0.13.0__py3-none-any.whl

Potentially problematic release.

chatlas 0.11.1py3-none-any.whl → 0.13.0py3-none-any.whl