PyPI - chatlas - Versions diffs - 0.9.1__tar.gz → 0.9.2__tar.gz - Mend

chatlas 0.9.1tar.gz → 0.9.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chatlas might be problematic. Click here for more details.

Files changed (200) hide show

{chatlas-0.9.1 → chatlas-0.9.2}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 -->
+## [0.9.2] - 2025-08-08
+### Improvements
+* `Chat.get_cost()` now covers many more models and also takes cached tokens into account. (#133)
+* Avoid erroring when tool calls occur with recent versions of `openai` (> v1.99.5). (#141)
 ## [0.9.1] - 2025-07-09
 ### Bug fixes

{chatlas-0.9.1 → chatlas-0.9.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chatlas
-Version: 0.9.1
+Version: 0.9.2
 Summary: A simple and consistent interface for chatting with LLMs
 Project-URL: Homepage, https://posit-dev.github.io/chatlas
 Project-URL: Documentation, https://posit-dev.github.io/chatlas
@@ -69,7 +69,7 @@ Provides-Extra: ollama
 Provides-Extra: openai
 Provides-Extra: perplexity
 Provides-Extra: snowflake
-Requires-Dist: snowflake-ml-python; extra == 'snowflake'
+Requires-Dist: snowflake-ml-python<=1.9.0; extra == 'snowflake'
 Provides-Extra: test
 Requires-Dist: pyright>=1.1.379; extra == 'test'
 Requires-Dist: pytest-asyncio; extra == 'test'

{chatlas-0.9.1 → chatlas-0.9.2}/chatlas/__init__.py RENAMED Viewed

@@ -1,20 +1,20 @@
 from . import types
-from ._anthropic import ChatAnthropic, ChatBedrockAnthropic
 from ._auto import ChatAuto
 from ._chat import Chat
 from ._content import ContentToolRequest, ContentToolResult, ContentToolResultImage
 from ._content_image import content_image_file, content_image_plot, content_image_url
 from ._content_pdf import content_pdf_file, content_pdf_url
-from ._databricks import ChatDatabricks
-from ._github import ChatGithub
-from ._google import ChatGoogle, ChatVertex
-from ._groq import ChatGroq
 from ._interpolate import interpolate, interpolate_file
-from ._ollama import ChatOllama
-from ._openai import ChatAzureOpenAI, ChatOpenAI
-from ._perplexity import ChatPerplexity
 from ._provider import Provider
-from ._snowflake import ChatSnowflake
+from ._provider_anthropic import ChatAnthropic, ChatBedrockAnthropic
+from ._provider_databricks import ChatDatabricks
+from ._provider_github import ChatGithub
+from ._provider_google import ChatGoogle, ChatVertex
+from ._provider_groq import ChatGroq
+from ._provider_ollama import ChatOllama
+from ._provider_openai import ChatAzureOpenAI, ChatOpenAI
+from ._provider_perplexity import ChatPerplexity
+from ._provider_snowflake import ChatSnowflake
 from ._tokens import token_usage
 from ._tools import Tool, ToolRejectError
 from ._turn import Turn

{chatlas-0.9.1 → chatlas-0.9.2}/chatlas/_auto.py RENAMED Viewed

@@ -5,16 +5,16 @@ from typing import Callable, Literal, Optional
 import orjson
-from ._anthropic import ChatAnthropic, ChatBedrockAnthropic
 from ._chat import Chat
-from ._databricks import ChatDatabricks
-from ._github import ChatGithub
-from ._google import ChatGoogle, ChatVertex
-from ._groq import ChatGroq
-from ._ollama import ChatOllama
-from ._openai import ChatAzureOpenAI, ChatOpenAI
-from ._perplexity import ChatPerplexity
-from ._snowflake import ChatSnowflake
+from ._provider_anthropic import ChatAnthropic, ChatBedrockAnthropic
+from ._provider_databricks import ChatDatabricks
+from ._provider_github import ChatGithub
+from ._provider_google import ChatGoogle, ChatVertex
+from ._provider_groq import ChatGroq
+from ._provider_ollama import ChatOllama
+from ._provider_openai import ChatAzureOpenAI, ChatOpenAI
+from ._provider_perplexity import ChatPerplexity
+from ._provider_snowflake import ChatSnowflake
 AutoProviders = Literal[
     "anthropic",

{chatlas-0.9.1 → chatlas-0.9.2}/chatlas/_chat.py RENAMED Viewed

@@ -65,6 +65,7 @@ class TokensDict(TypedDict):
     role: Literal["user", "assistant"]
     tokens: int
     tokens_total: int
+    tokens_cached: int
 CompletionT = TypeVar("CompletionT")
@@ -293,12 +294,15 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             {
                 "role": "user",
                 "tokens": turns[1].tokens[0],
+                # Number of tokens currently cached (reduces input token usage)
+                "tokens_cached": turns[1].tokens[2],
                 "tokens_total": turns[1].tokens[0],
             },
             # The token count for the 1st assistant response
             {
                 "role": "assistant",
                 "tokens": turns[1].tokens[1],
+                "tokens_cached": 0,
                 "tokens_total": turns[1].tokens[1],
             },
         ]
@@ -319,8 +323,11 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                 [
                     {
                         "role": "user",
-                        # Implied token count for the user input
+                        # Implied new token count for the user input (input tokens - context - cached reads)
+                        # Cached reads are only subtracted for particular providers
                         "tokens": tj.tokens[0] - sum(ti.tokens),
+                        # Number of tokens currently cached (reduces input token usage depending on provider's API)
+                        "tokens_cached": tj.tokens[2],
                         # Total tokens = Total User Tokens for the Turn = Distinct new tokens + context sent
                         "tokens_total": tj.tokens[0],
                     },
@@ -329,6 +336,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
                         # The token count for the assistant response
                         "tokens": tj.tokens[1],
                         # Total tokens = Total Assistant tokens used in the turn
+                        "tokens_cached": 0,
                         "tokens_total": tj.tokens[1],
                     },
                 ]
@@ -339,7 +347,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
     def get_cost(
         self,
         options: Literal["all", "last"] = "all",
-        token_price: Optional[tuple[float, float]] = None,
+        token_price: Optional[tuple[float, float, float]] = None,
     ) -> float:
         """
         Estimate the cost of the chat.
@@ -357,11 +365,13 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
               - `"last"`: Return the cost of the last turn in the chat.
         token_price
             An optional tuple in the format of (input_token_cost,
-            output_token_cost) for bringing your own cost information.
+            output_token_cost, cached_token_cost) for bringing your own cost information.
                  - `"input_token_cost"`: The cost per user token in USD per
                    million tokens.
                  - `"output_token_cost"`: The cost per assistant token in USD
                    per million tokens.
+                - `"cached_token_cost"`: The cost per cached token read in USD
+                   per million tokens.
         Returns
         -------
@@ -374,15 +384,19 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         if token_price:
             input_token_price = token_price[0] / 1e6
             output_token_price = token_price[1] / 1e6
+            cached_token_price = token_price[2] / 1e6
         else:
             price_token = get_token_pricing(self.provider.name, self.provider.model)
             if not price_token:
                 raise KeyError(
-                    f"We could not locate pricing information for model '{self.provider.model}' from provider '{self.provider.name}'. "
+                    f"We could not locate pricing information for model '{self.provider.model}'"
+                    f" from provider '{self.provider.name}'. "
                     "If you know the pricing for this model, specify it in `token_price`."
                 )
             input_token_price = price_token["input"] / 1e6
             output_token_price = price_token["output"] / 1e6
+            cached_token_price = price_token["cached_input"] / 1e6
         if len(turns_tokens) == 0:
             return 0.0
@@ -399,8 +413,16 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             user_tokens = sum(
                 u["tokens_total"] for u in turns_tokens if u["role"] == "user"
             )
-            cost = (asst_tokens * output_token_price) + (
-                user_tokens * input_token_price
+            # We add the cached tokens here because for relevant providers they have already been subtracted
+            # from the user tokens. This assumes the provider uses (reads) the cache each time.
+            cached_token_reads = sum(
+                u["tokens_cached"] for u in turns_tokens if u["role"] == "user"
+            )
+            cost = (
+                (asst_tokens * output_token_price)
+                + (user_tokens * input_token_price)
+                + (cached_token_reads * cached_token_price)
             )
             return cost
@@ -408,7 +430,9 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         if last_turn["role"] == "assistant":
             return last_turn["tokens"] * output_token_price
         if last_turn["role"] == "user":
-            return last_turn["tokens_total"] * input_token_price
+            return (last_turn["tokens_total"] * input_token_price) + (
+                last_turn["tokens_cached"] * cached_token_price
+            )
         raise ValueError(
             f"Expected last turn to have a role of 'user' or `'assistant'`, not '{last_turn['role']}'"
         )
@@ -2224,8 +2248,12 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
         tokens = self.get_tokens()
         tokens_asst = sum(u["tokens_total"] for u in tokens if u["role"] == "assistant")
         tokens_user = sum(u["tokens_total"] for u in tokens if u["role"] == "user")
+        tokens_cached = sum(u["tokens_cached"] for u in tokens if u["role"] == "user")
-        res = f"<Chat {self.provider.name}/{self.provider.model} turns={len(turns)} tokens={tokens_user}/{tokens_asst}"
+        res = (
+            f"<Chat {self.provider.name}/{self.provider.model} turns={len(turns)}"
+            f" tokens={tokens_user + tokens_cached}/{tokens_asst}"
+        )
         # Add cost info only if we can compute it
         cost = compute_cost(
@@ -2233,6 +2261,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
             self.provider.model,
             tokens_user,
             tokens_asst,
+            tokens_cached,
         )
         if cost is not None:
             res += f" ${round(cost, ndigits=2)}"

chatlas-0.9.1/chatlas/_anthropic.py → chatlas-0.9.2/chatlas/_provider_anthropic.py RENAMED Viewed

@@ -586,7 +586,16 @@ class AnthropicProvider(
                         )
                     )
-        tokens = completion.usage.input_tokens, completion.usage.output_tokens
+        usage = completion.usage
+        # N.B. Currently, Anthropic doesn't cache by default and we currently do not support
+        # manual caching in chatlas. Note also that this only tracks reads, NOT writes, which
+        # have their own cost. To track that properly, we would need another caching category and per-token cost.
+        tokens = (
+            completion.usage.input_tokens,
+            completion.usage.output_tokens,
+            usage.cache_read_input_tokens if usage.cache_read_input_tokens else 0,
+        )
         tokens_log(self, tokens)
@@ -764,7 +773,7 @@ class AnthropicBedrockProvider(AnthropicProvider):
         aws_session_token: str | None,
         max_tokens: int = 4096,
         base_url: str | None,
-        name: str = "AnthropicBedrock",
+        name: str = "AWS/Bedrock",
         kwargs: Optional["ChatBedrockClientArgs"] = None,
     ):
         super().__init__(name=name, model=model, max_tokens=max_tokens)

chatlas-0.9.1/chatlas/_databricks.py → chatlas-0.9.2/chatlas/_provider_databricks.py RENAMED Viewed

@@ -4,12 +4,12 @@ from typing import TYPE_CHECKING, Optional
 from ._chat import Chat
 from ._logging import log_model_default
-from ._openai import OpenAIProvider
+from ._provider_openai import OpenAIProvider
 if TYPE_CHECKING:
     from databricks.sdk import WorkspaceClient
-    from ._openai import ChatCompletion
+    from ._provider_openai import ChatCompletion
     from .types.openai import SubmitInputArgs

chatlas-0.9.1/chatlas/_github.py → chatlas-0.9.2/chatlas/_provider_github.py RENAMED Viewed

@@ -5,11 +5,11 @@ from typing import TYPE_CHECKING, Optional
 from ._chat import Chat
 from ._logging import log_model_default
-from ._openai import OpenAIProvider
+from ._provider_openai import OpenAIProvider
 from ._utils import MISSING, MISSING_TYPE, is_testing
 if TYPE_CHECKING:
-    from ._openai import ChatCompletion
+    from ._provider_openai import ChatCompletion
     from .types.openai import ChatClientArgs, SubmitInputArgs

chatlas-0.9.1/chatlas/_google.py → chatlas-0.9.2/chatlas/_provider_google.py RENAMED Viewed

@@ -426,9 +426,7 @@ class GoogleProvider(
                 )
             )
         elif isinstance(content, ContentToolResult):
-            if isinstance(
-                content, (ContentToolResultImage, ContentToolResultResource)
-            ):
+            if isinstance(content, (ContentToolResultImage, ContentToolResultResource)):
                 raise NotImplementedError(
                     "Tool results with images or resources aren't supported by Google (Gemini). "
                 )
@@ -507,11 +505,13 @@ class GoogleProvider(
                     )
         usage = message.get("usage_metadata")
-        tokens = (0, 0)
+        tokens = (0, 0, 0)
         if usage:
+            cached = usage.get("cached_content_token_count") or 0
             tokens = (
-                usage.get("prompt_token_count") or 0,
+                (usage.get("prompt_token_count") or 0) - cached,
                 usage.get("candidates_token_count") or 0,
+                usage.get("cached_content_token_count") or 0,
             )
         tokens_log(self, tokens)

chatlas-0.9.1/chatlas/_groq.py → chatlas-0.9.2/chatlas/_provider_groq.py RENAMED Viewed

@@ -5,11 +5,11 @@ from typing import TYPE_CHECKING, Optional
 from ._chat import Chat
 from ._logging import log_model_default
-from ._openai import OpenAIProvider
+from ._provider_openai import OpenAIProvider
 from ._utils import MISSING, MISSING_TYPE, is_testing
 if TYPE_CHECKING:
-    from ._openai import ChatCompletion
+    from ._provider_openai import ChatCompletion
     from .types.openai import ChatClientArgs, SubmitInputArgs

chatlas-0.9.1/chatlas/_ollama.py → chatlas-0.9.2/chatlas/_provider_ollama.py RENAMED Viewed

@@ -7,11 +7,11 @@ from typing import TYPE_CHECKING, Optional
 import orjson
 from ._chat import Chat
-from ._openai import OpenAIProvider
+from ._provider_openai import OpenAIProvider
 from ._utils import MISSING_TYPE, is_testing
 if TYPE_CHECKING:
-    from ._openai import ChatCompletion
+    from ._provider_openai import ChatCompletion
     from .types.openai import ChatClientArgs, SubmitInputArgs

chatlas-0.9.1/chatlas/_openai.py → chatlas-0.9.2/chatlas/_provider_openai.py RENAMED Viewed

@@ -531,6 +531,8 @@ class OpenAIProvider(
         if tool_calls is not None:
             for call in tool_calls:
+                if call.type != "function":
+                    continue
                 func = call.function
                 if func is None:
                     continue
@@ -557,14 +559,27 @@ class OpenAIProvider(
         usage = completion.usage
         if usage is None:
-            tokens = (0, 0)
+            tokens = (0, 0, 0)
         else:
-            tokens = usage.prompt_tokens, usage.completion_tokens
+            if usage.prompt_tokens_details is not None:
+                cached_tokens = (
+                    usage.prompt_tokens_details.cached_tokens
+                    if usage.prompt_tokens_details.cached_tokens
+                    else 0
+                )
+            else:
+                cached_tokens = 0
+            tokens = (
+                usage.prompt_tokens - cached_tokens,
+                usage.completion_tokens,
+                cached_tokens,
+            )
         # For some reason ChatGroq() includes tokens under completion.x_groq
+        # Groq does not support caching, so we set cached_tokens to 0
         if usage is None and hasattr(completion, "x_groq"):
             usage = completion.x_groq["usage"]  # type: ignore
-            tokens = usage["prompt_tokens"], usage["completion_tokens"]
+            tokens = usage["prompt_tokens"], usage["completion_tokens"], 0
         tokens_log(self, tokens)
@@ -703,7 +718,7 @@ class OpenAIAzureProvider(OpenAIProvider):
         api_version: Optional[str] = None,
         api_key: Optional[str] = None,
         seed: int | None = None,
-        name: str = "OpenAIAzure",
+        name: str = "Azure/OpenAI",
         model: Optional[str] = "UnusedValue",
         kwargs: Optional["ChatAzureClientArgs"] = None,
     ):

chatlas-0.9.1/chatlas/_perplexity.py → chatlas-0.9.2/chatlas/_provider_perplexity.py RENAMED Viewed

@@ -5,11 +5,11 @@ from typing import TYPE_CHECKING, Optional
 from ._chat import Chat
 from ._logging import log_model_default
-from ._openai import OpenAIProvider
+from ._provider_openai import OpenAIProvider
 from ._utils import MISSING, MISSING_TYPE, is_testing
 if TYPE_CHECKING:
-    from ._openai import ChatCompletion
+    from ._provider_openai import ChatCompletion
     from .types.openai import ChatClientArgs, SubmitInputArgs

chatlas-0.9.1/chatlas/_snowflake.py → chatlas-0.9.2/chatlas/_provider_snowflake.py RENAMED Viewed

@@ -537,12 +537,12 @@ class SnowflakeProvider(
                         arguments=params,
                     )
                 )
+        # Snowflake does not currently appear to support caching, so we set cached tokens to 0
         usage = completion.usage
         if usage is None:
-            tokens = (0, 0)
+            tokens = (0, 0, 0)
         else:
-            tokens = (usage.prompt_tokens or 0, usage.completion_tokens or 0)
+            tokens = (usage.prompt_tokens or 0, usage.completion_tokens or 0, 0)
         tokens_log(self, tokens)

{chatlas-0.9.1 → chatlas-0.9.2}/chatlas/_tokens.py RENAMED Viewed

@@ -23,6 +23,7 @@ class TokenUsage(TypedDict):
     model: str
     input: int
     output: int
+    cached_input: int
     cost: float | None
@@ -32,11 +33,16 @@ class ThreadSafeTokenCounter:
         self._tokens: dict[str, TokenUsage] = {}
     def log_tokens(
-        self, name: str, model: str, input_tokens: int, output_tokens: int
+        self,
+        name: str,
+        model: str,
+        input_tokens: int,
+        output_tokens: int,
+        cached_tokens: int,
     ) -> None:
         logger.info(
             f"Provider '{name}' generated a response of {output_tokens} tokens "
-            f"from an input of {input_tokens} tokens."
+            f"from an input of {input_tokens} tokens and {cached_tokens} cached input tokens."
         )
         with self._lock:
@@ -46,12 +52,18 @@ class ThreadSafeTokenCounter:
                     "model": model,
                     "input": input_tokens,
                     "output": output_tokens,
-                    "cost": compute_cost(name, model, input_tokens, output_tokens),
+                    "cached_input": cached_tokens,
+                    "cost": compute_cost(
+                        name, model, input_tokens, output_tokens, cached_tokens
+                    ),
                 }
             else:
                 self._tokens[name]["input"] += input_tokens
                 self._tokens[name]["output"] += output_tokens
-                price = compute_cost(name, model, input_tokens, output_tokens)
+                self._tokens[name]["cached_input"] += cached_tokens
+                price = compute_cost(
+                    name, model, input_tokens, output_tokens, cached_tokens
+                )
                 if price is not None:
                     cost = self._tokens[name]["cost"]
                     if cost is None:
@@ -71,11 +83,13 @@ class ThreadSafeTokenCounter:
 _token_counter = ThreadSafeTokenCounter()
-def tokens_log(provider: "Provider", tokens: tuple[int, int]) -> None:
+def tokens_log(provider: "Provider", tokens: tuple[int, int, int]) -> None:
     """
     Log token usage for a provider in a thread-safe manner.
     """
-    _token_counter.log_tokens(provider.name, provider.model, tokens[0], tokens[1])
+    _token_counter.log_tokens(
+        provider.name, provider.model, tokens[0], tokens[1], tokens[2]
+    )
 def tokens_reset() -> None:
@@ -132,7 +146,7 @@ def get_token_pricing(name: str, model: str) -> TokenPrice | None:
 def compute_cost(
-    name: str, model: str, input_tokens: int, output_tokens: int
+    name: str, model: str, input_tokens: int, output_tokens: int, cached_tokens: int = 0
 ) -> float | None:
     """
     Compute the cost of a turn.
@@ -147,7 +161,8 @@ def compute_cost(
         return None
     input_price = input_tokens * (price["input"] / 1e6)
     output_price = output_tokens * (price["output"] / 1e6)
-    return input_price + output_price
+    cached_price = cached_tokens * (price["cached_input"] / 1e6)
+    return input_price + output_price + cached_price
 def token_usage() -> list[TokenUsage] | None:

{chatlas-0.9.1 → chatlas-0.9.2}/chatlas/_turn.py RENAMED Viewed

@@ -55,7 +55,7 @@ class Turn(BaseModel, Generic[CompletionT]):
     contents
         A list of [](`~chatlas.types.Content`) objects.
     tokens
-        A numeric vector of length 2 representing the number of input and output
+        A numeric vector of length 3 representing the number of input, output, and cached
         tokens (respectively) used in this turn. Currently only recorded for
         assistant turns.
     finish_reason
@@ -69,7 +69,7 @@ class Turn(BaseModel, Generic[CompletionT]):
     role: Literal["user", "assistant", "system"]
     contents: list[ContentUnion] = Field(default_factory=list)
-    tokens: Optional[tuple[int, int]] = None
+    tokens: Optional[tuple[int, int, int]] = None
     finish_reason: Optional[str] = None
     completion: Optional[CompletionT] = Field(default=None, exclude=True)
@@ -80,7 +80,7 @@ class Turn(BaseModel, Generic[CompletionT]):
         role: Literal["user", "assistant", "system"],
         contents: str | Sequence[Content | str],
         *,
-        tokens: Optional[tuple[int, int]] = None,
+        tokens: Optional[tuple[int, int, int]] = None,
         finish_reason: Optional[str] = None,
         completion: Optional[CompletionT] = None,
         **kwargs,
@@ -134,4 +134,3 @@ def user_turn(*args: Content | str) -> Turn:
         raise ValueError("Must supply at least one input.")
     return Turn("user", args)

{chatlas-0.9.1 → chatlas-0.9.2}/chatlas/_version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.1'
-__version_tuple__ = version_tuple = (0, 9, 1)
+__version__ = version = '0.9.2'
+__version_tuple__ = version_tuple = (0, 9, 2)

chatlas 0.9.1__tar.gz → 0.9.2__tar.gz

Potentially problematic release.

chatlas 0.9.1tar.gz → 0.9.2tar.gz