chatlas 0.9.0__tar.gz → 0.9.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of chatlas might be problematic. Click here for more details.
- {chatlas-0.9.0 → chatlas-0.9.2}/CHANGELOG.md +16 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/PKG-INFO +2 -2
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/__init__.py +9 -9
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_auto.py +9 -9
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_chat.py +64 -12
- chatlas-0.9.0/chatlas/_anthropic.py → chatlas-0.9.2/chatlas/_provider_anthropic.py +11 -2
- chatlas-0.9.0/chatlas/_databricks.py → chatlas-0.9.2/chatlas/_provider_databricks.py +2 -2
- chatlas-0.9.0/chatlas/_github.py → chatlas-0.9.2/chatlas/_provider_github.py +2 -2
- chatlas-0.9.0/chatlas/_google.py → chatlas-0.9.2/chatlas/_provider_google.py +5 -5
- chatlas-0.9.0/chatlas/_groq.py → chatlas-0.9.2/chatlas/_provider_groq.py +2 -2
- chatlas-0.9.0/chatlas/_ollama.py → chatlas-0.9.2/chatlas/_provider_ollama.py +2 -2
- chatlas-0.9.0/chatlas/_openai.py → chatlas-0.9.2/chatlas/_provider_openai.py +19 -4
- chatlas-0.9.0/chatlas/_perplexity.py → chatlas-0.9.2/chatlas/_provider_perplexity.py +2 -2
- chatlas-0.9.0/chatlas/_snowflake.py → chatlas-0.9.2/chatlas/_provider_snowflake.py +3 -3
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_tokens.py +25 -18
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_turn.py +3 -4
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_version.py +2 -2
- chatlas-0.9.2/chatlas/data/prices.json +2870 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/__init__.py +3 -3
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/anthropic/_submit.py +5 -5
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/google/_submit.py +23 -29
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/openai/_submit.py +25 -3
- {chatlas-0.9.0 → chatlas-0.9.2}/pyproject.toml +3 -1
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/__snapshots__/test_chat.ambr +2 -2
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_auto.py +3 -3
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_chat.py +16 -11
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_provider_anthropic.py +1 -1
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_provider_azure.py +3 -3
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_provider_databricks.py +1 -1
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_provider_google.py +4 -3
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_provider_openai.py +1 -1
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_tokens.py +35 -27
- chatlas-0.9.0/chatlas/data/prices.json +0 -264
- {chatlas-0.9.0 → chatlas-0.9.2}/.github/workflows/check-update-types.yml +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/.github/workflows/docs-publish.yml +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/.github/workflows/release.yml +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/.github/workflows/test.yml +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/.github/workflows/update-pricing.yml +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/.gitignore +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/.vscode/extensions.json +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/.vscode/settings.json +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/LICENSE +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/Makefile +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/README.md +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_callbacks.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_content.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_content_image.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_content_pdf.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_display.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_interpolate.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_live_render.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_logging.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_mcp_manager.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_merge.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_provider.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_tokens_old.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_tools.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_typing_extensions.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/_utils.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/py.typed +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/anthropic/__init__.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/anthropic/_client.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/anthropic/_client_bedrock.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/google/__init__.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/google/_client.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/openai/__init__.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/openai/_client.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/chatlas/types/openai/_client_azure.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/.gitignore +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/_extensions/machow/interlinks/.gitignore +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/_extensions/machow/interlinks/_extension.yml +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/_extensions/machow/interlinks/interlinks.lua +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/_quarto.yml +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/_sidebar.yml +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/congressional-assets.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/async.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/chat.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/chatbots.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/debug.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/models.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/monitor.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/parameters.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/stream.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/structured-data.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/system-prompt.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/get-started/tools.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chat-app.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chat-console.mp4 +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chat-console.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chat-notebook.mp4 +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chat-parameters.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chatbot-gradio.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chatbot-shiny.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chatbot-streamlit.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chatbot-textual.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/chatlas-hello.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/client-parameters.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/congressional-assets.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/hello-chat-console.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/model-parameters.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/model-type-hints.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/posit-logo.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/shiny-mcp-run-python.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/shiny-tool-call-display.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/shiny-tool-call-map.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/tool-calling-right.svg +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/images/tool-calling-wrong.svg +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/index.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/logos/hero/hero-old.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/logos/hero/hero.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/logos/hex/logo.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/logos/small/logo.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/misc/RAG.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/misc/examples.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/misc/mcp-tools.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/misc/vocabulary.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/Chat.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatAnthropic.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatAuto.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatAzureOpenAI.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatBedrockAnthropic.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatDatabricks.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatGithub.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatGoogle.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatGroq.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatOllama.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatOpenAI.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatPerplexity.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatSnowflake.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ChatVertex.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/Provider.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/Tool.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/ToolRejectError.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/Turn.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/content_image_file.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/content_image_plot.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/content_image_url.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/content_pdf_file.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/content_pdf_url.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/image_file.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/image_plot.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/image_url.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/index.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/interpolate.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/interpolate_file.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/token_usage.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ChatResponse.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ChatResponseAsync.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.Content.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ContentImage.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ContentImageInline.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ContentImageRemote.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ContentJson.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ContentText.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ContentToolRequest.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ContentToolResult.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.ImageContentTypes.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.MISSING.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.MISSING_TYPE.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.SubmitInputArgsT.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/reference/types.TokenUsage.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/structured-data/article-summary.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/structured-data/classification.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/structured-data/entity-recognition.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/structured-data/multi-modal.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/structured-data/sentiment-analysis.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/styles.scss +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/tool-calling/approval.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/tool-calling/displays.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/tool-calling/how-it-works.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/docs/why-chatlas.qmd +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/pytest.ini +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/scripts/_generate_anthropic_types.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/scripts/_generate_google_types.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/scripts/_generate_openai_types.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/scripts/_utils.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/scripts/main.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/__init__.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/apples.pdf +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/conftest.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/images/dice.png +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/mcp_servers/http_add.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/mcp_servers/http_current_date.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/mcp_servers/stdio_current_date.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/mcp_servers/stdio_subtract_multiply.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_callbacks.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_content.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_content_html.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_content_image.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_content_pdf.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_content_tools.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_interpolate.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_mcp_client.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_provider_bedrock.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_provider_snowflake.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_set_model_params.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_tool_from_mcp.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_tools_enhanced.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_turns.py +0 -0
- {chatlas-0.9.0 → chatlas-0.9.2}/tests/test_utils_merge.py +0 -0
|
@@ -7,6 +7,22 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
|
7
7
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
8
8
|
-->
|
|
9
9
|
|
|
10
|
+
## [0.9.2] - 2025-08-08
|
|
11
|
+
|
|
12
|
+
### Improvements
|
|
13
|
+
|
|
14
|
+
* `Chat.get_cost()` now covers many more models and also takes cached tokens into account. (#133)
|
|
15
|
+
* Avoid erroring when tool calls occur with recent versions of `openai` (> v1.99.5). (#141)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## [0.9.1] - 2025-07-09
|
|
19
|
+
|
|
20
|
+
### Bug fixes
|
|
21
|
+
|
|
22
|
+
* Fixed an issue where `.chat()` wasn't streaming output properly in (the latest build of) Positron's Jupyter notebook. (#131)
|
|
23
|
+
|
|
24
|
+
* Needless warnings and errors are no longer thrown when model pricing info is unavailable. (#132)
|
|
25
|
+
|
|
10
26
|
## [0.9.0] - 2025-07-02
|
|
11
27
|
|
|
12
28
|
### New features
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chatlas
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.2
|
|
4
4
|
Summary: A simple and consistent interface for chatting with LLMs
|
|
5
5
|
Project-URL: Homepage, https://posit-dev.github.io/chatlas
|
|
6
6
|
Project-URL: Documentation, https://posit-dev.github.io/chatlas
|
|
@@ -69,7 +69,7 @@ Provides-Extra: ollama
|
|
|
69
69
|
Provides-Extra: openai
|
|
70
70
|
Provides-Extra: perplexity
|
|
71
71
|
Provides-Extra: snowflake
|
|
72
|
-
Requires-Dist: snowflake-ml-python; extra == 'snowflake'
|
|
72
|
+
Requires-Dist: snowflake-ml-python<=1.9.0; extra == 'snowflake'
|
|
73
73
|
Provides-Extra: test
|
|
74
74
|
Requires-Dist: pyright>=1.1.379; extra == 'test'
|
|
75
75
|
Requires-Dist: pytest-asyncio; extra == 'test'
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
from . import types
|
|
2
|
-
from ._anthropic import ChatAnthropic, ChatBedrockAnthropic
|
|
3
2
|
from ._auto import ChatAuto
|
|
4
3
|
from ._chat import Chat
|
|
5
4
|
from ._content import ContentToolRequest, ContentToolResult, ContentToolResultImage
|
|
6
5
|
from ._content_image import content_image_file, content_image_plot, content_image_url
|
|
7
6
|
from ._content_pdf import content_pdf_file, content_pdf_url
|
|
8
|
-
from ._databricks import ChatDatabricks
|
|
9
|
-
from ._github import ChatGithub
|
|
10
|
-
from ._google import ChatGoogle, ChatVertex
|
|
11
|
-
from ._groq import ChatGroq
|
|
12
7
|
from ._interpolate import interpolate, interpolate_file
|
|
13
|
-
from ._ollama import ChatOllama
|
|
14
|
-
from ._openai import ChatAzureOpenAI, ChatOpenAI
|
|
15
|
-
from ._perplexity import ChatPerplexity
|
|
16
8
|
from ._provider import Provider
|
|
17
|
-
from .
|
|
9
|
+
from ._provider_anthropic import ChatAnthropic, ChatBedrockAnthropic
|
|
10
|
+
from ._provider_databricks import ChatDatabricks
|
|
11
|
+
from ._provider_github import ChatGithub
|
|
12
|
+
from ._provider_google import ChatGoogle, ChatVertex
|
|
13
|
+
from ._provider_groq import ChatGroq
|
|
14
|
+
from ._provider_ollama import ChatOllama
|
|
15
|
+
from ._provider_openai import ChatAzureOpenAI, ChatOpenAI
|
|
16
|
+
from ._provider_perplexity import ChatPerplexity
|
|
17
|
+
from ._provider_snowflake import ChatSnowflake
|
|
18
18
|
from ._tokens import token_usage
|
|
19
19
|
from ._tools import Tool, ToolRejectError
|
|
20
20
|
from ._turn import Turn
|
|
@@ -5,16 +5,16 @@ from typing import Callable, Literal, Optional
|
|
|
5
5
|
|
|
6
6
|
import orjson
|
|
7
7
|
|
|
8
|
-
from ._anthropic import ChatAnthropic, ChatBedrockAnthropic
|
|
9
8
|
from ._chat import Chat
|
|
10
|
-
from .
|
|
11
|
-
from .
|
|
12
|
-
from .
|
|
13
|
-
from .
|
|
14
|
-
from .
|
|
15
|
-
from .
|
|
16
|
-
from .
|
|
17
|
-
from .
|
|
9
|
+
from ._provider_anthropic import ChatAnthropic, ChatBedrockAnthropic
|
|
10
|
+
from ._provider_databricks import ChatDatabricks
|
|
11
|
+
from ._provider_github import ChatGithub
|
|
12
|
+
from ._provider_google import ChatGoogle, ChatVertex
|
|
13
|
+
from ._provider_groq import ChatGroq
|
|
14
|
+
from ._provider_ollama import ChatOllama
|
|
15
|
+
from ._provider_openai import ChatAzureOpenAI, ChatOpenAI
|
|
16
|
+
from ._provider_perplexity import ChatPerplexity
|
|
17
|
+
from ._provider_snowflake import ChatSnowflake
|
|
18
18
|
|
|
19
19
|
AutoProviders = Literal[
|
|
20
20
|
"anthropic",
|
|
@@ -44,7 +44,7 @@ from ._display import (
|
|
|
44
44
|
from ._logging import log_tool_error
|
|
45
45
|
from ._mcp_manager import MCPSessionManager
|
|
46
46
|
from ._provider import Provider, StandardModelParams, SubmitInputArgsT
|
|
47
|
-
from ._tokens import get_token_pricing
|
|
47
|
+
from ._tokens import compute_cost, get_token_pricing
|
|
48
48
|
from ._tools import Tool, ToolRejectError
|
|
49
49
|
from ._turn import Turn, user_turn
|
|
50
50
|
from ._typing_extensions import TypedDict, TypeGuard
|
|
@@ -65,6 +65,7 @@ class TokensDict(TypedDict):
|
|
|
65
65
|
role: Literal["user", "assistant"]
|
|
66
66
|
tokens: int
|
|
67
67
|
tokens_total: int
|
|
68
|
+
tokens_cached: int
|
|
68
69
|
|
|
69
70
|
|
|
70
71
|
CompletionT = TypeVar("CompletionT")
|
|
@@ -293,12 +294,15 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
293
294
|
{
|
|
294
295
|
"role": "user",
|
|
295
296
|
"tokens": turns[1].tokens[0],
|
|
297
|
+
# Number of tokens currently cached (reduces input token usage)
|
|
298
|
+
"tokens_cached": turns[1].tokens[2],
|
|
296
299
|
"tokens_total": turns[1].tokens[0],
|
|
297
300
|
},
|
|
298
301
|
# The token count for the 1st assistant response
|
|
299
302
|
{
|
|
300
303
|
"role": "assistant",
|
|
301
304
|
"tokens": turns[1].tokens[1],
|
|
305
|
+
"tokens_cached": 0,
|
|
302
306
|
"tokens_total": turns[1].tokens[1],
|
|
303
307
|
},
|
|
304
308
|
]
|
|
@@ -319,8 +323,11 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
319
323
|
[
|
|
320
324
|
{
|
|
321
325
|
"role": "user",
|
|
322
|
-
# Implied token count for the user input
|
|
326
|
+
# Implied new token count for the user input (input tokens - context - cached reads)
|
|
327
|
+
# Cached reads are only subtracted for particular providers
|
|
323
328
|
"tokens": tj.tokens[0] - sum(ti.tokens),
|
|
329
|
+
# Number of tokens currently cached (reduces input token usage depending on provider's API)
|
|
330
|
+
"tokens_cached": tj.tokens[2],
|
|
324
331
|
# Total tokens = Total User Tokens for the Turn = Distinct new tokens + context sent
|
|
325
332
|
"tokens_total": tj.tokens[0],
|
|
326
333
|
},
|
|
@@ -329,6 +336,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
329
336
|
# The token count for the assistant response
|
|
330
337
|
"tokens": tj.tokens[1],
|
|
331
338
|
# Total tokens = Total Assistant tokens used in the turn
|
|
339
|
+
"tokens_cached": 0,
|
|
332
340
|
"tokens_total": tj.tokens[1],
|
|
333
341
|
},
|
|
334
342
|
]
|
|
@@ -339,7 +347,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
339
347
|
def get_cost(
|
|
340
348
|
self,
|
|
341
349
|
options: Literal["all", "last"] = "all",
|
|
342
|
-
token_price: Optional[tuple[float, float]] = None,
|
|
350
|
+
token_price: Optional[tuple[float, float, float]] = None,
|
|
343
351
|
) -> float:
|
|
344
352
|
"""
|
|
345
353
|
Estimate the cost of the chat.
|
|
@@ -357,11 +365,13 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
357
365
|
- `"last"`: Return the cost of the last turn in the chat.
|
|
358
366
|
token_price
|
|
359
367
|
An optional tuple in the format of (input_token_cost,
|
|
360
|
-
output_token_cost) for bringing your own cost information.
|
|
368
|
+
output_token_cost, cached_token_cost) for bringing your own cost information.
|
|
361
369
|
- `"input_token_cost"`: The cost per user token in USD per
|
|
362
370
|
million tokens.
|
|
363
371
|
- `"output_token_cost"`: The cost per assistant token in USD
|
|
364
372
|
per million tokens.
|
|
373
|
+
- `"cached_token_cost"`: The cost per cached token read in USD
|
|
374
|
+
per million tokens.
|
|
365
375
|
|
|
366
376
|
Returns
|
|
367
377
|
-------
|
|
@@ -374,15 +384,19 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
374
384
|
if token_price:
|
|
375
385
|
input_token_price = token_price[0] / 1e6
|
|
376
386
|
output_token_price = token_price[1] / 1e6
|
|
387
|
+
cached_token_price = token_price[2] / 1e6
|
|
377
388
|
else:
|
|
378
389
|
price_token = get_token_pricing(self.provider.name, self.provider.model)
|
|
379
390
|
if not price_token:
|
|
380
391
|
raise KeyError(
|
|
381
|
-
f"We could not locate pricing information for model '{self.provider.model}'
|
|
392
|
+
f"We could not locate pricing information for model '{self.provider.model}'"
|
|
393
|
+
f" from provider '{self.provider.name}'. "
|
|
382
394
|
"If you know the pricing for this model, specify it in `token_price`."
|
|
383
395
|
)
|
|
396
|
+
|
|
384
397
|
input_token_price = price_token["input"] / 1e6
|
|
385
398
|
output_token_price = price_token["output"] / 1e6
|
|
399
|
+
cached_token_price = price_token["cached_input"] / 1e6
|
|
386
400
|
|
|
387
401
|
if len(turns_tokens) == 0:
|
|
388
402
|
return 0.0
|
|
@@ -399,8 +413,16 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
399
413
|
user_tokens = sum(
|
|
400
414
|
u["tokens_total"] for u in turns_tokens if u["role"] == "user"
|
|
401
415
|
)
|
|
402
|
-
|
|
403
|
-
|
|
416
|
+
# We add the cached tokens here because for relevant providers they have already been subtracted
|
|
417
|
+
# from the user tokens. This assumes the provider uses (reads) the cache each time.
|
|
418
|
+
cached_token_reads = sum(
|
|
419
|
+
u["tokens_cached"] for u in turns_tokens if u["role"] == "user"
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
cost = (
|
|
423
|
+
(asst_tokens * output_token_price)
|
|
424
|
+
+ (user_tokens * input_token_price)
|
|
425
|
+
+ (cached_token_reads * cached_token_price)
|
|
404
426
|
)
|
|
405
427
|
return cost
|
|
406
428
|
|
|
@@ -408,7 +430,9 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
408
430
|
if last_turn["role"] == "assistant":
|
|
409
431
|
return last_turn["tokens"] * output_token_price
|
|
410
432
|
if last_turn["role"] == "user":
|
|
411
|
-
return last_turn["tokens_total"] * input_token_price
|
|
433
|
+
return (last_turn["tokens_total"] * input_token_price) + (
|
|
434
|
+
last_turn["tokens_cached"] * cached_token_price
|
|
435
|
+
)
|
|
412
436
|
raise ValueError(
|
|
413
437
|
f"Expected last turn to have a role of 'user' or `'assistant'`, not '{last_turn['role']}'"
|
|
414
438
|
)
|
|
@@ -2170,10 +2194,10 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
2170
2194
|
return ChatMarkdownDisplay(MockMarkdownDisplay(), self)
|
|
2171
2195
|
|
|
2172
2196
|
# rich does a lot to detect a notebook environment, but it doesn't
|
|
2173
|
-
# detect Quarto
|
|
2197
|
+
# detect Quarto, or a Positron notebook
|
|
2174
2198
|
from rich.console import Console
|
|
2175
2199
|
|
|
2176
|
-
is_web = Console().is_jupyter or
|
|
2200
|
+
is_web = Console().is_jupyter or is_quarto() or is_positron_notebook()
|
|
2177
2201
|
|
|
2178
2202
|
opts = self._echo_options
|
|
2179
2203
|
|
|
@@ -2222,11 +2246,27 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
|
|
|
2222
2246
|
def __repr__(self):
|
|
2223
2247
|
turns = self.get_turns(include_system_prompt=True)
|
|
2224
2248
|
tokens = self.get_tokens()
|
|
2225
|
-
cost = self.get_cost()
|
|
2226
2249
|
tokens_asst = sum(u["tokens_total"] for u in tokens if u["role"] == "assistant")
|
|
2227
2250
|
tokens_user = sum(u["tokens_total"] for u in tokens if u["role"] == "user")
|
|
2251
|
+
tokens_cached = sum(u["tokens_cached"] for u in tokens if u["role"] == "user")
|
|
2252
|
+
|
|
2253
|
+
res = (
|
|
2254
|
+
f"<Chat {self.provider.name}/{self.provider.model} turns={len(turns)}"
|
|
2255
|
+
f" tokens={tokens_user + tokens_cached}/{tokens_asst}"
|
|
2256
|
+
)
|
|
2228
2257
|
|
|
2229
|
-
|
|
2258
|
+
# Add cost info only if we can compute it
|
|
2259
|
+
cost = compute_cost(
|
|
2260
|
+
self.provider.name,
|
|
2261
|
+
self.provider.model,
|
|
2262
|
+
tokens_user,
|
|
2263
|
+
tokens_asst,
|
|
2264
|
+
tokens_cached,
|
|
2265
|
+
)
|
|
2266
|
+
if cost is not None:
|
|
2267
|
+
res += f" ${round(cost, ndigits=2)}"
|
|
2268
|
+
|
|
2269
|
+
res += ">"
|
|
2230
2270
|
for turn in turns:
|
|
2231
2271
|
res += "\n" + turn.__repr__(indent=2)
|
|
2232
2272
|
return res + "\n"
|
|
@@ -2421,3 +2461,15 @@ class ToolFailureWarning(RuntimeWarning):
|
|
|
2421
2461
|
|
|
2422
2462
|
# By default warnings are shown once; we want to always show them.
|
|
2423
2463
|
warnings.simplefilter("always", ToolFailureWarning)
|
|
2464
|
+
|
|
2465
|
+
|
|
2466
|
+
def is_quarto():
|
|
2467
|
+
return os.getenv("QUARTO_PYTHON", None) is not None
|
|
2468
|
+
|
|
2469
|
+
|
|
2470
|
+
def is_positron_notebook():
|
|
2471
|
+
try:
|
|
2472
|
+
mode = get_ipython().session_mode # noqa: F821 # type: ignore
|
|
2473
|
+
return mode == "notebook"
|
|
2474
|
+
except Exception:
|
|
2475
|
+
return False
|
|
@@ -586,7 +586,16 @@ class AnthropicProvider(
|
|
|
586
586
|
)
|
|
587
587
|
)
|
|
588
588
|
|
|
589
|
-
|
|
589
|
+
usage = completion.usage
|
|
590
|
+
# N.B. Currently, Anthropic doesn't cache by default and we currently do not support
|
|
591
|
+
# manual caching in chatlas. Note also that this only tracks reads, NOT writes, which
|
|
592
|
+
# have their own cost. To track that properly, we would need another caching category and per-token cost.
|
|
593
|
+
|
|
594
|
+
tokens = (
|
|
595
|
+
completion.usage.input_tokens,
|
|
596
|
+
completion.usage.output_tokens,
|
|
597
|
+
usage.cache_read_input_tokens if usage.cache_read_input_tokens else 0,
|
|
598
|
+
)
|
|
590
599
|
|
|
591
600
|
tokens_log(self, tokens)
|
|
592
601
|
|
|
@@ -764,7 +773,7 @@ class AnthropicBedrockProvider(AnthropicProvider):
|
|
|
764
773
|
aws_session_token: str | None,
|
|
765
774
|
max_tokens: int = 4096,
|
|
766
775
|
base_url: str | None,
|
|
767
|
-
name: str = "
|
|
776
|
+
name: str = "AWS/Bedrock",
|
|
768
777
|
kwargs: Optional["ChatBedrockClientArgs"] = None,
|
|
769
778
|
):
|
|
770
779
|
super().__init__(name=name, model=model, max_tokens=max_tokens)
|
|
@@ -4,12 +4,12 @@ from typing import TYPE_CHECKING, Optional
|
|
|
4
4
|
|
|
5
5
|
from ._chat import Chat
|
|
6
6
|
from ._logging import log_model_default
|
|
7
|
-
from .
|
|
7
|
+
from ._provider_openai import OpenAIProvider
|
|
8
8
|
|
|
9
9
|
if TYPE_CHECKING:
|
|
10
10
|
from databricks.sdk import WorkspaceClient
|
|
11
11
|
|
|
12
|
-
from .
|
|
12
|
+
from ._provider_openai import ChatCompletion
|
|
13
13
|
from .types.openai import SubmitInputArgs
|
|
14
14
|
|
|
15
15
|
|
|
@@ -5,11 +5,11 @@ from typing import TYPE_CHECKING, Optional
|
|
|
5
5
|
|
|
6
6
|
from ._chat import Chat
|
|
7
7
|
from ._logging import log_model_default
|
|
8
|
-
from .
|
|
8
|
+
from ._provider_openai import OpenAIProvider
|
|
9
9
|
from ._utils import MISSING, MISSING_TYPE, is_testing
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
|
-
from .
|
|
12
|
+
from ._provider_openai import ChatCompletion
|
|
13
13
|
from .types.openai import ChatClientArgs, SubmitInputArgs
|
|
14
14
|
|
|
15
15
|
|
|
@@ -426,9 +426,7 @@ class GoogleProvider(
|
|
|
426
426
|
)
|
|
427
427
|
)
|
|
428
428
|
elif isinstance(content, ContentToolResult):
|
|
429
|
-
if isinstance(
|
|
430
|
-
content, (ContentToolResultImage, ContentToolResultResource)
|
|
431
|
-
):
|
|
429
|
+
if isinstance(content, (ContentToolResultImage, ContentToolResultResource)):
|
|
432
430
|
raise NotImplementedError(
|
|
433
431
|
"Tool results with images or resources aren't supported by Google (Gemini). "
|
|
434
432
|
)
|
|
@@ -507,11 +505,13 @@ class GoogleProvider(
|
|
|
507
505
|
)
|
|
508
506
|
|
|
509
507
|
usage = message.get("usage_metadata")
|
|
510
|
-
tokens = (0, 0)
|
|
508
|
+
tokens = (0, 0, 0)
|
|
511
509
|
if usage:
|
|
510
|
+
cached = usage.get("cached_content_token_count") or 0
|
|
512
511
|
tokens = (
|
|
513
|
-
usage.get("prompt_token_count") or 0,
|
|
512
|
+
(usage.get("prompt_token_count") or 0) - cached,
|
|
514
513
|
usage.get("candidates_token_count") or 0,
|
|
514
|
+
usage.get("cached_content_token_count") or 0,
|
|
515
515
|
)
|
|
516
516
|
|
|
517
517
|
tokens_log(self, tokens)
|
|
@@ -5,11 +5,11 @@ from typing import TYPE_CHECKING, Optional
|
|
|
5
5
|
|
|
6
6
|
from ._chat import Chat
|
|
7
7
|
from ._logging import log_model_default
|
|
8
|
-
from .
|
|
8
|
+
from ._provider_openai import OpenAIProvider
|
|
9
9
|
from ._utils import MISSING, MISSING_TYPE, is_testing
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
|
-
from .
|
|
12
|
+
from ._provider_openai import ChatCompletion
|
|
13
13
|
from .types.openai import ChatClientArgs, SubmitInputArgs
|
|
14
14
|
|
|
15
15
|
|
|
@@ -7,11 +7,11 @@ from typing import TYPE_CHECKING, Optional
|
|
|
7
7
|
import orjson
|
|
8
8
|
|
|
9
9
|
from ._chat import Chat
|
|
10
|
-
from .
|
|
10
|
+
from ._provider_openai import OpenAIProvider
|
|
11
11
|
from ._utils import MISSING_TYPE, is_testing
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
|
-
from .
|
|
14
|
+
from ._provider_openai import ChatCompletion
|
|
15
15
|
from .types.openai import ChatClientArgs, SubmitInputArgs
|
|
16
16
|
|
|
17
17
|
|
|
@@ -531,6 +531,8 @@ class OpenAIProvider(
|
|
|
531
531
|
|
|
532
532
|
if tool_calls is not None:
|
|
533
533
|
for call in tool_calls:
|
|
534
|
+
if call.type != "function":
|
|
535
|
+
continue
|
|
534
536
|
func = call.function
|
|
535
537
|
if func is None:
|
|
536
538
|
continue
|
|
@@ -557,14 +559,27 @@ class OpenAIProvider(
|
|
|
557
559
|
|
|
558
560
|
usage = completion.usage
|
|
559
561
|
if usage is None:
|
|
560
|
-
tokens = (0, 0)
|
|
562
|
+
tokens = (0, 0, 0)
|
|
561
563
|
else:
|
|
562
|
-
|
|
564
|
+
if usage.prompt_tokens_details is not None:
|
|
565
|
+
cached_tokens = (
|
|
566
|
+
usage.prompt_tokens_details.cached_tokens
|
|
567
|
+
if usage.prompt_tokens_details.cached_tokens
|
|
568
|
+
else 0
|
|
569
|
+
)
|
|
570
|
+
else:
|
|
571
|
+
cached_tokens = 0
|
|
572
|
+
tokens = (
|
|
573
|
+
usage.prompt_tokens - cached_tokens,
|
|
574
|
+
usage.completion_tokens,
|
|
575
|
+
cached_tokens,
|
|
576
|
+
)
|
|
563
577
|
|
|
564
578
|
# For some reason ChatGroq() includes tokens under completion.x_groq
|
|
579
|
+
# Groq does not support caching, so we set cached_tokens to 0
|
|
565
580
|
if usage is None and hasattr(completion, "x_groq"):
|
|
566
581
|
usage = completion.x_groq["usage"] # type: ignore
|
|
567
|
-
tokens = usage["prompt_tokens"], usage["completion_tokens"]
|
|
582
|
+
tokens = usage["prompt_tokens"], usage["completion_tokens"], 0
|
|
568
583
|
|
|
569
584
|
tokens_log(self, tokens)
|
|
570
585
|
|
|
@@ -703,7 +718,7 @@ class OpenAIAzureProvider(OpenAIProvider):
|
|
|
703
718
|
api_version: Optional[str] = None,
|
|
704
719
|
api_key: Optional[str] = None,
|
|
705
720
|
seed: int | None = None,
|
|
706
|
-
name: str = "
|
|
721
|
+
name: str = "Azure/OpenAI",
|
|
707
722
|
model: Optional[str] = "UnusedValue",
|
|
708
723
|
kwargs: Optional["ChatAzureClientArgs"] = None,
|
|
709
724
|
):
|
|
@@ -5,11 +5,11 @@ from typing import TYPE_CHECKING, Optional
|
|
|
5
5
|
|
|
6
6
|
from ._chat import Chat
|
|
7
7
|
from ._logging import log_model_default
|
|
8
|
-
from .
|
|
8
|
+
from ._provider_openai import OpenAIProvider
|
|
9
9
|
from ._utils import MISSING, MISSING_TYPE, is_testing
|
|
10
10
|
|
|
11
11
|
if TYPE_CHECKING:
|
|
12
|
-
from .
|
|
12
|
+
from ._provider_openai import ChatCompletion
|
|
13
13
|
from .types.openai import ChatClientArgs, SubmitInputArgs
|
|
14
14
|
|
|
15
15
|
|
|
@@ -537,12 +537,12 @@ class SnowflakeProvider(
|
|
|
537
537
|
arguments=params,
|
|
538
538
|
)
|
|
539
539
|
)
|
|
540
|
-
|
|
540
|
+
# Snowflake does not currently appear to support caching, so we set cached tokens to 0
|
|
541
541
|
usage = completion.usage
|
|
542
542
|
if usage is None:
|
|
543
|
-
tokens = (0, 0)
|
|
543
|
+
tokens = (0, 0, 0)
|
|
544
544
|
else:
|
|
545
|
-
tokens = (usage.prompt_tokens or 0, usage.completion_tokens or 0)
|
|
545
|
+
tokens = (usage.prompt_tokens or 0, usage.completion_tokens or 0, 0)
|
|
546
546
|
|
|
547
547
|
tokens_log(self, tokens)
|
|
548
548
|
|
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import copy
|
|
4
4
|
import importlib.resources as resources
|
|
5
|
-
import warnings
|
|
6
5
|
from threading import Lock
|
|
7
6
|
from typing import TYPE_CHECKING
|
|
8
7
|
|
|
@@ -24,6 +23,7 @@ class TokenUsage(TypedDict):
|
|
|
24
23
|
model: str
|
|
25
24
|
input: int
|
|
26
25
|
output: int
|
|
26
|
+
cached_input: int
|
|
27
27
|
cost: float | None
|
|
28
28
|
|
|
29
29
|
|
|
@@ -33,11 +33,16 @@ class ThreadSafeTokenCounter:
|
|
|
33
33
|
self._tokens: dict[str, TokenUsage] = {}
|
|
34
34
|
|
|
35
35
|
def log_tokens(
|
|
36
|
-
self,
|
|
36
|
+
self,
|
|
37
|
+
name: str,
|
|
38
|
+
model: str,
|
|
39
|
+
input_tokens: int,
|
|
40
|
+
output_tokens: int,
|
|
41
|
+
cached_tokens: int,
|
|
37
42
|
) -> None:
|
|
38
43
|
logger.info(
|
|
39
44
|
f"Provider '{name}' generated a response of {output_tokens} tokens "
|
|
40
|
-
f"from an input of {input_tokens} tokens."
|
|
45
|
+
f"from an input of {input_tokens} tokens and {cached_tokens} cached input tokens."
|
|
41
46
|
)
|
|
42
47
|
|
|
43
48
|
with self._lock:
|
|
@@ -47,12 +52,18 @@ class ThreadSafeTokenCounter:
|
|
|
47
52
|
"model": model,
|
|
48
53
|
"input": input_tokens,
|
|
49
54
|
"output": output_tokens,
|
|
50
|
-
"
|
|
55
|
+
"cached_input": cached_tokens,
|
|
56
|
+
"cost": compute_cost(
|
|
57
|
+
name, model, input_tokens, output_tokens, cached_tokens
|
|
58
|
+
),
|
|
51
59
|
}
|
|
52
60
|
else:
|
|
53
61
|
self._tokens[name]["input"] += input_tokens
|
|
54
62
|
self._tokens[name]["output"] += output_tokens
|
|
55
|
-
|
|
63
|
+
self._tokens[name]["cached_input"] += cached_tokens
|
|
64
|
+
price = compute_cost(
|
|
65
|
+
name, model, input_tokens, output_tokens, cached_tokens
|
|
66
|
+
)
|
|
56
67
|
if price is not None:
|
|
57
68
|
cost = self._tokens[name]["cost"]
|
|
58
69
|
if cost is None:
|
|
@@ -72,11 +83,13 @@ class ThreadSafeTokenCounter:
|
|
|
72
83
|
_token_counter = ThreadSafeTokenCounter()
|
|
73
84
|
|
|
74
85
|
|
|
75
|
-
def tokens_log(provider: "Provider", tokens: tuple[int, int]) -> None:
|
|
86
|
+
def tokens_log(provider: "Provider", tokens: tuple[int, int, int]) -> None:
|
|
76
87
|
"""
|
|
77
88
|
Log token usage for a provider in a thread-safe manner.
|
|
78
89
|
"""
|
|
79
|
-
_token_counter.log_tokens(
|
|
90
|
+
_token_counter.log_tokens(
|
|
91
|
+
provider.name, provider.model, tokens[0], tokens[1], tokens[2]
|
|
92
|
+
)
|
|
80
93
|
|
|
81
94
|
|
|
82
95
|
def tokens_reset() -> None:
|
|
@@ -122,7 +135,7 @@ def get_token_pricing(name: str, model: str) -> TokenPrice | None:
|
|
|
122
135
|
-------
|
|
123
136
|
TokenPrice | None
|
|
124
137
|
"""
|
|
125
|
-
|
|
138
|
+
return next(
|
|
126
139
|
(
|
|
127
140
|
item
|
|
128
141
|
for item in pricing_list
|
|
@@ -130,17 +143,10 @@ def get_token_pricing(name: str, model: str) -> TokenPrice | None:
|
|
|
130
143
|
),
|
|
131
144
|
None,
|
|
132
145
|
)
|
|
133
|
-
if result is None:
|
|
134
|
-
warnings.warn(
|
|
135
|
-
f"Token pricing for the provider '{name}' and model '{model}' you selected is not available. "
|
|
136
|
-
"Please check the provider's documentation."
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
return result
|
|
140
146
|
|
|
141
147
|
|
|
142
|
-
def
|
|
143
|
-
name: str, model: str, input_tokens: int, output_tokens: int
|
|
148
|
+
def compute_cost(
|
|
149
|
+
name: str, model: str, input_tokens: int, output_tokens: int, cached_tokens: int = 0
|
|
144
150
|
) -> float | None:
|
|
145
151
|
"""
|
|
146
152
|
Compute the cost of a turn.
|
|
@@ -155,7 +161,8 @@ def compute_price(
|
|
|
155
161
|
return None
|
|
156
162
|
input_price = input_tokens * (price["input"] / 1e6)
|
|
157
163
|
output_price = output_tokens * (price["output"] / 1e6)
|
|
158
|
-
|
|
164
|
+
cached_price = cached_tokens * (price["cached_input"] / 1e6)
|
|
165
|
+
return input_price + output_price + cached_price
|
|
159
166
|
|
|
160
167
|
|
|
161
168
|
def token_usage() -> list[TokenUsage] | None:
|
|
@@ -55,7 +55,7 @@ class Turn(BaseModel, Generic[CompletionT]):
|
|
|
55
55
|
contents
|
|
56
56
|
A list of [](`~chatlas.types.Content`) objects.
|
|
57
57
|
tokens
|
|
58
|
-
A numeric vector of length
|
|
58
|
+
A numeric vector of length 3 representing the number of input, output, and cached
|
|
59
59
|
tokens (respectively) used in this turn. Currently only recorded for
|
|
60
60
|
assistant turns.
|
|
61
61
|
finish_reason
|
|
@@ -69,7 +69,7 @@ class Turn(BaseModel, Generic[CompletionT]):
|
|
|
69
69
|
|
|
70
70
|
role: Literal["user", "assistant", "system"]
|
|
71
71
|
contents: list[ContentUnion] = Field(default_factory=list)
|
|
72
|
-
tokens: Optional[tuple[int, int]] = None
|
|
72
|
+
tokens: Optional[tuple[int, int, int]] = None
|
|
73
73
|
finish_reason: Optional[str] = None
|
|
74
74
|
completion: Optional[CompletionT] = Field(default=None, exclude=True)
|
|
75
75
|
|
|
@@ -80,7 +80,7 @@ class Turn(BaseModel, Generic[CompletionT]):
|
|
|
80
80
|
role: Literal["user", "assistant", "system"],
|
|
81
81
|
contents: str | Sequence[Content | str],
|
|
82
82
|
*,
|
|
83
|
-
tokens: Optional[tuple[int, int]] = None,
|
|
83
|
+
tokens: Optional[tuple[int, int, int]] = None,
|
|
84
84
|
finish_reason: Optional[str] = None,
|
|
85
85
|
completion: Optional[CompletionT] = None,
|
|
86
86
|
**kwargs,
|
|
@@ -134,4 +134,3 @@ def user_turn(*args: Content | str) -> Turn:
|
|
|
134
134
|
raise ValueError("Must supply at least one input.")
|
|
135
135
|
|
|
136
136
|
return Turn("user", args)
|
|
137
|
-
|