lm-deluge 0.0.89__tar.gz → 0.0.91__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lm_deluge-0.0.89/src/lm_deluge.egg-info → lm_deluge-0.0.91}/PKG-INFO +12 -12
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/README.md +8 -8
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/pyproject.toml +6 -7
- lm_deluge-0.0.91/src/lm_deluge/__init__.py +16 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/anthropic.py +29 -7
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/base.py +38 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/bedrock.py +29 -3
- lm_deluge-0.0.89/src/lm_deluge/request_context.py → lm_deluge-0.0.91/src/lm_deluge/api_requests/context.py +4 -4
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/gemini.py +30 -14
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/mistral.py +1 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/openai.py +34 -5
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/batches.py +19 -49
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/cache.py +1 -1
- lm_deluge-0.0.91/src/lm_deluge/cli.py +672 -0
- lm_deluge-0.0.89/src/lm_deluge/client.py → lm_deluge-0.0.91/src/lm_deluge/client/__init__.py +42 -13
- lm_deluge-0.0.91/src/lm_deluge/config.py +23 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/embed.py +2 -6
- lm_deluge-0.0.91/src/lm_deluge/models/__init__.py +267 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/anthropic.py +32 -24
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/bedrock.py +9 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/cerebras.py +2 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/cohere.py +2 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/google.py +13 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/grok.py +4 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/groq.py +2 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/meta.py +2 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/minimax.py +9 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/openai.py +24 -1
- lm_deluge-0.0.91/src/lm_deluge/models/openrouter.py +296 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/together.py +3 -0
- lm_deluge-0.0.91/src/lm_deluge/models/zai.py +50 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/extract.py +4 -5
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/__init__.py +1 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/docs/samples.py +19 -10
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
- lm_deluge-0.0.91/src/lm_deluge/prompt/__init__.py +45 -0
- lm_deluge-0.0.89/src/lm_deluge/prompt.py → lm_deluge-0.0.91/src/lm_deluge/prompt/conversation.py +165 -869
- {lm_deluge-0.0.89/src/lm_deluge → lm_deluge-0.0.91/src/lm_deluge/prompt}/image.py +0 -10
- lm_deluge-0.0.91/src/lm_deluge/prompt/message.py +571 -0
- lm_deluge-0.0.91/src/lm_deluge/prompt/serialization.py +21 -0
- lm_deluge-0.0.91/src/lm_deluge/prompt/signatures.py +77 -0
- lm_deluge-0.0.91/src/lm_deluge/prompt/text.py +47 -0
- lm_deluge-0.0.91/src/lm_deluge/prompt/thinking.py +55 -0
- lm_deluge-0.0.91/src/lm_deluge/prompt/tool_calls.py +245 -0
- lm_deluge-0.0.91/src/lm_deluge/server/__init__.py +24 -0
- lm_deluge-0.0.91/src/lm_deluge/server/__main__.py +144 -0
- lm_deluge-0.0.91/src/lm_deluge/server/adapters.py +369 -0
- lm_deluge-0.0.91/src/lm_deluge/server/app.py +388 -0
- lm_deluge-0.0.91/src/lm_deluge/server/auth.py +71 -0
- lm_deluge-0.0.91/src/lm_deluge/server/model_policy.py +215 -0
- lm_deluge-0.0.91/src/lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge-0.0.91/src/lm_deluge/server/models_openai.py +175 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/__init__.py +78 -19
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
- lm_deluge-0.0.91/src/lm_deluge/tool/builtin/anthropic/bash.py +0 -0
- lm_deluge-0.0.91/src/lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/actions.py +26 -26
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/batch.py +1 -2
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/kernel.py +1 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/filesystem.py +2 -2
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/memory.py +3 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/otc/executor.py +3 -3
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/random.py +30 -54
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/rlm/__init__.py +2 -2
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/rlm/executor.py +1 -1
- lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +829 -0
- lm_deluge-0.0.91/src/lm_deluge/tool/prefab/skills.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/subagents.py +1 -1
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/logprobs.py +4 -4
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/schema.py +6 -6
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/validation.py +14 -9
- {lm_deluge-0.0.89 → lm_deluge-0.0.91/src/lm_deluge.egg-info}/PKG-INFO +12 -12
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/SOURCES.txt +30 -7
- lm_deluge-0.0.91/src/lm_deluge.egg-info/entry_points.txt +3 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/requires.txt +4 -4
- lm_deluge-0.0.89/src/lm_deluge/__init__.py +0 -40
- lm_deluge-0.0.89/src/lm_deluge/cli.py +0 -300
- lm_deluge-0.0.89/src/lm_deluge/config.py +0 -45
- lm_deluge-0.0.89/src/lm_deluge/mock_openai.py +0 -643
- lm_deluge-0.0.89/src/lm_deluge/models/__init__.py +0 -158
- lm_deluge-0.0.89/src/lm_deluge/models/openrouter.py +0 -142
- lm_deluge-0.0.89/src/lm_deluge/models/zai.py +0 -1
- lm_deluge-0.0.89/src/lm_deluge/tool/prefab/sandbox.py +0 -1621
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/LICENSE +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/setup.cfg +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/response.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/arcee.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/deepseek.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/fireworks.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/kimi.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/mistral.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/__init__.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/classify.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/core.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/optimizer.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/proposer.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/util.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/locate.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/ocr.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/score.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/translate.py +0 -0
- {lm_deluge-0.0.89/src/lm_deluge → lm_deluge-0.0.91/src/lm_deluge/prompt}/file.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/rerank.py +0 -0
- /lm_deluge-0.0.89/src/lm_deluge/tool/builtin/anthropic/bash.py → /lm_deluge-0.0.91/src/lm_deluge/skills/anthropic.py +0 -0
- /lm_deluge-0.0.89/src/lm_deluge/tool/builtin/anthropic/computer_use.py → /lm_deluge-0.0.91/src/lm_deluge/skills/compat.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/anthropic/editor.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/base.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/gemini.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/openai.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/__init__.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/base.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/converters.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/trycua.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/__init__.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/batch_tool.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/docs.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/email.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/full_text_search/tantivy_index.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/otc/__init__.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/otc/parse.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/rlm/parse.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/sheets.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/todos.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/tool_search.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/web_search.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/harmony.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/warnings.py +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.91
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -9,7 +9,6 @@ License-File: LICENSE
|
|
|
9
9
|
Requires-Dist: python-dotenv
|
|
10
10
|
Requires-Dist: json5
|
|
11
11
|
Requires-Dist: PyYAML
|
|
12
|
-
Requires-Dist: pandas
|
|
13
12
|
Requires-Dist: aiohttp
|
|
14
13
|
Requires-Dist: tiktoken
|
|
15
14
|
Requires-Dist: xxhash
|
|
@@ -23,8 +22,6 @@ Requires-Dist: pdf2image
|
|
|
23
22
|
Requires-Dist: pillow
|
|
24
23
|
Requires-Dist: fastmcp>=2.4
|
|
25
24
|
Requires-Dist: rich
|
|
26
|
-
Provides-Extra: openai
|
|
27
|
-
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
28
25
|
Provides-Extra: aws
|
|
29
26
|
Requires-Dist: boto3>=1.28.0; extra == "aws"
|
|
30
27
|
Provides-Extra: docker
|
|
@@ -36,6 +33,9 @@ Provides-Extra: sandbox
|
|
|
36
33
|
Requires-Dist: modal>=0.64.0; extra == "sandbox"
|
|
37
34
|
Requires-Dist: daytona-sdk>=0.1.4; extra == "sandbox"
|
|
38
35
|
Requires-Dist: docker>=7.0.0; extra == "sandbox"
|
|
36
|
+
Provides-Extra: server
|
|
37
|
+
Requires-Dist: fastapi>=0.100.0; extra == "server"
|
|
38
|
+
Requires-Dist: uvicorn>=0.20.0; extra == "server"
|
|
39
39
|
Dynamic: license-file
|
|
40
40
|
|
|
41
41
|
# lm-deluge
|
|
@@ -48,9 +48,9 @@ Dynamic: license-file
|
|
|
48
48
|
- **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
|
|
49
49
|
- **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
|
|
50
50
|
- **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
|
|
51
|
-
- **Computer Use** – We support
|
|
52
|
-
- **Caching** –
|
|
53
|
-
- **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our
|
|
51
|
+
- **Computer Use** – We support computer use for all major providers, and have pre-fabricated tools to integrate with Kernel, TryCUA, and more.
|
|
52
|
+
- **Local & Remote Caching** – Use Anthropic caching more easily with common patterns (system-only, tools-only, last N messages, etc.) Use client-side caching to save completions to avoid repeated LLM calls to process the same input.
|
|
53
|
+
- **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our `LLMClient` or with the `openai` and `anthropic` packages.
|
|
54
54
|
- **Sync and async APIs** – Use the client from sync or async code.
|
|
55
55
|
|
|
56
56
|
**STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
|
|
@@ -145,7 +145,7 @@ Constructing conversations to pass to models is notoriously annoying. Each provi
|
|
|
145
145
|
```python
|
|
146
146
|
from lm_deluge import Message, Conversation
|
|
147
147
|
|
|
148
|
-
prompt = Conversation.system("You are a helpful assistant.").add(
|
|
148
|
+
prompt = Conversation().system("You are a helpful assistant.").add(
|
|
149
149
|
Message.user("What's in this image?").add_image("tests/image.jpg")
|
|
150
150
|
)
|
|
151
151
|
|
|
@@ -166,7 +166,7 @@ from lm_deluge import LLMClient, Conversation
|
|
|
166
166
|
|
|
167
167
|
# Simple file upload
|
|
168
168
|
client = LLMClient("gpt-4.1-mini")
|
|
169
|
-
conversation = Conversation.user(
|
|
169
|
+
conversation = Conversation().user(
|
|
170
170
|
"Please summarize this document",
|
|
171
171
|
file="path/to/document.pdf"
|
|
172
172
|
)
|
|
@@ -175,7 +175,7 @@ resps = client.process_prompts_sync([conversation])
|
|
|
175
175
|
# You can also create File objects for more control
|
|
176
176
|
from lm_deluge import File
|
|
177
177
|
file = File("path/to/report.pdf", filename="Q4_Report.pdf")
|
|
178
|
-
conversation = Conversation.user("Analyze this financial report")
|
|
178
|
+
conversation = Conversation().user("Analyze this financial report")
|
|
179
179
|
conversation.messages[0].parts.append(file)
|
|
180
180
|
```
|
|
181
181
|
|
|
@@ -245,7 +245,7 @@ for tool_call in resps[0].tool_calls:
|
|
|
245
245
|
import asyncio
|
|
246
246
|
|
|
247
247
|
async def main():
|
|
248
|
-
conv = Conversation.user("List the files in the current directory")
|
|
248
|
+
conv = Conversation().user("List the files in the current directory")
|
|
249
249
|
conv, resp = await client.run_agent_loop(conv, tools=tools)
|
|
250
250
|
print(resp.content.completion)
|
|
251
251
|
|
|
@@ -261,7 +261,7 @@ from lm_deluge import LLMClient, Conversation, Message
|
|
|
261
261
|
|
|
262
262
|
# Create a conversation with system message
|
|
263
263
|
conv = (
|
|
264
|
-
Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
|
|
264
|
+
Conversation().system("You are an expert Python developer with deep knowledge of async programming.")
|
|
265
265
|
.add(Message.user("How do I use asyncio.gather?"))
|
|
266
266
|
)
|
|
267
267
|
|
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
- **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
|
|
9
9
|
- **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
|
|
10
10
|
- **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
|
|
11
|
-
- **Computer Use** – We support
|
|
12
|
-
- **Caching** –
|
|
13
|
-
- **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our
|
|
11
|
+
- **Computer Use** – We support computer use for all major providers, and have pre-fabricated tools to integrate with Kernel, TryCUA, and more.
|
|
12
|
+
- **Local & Remote Caching** – Use Anthropic caching more easily with common patterns (system-only, tools-only, last N messages, etc.) Use client-side caching to save completions to avoid repeated LLM calls to process the same input.
|
|
13
|
+
- **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our `LLMClient` or with the `openai` and `anthropic` packages.
|
|
14
14
|
- **Sync and async APIs** – Use the client from sync or async code.
|
|
15
15
|
|
|
16
16
|
**STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
|
|
@@ -105,7 +105,7 @@ Constructing conversations to pass to models is notoriously annoying. Each provi
|
|
|
105
105
|
```python
|
|
106
106
|
from lm_deluge import Message, Conversation
|
|
107
107
|
|
|
108
|
-
prompt = Conversation.system("You are a helpful assistant.").add(
|
|
108
|
+
prompt = Conversation().system("You are a helpful assistant.").add(
|
|
109
109
|
Message.user("What's in this image?").add_image("tests/image.jpg")
|
|
110
110
|
)
|
|
111
111
|
|
|
@@ -126,7 +126,7 @@ from lm_deluge import LLMClient, Conversation
|
|
|
126
126
|
|
|
127
127
|
# Simple file upload
|
|
128
128
|
client = LLMClient("gpt-4.1-mini")
|
|
129
|
-
conversation = Conversation.user(
|
|
129
|
+
conversation = Conversation().user(
|
|
130
130
|
"Please summarize this document",
|
|
131
131
|
file="path/to/document.pdf"
|
|
132
132
|
)
|
|
@@ -135,7 +135,7 @@ resps = client.process_prompts_sync([conversation])
|
|
|
135
135
|
# You can also create File objects for more control
|
|
136
136
|
from lm_deluge import File
|
|
137
137
|
file = File("path/to/report.pdf", filename="Q4_Report.pdf")
|
|
138
|
-
conversation = Conversation.user("Analyze this financial report")
|
|
138
|
+
conversation = Conversation().user("Analyze this financial report")
|
|
139
139
|
conversation.messages[0].parts.append(file)
|
|
140
140
|
```
|
|
141
141
|
|
|
@@ -205,7 +205,7 @@ for tool_call in resps[0].tool_calls:
|
|
|
205
205
|
import asyncio
|
|
206
206
|
|
|
207
207
|
async def main():
|
|
208
|
-
conv = Conversation.user("List the files in the current directory")
|
|
208
|
+
conv = Conversation().user("List the files in the current directory")
|
|
209
209
|
conv, resp = await client.run_agent_loop(conv, tools=tools)
|
|
210
210
|
print(resp.content.completion)
|
|
211
211
|
|
|
@@ -221,7 +221,7 @@ from lm_deluge import LLMClient, Conversation, Message
|
|
|
221
221
|
|
|
222
222
|
# Create a conversation with system message
|
|
223
223
|
conv = (
|
|
224
|
-
Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
|
|
224
|
+
Conversation().system("You are an expert Python developer with deep knowledge of async programming.")
|
|
225
225
|
.add(Message.user("How do I use asyncio.gather?"))
|
|
226
226
|
)
|
|
227
227
|
|
|
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
|
|
|
3
3
|
|
|
4
4
|
[project]
|
|
5
5
|
name = "lm_deluge"
|
|
6
|
-
version = "0.0.
|
|
6
|
+
version = "0.0.91"
|
|
7
7
|
authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
|
|
8
8
|
description = "Python utility for using LLM API models."
|
|
9
9
|
readme = "README.md"
|
|
@@ -15,7 +15,6 @@ dependencies = [
|
|
|
15
15
|
"python-dotenv",
|
|
16
16
|
"json5",
|
|
17
17
|
"PyYAML",
|
|
18
|
-
"pandas",
|
|
19
18
|
"aiohttp",
|
|
20
19
|
"tiktoken",
|
|
21
20
|
"xxhash",
|
|
@@ -28,16 +27,16 @@ dependencies = [
|
|
|
28
27
|
"pdf2image",
|
|
29
28
|
"pillow",
|
|
30
29
|
"fastmcp>=2.4",
|
|
31
|
-
"rich"
|
|
32
|
-
# "textual>=0.58.0"
|
|
30
|
+
"rich"
|
|
33
31
|
]
|
|
34
32
|
|
|
35
33
|
[project.optional-dependencies]
|
|
36
|
-
openai = ["openai>=1.0.0"]
|
|
37
34
|
aws = ["boto3>=1.28.0"]
|
|
38
35
|
docker = ["docker>=7.0.0"]
|
|
39
36
|
full_text_search = ["tantivy>=0.21.0", "lenlp>=0.1.0"]
|
|
40
37
|
sandbox = ["modal>=0.64.0", "daytona-sdk>=0.1.4", "docker>=7.0.0"]
|
|
38
|
+
server = ["fastapi>=0.100.0", "uvicorn>=0.20.0"]
|
|
41
39
|
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
[project.scripts]
|
|
41
|
+
deluge = "lm_deluge.cli:main"
|
|
42
|
+
deluge-server = "lm_deluge.server.__main__:main"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .client import APIResponse, LLMClient, SamplingParams
|
|
2
|
+
from .prompt import Conversation, Message, File
|
|
3
|
+
from .tool import Tool, MCPServer
|
|
4
|
+
|
|
5
|
+
# dotenv.load_dotenv() - don't do this, fucks with other packages
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"LLMClient",
|
|
9
|
+
"SamplingParams",
|
|
10
|
+
"APIResponse",
|
|
11
|
+
"Conversation",
|
|
12
|
+
"Message",
|
|
13
|
+
"Tool",
|
|
14
|
+
"MCPServer",
|
|
15
|
+
"File",
|
|
16
|
+
]
|
|
@@ -6,10 +6,11 @@ from aiohttp import ClientResponse
|
|
|
6
6
|
from lm_deluge.prompt import (
|
|
7
7
|
Message,
|
|
8
8
|
Text,
|
|
9
|
+
ThoughtSignature,
|
|
9
10
|
Thinking,
|
|
10
11
|
ToolCall,
|
|
11
12
|
)
|
|
12
|
-
from lm_deluge.
|
|
13
|
+
from lm_deluge.api_requests.context import RequestContext
|
|
13
14
|
from lm_deluge.tool import MCPServer, Tool
|
|
14
15
|
from lm_deluge.usage import Usage
|
|
15
16
|
from lm_deluge.util.schema import (
|
|
@@ -102,7 +103,9 @@ def _build_anthropic_request(
|
|
|
102
103
|
if "top_p" in request_json:
|
|
103
104
|
request_json["top_p"] = max(request_json["top_p"], 0.95)
|
|
104
105
|
request_json["temperature"] = 1.0
|
|
105
|
-
request_json["max_tokens"]
|
|
106
|
+
max_tokens = request_json["max_tokens"]
|
|
107
|
+
assert isinstance(max_tokens, int)
|
|
108
|
+
request_json["max_tokens"] = max_tokens + budget
|
|
106
109
|
else:
|
|
107
110
|
request_json["thinking"] = {"type": "disabled"}
|
|
108
111
|
if "kimi" in model.id and "thinking" in model.id:
|
|
@@ -250,8 +253,28 @@ class AnthropicRequest(APIRequestBase):
|
|
|
250
253
|
if item["type"] == "text":
|
|
251
254
|
parts.append(Text(item["text"]))
|
|
252
255
|
elif item["type"] == "thinking":
|
|
253
|
-
|
|
254
|
-
|
|
256
|
+
thinking_content = item.get("thinking", "")
|
|
257
|
+
thinking = thinking_content
|
|
258
|
+
signature = item.get("signature")
|
|
259
|
+
parts.append(
|
|
260
|
+
Thinking(
|
|
261
|
+
thinking_content,
|
|
262
|
+
raw_payload=item,
|
|
263
|
+
thought_signature=ThoughtSignature(
|
|
264
|
+
signature,
|
|
265
|
+
provider="anthropic",
|
|
266
|
+
)
|
|
267
|
+
if signature is not None
|
|
268
|
+
else None,
|
|
269
|
+
)
|
|
270
|
+
)
|
|
271
|
+
elif item["type"] == "redacted_thinking":
|
|
272
|
+
parts.append(
|
|
273
|
+
Thinking(
|
|
274
|
+
item.get("data", ""),
|
|
275
|
+
raw_payload=item,
|
|
276
|
+
)
|
|
277
|
+
)
|
|
255
278
|
elif item["type"] == "tool_use":
|
|
256
279
|
parts.append(
|
|
257
280
|
ToolCall(
|
|
@@ -265,9 +288,8 @@ class AnthropicRequest(APIRequestBase):
|
|
|
265
288
|
usage = Usage.from_anthropic_usage(data["usage"])
|
|
266
289
|
except Exception as e:
|
|
267
290
|
is_error = True
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
)
|
|
291
|
+
response_text = await http_response.text()
|
|
292
|
+
error_message = f"Error calling .json() on response w/ status {status_code}: {e}. Response: {response_text[:500]}"
|
|
271
293
|
elif mimetype and "json" in mimetype.lower():
|
|
272
294
|
is_error = True # expected status is 200, otherwise it's an error
|
|
273
295
|
data = await http_response.json()
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
2
4
|
import time
|
|
3
5
|
import traceback
|
|
4
6
|
from abc import ABC, abstractmethod
|
|
@@ -8,7 +10,7 @@ from aiohttp import ClientResponse
|
|
|
8
10
|
|
|
9
11
|
from ..errors import raise_if_modal_exception
|
|
10
12
|
from ..models.openai import OPENAI_MODELS
|
|
11
|
-
from ..
|
|
13
|
+
from ..api_requests.context import RequestContext
|
|
12
14
|
from .response import APIResponse
|
|
13
15
|
|
|
14
16
|
|
|
@@ -73,6 +75,24 @@ class APIRequestBase(ABC):
|
|
|
73
75
|
|
|
74
76
|
# Start with base headers, then overlay filtered extra headers (extra takes precedence)
|
|
75
77
|
merged = dict(base_headers)
|
|
78
|
+
if "anthropic-beta" in merged and "anthropic-beta" in filtered_extra:
|
|
79
|
+
combined = []
|
|
80
|
+
seen = set()
|
|
81
|
+
for (
|
|
82
|
+
raw
|
|
83
|
+
) in f"{merged['anthropic-beta']},{filtered_extra['anthropic-beta']}".split(
|
|
84
|
+
","
|
|
85
|
+
):
|
|
86
|
+
token = raw.strip()
|
|
87
|
+
if token and token not in seen:
|
|
88
|
+
seen.add(token)
|
|
89
|
+
combined.append(token)
|
|
90
|
+
merged["anthropic-beta"] = ",".join(combined)
|
|
91
|
+
filtered_extra = {
|
|
92
|
+
key: value
|
|
93
|
+
for key, value in filtered_extra.items()
|
|
94
|
+
if key != "anthropic-beta"
|
|
95
|
+
}
|
|
76
96
|
merged.update(filtered_extra)
|
|
77
97
|
|
|
78
98
|
# Filter out None values from final merged headers
|
|
@@ -189,6 +209,23 @@ class APIRequestBase(ABC):
|
|
|
189
209
|
await self.build_request()
|
|
190
210
|
assert self.context.status_tracker
|
|
191
211
|
|
|
212
|
+
if os.getenv("DELUGE_PROXY_LOG_PROVIDER_REQUESTS", "").strip().lower() in {
|
|
213
|
+
"1",
|
|
214
|
+
"true",
|
|
215
|
+
"yes",
|
|
216
|
+
"on",
|
|
217
|
+
}:
|
|
218
|
+
print("DELUGE_PROXY_PROVIDER_REQUEST")
|
|
219
|
+
print(f"URL: {self.url}")
|
|
220
|
+
print("Headers:")
|
|
221
|
+
print(self.request_header)
|
|
222
|
+
if self.request_json is not None:
|
|
223
|
+
print("JSON:")
|
|
224
|
+
try:
|
|
225
|
+
print(json.dumps(self.request_json, indent=2))
|
|
226
|
+
except Exception:
|
|
227
|
+
print(self.request_json)
|
|
228
|
+
|
|
192
229
|
if (
|
|
193
230
|
self.context.background
|
|
194
231
|
and self.context.use_responses_api
|
|
@@ -16,10 +16,11 @@ except ImportError:
|
|
|
16
16
|
from lm_deluge.prompt import (
|
|
17
17
|
Message,
|
|
18
18
|
Text,
|
|
19
|
+
ThoughtSignature,
|
|
19
20
|
Thinking,
|
|
20
21
|
ToolCall,
|
|
21
22
|
)
|
|
22
|
-
from lm_deluge.
|
|
23
|
+
from lm_deluge.api_requests.context import RequestContext
|
|
23
24
|
from lm_deluge.tool import MCPServer, Tool
|
|
24
25
|
from lm_deluge.usage import Usage
|
|
25
26
|
|
|
@@ -262,6 +263,11 @@ class BedrockRequest(APIRequestBase):
|
|
|
262
263
|
# Create a fake requests.PreparedRequest object for AWS4Auth to sign
|
|
263
264
|
import requests
|
|
264
265
|
|
|
266
|
+
assert self.url is not None, "URL must be set after build_request"
|
|
267
|
+
assert (
|
|
268
|
+
self.request_header is not None
|
|
269
|
+
), "Headers must be set after build_request"
|
|
270
|
+
|
|
265
271
|
fake_request = requests.Request(
|
|
266
272
|
method="POST",
|
|
267
273
|
url=self.url,
|
|
@@ -363,8 +369,28 @@ class BedrockRequest(APIRequestBase):
|
|
|
363
369
|
if item["type"] == "text":
|
|
364
370
|
parts.append(Text(item["text"]))
|
|
365
371
|
elif item["type"] == "thinking":
|
|
366
|
-
|
|
367
|
-
|
|
372
|
+
thinking_content = item.get("thinking", "")
|
|
373
|
+
thinking = thinking_content
|
|
374
|
+
signature = item.get("signature")
|
|
375
|
+
parts.append(
|
|
376
|
+
Thinking(
|
|
377
|
+
thinking_content,
|
|
378
|
+
raw_payload=item,
|
|
379
|
+
thought_signature=ThoughtSignature(
|
|
380
|
+
signature,
|
|
381
|
+
provider="anthropic",
|
|
382
|
+
)
|
|
383
|
+
if signature is not None
|
|
384
|
+
else None,
|
|
385
|
+
)
|
|
386
|
+
)
|
|
387
|
+
elif item["type"] == "redacted_thinking":
|
|
388
|
+
parts.append(
|
|
389
|
+
Thinking(
|
|
390
|
+
item.get("data", ""),
|
|
391
|
+
raw_payload=item,
|
|
392
|
+
)
|
|
393
|
+
)
|
|
368
394
|
elif item["type"] == "tool_use":
|
|
369
395
|
parts.append(
|
|
370
396
|
ToolCall(
|
|
@@ -2,9 +2,9 @@ from dataclasses import dataclass, field
|
|
|
2
2
|
from functools import cached_property
|
|
3
3
|
from typing import Any, Callable, Sequence, TYPE_CHECKING
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
5
|
+
from ..config import SamplingParams
|
|
6
|
+
from ..prompt import CachePattern, Conversation
|
|
7
|
+
from ..tracker import StatusTracker
|
|
8
8
|
|
|
9
9
|
if TYPE_CHECKING:
|
|
10
10
|
from pydantic import BaseModel
|
|
@@ -83,4 +83,4 @@ class RequestContext:
|
|
|
83
83
|
# Update with any overrides
|
|
84
84
|
current_values.update(overrides)
|
|
85
85
|
|
|
86
|
-
return RequestContext(**current_values)
|
|
86
|
+
return RequestContext(**current_values) # type: ignore[arg-type]
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
from aiohttp import ClientResponse
|
|
5
6
|
|
|
6
|
-
from lm_deluge.
|
|
7
|
+
from lm_deluge.api_requests.context import RequestContext
|
|
7
8
|
from lm_deluge.tool import Tool
|
|
8
9
|
from lm_deluge.warnings import maybe_warn
|
|
9
10
|
|
|
10
11
|
from ..config import SamplingParams
|
|
11
12
|
from ..models import APIModel
|
|
12
|
-
from ..prompt import Conversation, Message, Text, Thinking, ToolCall
|
|
13
|
+
from ..prompt import Conversation, Message, Text, ThoughtSignature, Thinking, ToolCall
|
|
13
14
|
from ..usage import Usage
|
|
14
15
|
from .base import APIRequestBase, APIResponse
|
|
15
16
|
|
|
@@ -37,13 +38,14 @@ async def _build_gemini_request(
|
|
|
37
38
|
part_type="function call",
|
|
38
39
|
)
|
|
39
40
|
|
|
40
|
-
|
|
41
|
+
generation_config: dict[str, Any] = {
|
|
42
|
+
"temperature": sampling_params.temperature,
|
|
43
|
+
"topP": sampling_params.top_p,
|
|
44
|
+
"maxOutputTokens": sampling_params.max_new_tokens,
|
|
45
|
+
}
|
|
46
|
+
request_json: dict[str, Any] = {
|
|
41
47
|
"contents": messages,
|
|
42
|
-
"generationConfig":
|
|
43
|
-
"temperature": sampling_params.temperature,
|
|
44
|
-
"topP": sampling_params.top_p,
|
|
45
|
-
"maxOutputTokens": sampling_params.max_new_tokens,
|
|
46
|
-
},
|
|
48
|
+
"generationConfig": generation_config,
|
|
47
49
|
}
|
|
48
50
|
|
|
49
51
|
# Add system instruction if present
|
|
@@ -83,7 +85,7 @@ async def _build_gemini_request(
|
|
|
83
85
|
}
|
|
84
86
|
effort = level_map[effort_key]
|
|
85
87
|
thinking_config = {"thinkingLevel": effort}
|
|
86
|
-
|
|
88
|
+
generation_config["thinkingConfig"] = thinking_config
|
|
87
89
|
|
|
88
90
|
elif model.reasoning_model:
|
|
89
91
|
if (
|
|
@@ -126,7 +128,7 @@ async def _build_gemini_request(
|
|
|
126
128
|
# no thoughts head empty
|
|
127
129
|
thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
|
|
128
130
|
|
|
129
|
-
|
|
131
|
+
generation_config["thinkingConfig"] = thinking_config
|
|
130
132
|
|
|
131
133
|
else:
|
|
132
134
|
if sampling_params.reasoning_effort:
|
|
@@ -171,14 +173,14 @@ async def _build_gemini_request(
|
|
|
171
173
|
|
|
172
174
|
# Handle JSON mode
|
|
173
175
|
if sampling_params.json_mode and model.supports_json:
|
|
174
|
-
|
|
176
|
+
generation_config["responseMimeType"] = "application/json"
|
|
175
177
|
|
|
176
178
|
# Handle media_resolution for Gemini 3 (requires v1alpha)
|
|
177
179
|
if sampling_params.media_resolution is not None:
|
|
178
180
|
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
179
181
|
if is_gemini_3:
|
|
180
182
|
# Add global media resolution to generationConfig
|
|
181
|
-
|
|
183
|
+
generation_config["mediaResolution"] = {
|
|
182
184
|
"level": sampling_params.media_resolution
|
|
183
185
|
}
|
|
184
186
|
else:
|
|
@@ -260,10 +262,20 @@ class GeminiRequest(APIRequestBase):
|
|
|
260
262
|
if "content" in candidate and "parts" in candidate["content"]:
|
|
261
263
|
for part in candidate["content"]["parts"]:
|
|
262
264
|
# Extract thought signature if present
|
|
263
|
-
|
|
265
|
+
raw_sig = part.get("thoughtSignature")
|
|
266
|
+
thought_sig = (
|
|
267
|
+
ThoughtSignature(raw_sig, provider="gemini")
|
|
268
|
+
if raw_sig is not None
|
|
269
|
+
else None
|
|
270
|
+
)
|
|
264
271
|
|
|
265
272
|
if "text" in part:
|
|
266
|
-
parts.append(
|
|
273
|
+
parts.append(
|
|
274
|
+
Text(
|
|
275
|
+
part["text"],
|
|
276
|
+
thought_signature=thought_sig,
|
|
277
|
+
)
|
|
278
|
+
)
|
|
267
279
|
elif "thought" in part:
|
|
268
280
|
# Thought with optional signature
|
|
269
281
|
parts.append(
|
|
@@ -286,6 +298,10 @@ class GeminiRequest(APIRequestBase):
|
|
|
286
298
|
thought_signature=thought_sig,
|
|
287
299
|
)
|
|
288
300
|
)
|
|
301
|
+
elif thought_sig:
|
|
302
|
+
parts.append(
|
|
303
|
+
Text("", thought_signature=thought_sig)
|
|
304
|
+
)
|
|
289
305
|
|
|
290
306
|
content = Message("assistant", parts)
|
|
291
307
|
|
|
@@ -7,7 +7,7 @@ from lm_deluge.warnings import maybe_warn
|
|
|
7
7
|
|
|
8
8
|
from ..models import APIModel
|
|
9
9
|
from ..prompt import Message
|
|
10
|
-
from ..
|
|
10
|
+
from ..api_requests.context import RequestContext
|
|
11
11
|
from ..usage import Usage
|
|
12
12
|
from .base import APIRequestBase, APIResponse
|
|
13
13
|
|
|
@@ -7,7 +7,7 @@ from typing import Sequence
|
|
|
7
7
|
import aiohttp
|
|
8
8
|
from aiohttp import ClientResponse
|
|
9
9
|
|
|
10
|
-
from lm_deluge.
|
|
10
|
+
from lm_deluge.api_requests.context import RequestContext
|
|
11
11
|
from lm_deluge.tool import MCPServer, Tool
|
|
12
12
|
from lm_deluge.util.schema import (
|
|
13
13
|
prepare_output_schema,
|
|
@@ -22,6 +22,24 @@ from ..usage import Usage
|
|
|
22
22
|
from .base import APIRequestBase, APIResponse
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
def _message_contents_to_string(messages: list[dict]):
|
|
26
|
+
messages = messages.copy()
|
|
27
|
+
|
|
28
|
+
for msg in messages:
|
|
29
|
+
content = msg.get("content")
|
|
30
|
+
assert content
|
|
31
|
+
if isinstance(content, list):
|
|
32
|
+
new_content = ""
|
|
33
|
+
for part in content:
|
|
34
|
+
assert "text" in part, "Invalid text part: " + str(part)
|
|
35
|
+
new_content += part["text"]
|
|
36
|
+
new_content += "\n"
|
|
37
|
+
|
|
38
|
+
msg["content"] = new_content.strip()
|
|
39
|
+
|
|
40
|
+
return messages
|
|
41
|
+
|
|
42
|
+
|
|
25
43
|
async def _build_oa_chat_request(
|
|
26
44
|
model: APIModel,
|
|
27
45
|
context: RequestContext,
|
|
@@ -55,6 +73,12 @@ async def _build_oa_chat_request(
|
|
|
55
73
|
request_json["service_tier"] = context.service_tier
|
|
56
74
|
else:
|
|
57
75
|
request_json["service_tier"] = context.service_tier
|
|
76
|
+
# if tinker, for now hack to mush into 1 string
|
|
77
|
+
if "tinker" in model.name:
|
|
78
|
+
messages = request_json["messages"]
|
|
79
|
+
assert isinstance(messages, list)
|
|
80
|
+
request_json["messages"] = _message_contents_to_string(messages)
|
|
81
|
+
|
|
58
82
|
# set max_tokens or max_completion_tokens dep. on provider
|
|
59
83
|
if "cohere" in model.api_base:
|
|
60
84
|
request_json["max_tokens"] = sampling_params.max_new_tokens
|
|
@@ -217,7 +241,7 @@ class OpenAIRequest(APIRequestBase):
|
|
|
217
241
|
parts.append(Text(message["content"]))
|
|
218
242
|
|
|
219
243
|
# Add tool calls if present
|
|
220
|
-
if "tool_calls" in message:
|
|
244
|
+
if "tool_calls" in message and message["tool_calls"] is not None:
|
|
221
245
|
for tool_call in message["tool_calls"]:
|
|
222
246
|
parts.append(
|
|
223
247
|
ToolCall(
|
|
@@ -238,9 +262,9 @@ class OpenAIRequest(APIRequestBase):
|
|
|
238
262
|
and "logprobs" in data["choices"][0]
|
|
239
263
|
):
|
|
240
264
|
logprobs = data["choices"][0]["logprobs"]["content"]
|
|
241
|
-
except Exception:
|
|
265
|
+
except Exception as e:
|
|
242
266
|
is_error = True
|
|
243
|
-
error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response."
|
|
267
|
+
error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response: {data}. Error: {e}"
|
|
244
268
|
elif mimetype and "json" in mimetype.lower():
|
|
245
269
|
is_error = True # expected status is 200, otherwise it's an error
|
|
246
270
|
data = await http_response.json()
|
|
@@ -655,7 +679,12 @@ async def stream_chat(
|
|
|
655
679
|
request_header.update(filtered_extra)
|
|
656
680
|
|
|
657
681
|
context = SimpleNamespace(
|
|
658
|
-
prompt=prompt,
|
|
682
|
+
prompt=prompt,
|
|
683
|
+
tools=tools,
|
|
684
|
+
sampling_params=sampling_params,
|
|
685
|
+
service_tier=None,
|
|
686
|
+
output_schema=None,
|
|
687
|
+
model_name=model_name,
|
|
659
688
|
)
|
|
660
689
|
|
|
661
690
|
request_json = await _build_oa_chat_request(model, context) # type: ignore
|