lm-deluge 0.0.89__tar.gz → 0.0.91__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. {lm_deluge-0.0.89/src/lm_deluge.egg-info → lm_deluge-0.0.91}/PKG-INFO +12 -12
  2. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/README.md +8 -8
  3. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/pyproject.toml +6 -7
  4. lm_deluge-0.0.91/src/lm_deluge/__init__.py +16 -0
  5. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/anthropic.py +29 -7
  6. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/base.py +38 -1
  7. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/bedrock.py +29 -3
  8. lm_deluge-0.0.89/src/lm_deluge/request_context.py → lm_deluge-0.0.91/src/lm_deluge/api_requests/context.py +4 -4
  9. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/gemini.py +30 -14
  10. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/mistral.py +1 -1
  11. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/openai.py +34 -5
  12. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/batches.py +19 -49
  13. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/cache.py +1 -1
  14. lm_deluge-0.0.91/src/lm_deluge/cli.py +672 -0
  15. lm_deluge-0.0.89/src/lm_deluge/client.py → lm_deluge-0.0.91/src/lm_deluge/client/__init__.py +42 -13
  16. lm_deluge-0.0.91/src/lm_deluge/config.py +23 -0
  17. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/embed.py +2 -6
  18. lm_deluge-0.0.91/src/lm_deluge/models/__init__.py +267 -0
  19. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/anthropic.py +32 -24
  20. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/bedrock.py +9 -0
  21. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/cerebras.py +2 -0
  22. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/cohere.py +2 -0
  23. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/google.py +13 -0
  24. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/grok.py +4 -0
  25. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/groq.py +2 -0
  26. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/meta.py +2 -0
  27. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/minimax.py +9 -1
  28. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/openai.py +24 -1
  29. lm_deluge-0.0.91/src/lm_deluge/models/openrouter.py +296 -0
  30. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/together.py +3 -0
  31. lm_deluge-0.0.91/src/lm_deluge/models/zai.py +50 -0
  32. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/extract.py +4 -5
  33. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/__init__.py +1 -1
  34. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/docs/samples.py +19 -10
  35. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
  36. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
  37. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
  38. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
  39. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
  40. lm_deluge-0.0.91/src/lm_deluge/prompt/__init__.py +45 -0
  41. lm_deluge-0.0.89/src/lm_deluge/prompt.py → lm_deluge-0.0.91/src/lm_deluge/prompt/conversation.py +165 -869
  42. {lm_deluge-0.0.89/src/lm_deluge → lm_deluge-0.0.91/src/lm_deluge/prompt}/image.py +0 -10
  43. lm_deluge-0.0.91/src/lm_deluge/prompt/message.py +571 -0
  44. lm_deluge-0.0.91/src/lm_deluge/prompt/serialization.py +21 -0
  45. lm_deluge-0.0.91/src/lm_deluge/prompt/signatures.py +77 -0
  46. lm_deluge-0.0.91/src/lm_deluge/prompt/text.py +47 -0
  47. lm_deluge-0.0.91/src/lm_deluge/prompt/thinking.py +55 -0
  48. lm_deluge-0.0.91/src/lm_deluge/prompt/tool_calls.py +245 -0
  49. lm_deluge-0.0.91/src/lm_deluge/server/__init__.py +24 -0
  50. lm_deluge-0.0.91/src/lm_deluge/server/__main__.py +144 -0
  51. lm_deluge-0.0.91/src/lm_deluge/server/adapters.py +369 -0
  52. lm_deluge-0.0.91/src/lm_deluge/server/app.py +388 -0
  53. lm_deluge-0.0.91/src/lm_deluge/server/auth.py +71 -0
  54. lm_deluge-0.0.91/src/lm_deluge/server/model_policy.py +215 -0
  55. lm_deluge-0.0.91/src/lm_deluge/server/models_anthropic.py +172 -0
  56. lm_deluge-0.0.91/src/lm_deluge/server/models_openai.py +175 -0
  57. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/__init__.py +78 -19
  58. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
  59. lm_deluge-0.0.91/src/lm_deluge/tool/builtin/anthropic/bash.py +0 -0
  60. lm_deluge-0.0.91/src/lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
  61. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/actions.py +26 -26
  62. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/batch.py +1 -2
  63. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/kernel.py +1 -1
  64. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/filesystem.py +2 -2
  65. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
  66. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/memory.py +3 -1
  67. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/otc/executor.py +3 -3
  68. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/random.py +30 -54
  69. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/rlm/__init__.py +2 -2
  70. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/rlm/executor.py +1 -1
  71. lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  72. lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  73. lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  74. lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  75. lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  76. lm_deluge-0.0.91/src/lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +829 -0
  77. lm_deluge-0.0.91/src/lm_deluge/tool/prefab/skills.py +0 -0
  78. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/subagents.py +1 -1
  79. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/logprobs.py +4 -4
  80. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/schema.py +6 -6
  81. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/validation.py +14 -9
  82. {lm_deluge-0.0.89 → lm_deluge-0.0.91/src/lm_deluge.egg-info}/PKG-INFO +12 -12
  83. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/SOURCES.txt +30 -7
  84. lm_deluge-0.0.91/src/lm_deluge.egg-info/entry_points.txt +3 -0
  85. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/requires.txt +4 -4
  86. lm_deluge-0.0.89/src/lm_deluge/__init__.py +0 -40
  87. lm_deluge-0.0.89/src/lm_deluge/cli.py +0 -300
  88. lm_deluge-0.0.89/src/lm_deluge/config.py +0 -45
  89. lm_deluge-0.0.89/src/lm_deluge/mock_openai.py +0 -643
  90. lm_deluge-0.0.89/src/lm_deluge/models/__init__.py +0 -158
  91. lm_deluge-0.0.89/src/lm_deluge/models/openrouter.py +0 -142
  92. lm_deluge-0.0.89/src/lm_deluge/models/zai.py +0 -1
  93. lm_deluge-0.0.89/src/lm_deluge/tool/prefab/sandbox.py +0 -1621
  94. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/LICENSE +0 -0
  95. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/setup.cfg +0 -0
  96. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/__init__.py +0 -0
  97. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
  98. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/common.py +0 -0
  99. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  100. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  101. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  102. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  103. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  104. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/response.py +0 -0
  105. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/errors.py +0 -0
  106. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/arcee.py +0 -0
  107. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/deepseek.py +0 -0
  108. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/fireworks.py +0 -0
  109. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/kimi.py +0 -0
  110. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/models/mistral.py +0 -0
  111. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/__init__.py +0 -0
  112. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/classify.py +0 -0
  113. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/core.py +0 -0
  114. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/optimizer.py +0 -0
  115. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/proposer.py +0 -0
  116. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/util.py +0 -0
  117. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/locate.py +0 -0
  118. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/ocr.py +0 -0
  119. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/score.py +0 -0
  120. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/translate.py +0 -0
  121. {lm_deluge-0.0.89/src/lm_deluge → lm_deluge-0.0.91/src/lm_deluge/prompt}/file.py +0 -0
  122. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/rerank.py +0 -0
  123. /lm_deluge-0.0.89/src/lm_deluge/tool/builtin/anthropic/bash.py → /lm_deluge-0.0.91/src/lm_deluge/skills/anthropic.py +0 -0
  124. /lm_deluge-0.0.89/src/lm_deluge/tool/builtin/anthropic/computer_use.py → /lm_deluge-0.0.91/src/lm_deluge/skills/compat.py +0 -0
  125. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/anthropic/editor.py +0 -0
  126. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/base.py +0 -0
  127. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/gemini.py +0 -0
  128. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/openai.py +0 -0
  129. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/__init__.py +0 -0
  130. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/base.py +0 -0
  131. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/converters.py +0 -0
  132. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/trycua.py +0 -0
  133. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/__init__.py +0 -0
  134. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/batch_tool.py +0 -0
  135. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/docs.py +0 -0
  136. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/email.py +0 -0
  137. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/full_text_search/tantivy_index.py +0 -0
  138. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/otc/__init__.py +0 -0
  139. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/otc/parse.py +0 -0
  140. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/rlm/parse.py +0 -0
  141. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/sheets.py +0 -0
  142. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/todos.py +0 -0
  143. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/tool_search.py +0 -0
  144. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/web_search.py +0 -0
  145. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/tracker.py +0 -0
  146. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/usage.py +0 -0
  147. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/harmony.py +0 -0
  148. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/json.py +0 -0
  149. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/spatial.py +0 -0
  150. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/util/xml.py +0 -0
  151. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge/warnings.py +0 -0
  152. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  153. {lm_deluge-0.0.89 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.89
3
+ Version: 0.0.91
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -9,7 +9,6 @@ License-File: LICENSE
9
9
  Requires-Dist: python-dotenv
10
10
  Requires-Dist: json5
11
11
  Requires-Dist: PyYAML
12
- Requires-Dist: pandas
13
12
  Requires-Dist: aiohttp
14
13
  Requires-Dist: tiktoken
15
14
  Requires-Dist: xxhash
@@ -23,8 +22,6 @@ Requires-Dist: pdf2image
23
22
  Requires-Dist: pillow
24
23
  Requires-Dist: fastmcp>=2.4
25
24
  Requires-Dist: rich
26
- Provides-Extra: openai
27
- Requires-Dist: openai>=1.0.0; extra == "openai"
28
25
  Provides-Extra: aws
29
26
  Requires-Dist: boto3>=1.28.0; extra == "aws"
30
27
  Provides-Extra: docker
@@ -36,6 +33,9 @@ Provides-Extra: sandbox
36
33
  Requires-Dist: modal>=0.64.0; extra == "sandbox"
37
34
  Requires-Dist: daytona-sdk>=0.1.4; extra == "sandbox"
38
35
  Requires-Dist: docker>=7.0.0; extra == "sandbox"
36
+ Provides-Extra: server
37
+ Requires-Dist: fastapi>=0.100.0; extra == "server"
38
+ Requires-Dist: uvicorn>=0.20.0; extra == "server"
39
39
  Dynamic: license-file
40
40
 
41
41
  # lm-deluge
@@ -48,9 +48,9 @@ Dynamic: license-file
48
48
  - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
49
49
  - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
50
50
  - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
51
- - **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
52
- - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
53
- - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
51
+ - **Computer Use** – We support computer use for all major providers, and have pre-fabricated tools to integrate with Kernel, TryCUA, and more.
52
+ - **Local & Remote Caching** – Use Anthropic caching more easily with common patterns (system-only, tools-only, last N messages, etc.) Use client-side caching to save completions to avoid repeated LLM calls to process the same input.
53
+ - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our `LLMClient` or with the `openai` and `anthropic` packages.
54
54
  - **Sync and async APIs** – Use the client from sync or async code.
55
55
 
56
56
  **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
@@ -145,7 +145,7 @@ Constructing conversations to pass to models is notoriously annoying. Each provi
145
145
  ```python
146
146
  from lm_deluge import Message, Conversation
147
147
 
148
- prompt = Conversation.system("You are a helpful assistant.").add(
148
+ prompt = Conversation().system("You are a helpful assistant.").add(
149
149
  Message.user("What's in this image?").add_image("tests/image.jpg")
150
150
  )
151
151
 
@@ -166,7 +166,7 @@ from lm_deluge import LLMClient, Conversation
166
166
 
167
167
  # Simple file upload
168
168
  client = LLMClient("gpt-4.1-mini")
169
- conversation = Conversation.user(
169
+ conversation = Conversation().user(
170
170
  "Please summarize this document",
171
171
  file="path/to/document.pdf"
172
172
  )
@@ -175,7 +175,7 @@ resps = client.process_prompts_sync([conversation])
175
175
  # You can also create File objects for more control
176
176
  from lm_deluge import File
177
177
  file = File("path/to/report.pdf", filename="Q4_Report.pdf")
178
- conversation = Conversation.user("Analyze this financial report")
178
+ conversation = Conversation().user("Analyze this financial report")
179
179
  conversation.messages[0].parts.append(file)
180
180
  ```
181
181
 
@@ -245,7 +245,7 @@ for tool_call in resps[0].tool_calls:
245
245
  import asyncio
246
246
 
247
247
  async def main():
248
- conv = Conversation.user("List the files in the current directory")
248
+ conv = Conversation().user("List the files in the current directory")
249
249
  conv, resp = await client.run_agent_loop(conv, tools=tools)
250
250
  print(resp.content.completion)
251
251
 
@@ -261,7 +261,7 @@ from lm_deluge import LLMClient, Conversation, Message
261
261
 
262
262
  # Create a conversation with system message
263
263
  conv = (
264
- Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
264
+ Conversation().system("You are an expert Python developer with deep knowledge of async programming.")
265
265
  .add(Message.user("How do I use asyncio.gather?"))
266
266
  )
267
267
 
@@ -8,9 +8,9 @@
8
8
  - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
9
9
  - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
10
10
  - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
11
- - **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
12
- - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
13
- - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
11
+ - **Computer Use** – We support computer use for all major providers, and have pre-fabricated tools to integrate with Kernel, TryCUA, and more.
12
+ - **Local & Remote Caching** – Use Anthropic caching more easily with common patterns (system-only, tools-only, last N messages, etc.) Use client-side caching to save completions to avoid repeated LLM calls to process the same input.
13
+ - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our `LLMClient` or with the `openai` and `anthropic` packages.
14
14
  - **Sync and async APIs** – Use the client from sync or async code.
15
15
 
16
16
  **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
@@ -105,7 +105,7 @@ Constructing conversations to pass to models is notoriously annoying. Each provi
105
105
  ```python
106
106
  from lm_deluge import Message, Conversation
107
107
 
108
- prompt = Conversation.system("You are a helpful assistant.").add(
108
+ prompt = Conversation().system("You are a helpful assistant.").add(
109
109
  Message.user("What's in this image?").add_image("tests/image.jpg")
110
110
  )
111
111
 
@@ -126,7 +126,7 @@ from lm_deluge import LLMClient, Conversation
126
126
 
127
127
  # Simple file upload
128
128
  client = LLMClient("gpt-4.1-mini")
129
- conversation = Conversation.user(
129
+ conversation = Conversation().user(
130
130
  "Please summarize this document",
131
131
  file="path/to/document.pdf"
132
132
  )
@@ -135,7 +135,7 @@ resps = client.process_prompts_sync([conversation])
135
135
  # You can also create File objects for more control
136
136
  from lm_deluge import File
137
137
  file = File("path/to/report.pdf", filename="Q4_Report.pdf")
138
- conversation = Conversation.user("Analyze this financial report")
138
+ conversation = Conversation().user("Analyze this financial report")
139
139
  conversation.messages[0].parts.append(file)
140
140
  ```
141
141
 
@@ -205,7 +205,7 @@ for tool_call in resps[0].tool_calls:
205
205
  import asyncio
206
206
 
207
207
  async def main():
208
- conv = Conversation.user("List the files in the current directory")
208
+ conv = Conversation().user("List the files in the current directory")
209
209
  conv, resp = await client.run_agent_loop(conv, tools=tools)
210
210
  print(resp.content.completion)
211
211
 
@@ -221,7 +221,7 @@ from lm_deluge import LLMClient, Conversation, Message
221
221
 
222
222
  # Create a conversation with system message
223
223
  conv = (
224
- Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
224
+ Conversation().system("You are an expert Python developer with deep knowledge of async programming.")
225
225
  .add(Message.user("How do I use asyncio.gather?"))
226
226
  )
227
227
 
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.89"
6
+ version = "0.0.91"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -15,7 +15,6 @@ dependencies = [
15
15
  "python-dotenv",
16
16
  "json5",
17
17
  "PyYAML",
18
- "pandas",
19
18
  "aiohttp",
20
19
  "tiktoken",
21
20
  "xxhash",
@@ -28,16 +27,16 @@ dependencies = [
28
27
  "pdf2image",
29
28
  "pillow",
30
29
  "fastmcp>=2.4",
31
- "rich",
32
- # "textual>=0.58.0"
30
+ "rich"
33
31
  ]
34
32
 
35
33
  [project.optional-dependencies]
36
- openai = ["openai>=1.0.0"]
37
34
  aws = ["boto3>=1.28.0"]
38
35
  docker = ["docker>=7.0.0"]
39
36
  full_text_search = ["tantivy>=0.21.0", "lenlp>=0.1.0"]
40
37
  sandbox = ["modal>=0.64.0", "daytona-sdk>=0.1.4", "docker>=7.0.0"]
38
+ server = ["fastapi>=0.100.0", "uvicorn>=0.20.0"]
41
39
 
42
- # [project.scripts]
43
- # deluge = "lm_deluge.cli:main"
40
+ [project.scripts]
41
+ deluge = "lm_deluge.cli:main"
42
+ deluge-server = "lm_deluge.server.__main__:main"
@@ -0,0 +1,16 @@
1
+ from .client import APIResponse, LLMClient, SamplingParams
2
+ from .prompt import Conversation, Message, File
3
+ from .tool import Tool, MCPServer
4
+
5
+ # dotenv.load_dotenv() - don't do this, fucks with other packages
6
+
7
+ __all__ = [
8
+ "LLMClient",
9
+ "SamplingParams",
10
+ "APIResponse",
11
+ "Conversation",
12
+ "Message",
13
+ "Tool",
14
+ "MCPServer",
15
+ "File",
16
+ ]
@@ -6,10 +6,11 @@ from aiohttp import ClientResponse
6
6
  from lm_deluge.prompt import (
7
7
  Message,
8
8
  Text,
9
+ ThoughtSignature,
9
10
  Thinking,
10
11
  ToolCall,
11
12
  )
12
- from lm_deluge.request_context import RequestContext
13
+ from lm_deluge.api_requests.context import RequestContext
13
14
  from lm_deluge.tool import MCPServer, Tool
14
15
  from lm_deluge.usage import Usage
15
16
  from lm_deluge.util.schema import (
@@ -102,7 +103,9 @@ def _build_anthropic_request(
102
103
  if "top_p" in request_json:
103
104
  request_json["top_p"] = max(request_json["top_p"], 0.95)
104
105
  request_json["temperature"] = 1.0
105
- request_json["max_tokens"] += budget
106
+ max_tokens = request_json["max_tokens"]
107
+ assert isinstance(max_tokens, int)
108
+ request_json["max_tokens"] = max_tokens + budget
106
109
  else:
107
110
  request_json["thinking"] = {"type": "disabled"}
108
111
  if "kimi" in model.id and "thinking" in model.id:
@@ -250,8 +253,28 @@ class AnthropicRequest(APIRequestBase):
250
253
  if item["type"] == "text":
251
254
  parts.append(Text(item["text"]))
252
255
  elif item["type"] == "thinking":
253
- thinking = item["thinking"]
254
- parts.append(Thinking(item["thinking"]))
256
+ thinking_content = item.get("thinking", "")
257
+ thinking = thinking_content
258
+ signature = item.get("signature")
259
+ parts.append(
260
+ Thinking(
261
+ thinking_content,
262
+ raw_payload=item,
263
+ thought_signature=ThoughtSignature(
264
+ signature,
265
+ provider="anthropic",
266
+ )
267
+ if signature is not None
268
+ else None,
269
+ )
270
+ )
271
+ elif item["type"] == "redacted_thinking":
272
+ parts.append(
273
+ Thinking(
274
+ item.get("data", ""),
275
+ raw_payload=item,
276
+ )
277
+ )
255
278
  elif item["type"] == "tool_use":
256
279
  parts.append(
257
280
  ToolCall(
@@ -265,9 +288,8 @@ class AnthropicRequest(APIRequestBase):
265
288
  usage = Usage.from_anthropic_usage(data["usage"])
266
289
  except Exception as e:
267
290
  is_error = True
268
- error_message = (
269
- f"Error calling .json() on response w/ status {status_code}: {e}"
270
- )
291
+ response_text = await http_response.text()
292
+ error_message = f"Error calling .json() on response w/ status {status_code}: {e}. Response: {response_text[:500]}"
271
293
  elif mimetype and "json" in mimetype.lower():
272
294
  is_error = True # expected status is 200, otherwise it's an error
273
295
  data = await http_response.json()
@@ -1,4 +1,6 @@
1
1
  import asyncio
2
+ import json
3
+ import os
2
4
  import time
3
5
  import traceback
4
6
  from abc import ABC, abstractmethod
@@ -8,7 +10,7 @@ from aiohttp import ClientResponse
8
10
 
9
11
  from ..errors import raise_if_modal_exception
10
12
  from ..models.openai import OPENAI_MODELS
11
- from ..request_context import RequestContext
13
+ from ..api_requests.context import RequestContext
12
14
  from .response import APIResponse
13
15
 
14
16
 
@@ -73,6 +75,24 @@ class APIRequestBase(ABC):
73
75
 
74
76
  # Start with base headers, then overlay filtered extra headers (extra takes precedence)
75
77
  merged = dict(base_headers)
78
+ if "anthropic-beta" in merged and "anthropic-beta" in filtered_extra:
79
+ combined = []
80
+ seen = set()
81
+ for (
82
+ raw
83
+ ) in f"{merged['anthropic-beta']},{filtered_extra['anthropic-beta']}".split(
84
+ ","
85
+ ):
86
+ token = raw.strip()
87
+ if token and token not in seen:
88
+ seen.add(token)
89
+ combined.append(token)
90
+ merged["anthropic-beta"] = ",".join(combined)
91
+ filtered_extra = {
92
+ key: value
93
+ for key, value in filtered_extra.items()
94
+ if key != "anthropic-beta"
95
+ }
76
96
  merged.update(filtered_extra)
77
97
 
78
98
  # Filter out None values from final merged headers
@@ -189,6 +209,23 @@ class APIRequestBase(ABC):
189
209
  await self.build_request()
190
210
  assert self.context.status_tracker
191
211
 
212
+ if os.getenv("DELUGE_PROXY_LOG_PROVIDER_REQUESTS", "").strip().lower() in {
213
+ "1",
214
+ "true",
215
+ "yes",
216
+ "on",
217
+ }:
218
+ print("DELUGE_PROXY_PROVIDER_REQUEST")
219
+ print(f"URL: {self.url}")
220
+ print("Headers:")
221
+ print(self.request_header)
222
+ if self.request_json is not None:
223
+ print("JSON:")
224
+ try:
225
+ print(json.dumps(self.request_json, indent=2))
226
+ except Exception:
227
+ print(self.request_json)
228
+
192
229
  if (
193
230
  self.context.background
194
231
  and self.context.use_responses_api
@@ -16,10 +16,11 @@ except ImportError:
16
16
  from lm_deluge.prompt import (
17
17
  Message,
18
18
  Text,
19
+ ThoughtSignature,
19
20
  Thinking,
20
21
  ToolCall,
21
22
  )
22
- from lm_deluge.request_context import RequestContext
23
+ from lm_deluge.api_requests.context import RequestContext
23
24
  from lm_deluge.tool import MCPServer, Tool
24
25
  from lm_deluge.usage import Usage
25
26
 
@@ -262,6 +263,11 @@ class BedrockRequest(APIRequestBase):
262
263
  # Create a fake requests.PreparedRequest object for AWS4Auth to sign
263
264
  import requests
264
265
 
266
+ assert self.url is not None, "URL must be set after build_request"
267
+ assert (
268
+ self.request_header is not None
269
+ ), "Headers must be set after build_request"
270
+
265
271
  fake_request = requests.Request(
266
272
  method="POST",
267
273
  url=self.url,
@@ -363,8 +369,28 @@ class BedrockRequest(APIRequestBase):
363
369
  if item["type"] == "text":
364
370
  parts.append(Text(item["text"]))
365
371
  elif item["type"] == "thinking":
366
- thinking = item["thinking"]
367
- parts.append(Thinking(item["thinking"]))
372
+ thinking_content = item.get("thinking", "")
373
+ thinking = thinking_content
374
+ signature = item.get("signature")
375
+ parts.append(
376
+ Thinking(
377
+ thinking_content,
378
+ raw_payload=item,
379
+ thought_signature=ThoughtSignature(
380
+ signature,
381
+ provider="anthropic",
382
+ )
383
+ if signature is not None
384
+ else None,
385
+ )
386
+ )
387
+ elif item["type"] == "redacted_thinking":
388
+ parts.append(
389
+ Thinking(
390
+ item.get("data", ""),
391
+ raw_payload=item,
392
+ )
393
+ )
368
394
  elif item["type"] == "tool_use":
369
395
  parts.append(
370
396
  ToolCall(
@@ -2,9 +2,9 @@ from dataclasses import dataclass, field
2
2
  from functools import cached_property
3
3
  from typing import Any, Callable, Sequence, TYPE_CHECKING
4
4
 
5
- from .config import SamplingParams
6
- from .prompt import CachePattern, Conversation
7
- from .tracker import StatusTracker
5
+ from ..config import SamplingParams
6
+ from ..prompt import CachePattern, Conversation
7
+ from ..tracker import StatusTracker
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from pydantic import BaseModel
@@ -83,4 +83,4 @@ class RequestContext:
83
83
  # Update with any overrides
84
84
  current_values.update(overrides)
85
85
 
86
- return RequestContext(**current_values)
86
+ return RequestContext(**current_values) # type: ignore[arg-type]
@@ -1,15 +1,16 @@
1
1
  import json
2
2
  import os
3
+ from typing import Any
3
4
 
4
5
  from aiohttp import ClientResponse
5
6
 
6
- from lm_deluge.request_context import RequestContext
7
+ from lm_deluge.api_requests.context import RequestContext
7
8
  from lm_deluge.tool import Tool
8
9
  from lm_deluge.warnings import maybe_warn
9
10
 
10
11
  from ..config import SamplingParams
11
12
  from ..models import APIModel
12
- from ..prompt import Conversation, Message, Text, Thinking, ToolCall
13
+ from ..prompt import Conversation, Message, Text, ThoughtSignature, Thinking, ToolCall
13
14
  from ..usage import Usage
14
15
  from .base import APIRequestBase, APIResponse
15
16
 
@@ -37,13 +38,14 @@ async def _build_gemini_request(
37
38
  part_type="function call",
38
39
  )
39
40
 
40
- request_json = {
41
+ generation_config: dict[str, Any] = {
42
+ "temperature": sampling_params.temperature,
43
+ "topP": sampling_params.top_p,
44
+ "maxOutputTokens": sampling_params.max_new_tokens,
45
+ }
46
+ request_json: dict[str, Any] = {
41
47
  "contents": messages,
42
- "generationConfig": {
43
- "temperature": sampling_params.temperature,
44
- "topP": sampling_params.top_p,
45
- "maxOutputTokens": sampling_params.max_new_tokens,
46
- },
48
+ "generationConfig": generation_config,
47
49
  }
48
50
 
49
51
  # Add system instruction if present
@@ -83,7 +85,7 @@ async def _build_gemini_request(
83
85
  }
84
86
  effort = level_map[effort_key]
85
87
  thinking_config = {"thinkingLevel": effort}
86
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
88
+ generation_config["thinkingConfig"] = thinking_config
87
89
 
88
90
  elif model.reasoning_model:
89
91
  if (
@@ -126,7 +128,7 @@ async def _build_gemini_request(
126
128
  # no thoughts head empty
127
129
  thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
128
130
 
129
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
131
+ generation_config["thinkingConfig"] = thinking_config
130
132
 
131
133
  else:
132
134
  if sampling_params.reasoning_effort:
@@ -171,14 +173,14 @@ async def _build_gemini_request(
171
173
 
172
174
  # Handle JSON mode
173
175
  if sampling_params.json_mode and model.supports_json:
174
- request_json["generationConfig"]["responseMimeType"] = "application/json"
176
+ generation_config["responseMimeType"] = "application/json"
175
177
 
176
178
  # Handle media_resolution for Gemini 3 (requires v1alpha)
177
179
  if sampling_params.media_resolution is not None:
178
180
  is_gemini_3 = "gemini-3" in model.name.lower()
179
181
  if is_gemini_3:
180
182
  # Add global media resolution to generationConfig
181
- request_json["generationConfig"]["mediaResolution"] = {
183
+ generation_config["mediaResolution"] = {
182
184
  "level": sampling_params.media_resolution
183
185
  }
184
186
  else:
@@ -260,10 +262,20 @@ class GeminiRequest(APIRequestBase):
260
262
  if "content" in candidate and "parts" in candidate["content"]:
261
263
  for part in candidate["content"]["parts"]:
262
264
  # Extract thought signature if present
263
- thought_sig = part.get("thoughtSignature")
265
+ raw_sig = part.get("thoughtSignature")
266
+ thought_sig = (
267
+ ThoughtSignature(raw_sig, provider="gemini")
268
+ if raw_sig is not None
269
+ else None
270
+ )
264
271
 
265
272
  if "text" in part:
266
- parts.append(Text(part["text"]))
273
+ parts.append(
274
+ Text(
275
+ part["text"],
276
+ thought_signature=thought_sig,
277
+ )
278
+ )
267
279
  elif "thought" in part:
268
280
  # Thought with optional signature
269
281
  parts.append(
@@ -286,6 +298,10 @@ class GeminiRequest(APIRequestBase):
286
298
  thought_signature=thought_sig,
287
299
  )
288
300
  )
301
+ elif thought_sig:
302
+ parts.append(
303
+ Text("", thought_signature=thought_sig)
304
+ )
289
305
 
290
306
  content = Message("assistant", parts)
291
307
 
@@ -7,7 +7,7 @@ from lm_deluge.warnings import maybe_warn
7
7
 
8
8
  from ..models import APIModel
9
9
  from ..prompt import Message
10
- from ..request_context import RequestContext
10
+ from ..api_requests.context import RequestContext
11
11
  from ..usage import Usage
12
12
  from .base import APIRequestBase, APIResponse
13
13
 
@@ -7,7 +7,7 @@ from typing import Sequence
7
7
  import aiohttp
8
8
  from aiohttp import ClientResponse
9
9
 
10
- from lm_deluge.request_context import RequestContext
10
+ from lm_deluge.api_requests.context import RequestContext
11
11
  from lm_deluge.tool import MCPServer, Tool
12
12
  from lm_deluge.util.schema import (
13
13
  prepare_output_schema,
@@ -22,6 +22,24 @@ from ..usage import Usage
22
22
  from .base import APIRequestBase, APIResponse
23
23
 
24
24
 
25
+ def _message_contents_to_string(messages: list[dict]):
26
+ messages = messages.copy()
27
+
28
+ for msg in messages:
29
+ content = msg.get("content")
30
+ assert content
31
+ if isinstance(content, list):
32
+ new_content = ""
33
+ for part in content:
34
+ assert "text" in part, "Invalid text part: " + str(part)
35
+ new_content += part["text"]
36
+ new_content += "\n"
37
+
38
+ msg["content"] = new_content.strip()
39
+
40
+ return messages
41
+
42
+
25
43
  async def _build_oa_chat_request(
26
44
  model: APIModel,
27
45
  context: RequestContext,
@@ -55,6 +73,12 @@ async def _build_oa_chat_request(
55
73
  request_json["service_tier"] = context.service_tier
56
74
  else:
57
75
  request_json["service_tier"] = context.service_tier
76
+ # if tinker, for now hack to mush into 1 string
77
+ if "tinker" in model.name:
78
+ messages = request_json["messages"]
79
+ assert isinstance(messages, list)
80
+ request_json["messages"] = _message_contents_to_string(messages)
81
+
58
82
  # set max_tokens or max_completion_tokens dep. on provider
59
83
  if "cohere" in model.api_base:
60
84
  request_json["max_tokens"] = sampling_params.max_new_tokens
@@ -217,7 +241,7 @@ class OpenAIRequest(APIRequestBase):
217
241
  parts.append(Text(message["content"]))
218
242
 
219
243
  # Add tool calls if present
220
- if "tool_calls" in message:
244
+ if "tool_calls" in message and message["tool_calls"] is not None:
221
245
  for tool_call in message["tool_calls"]:
222
246
  parts.append(
223
247
  ToolCall(
@@ -238,9 +262,9 @@ class OpenAIRequest(APIRequestBase):
238
262
  and "logprobs" in data["choices"][0]
239
263
  ):
240
264
  logprobs = data["choices"][0]["logprobs"]["content"]
241
- except Exception:
265
+ except Exception as e:
242
266
  is_error = True
243
- error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response."
267
+ error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response: {data}. Error: {e}"
244
268
  elif mimetype and "json" in mimetype.lower():
245
269
  is_error = True # expected status is 200, otherwise it's an error
246
270
  data = await http_response.json()
@@ -655,7 +679,12 @@ async def stream_chat(
655
679
  request_header.update(filtered_extra)
656
680
 
657
681
  context = SimpleNamespace(
658
- prompt=prompt, tools=tools, sampling_params=sampling_params
682
+ prompt=prompt,
683
+ tools=tools,
684
+ sampling_params=sampling_params,
685
+ service_tier=None,
686
+ output_schema=None,
687
+ model_name=model_name,
659
688
  )
660
689
 
661
690
  request_json = await _build_oa_chat_request(model, context) # type: ignore