lm-deluge 0.0.90__tar.gz → 0.0.91__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. {lm_deluge-0.0.90/src/lm_deluge.egg-info → lm_deluge-0.0.91}/PKG-INFO +9 -10
  2. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/README.md +8 -8
  3. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/pyproject.toml +5 -6
  4. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/__init__.py +3 -3
  5. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/anthropic.py +4 -2
  6. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/base.py +1 -1
  7. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/bedrock.py +6 -1
  8. lm_deluge-0.0.90/src/lm_deluge/request_context.py → lm_deluge-0.0.91/src/lm_deluge/api_requests/context.py +4 -4
  9. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/gemini.py +13 -11
  10. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/mistral.py +1 -1
  11. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/openai.py +4 -2
  12. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/batches.py +4 -4
  13. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/cache.py +1 -1
  14. lm_deluge-0.0.91/src/lm_deluge/cli.py +672 -0
  15. lm_deluge-0.0.90/src/lm_deluge/client.py → lm_deluge-0.0.91/src/lm_deluge/client/__init__.py +15 -12
  16. lm_deluge-0.0.91/src/lm_deluge/config.py +23 -0
  17. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/embed.py +2 -6
  18. lm_deluge-0.0.91/src/lm_deluge/models/__init__.py +267 -0
  19. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/anthropic.py +20 -12
  20. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/bedrock.py +9 -0
  21. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/cerebras.py +2 -0
  22. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/cohere.py +2 -0
  23. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/google.py +13 -0
  24. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/grok.py +4 -0
  25. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/groq.py +2 -0
  26. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/meta.py +2 -0
  27. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/openai.py +24 -1
  28. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/openrouter.py +107 -1
  29. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/together.py +3 -0
  30. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/extract.py +4 -5
  31. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/__init__.py +1 -1
  32. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
  33. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
  34. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
  35. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
  36. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
  37. lm_deluge-0.0.91/src/lm_deluge/prompt/__init__.py +45 -0
  38. lm_deluge-0.0.90/src/lm_deluge/prompt.py → lm_deluge-0.0.91/src/lm_deluge/prompt/conversation.py +45 -1014
  39. {lm_deluge-0.0.90/src/lm_deluge → lm_deluge-0.0.91/src/lm_deluge/prompt}/image.py +0 -10
  40. lm_deluge-0.0.91/src/lm_deluge/prompt/message.py +571 -0
  41. lm_deluge-0.0.91/src/lm_deluge/prompt/serialization.py +21 -0
  42. lm_deluge-0.0.91/src/lm_deluge/prompt/signatures.py +77 -0
  43. lm_deluge-0.0.91/src/lm_deluge/prompt/text.py +47 -0
  44. lm_deluge-0.0.91/src/lm_deluge/prompt/thinking.py +55 -0
  45. lm_deluge-0.0.91/src/lm_deluge/prompt/tool_calls.py +245 -0
  46. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/server/app.py +1 -1
  47. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/__init__.py +65 -18
  48. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
  49. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/actions.py +26 -26
  50. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/batch.py +1 -2
  51. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/kernel.py +1 -1
  52. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/filesystem.py +2 -2
  53. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
  54. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/memory.py +3 -1
  55. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/otc/executor.py +3 -3
  56. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/random.py +30 -54
  57. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/rlm/__init__.py +2 -2
  58. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/rlm/executor.py +1 -1
  59. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +2 -2
  60. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +9 -7
  61. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/subagents.py +1 -1
  62. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/util/logprobs.py +4 -4
  63. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/util/schema.py +6 -6
  64. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/util/validation.py +14 -9
  65. {lm_deluge-0.0.90 → lm_deluge-0.0.91/src/lm_deluge.egg-info}/PKG-INFO +9 -10
  66. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/SOURCES.txt +13 -5
  67. lm_deluge-0.0.91/src/lm_deluge.egg-info/entry_points.txt +3 -0
  68. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/requires.txt +0 -1
  69. lm_deluge-0.0.90/src/lm_deluge/cli.py +0 -300
  70. lm_deluge-0.0.90/src/lm_deluge/config.py +0 -45
  71. lm_deluge-0.0.90/src/lm_deluge/models/__init__.py +0 -160
  72. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/LICENSE +0 -0
  73. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/setup.cfg +0 -0
  74. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/__init__.py +0 -0
  75. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
  76. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/common.py +0 -0
  77. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  78. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  79. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  80. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  81. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  82. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/api_requests/response.py +0 -0
  83. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/errors.py +0 -0
  84. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/arcee.py +0 -0
  85. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/deepseek.py +0 -0
  86. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/fireworks.py +0 -0
  87. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/kimi.py +0 -0
  88. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/minimax.py +0 -0
  89. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/mistral.py +0 -0
  90. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/models/zai.py +0 -0
  91. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/__init__.py +0 -0
  92. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/classify.py +0 -0
  93. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/core.py +0 -0
  94. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/docs/samples.py +0 -0
  95. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/optimizer.py +0 -0
  96. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/proposer.py +0 -0
  97. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/gepa/util.py +0 -0
  98. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/locate.py +0 -0
  99. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/ocr.py +0 -0
  100. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/score.py +0 -0
  101. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/pipelines/translate.py +0 -0
  102. {lm_deluge-0.0.90/src/lm_deluge → lm_deluge-0.0.91/src/lm_deluge/prompt}/file.py +0 -0
  103. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/rerank.py +0 -0
  104. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/server/__init__.py +0 -0
  105. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/server/__main__.py +0 -0
  106. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/server/adapters.py +0 -0
  107. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/server/auth.py +0 -0
  108. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/server/model_policy.py +0 -0
  109. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/server/models_anthropic.py +0 -0
  110. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/server/models_openai.py +0 -0
  111. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/skills/anthropic.py +0 -0
  112. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/skills/compat.py +0 -0
  113. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/anthropic/bash.py +0 -0
  114. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
  115. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/anthropic/editor.py +0 -0
  116. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/base.py +0 -0
  117. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/gemini.py +0 -0
  118. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/builtin/openai.py +0 -0
  119. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/__init__.py +0 -0
  120. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/base.py +0 -0
  121. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/converters.py +0 -0
  122. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/cua/trycua.py +0 -0
  123. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/__init__.py +0 -0
  124. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/batch_tool.py +0 -0
  125. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/docs.py +0 -0
  126. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/email.py +0 -0
  127. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/full_text_search/tantivy_index.py +0 -0
  128. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/otc/__init__.py +0 -0
  129. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/otc/parse.py +0 -0
  130. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/rlm/parse.py +0 -0
  131. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/sandbox/__init__.py +0 -0
  132. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/sandbox/docker_sandbox.py +0 -0
  133. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +0 -0
  134. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/sandbox/modal_sandbox.py +0 -0
  135. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/sheets.py +0 -0
  136. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/skills.py +0 -0
  137. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/todos.py +0 -0
  138. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/tool_search.py +0 -0
  139. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tool/prefab/web_search.py +0 -0
  140. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/tracker.py +0 -0
  141. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/usage.py +0 -0
  142. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/util/harmony.py +0 -0
  143. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/util/json.py +0 -0
  144. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/util/spatial.py +0 -0
  145. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/util/xml.py +0 -0
  146. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge/warnings.py +0 -0
  147. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  148. {lm_deluge-0.0.90 → lm_deluge-0.0.91}/src/lm_deluge.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.90
3
+ Version: 0.0.91
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -9,7 +9,6 @@ License-File: LICENSE
9
9
  Requires-Dist: python-dotenv
10
10
  Requires-Dist: json5
11
11
  Requires-Dist: PyYAML
12
- Requires-Dist: pandas
13
12
  Requires-Dist: aiohttp
14
13
  Requires-Dist: tiktoken
15
14
  Requires-Dist: xxhash
@@ -49,9 +48,9 @@ Dynamic: license-file
49
48
  - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
50
49
  - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
51
50
  - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
52
- - **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
53
- - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
54
- - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
51
+ - **Computer Use** – We support computer use for all major providers, and have pre-fabricated tools to integrate with Kernel, TryCUA, and more.
52
+ - **Local & Remote Caching** – Use Anthropic caching more easily with common patterns (system-only, tools-only, last N messages, etc.) Use client-side caching to save completions to avoid repeated LLM calls to process the same input.
53
+ - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our `LLMClient` or with the `openai` and `anthropic` packages.
55
54
  - **Sync and async APIs** – Use the client from sync or async code.
56
55
 
57
56
  **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
@@ -146,7 +145,7 @@ Constructing conversations to pass to models is notoriously annoying. Each provi
146
145
  ```python
147
146
  from lm_deluge import Message, Conversation
148
147
 
149
- prompt = Conversation.system("You are a helpful assistant.").add(
148
+ prompt = Conversation().system("You are a helpful assistant.").add(
150
149
  Message.user("What's in this image?").add_image("tests/image.jpg")
151
150
  )
152
151
 
@@ -167,7 +166,7 @@ from lm_deluge import LLMClient, Conversation
167
166
 
168
167
  # Simple file upload
169
168
  client = LLMClient("gpt-4.1-mini")
170
- conversation = Conversation.user(
169
+ conversation = Conversation().user(
171
170
  "Please summarize this document",
172
171
  file="path/to/document.pdf"
173
172
  )
@@ -176,7 +175,7 @@ resps = client.process_prompts_sync([conversation])
176
175
  # You can also create File objects for more control
177
176
  from lm_deluge import File
178
177
  file = File("path/to/report.pdf", filename="Q4_Report.pdf")
179
- conversation = Conversation.user("Analyze this financial report")
178
+ conversation = Conversation().user("Analyze this financial report")
180
179
  conversation.messages[0].parts.append(file)
181
180
  ```
182
181
 
@@ -246,7 +245,7 @@ for tool_call in resps[0].tool_calls:
246
245
  import asyncio
247
246
 
248
247
  async def main():
249
- conv = Conversation.user("List the files in the current directory")
248
+ conv = Conversation().user("List the files in the current directory")
250
249
  conv, resp = await client.run_agent_loop(conv, tools=tools)
251
250
  print(resp.content.completion)
252
251
 
@@ -262,7 +261,7 @@ from lm_deluge import LLMClient, Conversation, Message
262
261
 
263
262
  # Create a conversation with system message
264
263
  conv = (
265
- Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
264
+ Conversation().system("You are an expert Python developer with deep knowledge of async programming.")
266
265
  .add(Message.user("How do I use asyncio.gather?"))
267
266
  )
268
267
 
@@ -8,9 +8,9 @@
8
8
  - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
9
9
  - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
10
10
  - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
11
- - **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
12
- - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
13
- - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
11
+ - **Computer Use** – We support computer use for all major providers, and have pre-fabricated tools to integrate with Kernel, TryCUA, and more.
12
+ - **Local & Remote Caching** – Use Anthropic caching more easily with common patterns (system-only, tools-only, last N messages, etc.) Use client-side caching to save completions to avoid repeated LLM calls to process the same input.
13
+ - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our `LLMClient` or with the `openai` and `anthropic` packages.
14
14
  - **Sync and async APIs** – Use the client from sync or async code.
15
15
 
16
16
  **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
@@ -105,7 +105,7 @@ Constructing conversations to pass to models is notoriously annoying. Each provi
105
105
  ```python
106
106
  from lm_deluge import Message, Conversation
107
107
 
108
- prompt = Conversation.system("You are a helpful assistant.").add(
108
+ prompt = Conversation().system("You are a helpful assistant.").add(
109
109
  Message.user("What's in this image?").add_image("tests/image.jpg")
110
110
  )
111
111
 
@@ -126,7 +126,7 @@ from lm_deluge import LLMClient, Conversation
126
126
 
127
127
  # Simple file upload
128
128
  client = LLMClient("gpt-4.1-mini")
129
- conversation = Conversation.user(
129
+ conversation = Conversation().user(
130
130
  "Please summarize this document",
131
131
  file="path/to/document.pdf"
132
132
  )
@@ -135,7 +135,7 @@ resps = client.process_prompts_sync([conversation])
135
135
  # You can also create File objects for more control
136
136
  from lm_deluge import File
137
137
  file = File("path/to/report.pdf", filename="Q4_Report.pdf")
138
- conversation = Conversation.user("Analyze this financial report")
138
+ conversation = Conversation().user("Analyze this financial report")
139
139
  conversation.messages[0].parts.append(file)
140
140
  ```
141
141
 
@@ -205,7 +205,7 @@ for tool_call in resps[0].tool_calls:
205
205
  import asyncio
206
206
 
207
207
  async def main():
208
- conv = Conversation.user("List the files in the current directory")
208
+ conv = Conversation().user("List the files in the current directory")
209
209
  conv, resp = await client.run_agent_loop(conv, tools=tools)
210
210
  print(resp.content.completion)
211
211
 
@@ -221,7 +221,7 @@ from lm_deluge import LLMClient, Conversation, Message
221
221
 
222
222
  # Create a conversation with system message
223
223
  conv = (
224
- Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
224
+ Conversation().system("You are an expert Python developer with deep knowledge of async programming.")
225
225
  .add(Message.user("How do I use asyncio.gather?"))
226
226
  )
227
227
 
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.90"
6
+ version = "0.0.91"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -15,7 +15,6 @@ dependencies = [
15
15
  "python-dotenv",
16
16
  "json5",
17
17
  "PyYAML",
18
- "pandas",
19
18
  "aiohttp",
20
19
  "tiktoken",
21
20
  "xxhash",
@@ -28,8 +27,7 @@ dependencies = [
28
27
  "pdf2image",
29
28
  "pillow",
30
29
  "fastmcp>=2.4",
31
- "rich",
32
- # "textual>=0.58.0"
30
+ "rich"
33
31
  ]
34
32
 
35
33
  [project.optional-dependencies]
@@ -39,5 +37,6 @@ full_text_search = ["tantivy>=0.21.0", "lenlp>=0.1.0"]
39
37
  sandbox = ["modal>=0.64.0", "daytona-sdk>=0.1.4", "docker>=7.0.0"]
40
38
  server = ["fastapi>=0.100.0", "uvicorn>=0.20.0"]
41
39
 
42
- # [project.scripts]
43
- # deluge = "lm_deluge.cli:main"
40
+ [project.scripts]
41
+ deluge = "lm_deluge.cli:main"
42
+ deluge-server = "lm_deluge.server.__main__:main"
@@ -1,7 +1,6 @@
1
1
  from .client import APIResponse, LLMClient, SamplingParams
2
- from .file import File
3
- from .prompt import Conversation, Message
4
- from .tool import Tool
2
+ from .prompt import Conversation, Message, File
3
+ from .tool import Tool, MCPServer
5
4
 
6
5
  # dotenv.load_dotenv() - don't do this, fucks with other packages
7
6
 
@@ -12,5 +11,6 @@ __all__ = [
12
11
  "Conversation",
13
12
  "Message",
14
13
  "Tool",
14
+ "MCPServer",
15
15
  "File",
16
16
  ]
@@ -10,7 +10,7 @@ from lm_deluge.prompt import (
10
10
  Thinking,
11
11
  ToolCall,
12
12
  )
13
- from lm_deluge.request_context import RequestContext
13
+ from lm_deluge.api_requests.context import RequestContext
14
14
  from lm_deluge.tool import MCPServer, Tool
15
15
  from lm_deluge.usage import Usage
16
16
  from lm_deluge.util.schema import (
@@ -103,7 +103,9 @@ def _build_anthropic_request(
103
103
  if "top_p" in request_json:
104
104
  request_json["top_p"] = max(request_json["top_p"], 0.95)
105
105
  request_json["temperature"] = 1.0
106
- request_json["max_tokens"] += budget
106
+ max_tokens = request_json["max_tokens"]
107
+ assert isinstance(max_tokens, int)
108
+ request_json["max_tokens"] = max_tokens + budget
107
109
  else:
108
110
  request_json["thinking"] = {"type": "disabled"}
109
111
  if "kimi" in model.id and "thinking" in model.id:
@@ -10,7 +10,7 @@ from aiohttp import ClientResponse
10
10
 
11
11
  from ..errors import raise_if_modal_exception
12
12
  from ..models.openai import OPENAI_MODELS
13
- from ..request_context import RequestContext
13
+ from ..api_requests.context import RequestContext
14
14
  from .response import APIResponse
15
15
 
16
16
 
@@ -20,7 +20,7 @@ from lm_deluge.prompt import (
20
20
  Thinking,
21
21
  ToolCall,
22
22
  )
23
- from lm_deluge.request_context import RequestContext
23
+ from lm_deluge.api_requests.context import RequestContext
24
24
  from lm_deluge.tool import MCPServer, Tool
25
25
  from lm_deluge.usage import Usage
26
26
 
@@ -263,6 +263,11 @@ class BedrockRequest(APIRequestBase):
263
263
  # Create a fake requests.PreparedRequest object for AWS4Auth to sign
264
264
  import requests
265
265
 
266
+ assert self.url is not None, "URL must be set after build_request"
267
+ assert (
268
+ self.request_header is not None
269
+ ), "Headers must be set after build_request"
270
+
266
271
  fake_request = requests.Request(
267
272
  method="POST",
268
273
  url=self.url,
@@ -2,9 +2,9 @@ from dataclasses import dataclass, field
2
2
  from functools import cached_property
3
3
  from typing import Any, Callable, Sequence, TYPE_CHECKING
4
4
 
5
- from .config import SamplingParams
6
- from .prompt import CachePattern, Conversation
7
- from .tracker import StatusTracker
5
+ from ..config import SamplingParams
6
+ from ..prompt import CachePattern, Conversation
7
+ from ..tracker import StatusTracker
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from pydantic import BaseModel
@@ -83,4 +83,4 @@ class RequestContext:
83
83
  # Update with any overrides
84
84
  current_values.update(overrides)
85
85
 
86
- return RequestContext(**current_values)
86
+ return RequestContext(**current_values) # type: ignore[arg-type]
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import os
3
+ from typing import Any
3
4
 
4
5
  from aiohttp import ClientResponse
5
6
 
6
- from lm_deluge.request_context import RequestContext
7
+ from lm_deluge.api_requests.context import RequestContext
7
8
  from lm_deluge.tool import Tool
8
9
  from lm_deluge.warnings import maybe_warn
9
10
 
@@ -37,13 +38,14 @@ async def _build_gemini_request(
37
38
  part_type="function call",
38
39
  )
39
40
 
40
- request_json = {
41
+ generation_config: dict[str, Any] = {
42
+ "temperature": sampling_params.temperature,
43
+ "topP": sampling_params.top_p,
44
+ "maxOutputTokens": sampling_params.max_new_tokens,
45
+ }
46
+ request_json: dict[str, Any] = {
41
47
  "contents": messages,
42
- "generationConfig": {
43
- "temperature": sampling_params.temperature,
44
- "topP": sampling_params.top_p,
45
- "maxOutputTokens": sampling_params.max_new_tokens,
46
- },
48
+ "generationConfig": generation_config,
47
49
  }
48
50
 
49
51
  # Add system instruction if present
@@ -83,7 +85,7 @@ async def _build_gemini_request(
83
85
  }
84
86
  effort = level_map[effort_key]
85
87
  thinking_config = {"thinkingLevel": effort}
86
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
88
+ generation_config["thinkingConfig"] = thinking_config
87
89
 
88
90
  elif model.reasoning_model:
89
91
  if (
@@ -126,7 +128,7 @@ async def _build_gemini_request(
126
128
  # no thoughts head empty
127
129
  thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
128
130
 
129
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
131
+ generation_config["thinkingConfig"] = thinking_config
130
132
 
131
133
  else:
132
134
  if sampling_params.reasoning_effort:
@@ -171,14 +173,14 @@ async def _build_gemini_request(
171
173
 
172
174
  # Handle JSON mode
173
175
  if sampling_params.json_mode and model.supports_json:
174
- request_json["generationConfig"]["responseMimeType"] = "application/json"
176
+ generation_config["responseMimeType"] = "application/json"
175
177
 
176
178
  # Handle media_resolution for Gemini 3 (requires v1alpha)
177
179
  if sampling_params.media_resolution is not None:
178
180
  is_gemini_3 = "gemini-3" in model.name.lower()
179
181
  if is_gemini_3:
180
182
  # Add global media resolution to generationConfig
181
- request_json["generationConfig"]["mediaResolution"] = {
183
+ generation_config["mediaResolution"] = {
182
184
  "level": sampling_params.media_resolution
183
185
  }
184
186
  else:
@@ -7,7 +7,7 @@ from lm_deluge.warnings import maybe_warn
7
7
 
8
8
  from ..models import APIModel
9
9
  from ..prompt import Message
10
- from ..request_context import RequestContext
10
+ from ..api_requests.context import RequestContext
11
11
  from ..usage import Usage
12
12
  from .base import APIRequestBase, APIResponse
13
13
 
@@ -7,7 +7,7 @@ from typing import Sequence
7
7
  import aiohttp
8
8
  from aiohttp import ClientResponse
9
9
 
10
- from lm_deluge.request_context import RequestContext
10
+ from lm_deluge.api_requests.context import RequestContext
11
11
  from lm_deluge.tool import MCPServer, Tool
12
12
  from lm_deluge.util.schema import (
13
13
  prepare_output_schema,
@@ -75,7 +75,9 @@ async def _build_oa_chat_request(
75
75
  request_json["service_tier"] = context.service_tier
76
76
  # if tinker, for now hack to mush into 1 string
77
77
  if "tinker" in model.name:
78
- request_json["messages"] = _message_contents_to_string(request_json["messages"])
78
+ messages = request_json["messages"]
79
+ assert isinstance(messages, list)
80
+ request_json["messages"] = _message_contents_to_string(messages)
79
81
 
80
82
  # set max_tokens or max_completion_tokens dep. on provider
81
83
  if "cohere" in model.api_base:
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import tempfile
5
5
  import time
6
- from typing import Literal, Sequence, cast
6
+ from typing import Any, Literal, Sequence, cast
7
7
 
8
8
  import aiohttp
9
9
  from rich.console import Console
@@ -22,7 +22,7 @@ from lm_deluge.prompt import (
22
22
  Prompt,
23
23
  prompts_to_conversations,
24
24
  )
25
- from lm_deluge.request_context import RequestContext
25
+ from lm_deluge.api_requests.context import RequestContext
26
26
 
27
27
 
28
28
  def _create_batch_status_display(
@@ -480,7 +480,7 @@ async def _wait_for_anthropic_batch_completion_async(
480
480
 
481
481
  # Event to signal when to stop the display updater
482
482
  stop_display_event = asyncio.Event()
483
- current_status = {"status": "processing", "counts": None}
483
+ current_status: dict[str, Any] = {"status": "processing", "counts": None}
484
484
 
485
485
  async def display_updater():
486
486
  """Update display independently of polling."""
@@ -632,7 +632,7 @@ async def _wait_for_openai_batch_completion_async(
632
632
 
633
633
  # Event to signal when to stop the display updater
634
634
  stop_display_event = asyncio.Event()
635
- current_status = {"status": "pending", "counts": None}
635
+ current_status: dict[str, Any] = {"status": "pending", "counts": None}
636
636
 
637
637
  async def display_updater():
638
638
  """Update display independently of polling."""
@@ -8,7 +8,7 @@ from .api_requests.base import APIResponse
8
8
  try:
9
9
  import plyvel # type: ignore
10
10
  except ImportError:
11
- plyvel = None
11
+ plyvel: Any = None
12
12
  print("Warning: plyvel not installed, cannot use LevelDB.")
13
13
 
14
14