lm-deluge 0.0.90__tar.gz → 0.0.92__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. {lm_deluge-0.0.90/src/lm_deluge.egg-info → lm_deluge-0.0.92}/PKG-INFO +9 -10
  2. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/README.md +8 -8
  3. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/pyproject.toml +5 -6
  4. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/__init__.py +3 -3
  5. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/anthropic.py +4 -2
  6. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/base.py +1 -1
  7. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/bedrock.py +6 -1
  8. lm_deluge-0.0.92/src/lm_deluge/api_requests/bedrock_nova.py +299 -0
  9. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/common.py +2 -0
  10. lm_deluge-0.0.90/src/lm_deluge/request_context.py → lm_deluge-0.0.92/src/lm_deluge/api_requests/context.py +4 -4
  11. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/gemini.py +13 -11
  12. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/mistral.py +1 -1
  13. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/openai.py +13 -5
  14. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/batches.py +4 -4
  15. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/cache.py +1 -1
  16. lm_deluge-0.0.92/src/lm_deluge/cli.py +672 -0
  17. lm_deluge-0.0.90/src/lm_deluge/client.py → lm_deluge-0.0.92/src/lm_deluge/client/__init__.py +15 -12
  18. lm_deluge-0.0.92/src/lm_deluge/config.py +23 -0
  19. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/embed.py +2 -6
  20. lm_deluge-0.0.92/src/lm_deluge/models/__init__.py +269 -0
  21. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/anthropic.py +20 -12
  22. lm_deluge-0.0.92/src/lm_deluge/models/azure.py +269 -0
  23. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/bedrock.py +48 -0
  24. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/cerebras.py +2 -0
  25. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/cohere.py +2 -0
  26. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/google.py +13 -0
  27. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/grok.py +4 -0
  28. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/groq.py +2 -0
  29. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/meta.py +2 -0
  30. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/openai.py +24 -1
  31. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/openrouter.py +107 -1
  32. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/together.py +3 -0
  33. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/extract.py +4 -5
  34. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/__init__.py +1 -1
  35. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
  36. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
  37. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
  38. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
  39. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
  40. lm_deluge-0.0.92/src/lm_deluge/prompt/__init__.py +45 -0
  41. lm_deluge-0.0.90/src/lm_deluge/prompt.py → lm_deluge-0.0.92/src/lm_deluge/prompt/conversation.py +107 -1014
  42. {lm_deluge-0.0.90/src/lm_deluge → lm_deluge-0.0.92/src/lm_deluge/prompt}/file.py +4 -0
  43. {lm_deluge-0.0.90/src/lm_deluge → lm_deluge-0.0.92/src/lm_deluge/prompt}/image.py +20 -10
  44. lm_deluge-0.0.92/src/lm_deluge/prompt/message.py +583 -0
  45. lm_deluge-0.0.92/src/lm_deluge/prompt/serialization.py +21 -0
  46. lm_deluge-0.0.92/src/lm_deluge/prompt/signatures.py +77 -0
  47. lm_deluge-0.0.92/src/lm_deluge/prompt/text.py +50 -0
  48. lm_deluge-0.0.92/src/lm_deluge/prompt/thinking.py +58 -0
  49. lm_deluge-0.0.92/src/lm_deluge/prompt/tool_calls.py +278 -0
  50. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/server/app.py +1 -1
  51. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/__init__.py +65 -18
  52. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
  53. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/cua/actions.py +26 -26
  54. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/cua/batch.py +1 -2
  55. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/cua/kernel.py +1 -1
  56. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/filesystem.py +2 -2
  57. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
  58. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/memory.py +3 -1
  59. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/otc/executor.py +3 -3
  60. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/random.py +30 -54
  61. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/rlm/__init__.py +2 -2
  62. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/rlm/executor.py +1 -1
  63. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +2 -2
  64. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +9 -7
  65. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/subagents.py +1 -1
  66. lm_deluge-0.0.92/src/lm_deluge/tool/prefab/web_search.py +1020 -0
  67. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/util/logprobs.py +4 -4
  68. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/util/schema.py +6 -6
  69. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/util/validation.py +14 -9
  70. {lm_deluge-0.0.90 → lm_deluge-0.0.92/src/lm_deluge.egg-info}/PKG-INFO +9 -10
  71. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge.egg-info/SOURCES.txt +15 -5
  72. lm_deluge-0.0.92/src/lm_deluge.egg-info/entry_points.txt +3 -0
  73. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge.egg-info/requires.txt +0 -1
  74. lm_deluge-0.0.90/src/lm_deluge/cli.py +0 -300
  75. lm_deluge-0.0.90/src/lm_deluge/config.py +0 -45
  76. lm_deluge-0.0.90/src/lm_deluge/models/__init__.py +0 -160
  77. lm_deluge-0.0.90/src/lm_deluge/tool/prefab/web_search.py +0 -199
  78. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/LICENSE +0 -0
  79. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/setup.cfg +0 -0
  80. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/__init__.py +0 -0
  81. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/chat_reasoning.py +0 -0
  82. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  83. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  84. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  85. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  86. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  87. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/api_requests/response.py +0 -0
  88. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/errors.py +0 -0
  89. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/arcee.py +0 -0
  90. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/deepseek.py +0 -0
  91. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/fireworks.py +0 -0
  92. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/kimi.py +0 -0
  93. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/minimax.py +0 -0
  94. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/mistral.py +0 -0
  95. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/models/zai.py +0 -0
  96. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/__init__.py +0 -0
  97. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/classify.py +0 -0
  98. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/core.py +0 -0
  99. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/docs/samples.py +0 -0
  100. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/optimizer.py +0 -0
  101. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/proposer.py +0 -0
  102. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/gepa/util.py +0 -0
  103. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/locate.py +0 -0
  104. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/ocr.py +0 -0
  105. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/score.py +0 -0
  106. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/pipelines/translate.py +0 -0
  107. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/rerank.py +0 -0
  108. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/server/__init__.py +0 -0
  109. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/server/__main__.py +0 -0
  110. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/server/adapters.py +0 -0
  111. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/server/auth.py +0 -0
  112. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/server/model_policy.py +0 -0
  113. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/server/models_anthropic.py +0 -0
  114. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/server/models_openai.py +0 -0
  115. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/skills/anthropic.py +0 -0
  116. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/skills/compat.py +0 -0
  117. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/builtin/anthropic/bash.py +0 -0
  118. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
  119. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/builtin/anthropic/editor.py +0 -0
  120. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/builtin/base.py +0 -0
  121. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/builtin/gemini.py +0 -0
  122. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/builtin/openai.py +0 -0
  123. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/cua/__init__.py +0 -0
  124. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/cua/base.py +0 -0
  125. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/cua/converters.py +0 -0
  126. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/cua/trycua.py +0 -0
  127. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/__init__.py +0 -0
  128. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/batch_tool.py +0 -0
  129. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/docs.py +0 -0
  130. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/email.py +0 -0
  131. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/full_text_search/tantivy_index.py +0 -0
  132. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/otc/__init__.py +0 -0
  133. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/otc/parse.py +0 -0
  134. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/rlm/parse.py +0 -0
  135. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/sandbox/__init__.py +0 -0
  136. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/sandbox/docker_sandbox.py +0 -0
  137. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +0 -0
  138. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/sandbox/modal_sandbox.py +0 -0
  139. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/sheets.py +0 -0
  140. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/skills.py +0 -0
  141. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/todos.py +0 -0
  142. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tool/prefab/tool_search.py +0 -0
  143. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/tracker.py +0 -0
  144. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/usage.py +0 -0
  145. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/util/harmony.py +0 -0
  146. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/util/json.py +0 -0
  147. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/util/spatial.py +0 -0
  148. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/util/xml.py +0 -0
  149. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge/warnings.py +0 -0
  150. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  151. {lm_deluge-0.0.90 → lm_deluge-0.0.92}/src/lm_deluge.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.90
3
+ Version: 0.0.92
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -9,7 +9,6 @@ License-File: LICENSE
9
9
  Requires-Dist: python-dotenv
10
10
  Requires-Dist: json5
11
11
  Requires-Dist: PyYAML
12
- Requires-Dist: pandas
13
12
  Requires-Dist: aiohttp
14
13
  Requires-Dist: tiktoken
15
14
  Requires-Dist: xxhash
@@ -49,9 +48,9 @@ Dynamic: license-file
49
48
  - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
50
49
  - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
51
50
  - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
52
- - **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
53
- - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
54
- - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
51
+ - **Computer Use** – We support computer use for all major providers, and have pre-fabricated tools to integrate with Kernel, TryCUA, and more.
52
+ - **Local & Remote Caching** – Use Anthropic caching more easily with common patterns (system-only, tools-only, last N messages, etc.) Use client-side caching to save completions to avoid repeated LLM calls to process the same input.
53
+ - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our `LLMClient` or with the `openai` and `anthropic` packages.
55
54
  - **Sync and async APIs** – Use the client from sync or async code.
56
55
 
57
56
  **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
@@ -146,7 +145,7 @@ Constructing conversations to pass to models is notoriously annoying. Each provi
146
145
  ```python
147
146
  from lm_deluge import Message, Conversation
148
147
 
149
- prompt = Conversation.system("You are a helpful assistant.").add(
148
+ prompt = Conversation().system("You are a helpful assistant.").add(
150
149
  Message.user("What's in this image?").add_image("tests/image.jpg")
151
150
  )
152
151
 
@@ -167,7 +166,7 @@ from lm_deluge import LLMClient, Conversation
167
166
 
168
167
  # Simple file upload
169
168
  client = LLMClient("gpt-4.1-mini")
170
- conversation = Conversation.user(
169
+ conversation = Conversation().user(
171
170
  "Please summarize this document",
172
171
  file="path/to/document.pdf"
173
172
  )
@@ -176,7 +175,7 @@ resps = client.process_prompts_sync([conversation])
176
175
  # You can also create File objects for more control
177
176
  from lm_deluge import File
178
177
  file = File("path/to/report.pdf", filename="Q4_Report.pdf")
179
- conversation = Conversation.user("Analyze this financial report")
178
+ conversation = Conversation().user("Analyze this financial report")
180
179
  conversation.messages[0].parts.append(file)
181
180
  ```
182
181
 
@@ -246,7 +245,7 @@ for tool_call in resps[0].tool_calls:
246
245
  import asyncio
247
246
 
248
247
  async def main():
249
- conv = Conversation.user("List the files in the current directory")
248
+ conv = Conversation().user("List the files in the current directory")
250
249
  conv, resp = await client.run_agent_loop(conv, tools=tools)
251
250
  print(resp.content.completion)
252
251
 
@@ -262,7 +261,7 @@ from lm_deluge import LLMClient, Conversation, Message
262
261
 
263
262
  # Create a conversation with system message
264
263
  conv = (
265
- Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
264
+ Conversation().system("You are an expert Python developer with deep knowledge of async programming.")
266
265
  .add(Message.user("How do I use asyncio.gather?"))
267
266
  )
268
267
 
@@ -8,9 +8,9 @@
8
8
  - **Spray across models/providers** – Configure a client with multiple models from any provider(s), and sampling weights. The client samples a model for each request.
9
9
  - **Tool Use** – Unified API for defining tools for all providers, and creating tools automatically from python functions.
10
10
  - **MCP Support** – Instantiate a `Tool` from a local or remote MCP server so that any LLM can use it, whether or not that provider natively supports MCP.
11
- - **Computer Use** – We support Claude Computer Use via the computer_use argument to process_prompts_sync/async. It works with Anthropic's API; Bedrock's API is broken right now and rejects the tool definitions, but in principle this will work there too when Bedrock gets their sh*t together.
12
- - **Caching** – Save completions in a local or distributed cache to avoid repeated LLM calls to process the same input.
13
- - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our client or with the `openai` and `anthropic` packages.
11
+ - **Computer Use** – We support computer use for all major providers, and have pre-fabricated tools to integrate with Kernel, TryCUA, and more.
12
+ - **Local & Remote Caching** – Use Anthropic caching more easily with common patterns (system-only, tools-only, last N messages, etc.) Use client-side caching to save completions to avoid repeated LLM calls to process the same input.
13
+ - **Convenient message constructor** – No more looking up how to build an Anthropic messages list with images. Our `Conversation` and `Message` classes work great with our `LLMClient` or with the `openai` and `anthropic` packages.
14
14
  - **Sync and async APIs** – Use the client from sync or async code.
15
15
 
16
16
  **STREAMING IS NOT IN SCOPE.** There are plenty of packages that let you stream chat completions across providers. The sole purpose of this package is to do very fast batch inference using APIs. Sorry!
@@ -105,7 +105,7 @@ Constructing conversations to pass to models is notoriously annoying. Each provi
105
105
  ```python
106
106
  from lm_deluge import Message, Conversation
107
107
 
108
- prompt = Conversation.system("You are a helpful assistant.").add(
108
+ prompt = Conversation().system("You are a helpful assistant.").add(
109
109
  Message.user("What's in this image?").add_image("tests/image.jpg")
110
110
  )
111
111
 
@@ -126,7 +126,7 @@ from lm_deluge import LLMClient, Conversation
126
126
 
127
127
  # Simple file upload
128
128
  client = LLMClient("gpt-4.1-mini")
129
- conversation = Conversation.user(
129
+ conversation = Conversation().user(
130
130
  "Please summarize this document",
131
131
  file="path/to/document.pdf"
132
132
  )
@@ -135,7 +135,7 @@ resps = client.process_prompts_sync([conversation])
135
135
  # You can also create File objects for more control
136
136
  from lm_deluge import File
137
137
  file = File("path/to/report.pdf", filename="Q4_Report.pdf")
138
- conversation = Conversation.user("Analyze this financial report")
138
+ conversation = Conversation().user("Analyze this financial report")
139
139
  conversation.messages[0].parts.append(file)
140
140
  ```
141
141
 
@@ -205,7 +205,7 @@ for tool_call in resps[0].tool_calls:
205
205
  import asyncio
206
206
 
207
207
  async def main():
208
- conv = Conversation.user("List the files in the current directory")
208
+ conv = Conversation().user("List the files in the current directory")
209
209
  conv, resp = await client.run_agent_loop(conv, tools=tools)
210
210
  print(resp.content.completion)
211
211
 
@@ -221,7 +221,7 @@ from lm_deluge import LLMClient, Conversation, Message
221
221
 
222
222
  # Create a conversation with system message
223
223
  conv = (
224
- Conversation.system("You are an expert Python developer with deep knowledge of async programming.")
224
+ Conversation().system("You are an expert Python developer with deep knowledge of async programming.")
225
225
  .add(Message.user("How do I use asyncio.gather?"))
226
226
  )
227
227
 
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.90"
6
+ version = "0.0.92"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -15,7 +15,6 @@ dependencies = [
15
15
  "python-dotenv",
16
16
  "json5",
17
17
  "PyYAML",
18
- "pandas",
19
18
  "aiohttp",
20
19
  "tiktoken",
21
20
  "xxhash",
@@ -28,8 +27,7 @@ dependencies = [
28
27
  "pdf2image",
29
28
  "pillow",
30
29
  "fastmcp>=2.4",
31
- "rich",
32
- # "textual>=0.58.0"
30
+ "rich"
33
31
  ]
34
32
 
35
33
  [project.optional-dependencies]
@@ -39,5 +37,6 @@ full_text_search = ["tantivy>=0.21.0", "lenlp>=0.1.0"]
39
37
  sandbox = ["modal>=0.64.0", "daytona-sdk>=0.1.4", "docker>=7.0.0"]
40
38
  server = ["fastapi>=0.100.0", "uvicorn>=0.20.0"]
41
39
 
42
- # [project.scripts]
43
- # deluge = "lm_deluge.cli:main"
40
+ [project.scripts]
41
+ deluge = "lm_deluge.cli:main"
42
+ deluge-server = "lm_deluge.server.__main__:main"
@@ -1,7 +1,6 @@
1
1
  from .client import APIResponse, LLMClient, SamplingParams
2
- from .file import File
3
- from .prompt import Conversation, Message
4
- from .tool import Tool
2
+ from .prompt import Conversation, Message, File
3
+ from .tool import Tool, MCPServer
5
4
 
6
5
  # dotenv.load_dotenv() - don't do this, fucks with other packages
7
6
 
@@ -12,5 +11,6 @@ __all__ = [
12
11
  "Conversation",
13
12
  "Message",
14
13
  "Tool",
14
+ "MCPServer",
15
15
  "File",
16
16
  ]
@@ -10,7 +10,7 @@ from lm_deluge.prompt import (
10
10
  Thinking,
11
11
  ToolCall,
12
12
  )
13
- from lm_deluge.request_context import RequestContext
13
+ from lm_deluge.api_requests.context import RequestContext
14
14
  from lm_deluge.tool import MCPServer, Tool
15
15
  from lm_deluge.usage import Usage
16
16
  from lm_deluge.util.schema import (
@@ -103,7 +103,9 @@ def _build_anthropic_request(
103
103
  if "top_p" in request_json:
104
104
  request_json["top_p"] = max(request_json["top_p"], 0.95)
105
105
  request_json["temperature"] = 1.0
106
- request_json["max_tokens"] += budget
106
+ max_tokens = request_json["max_tokens"]
107
+ assert isinstance(max_tokens, int)
108
+ request_json["max_tokens"] = max_tokens + budget
107
109
  else:
108
110
  request_json["thinking"] = {"type": "disabled"}
109
111
  if "kimi" in model.id and "thinking" in model.id:
@@ -10,7 +10,7 @@ from aiohttp import ClientResponse
10
10
 
11
11
  from ..errors import raise_if_modal_exception
12
12
  from ..models.openai import OPENAI_MODELS
13
- from ..request_context import RequestContext
13
+ from ..api_requests.context import RequestContext
14
14
  from .response import APIResponse
15
15
 
16
16
 
@@ -20,7 +20,7 @@ from lm_deluge.prompt import (
20
20
  Thinking,
21
21
  ToolCall,
22
22
  )
23
- from lm_deluge.request_context import RequestContext
23
+ from lm_deluge.api_requests.context import RequestContext
24
24
  from lm_deluge.tool import MCPServer, Tool
25
25
  from lm_deluge.usage import Usage
26
26
 
@@ -263,6 +263,11 @@ class BedrockRequest(APIRequestBase):
263
263
  # Create a fake requests.PreparedRequest object for AWS4Auth to sign
264
264
  import requests
265
265
 
266
+ assert self.url is not None, "URL must be set after build_request"
267
+ assert (
268
+ self.request_header is not None
269
+ ), "Headers must be set after build_request"
270
+
266
271
  fake_request = requests.Request(
267
272
  method="POST",
268
273
  url=self.url,
@@ -0,0 +1,299 @@
1
+ import asyncio
2
+ import json
3
+ import os
4
+
5
+ from aiohttp import ClientResponse
6
+
7
+ try:
8
+ from requests_aws4auth import AWS4Auth
9
+ except ImportError:
10
+ raise ImportError(
11
+ "aws4auth is required for bedrock support. Install with: pip install requests-aws4auth"
12
+ )
13
+
14
+ from lm_deluge.prompt import Message, Text, ToolCall
15
+ from lm_deluge.api_requests.context import RequestContext
16
+ from lm_deluge.tool import MCPServer, Tool
17
+ from lm_deluge.usage import Usage
18
+
19
+ from ..models import APIModel
20
+ from .base import APIRequestBase, APIResponse
21
+
22
+
23
+ def _convert_tool_to_nova(tool: Tool) -> dict:
24
+ """Convert a Tool to Nova toolSpec format."""
25
+ return {
26
+ "toolSpec": {
27
+ "name": tool.name,
28
+ "description": tool.description,
29
+ "inputSchema": {
30
+ "json": {
31
+ "type": "object",
32
+ "properties": tool.parameters,
33
+ "required": tool.required or [],
34
+ }
35
+ },
36
+ }
37
+ }
38
+
39
+
40
+ async def _build_nova_request(
41
+ model: APIModel,
42
+ context: RequestContext,
43
+ ):
44
+ """Build request for Amazon Nova models on Bedrock."""
45
+ prompt = context.prompt
46
+ tools = context.tools
47
+ sampling_params = context.sampling_params
48
+ cache_pattern = context.cache
49
+
50
+ # Handle AWS auth
51
+ access_key = os.getenv("AWS_ACCESS_KEY_ID")
52
+ secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
53
+ session_token = os.getenv("AWS_SESSION_TOKEN")
54
+
55
+ if not access_key or not secret_key:
56
+ raise ValueError(
57
+ "AWS credentials not found. Please set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables."
58
+ )
59
+
60
+ # Use us-west-2 for cross-region inference models
61
+ region = "us-west-2"
62
+
63
+ # Construct the endpoint URL
64
+ service = "bedrock"
65
+ url = f"https://bedrock-runtime.{region}.amazonaws.com/model/{model.name}/invoke"
66
+
67
+ # Prepare headers
68
+ auth = AWS4Auth(
69
+ access_key,
70
+ secret_key,
71
+ region,
72
+ service,
73
+ session_token=session_token,
74
+ )
75
+
76
+ base_headers = {
77
+ "Content-Type": "application/json",
78
+ }
79
+
80
+ # Convert conversation to Nova format with optional caching
81
+ system_list, messages = prompt.to_nova(cache_pattern=cache_pattern)
82
+
83
+ # Build request body
84
+ request_json = {
85
+ "schemaVersion": "messages-v1",
86
+ "messages": messages,
87
+ "inferenceConfig": {
88
+ "maxTokens": sampling_params.max_new_tokens,
89
+ "temperature": sampling_params.temperature,
90
+ "topP": sampling_params.top_p,
91
+ },
92
+ }
93
+
94
+ # Add system prompt if present
95
+ if system_list:
96
+ request_json["system"] = system_list
97
+
98
+ # Add tools if present
99
+ if tools:
100
+ tool_definitions = []
101
+ for tool in tools:
102
+ if isinstance(tool, Tool):
103
+ tool_definitions.append(_convert_tool_to_nova(tool))
104
+ elif isinstance(tool, MCPServer):
105
+ # Convert MCP server to individual tools
106
+ individual_tools = await tool.to_tools()
107
+ for individual_tool in individual_tools:
108
+ tool_definitions.append(_convert_tool_to_nova(individual_tool))
109
+
110
+ if tool_definitions:
111
+ request_json["toolConfig"] = {"tools": tool_definitions}
112
+
113
+ return request_json, base_headers, auth, url, region
114
+
115
+
116
+ class BedrockNovaRequest(APIRequestBase):
117
+ """Request handler for Amazon Nova models on Bedrock."""
118
+
119
+ def __init__(self, context: RequestContext):
120
+ super().__init__(context=context)
121
+ self.model = APIModel.from_registry(self.context.model_name)
122
+ self.region = None
123
+
124
+ async def build_request(self):
125
+ (
126
+ self.request_json,
127
+ base_headers,
128
+ self.auth,
129
+ self.url,
130
+ self.region,
131
+ ) = await _build_nova_request(self.model, self.context)
132
+
133
+ self.request_header = self.merge_headers(
134
+ base_headers, exclude_patterns=["anthropic", "openai", "gemini", "mistral"]
135
+ )
136
+
137
+ async def execute_once(self) -> APIResponse:
138
+ """Override execute_once to handle AWS4Auth signing."""
139
+ await self.build_request()
140
+ import aiohttp
141
+
142
+ assert self.context.status_tracker
143
+
144
+ self.context.status_tracker.total_requests += 1
145
+ timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
146
+
147
+ # Prepare the request data
148
+ payload = json.dumps(self.request_json, separators=(",", ":")).encode("utf-8")
149
+
150
+ # Create a fake requests.PreparedRequest object for AWS4Auth to sign
151
+ import requests
152
+
153
+ assert self.url is not None, "URL must be set after build_request"
154
+ assert (
155
+ self.request_header is not None
156
+ ), "Headers must be set after build_request"
157
+
158
+ fake_request = requests.Request(
159
+ method="POST",
160
+ url=self.url,
161
+ data=payload,
162
+ headers=self.request_header.copy(),
163
+ )
164
+
165
+ prepared_request = fake_request.prepare()
166
+ signed_request = self.auth(prepared_request)
167
+ signed_headers = dict(signed_request.headers)
168
+
169
+ try:
170
+ async with aiohttp.ClientSession(timeout=timeout) as session:
171
+ async with session.post(
172
+ url=self.url,
173
+ headers=signed_headers,
174
+ data=payload,
175
+ ) as http_response:
176
+ response: APIResponse = await self.handle_response(http_response)
177
+ return response
178
+
179
+ except asyncio.TimeoutError:
180
+ return APIResponse(
181
+ id=self.context.task_id,
182
+ model_internal=self.context.model_name,
183
+ prompt=self.context.prompt,
184
+ sampling_params=self.context.sampling_params,
185
+ status_code=None,
186
+ is_error=True,
187
+ error_message="Request timed out (terminated by client).",
188
+ content=None,
189
+ usage=None,
190
+ )
191
+
192
+ except Exception as e:
193
+ from ..errors import raise_if_modal_exception
194
+
195
+ raise_if_modal_exception(e)
196
+ return APIResponse(
197
+ id=self.context.task_id,
198
+ model_internal=self.context.model_name,
199
+ prompt=self.context.prompt,
200
+ sampling_params=self.context.sampling_params,
201
+ status_code=None,
202
+ is_error=True,
203
+ error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
204
+ content=None,
205
+ usage=None,
206
+ )
207
+
208
+ async def handle_response(self, http_response: ClientResponse) -> APIResponse:
209
+ is_error = False
210
+ error_message = None
211
+ content = None
212
+ usage = None
213
+ finish_reason = None
214
+ status_code = http_response.status
215
+ mimetype = http_response.headers.get("Content-Type", None)
216
+ data = None
217
+ assert self.context.status_tracker
218
+
219
+ if status_code >= 200 and status_code < 300:
220
+ try:
221
+ data = await http_response.json()
222
+
223
+ # Parse Nova response format
224
+ # Nova returns: {"output": {"message": {"role": "assistant", "content": [...]}}, "usage": {...}, "stopReason": "..."}
225
+ output = data.get("output", {})
226
+ message = output.get("message", {})
227
+ response_content = message.get("content", [])
228
+ finish_reason = data.get("stopReason")
229
+
230
+ parts = []
231
+ for item in response_content:
232
+ if "text" in item:
233
+ parts.append(Text(item["text"]))
234
+ elif "toolUse" in item:
235
+ tool_use = item["toolUse"]
236
+ parts.append(
237
+ ToolCall(
238
+ id=tool_use["toolUseId"],
239
+ name=tool_use["name"],
240
+ arguments=tool_use.get("input", {}),
241
+ )
242
+ )
243
+
244
+ content = Message("assistant", parts)
245
+
246
+ # Parse usage including cache tokens
247
+ # Note: Nova uses "cacheReadInputTokenCount" and "cacheWriteInputTokenCount"
248
+ raw_usage = data.get("usage", {})
249
+ usage = Usage(
250
+ input_tokens=raw_usage.get("inputTokens", 0),
251
+ output_tokens=raw_usage.get("outputTokens", 0),
252
+ cache_read_tokens=raw_usage.get("cacheReadInputTokenCount", 0),
253
+ cache_write_tokens=raw_usage.get("cacheWriteInputTokenCount", 0),
254
+ )
255
+
256
+ except Exception as e:
257
+ is_error = True
258
+ error_message = (
259
+ f"Error calling .json() on response w/ status {status_code}: {e}"
260
+ )
261
+ elif mimetype and "json" in mimetype.lower():
262
+ is_error = True
263
+ data = await http_response.json()
264
+ error_message = json.dumps(data)
265
+ else:
266
+ is_error = True
267
+ text = await http_response.text()
268
+ error_message = text
269
+
270
+ # Handle special kinds of errors
271
+ retry_with_different_model = status_code in [529, 429, 400, 401, 403, 413]
272
+ if is_error and error_message is not None:
273
+ if (
274
+ "rate limit" in error_message.lower()
275
+ or "throttling" in error_message.lower()
276
+ or status_code == 429
277
+ ):
278
+ error_message += " (Rate limit error, triggering cooldown.)"
279
+ self.context.status_tracker.rate_limit_exceeded()
280
+ if "context length" in error_message or "too long" in error_message:
281
+ error_message += " (Context length exceeded, set retries to 0.)"
282
+ self.context.attempts_left = 0
283
+ retry_with_different_model = True
284
+
285
+ return APIResponse(
286
+ id=self.context.task_id,
287
+ status_code=status_code,
288
+ is_error=is_error,
289
+ error_message=error_message,
290
+ prompt=self.context.prompt,
291
+ content=content,
292
+ model_internal=self.context.model_name,
293
+ region=self.region,
294
+ sampling_params=self.context.sampling_params,
295
+ usage=usage,
296
+ raw_response=data,
297
+ finish_reason=finish_reason,
298
+ retry_with_different_model=retry_with_different_model,
299
+ )
@@ -2,6 +2,7 @@ from .openai import OpenAIRequest, OpenAIResponsesRequest
2
2
  from .anthropic import AnthropicRequest
3
3
  from .mistral import MistralRequest
4
4
  from .bedrock import BedrockRequest
5
+ from .bedrock_nova import BedrockNovaRequest
5
6
  from .gemini import GeminiRequest
6
7
 
7
8
  CLASSES = {
@@ -10,5 +11,6 @@ CLASSES = {
10
11
  "anthropic": AnthropicRequest,
11
12
  "mistral": MistralRequest,
12
13
  "bedrock": BedrockRequest,
14
+ "bedrock-nova": BedrockNovaRequest,
13
15
  "gemini": GeminiRequest,
14
16
  }
@@ -2,9 +2,9 @@ from dataclasses import dataclass, field
2
2
  from functools import cached_property
3
3
  from typing import Any, Callable, Sequence, TYPE_CHECKING
4
4
 
5
- from .config import SamplingParams
6
- from .prompt import CachePattern, Conversation
7
- from .tracker import StatusTracker
5
+ from ..config import SamplingParams
6
+ from ..prompt import CachePattern, Conversation
7
+ from ..tracker import StatusTracker
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from pydantic import BaseModel
@@ -83,4 +83,4 @@ class RequestContext:
83
83
  # Update with any overrides
84
84
  current_values.update(overrides)
85
85
 
86
- return RequestContext(**current_values)
86
+ return RequestContext(**current_values) # type: ignore[arg-type]
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import os
3
+ from typing import Any
3
4
 
4
5
  from aiohttp import ClientResponse
5
6
 
6
- from lm_deluge.request_context import RequestContext
7
+ from lm_deluge.api_requests.context import RequestContext
7
8
  from lm_deluge.tool import Tool
8
9
  from lm_deluge.warnings import maybe_warn
9
10
 
@@ -37,13 +38,14 @@ async def _build_gemini_request(
37
38
  part_type="function call",
38
39
  )
39
40
 
40
- request_json = {
41
+ generation_config: dict[str, Any] = {
42
+ "temperature": sampling_params.temperature,
43
+ "topP": sampling_params.top_p,
44
+ "maxOutputTokens": sampling_params.max_new_tokens,
45
+ }
46
+ request_json: dict[str, Any] = {
41
47
  "contents": messages,
42
- "generationConfig": {
43
- "temperature": sampling_params.temperature,
44
- "topP": sampling_params.top_p,
45
- "maxOutputTokens": sampling_params.max_new_tokens,
46
- },
48
+ "generationConfig": generation_config,
47
49
  }
48
50
 
49
51
  # Add system instruction if present
@@ -83,7 +85,7 @@ async def _build_gemini_request(
83
85
  }
84
86
  effort = level_map[effort_key]
85
87
  thinking_config = {"thinkingLevel": effort}
86
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
88
+ generation_config["thinkingConfig"] = thinking_config
87
89
 
88
90
  elif model.reasoning_model:
89
91
  if (
@@ -126,7 +128,7 @@ async def _build_gemini_request(
126
128
  # no thoughts head empty
127
129
  thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
128
130
 
129
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
131
+ generation_config["thinkingConfig"] = thinking_config
130
132
 
131
133
  else:
132
134
  if sampling_params.reasoning_effort:
@@ -171,14 +173,14 @@ async def _build_gemini_request(
171
173
 
172
174
  # Handle JSON mode
173
175
  if sampling_params.json_mode and model.supports_json:
174
- request_json["generationConfig"]["responseMimeType"] = "application/json"
176
+ generation_config["responseMimeType"] = "application/json"
175
177
 
176
178
  # Handle media_resolution for Gemini 3 (requires v1alpha)
177
179
  if sampling_params.media_resolution is not None:
178
180
  is_gemini_3 = "gemini-3" in model.name.lower()
179
181
  if is_gemini_3:
180
182
  # Add global media resolution to generationConfig
181
- request_json["generationConfig"]["mediaResolution"] = {
183
+ generation_config["mediaResolution"] = {
182
184
  "level": sampling_params.media_resolution
183
185
  }
184
186
  else:
@@ -7,7 +7,7 @@ from lm_deluge.warnings import maybe_warn
7
7
 
8
8
  from ..models import APIModel
9
9
  from ..prompt import Message
10
- from ..request_context import RequestContext
10
+ from ..api_requests.context import RequestContext
11
11
  from ..usage import Usage
12
12
  from .base import APIRequestBase, APIResponse
13
13