lm-deluge 0.0.90__py3-none-any.whl → 0.0.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. lm_deluge/__init__.py +3 -3
  2. lm_deluge/api_requests/anthropic.py +4 -2
  3. lm_deluge/api_requests/base.py +1 -1
  4. lm_deluge/api_requests/bedrock.py +6 -1
  5. lm_deluge/{request_context.py → api_requests/context.py} +4 -4
  6. lm_deluge/api_requests/gemini.py +13 -11
  7. lm_deluge/api_requests/mistral.py +1 -1
  8. lm_deluge/api_requests/openai.py +4 -2
  9. lm_deluge/batches.py +4 -4
  10. lm_deluge/cache.py +1 -1
  11. lm_deluge/cli.py +672 -300
  12. lm_deluge/{client.py → client/__init__.py} +15 -12
  13. lm_deluge/config.py +9 -31
  14. lm_deluge/embed.py +2 -6
  15. lm_deluge/models/__init__.py +137 -30
  16. lm_deluge/models/anthropic.py +20 -12
  17. lm_deluge/models/bedrock.py +9 -0
  18. lm_deluge/models/cerebras.py +2 -0
  19. lm_deluge/models/cohere.py +2 -0
  20. lm_deluge/models/google.py +13 -0
  21. lm_deluge/models/grok.py +4 -0
  22. lm_deluge/models/groq.py +2 -0
  23. lm_deluge/models/meta.py +2 -0
  24. lm_deluge/models/openai.py +24 -1
  25. lm_deluge/models/openrouter.py +107 -1
  26. lm_deluge/models/together.py +3 -0
  27. lm_deluge/pipelines/extract.py +4 -5
  28. lm_deluge/pipelines/gepa/__init__.py +1 -1
  29. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
  30. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
  31. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
  32. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
  33. lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
  34. lm_deluge/prompt/__init__.py +45 -0
  35. lm_deluge/{prompt.py → prompt/conversation.py} +45 -1014
  36. lm_deluge/{image.py → prompt/image.py} +0 -10
  37. lm_deluge/prompt/message.py +571 -0
  38. lm_deluge/prompt/serialization.py +21 -0
  39. lm_deluge/prompt/signatures.py +77 -0
  40. lm_deluge/prompt/text.py +47 -0
  41. lm_deluge/prompt/thinking.py +55 -0
  42. lm_deluge/prompt/tool_calls.py +245 -0
  43. lm_deluge/server/app.py +1 -1
  44. lm_deluge/tool/__init__.py +65 -18
  45. lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
  46. lm_deluge/tool/cua/actions.py +26 -26
  47. lm_deluge/tool/cua/batch.py +1 -2
  48. lm_deluge/tool/cua/kernel.py +1 -1
  49. lm_deluge/tool/prefab/filesystem.py +2 -2
  50. lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
  51. lm_deluge/tool/prefab/memory.py +3 -1
  52. lm_deluge/tool/prefab/otc/executor.py +3 -3
  53. lm_deluge/tool/prefab/random.py +30 -54
  54. lm_deluge/tool/prefab/rlm/__init__.py +2 -2
  55. lm_deluge/tool/prefab/rlm/executor.py +1 -1
  56. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +2 -2
  57. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +9 -7
  58. lm_deluge/tool/prefab/subagents.py +1 -1
  59. lm_deluge/util/logprobs.py +4 -4
  60. lm_deluge/util/schema.py +6 -6
  61. lm_deluge/util/validation.py +14 -9
  62. {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/METADATA +9 -10
  63. {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/RECORD +68 -60
  64. lm_deluge-0.0.91.dist-info/entry_points.txt +3 -0
  65. /lm_deluge/{file.py → prompt/file.py} +0 -0
  66. {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/WHEEL +0 -0
  67. {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/licenses/LICENSE +0 -0
  68. {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from .client import APIResponse, LLMClient, SamplingParams
2
- from .file import File
3
- from .prompt import Conversation, Message
4
- from .tool import Tool
2
+ from .prompt import Conversation, Message, File
3
+ from .tool import Tool, MCPServer
5
4
 
6
5
  # dotenv.load_dotenv() - don't do this, fucks with other packages
7
6
 
@@ -12,5 +11,6 @@ __all__ = [
12
11
  "Conversation",
13
12
  "Message",
14
13
  "Tool",
14
+ "MCPServer",
15
15
  "File",
16
16
  ]
@@ -10,7 +10,7 @@ from lm_deluge.prompt import (
10
10
  Thinking,
11
11
  ToolCall,
12
12
  )
13
- from lm_deluge.request_context import RequestContext
13
+ from lm_deluge.api_requests.context import RequestContext
14
14
  from lm_deluge.tool import MCPServer, Tool
15
15
  from lm_deluge.usage import Usage
16
16
  from lm_deluge.util.schema import (
@@ -103,7 +103,9 @@ def _build_anthropic_request(
103
103
  if "top_p" in request_json:
104
104
  request_json["top_p"] = max(request_json["top_p"], 0.95)
105
105
  request_json["temperature"] = 1.0
106
- request_json["max_tokens"] += budget
106
+ max_tokens = request_json["max_tokens"]
107
+ assert isinstance(max_tokens, int)
108
+ request_json["max_tokens"] = max_tokens + budget
107
109
  else:
108
110
  request_json["thinking"] = {"type": "disabled"}
109
111
  if "kimi" in model.id and "thinking" in model.id:
@@ -10,7 +10,7 @@ from aiohttp import ClientResponse
10
10
 
11
11
  from ..errors import raise_if_modal_exception
12
12
  from ..models.openai import OPENAI_MODELS
13
- from ..request_context import RequestContext
13
+ from ..api_requests.context import RequestContext
14
14
  from .response import APIResponse
15
15
 
16
16
 
@@ -20,7 +20,7 @@ from lm_deluge.prompt import (
20
20
  Thinking,
21
21
  ToolCall,
22
22
  )
23
- from lm_deluge.request_context import RequestContext
23
+ from lm_deluge.api_requests.context import RequestContext
24
24
  from lm_deluge.tool import MCPServer, Tool
25
25
  from lm_deluge.usage import Usage
26
26
 
@@ -263,6 +263,11 @@ class BedrockRequest(APIRequestBase):
263
263
  # Create a fake requests.PreparedRequest object for AWS4Auth to sign
264
264
  import requests
265
265
 
266
+ assert self.url is not None, "URL must be set after build_request"
267
+ assert (
268
+ self.request_header is not None
269
+ ), "Headers must be set after build_request"
270
+
266
271
  fake_request = requests.Request(
267
272
  method="POST",
268
273
  url=self.url,
@@ -2,9 +2,9 @@ from dataclasses import dataclass, field
2
2
  from functools import cached_property
3
3
  from typing import Any, Callable, Sequence, TYPE_CHECKING
4
4
 
5
- from .config import SamplingParams
6
- from .prompt import CachePattern, Conversation
7
- from .tracker import StatusTracker
5
+ from ..config import SamplingParams
6
+ from ..prompt import CachePattern, Conversation
7
+ from ..tracker import StatusTracker
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from pydantic import BaseModel
@@ -83,4 +83,4 @@ class RequestContext:
83
83
  # Update with any overrides
84
84
  current_values.update(overrides)
85
85
 
86
- return RequestContext(**current_values)
86
+ return RequestContext(**current_values) # type: ignore[arg-type]
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import os
3
+ from typing import Any
3
4
 
4
5
  from aiohttp import ClientResponse
5
6
 
6
- from lm_deluge.request_context import RequestContext
7
+ from lm_deluge.api_requests.context import RequestContext
7
8
  from lm_deluge.tool import Tool
8
9
  from lm_deluge.warnings import maybe_warn
9
10
 
@@ -37,13 +38,14 @@ async def _build_gemini_request(
37
38
  part_type="function call",
38
39
  )
39
40
 
40
- request_json = {
41
+ generation_config: dict[str, Any] = {
42
+ "temperature": sampling_params.temperature,
43
+ "topP": sampling_params.top_p,
44
+ "maxOutputTokens": sampling_params.max_new_tokens,
45
+ }
46
+ request_json: dict[str, Any] = {
41
47
  "contents": messages,
42
- "generationConfig": {
43
- "temperature": sampling_params.temperature,
44
- "topP": sampling_params.top_p,
45
- "maxOutputTokens": sampling_params.max_new_tokens,
46
- },
48
+ "generationConfig": generation_config,
47
49
  }
48
50
 
49
51
  # Add system instruction if present
@@ -83,7 +85,7 @@ async def _build_gemini_request(
83
85
  }
84
86
  effort = level_map[effort_key]
85
87
  thinking_config = {"thinkingLevel": effort}
86
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
88
+ generation_config["thinkingConfig"] = thinking_config
87
89
 
88
90
  elif model.reasoning_model:
89
91
  if (
@@ -126,7 +128,7 @@ async def _build_gemini_request(
126
128
  # no thoughts head empty
127
129
  thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
128
130
 
129
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
131
+ generation_config["thinkingConfig"] = thinking_config
130
132
 
131
133
  else:
132
134
  if sampling_params.reasoning_effort:
@@ -171,14 +173,14 @@ async def _build_gemini_request(
171
173
 
172
174
  # Handle JSON mode
173
175
  if sampling_params.json_mode and model.supports_json:
174
- request_json["generationConfig"]["responseMimeType"] = "application/json"
176
+ generation_config["responseMimeType"] = "application/json"
175
177
 
176
178
  # Handle media_resolution for Gemini 3 (requires v1alpha)
177
179
  if sampling_params.media_resolution is not None:
178
180
  is_gemini_3 = "gemini-3" in model.name.lower()
179
181
  if is_gemini_3:
180
182
  # Add global media resolution to generationConfig
181
- request_json["generationConfig"]["mediaResolution"] = {
183
+ generation_config["mediaResolution"] = {
182
184
  "level": sampling_params.media_resolution
183
185
  }
184
186
  else:
@@ -7,7 +7,7 @@ from lm_deluge.warnings import maybe_warn
7
7
 
8
8
  from ..models import APIModel
9
9
  from ..prompt import Message
10
- from ..request_context import RequestContext
10
+ from ..api_requests.context import RequestContext
11
11
  from ..usage import Usage
12
12
  from .base import APIRequestBase, APIResponse
13
13
 
@@ -7,7 +7,7 @@ from typing import Sequence
7
7
  import aiohttp
8
8
  from aiohttp import ClientResponse
9
9
 
10
- from lm_deluge.request_context import RequestContext
10
+ from lm_deluge.api_requests.context import RequestContext
11
11
  from lm_deluge.tool import MCPServer, Tool
12
12
  from lm_deluge.util.schema import (
13
13
  prepare_output_schema,
@@ -75,7 +75,9 @@ async def _build_oa_chat_request(
75
75
  request_json["service_tier"] = context.service_tier
76
76
  # if tinker, for now hack to mush into 1 string
77
77
  if "tinker" in model.name:
78
- request_json["messages"] = _message_contents_to_string(request_json["messages"])
78
+ messages = request_json["messages"]
79
+ assert isinstance(messages, list)
80
+ request_json["messages"] = _message_contents_to_string(messages)
79
81
 
80
82
  # set max_tokens or max_completion_tokens dep. on provider
81
83
  if "cohere" in model.api_base:
lm_deluge/batches.py CHANGED
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import tempfile
5
5
  import time
6
- from typing import Literal, Sequence, cast
6
+ from typing import Any, Literal, Sequence, cast
7
7
 
8
8
  import aiohttp
9
9
  from rich.console import Console
@@ -22,7 +22,7 @@ from lm_deluge.prompt import (
22
22
  Prompt,
23
23
  prompts_to_conversations,
24
24
  )
25
- from lm_deluge.request_context import RequestContext
25
+ from lm_deluge.api_requests.context import RequestContext
26
26
 
27
27
 
28
28
  def _create_batch_status_display(
@@ -480,7 +480,7 @@ async def _wait_for_anthropic_batch_completion_async(
480
480
 
481
481
  # Event to signal when to stop the display updater
482
482
  stop_display_event = asyncio.Event()
483
- current_status = {"status": "processing", "counts": None}
483
+ current_status: dict[str, Any] = {"status": "processing", "counts": None}
484
484
 
485
485
  async def display_updater():
486
486
  """Update display independently of polling."""
@@ -632,7 +632,7 @@ async def _wait_for_openai_batch_completion_async(
632
632
 
633
633
  # Event to signal when to stop the display updater
634
634
  stop_display_event = asyncio.Event()
635
- current_status = {"status": "pending", "counts": None}
635
+ current_status: dict[str, Any] = {"status": "pending", "counts": None}
636
636
 
637
637
  async def display_updater():
638
638
  """Update display independently of polling."""
lm_deluge/cache.py CHANGED
@@ -8,7 +8,7 @@ from .api_requests.base import APIResponse
8
8
  try:
9
9
  import plyvel # type: ignore
10
10
  except ImportError:
11
- plyvel = None
11
+ plyvel: Any = None
12
12
  print("Warning: plyvel not installed, cannot use LevelDB.")
13
13
 
14
14