lm-deluge 0.0.56__py3-none-any.whl → 0.0.69__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. lm_deluge/__init__.py +12 -1
  2. lm_deluge/api_requests/anthropic.py +12 -1
  3. lm_deluge/api_requests/base.py +87 -5
  4. lm_deluge/api_requests/bedrock.py +3 -4
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +7 -6
  7. lm_deluge/api_requests/mistral.py +8 -9
  8. lm_deluge/api_requests/openai.py +179 -124
  9. lm_deluge/batches.py +25 -9
  10. lm_deluge/client.py +280 -67
  11. lm_deluge/config.py +1 -1
  12. lm_deluge/file.py +382 -13
  13. lm_deluge/mock_openai.py +482 -0
  14. lm_deluge/models/__init__.py +12 -8
  15. lm_deluge/models/anthropic.py +12 -20
  16. lm_deluge/models/bedrock.py +0 -14
  17. lm_deluge/models/cohere.py +0 -16
  18. lm_deluge/models/google.py +0 -20
  19. lm_deluge/models/grok.py +48 -4
  20. lm_deluge/models/groq.py +2 -2
  21. lm_deluge/models/kimi.py +34 -0
  22. lm_deluge/models/meta.py +0 -8
  23. lm_deluge/models/minimax.py +10 -0
  24. lm_deluge/models/openai.py +28 -34
  25. lm_deluge/models/openrouter.py +64 -1
  26. lm_deluge/models/together.py +0 -16
  27. lm_deluge/prompt.py +138 -29
  28. lm_deluge/request_context.py +9 -11
  29. lm_deluge/tool.py +395 -19
  30. lm_deluge/tracker.py +11 -5
  31. lm_deluge/warnings.py +46 -0
  32. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/METADATA +3 -1
  33. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/RECORD +36 -33
  34. lm_deluge/agent.py +0 -0
  35. lm_deluge/gemini_limits.py +0 -65
  36. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/WHEEL +0 -0
  37. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/licenses/LICENSE +0 -0
  38. {lm_deluge-0.0.56.dist-info → lm_deluge-0.0.69.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py CHANGED
@@ -1,7 +1,14 @@
1
1
  from .client import APIResponse, LLMClient, SamplingParams
2
2
  from .file import File
3
3
  from .prompt import Conversation, Message
4
- from .tool import Tool
4
+ from .tool import Tool, ToolParams
5
+
6
+ try:
7
+ from .mock_openai import MockAsyncOpenAI # noqa
8
+
9
+ _has_openai = True
10
+ except ImportError:
11
+ _has_openai = False
5
12
 
6
13
  # dotenv.load_dotenv() - don't do this, fucks with other packages
7
14
 
@@ -12,5 +19,9 @@ __all__ = [
12
19
  "Conversation",
13
20
  "Message",
14
21
  "Tool",
22
+ "ToolParams",
15
23
  "File",
16
24
  ]
25
+
26
+ if _has_openai:
27
+ __all__.append("MockAsyncOpenAI")
@@ -42,6 +42,14 @@ def _build_anthropic_request(
42
42
  "content-type": "application/json",
43
43
  }
44
44
 
45
+ # Check if any messages contain uploaded files (file_id)
46
+ # If so, add the files-api beta header
47
+ for msg in prompt.messages:
48
+ for file in msg.files:
49
+ if file.is_remote and file.remote_provider == "anthropic":
50
+ _add_beta(base_headers, "files-api-2025-04-14")
51
+ break
52
+
45
53
  request_json = {
46
54
  "model": model.name,
47
55
  "messages": messages,
@@ -72,7 +80,7 @@ def _build_anthropic_request(
72
80
  request_json["system"] = system_message
73
81
 
74
82
  # handle temp + top_p for opus 4.1/sonnet 4.5
75
- if model.name in ["claude-sonnet-4-5-20250929", "claude-opus-4-1-20250805"]:
83
+ if "4-1" in model.name or "4-5" in model.name:
76
84
  if "temperature" in request_json and "top_p" in request_json:
77
85
  request_json.pop("top_p")
78
86
 
@@ -82,6 +90,9 @@ def _build_anthropic_request(
82
90
  for tool in tools:
83
91
  if isinstance(tool, Tool):
84
92
  tool_definitions.append(tool.dump_for("anthropic"))
93
+ elif isinstance(tool, dict) and "url" in tool:
94
+ _add_beta(base_headers, "mcp-client-2025-04-04")
95
+ mcp_servers.append(tool)
85
96
  elif isinstance(tool, dict):
86
97
  tool_definitions.append(tool)
87
98
  # add betas if needed
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import time
2
3
  import traceback
3
4
  from abc import ABC, abstractmethod
4
5
 
@@ -6,6 +7,7 @@ import aiohttp
6
7
  from aiohttp import ClientResponse
7
8
 
8
9
  from ..errors import raise_if_modal_exception
10
+ from ..models.openai import OPENAI_MODELS
9
11
  from ..request_context import RequestContext
10
12
  from .response import APIResponse
11
13
 
@@ -82,15 +84,95 @@ class APIRequestBase(ABC):
82
84
  if self.context.status_tracker:
83
85
  self.context.status_tracker.task_succeeded(self.context.task_id)
84
86
 
87
+ async def _execute_once_background_mode(self) -> APIResponse:
88
+ """
89
+ ONLY for OpenAI responses API. Implement the
90
+ start -> poll -> result style of request.
91
+ """
92
+ assert self.context.status_tracker, "no status tracker"
93
+ start_time = time.time()
94
+ async with aiohttp.ClientSession() as session:
95
+ last_status: str | None = None
96
+
97
+ try:
98
+ self.context.status_tracker.total_requests += 1
99
+ assert self.url is not None, "URL is not set"
100
+ async with session.post(
101
+ url=self.url,
102
+ headers=self.request_header,
103
+ json=self.request_json,
104
+ ) as http_response:
105
+ # make sure we created the Response object
106
+ http_response.raise_for_status()
107
+ data = await http_response.json()
108
+ response_id = data["id"]
109
+ last_status = data["status"]
110
+
111
+ while True:
112
+ if time.time() - start_time > self.context.request_timeout:
113
+ # cancel the response
114
+ async with session.post(
115
+ url=f"{self.url}/{response_id}/cancel",
116
+ headers=self.request_header,
117
+ ) as http_response:
118
+ http_response.raise_for_status()
119
+
120
+ return APIResponse(
121
+ id=self.context.task_id,
122
+ model_internal=self.context.model_name,
123
+ prompt=self.context.prompt,
124
+ sampling_params=self.context.sampling_params,
125
+ status_code=None,
126
+ is_error=True,
127
+ error_message="Request timed out (terminated by client).",
128
+ content=None,
129
+ usage=None,
130
+ )
131
+ # poll for the response
132
+ await asyncio.sleep(5.0)
133
+ async with session.get(
134
+ url=f"{self.url}/{response_id}",
135
+ headers=self.request_header,
136
+ ) as http_response:
137
+ http_response.raise_for_status()
138
+ data = await http_response.json()
139
+
140
+ if data["status"] != last_status:
141
+ print(
142
+ f"Background req {response_id} status updated to: {data['status']}"
143
+ )
144
+ last_status = data["status"]
145
+ if last_status not in ["queued", "in_progress"]:
146
+ return await self.handle_response(http_response)
147
+
148
+ except Exception as e:
149
+ raise_if_modal_exception(e)
150
+ tb = traceback.format_exc()
151
+ print(tb)
152
+ return APIResponse(
153
+ id=self.context.task_id,
154
+ model_internal=self.context.model_name,
155
+ prompt=self.context.prompt,
156
+ sampling_params=self.context.sampling_params,
157
+ status_code=None,
158
+ is_error=True,
159
+ error_message=f"Unexpected {type(e).__name__}: {str(e) or 'No message.'}",
160
+ content=None,
161
+ usage=None,
162
+ )
163
+
85
164
  async def execute_once(self) -> APIResponse:
86
165
  """Send the HTTP request once and return the parsed APIResponse."""
87
166
  await self.build_request()
88
167
  assert self.context.status_tracker
89
- # try:
90
- # dumped = json.dumps(self.request_json)
91
- # except Exception:
92
- # print("couldn't serialize request json")
93
- # print(self.request_json)
168
+
169
+ if (
170
+ self.context.background
171
+ and self.context.use_responses_api
172
+ and self.context.model_name in OPENAI_MODELS
173
+ ):
174
+ return await self._execute_once_background_mode()
175
+
94
176
  try:
95
177
  self.context.status_tracker.total_requests += 1
96
178
  timeout = aiohttp.ClientTimeout(total=self.context.request_timeout)
@@ -1,10 +1,11 @@
1
1
  import asyncio
2
2
  import json
3
3
  import os
4
- import warnings
5
4
 
6
5
  from aiohttp import ClientResponse
7
6
 
7
+ from lm_deluge.warnings import maybe_warn
8
+
8
9
  try:
9
10
  from requests_aws4auth import AWS4Auth
10
11
  except ImportError:
@@ -187,9 +188,7 @@ async def _build_openai_bedrock_request(
187
188
  # Note: GPT-OSS on Bedrock doesn't support response_format parameter
188
189
  # Even though the model supports JSON, we can't use the response_format parameter
189
190
  if sampling_params.json_mode and model.supports_json:
190
- warnings.warn(
191
- f"JSON mode requested for {model.name} but response_format parameter not supported on Bedrock"
192
- )
191
+ maybe_warn("WARN_JSON_MODE_UNSUPPORTED", model_name=model.name)
193
192
 
194
193
  if tools:
195
194
  request_tools = []
@@ -0,0 +1,4 @@
1
+ # this request type is for models that add "reasoning_content"
2
+ # on top of the openai chat completions. it's important to be separate
3
+ # for providers that expect you to provide back the reasoning content to
4
+ # preserve best performance.
@@ -1,11 +1,12 @@
1
1
  import json
2
2
  import os
3
- import warnings
4
3
  from typing import Any
4
+
5
5
  from aiohttp import ClientResponse
6
6
 
7
7
  from lm_deluge.request_context import RequestContext
8
8
  from lm_deluge.tool import Tool
9
+ from lm_deluge.warnings import maybe_warn
9
10
 
10
11
  from ..config import SamplingParams
11
12
  from ..models import APIModel
@@ -54,9 +55,7 @@ async def _build_gemini_request(
54
55
 
55
56
  else:
56
57
  if sampling_params.reasoning_effort:
57
- warnings.warn(
58
- f"Ignoring reasoning_effort param for non-reasoning model: {model.name}"
59
- )
58
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=model.name)
60
59
 
61
60
  # Add tools if provided
62
61
  if tools:
@@ -76,8 +75,10 @@ class GeminiRequest(APIRequestBase):
76
75
 
77
76
  # Warn if cache is specified for Gemini model
78
77
  if self.context.cache is not None:
79
- warnings.warn(
80
- f"Cache parameter '{self.context.cache}' is not supported for Gemini models, ignoring for {self.context.model_name}"
78
+ maybe_warn(
79
+ "WARN_CACHING_UNSUPPORTED",
80
+ model_name=self.context.model_name,
81
+ cache_param=self.context.cache,
81
82
  )
82
83
 
83
84
  self.model = APIModel.from_registry(self.context.model_name)
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import os
3
- import warnings
4
3
 
5
4
  from aiohttp import ClientResponse
6
5
 
6
+ from lm_deluge.warnings import maybe_warn
7
+
7
8
  from ..models import APIModel
8
9
  from ..prompt import Message
9
10
  from ..request_context import RequestContext
@@ -17,8 +18,10 @@ class MistralRequest(APIRequestBase):
17
18
 
18
19
  # Warn if cache is specified for non-Anthropic model
19
20
  if self.context.cache is not None:
20
- warnings.warn(
21
- f"Cache parameter '{self.context.cache}' is only supported for Anthropic models, ignoring for {self.context.model_name}"
21
+ maybe_warn(
22
+ "WARN_CACHING_UNSUPPORTED",
23
+ model_name=self.context.model_name,
24
+ cache_param=self.context.cache,
22
25
  )
23
26
  self.model = APIModel.from_registry(self.context.model_name)
24
27
 
@@ -38,13 +41,9 @@ class MistralRequest(APIRequestBase):
38
41
  "max_tokens": self.context.sampling_params.max_new_tokens,
39
42
  }
40
43
  if self.context.sampling_params.reasoning_effort:
41
- warnings.warn(
42
- f"Ignoring reasoning_effort param for non-reasoning model: {self.context.model_name}"
43
- )
44
+ maybe_warn("WARN_REASONING_UNSUPPORTED", model_name=self.context.model_name)
44
45
  if self.context.sampling_params.logprobs:
45
- warnings.warn(
46
- f"Ignoring logprobs param for non-logprobs model: {self.context.model_name}"
47
- )
46
+ maybe_warn("WARN_LOGPROBS_UNSUPPORTED", model_name=self.context.model_name)
48
47
  if self.context.sampling_params.json_mode and self.model.supports_json:
49
48
  self.request_json["response_format"] = {"type": "json_object"}
50
49