lm-deluge 0.0.89__py3-none-any.whl → 0.0.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. lm_deluge/__init__.py +3 -27
  2. lm_deluge/api_requests/anthropic.py +29 -7
  3. lm_deluge/api_requests/base.py +38 -1
  4. lm_deluge/api_requests/bedrock.py +29 -3
  5. lm_deluge/{request_context.py → api_requests/context.py} +4 -4
  6. lm_deluge/api_requests/gemini.py +30 -14
  7. lm_deluge/api_requests/mistral.py +1 -1
  8. lm_deluge/api_requests/openai.py +34 -5
  9. lm_deluge/batches.py +19 -49
  10. lm_deluge/cache.py +1 -1
  11. lm_deluge/cli.py +672 -300
  12. lm_deluge/{client.py → client/__init__.py} +42 -13
  13. lm_deluge/config.py +9 -31
  14. lm_deluge/embed.py +2 -6
  15. lm_deluge/models/__init__.py +138 -29
  16. lm_deluge/models/anthropic.py +32 -24
  17. lm_deluge/models/bedrock.py +9 -0
  18. lm_deluge/models/cerebras.py +2 -0
  19. lm_deluge/models/cohere.py +2 -0
  20. lm_deluge/models/google.py +13 -0
  21. lm_deluge/models/grok.py +4 -0
  22. lm_deluge/models/groq.py +2 -0
  23. lm_deluge/models/meta.py +2 -0
  24. lm_deluge/models/minimax.py +9 -1
  25. lm_deluge/models/openai.py +24 -1
  26. lm_deluge/models/openrouter.py +155 -1
  27. lm_deluge/models/together.py +3 -0
  28. lm_deluge/models/zai.py +50 -1
  29. lm_deluge/pipelines/extract.py +4 -5
  30. lm_deluge/pipelines/gepa/__init__.py +1 -1
  31. lm_deluge/pipelines/gepa/docs/samples.py +19 -10
  32. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
  33. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
  34. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
  35. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
  36. lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
  37. lm_deluge/prompt/__init__.py +45 -0
  38. lm_deluge/{prompt.py → prompt/conversation.py} +165 -869
  39. lm_deluge/{image.py → prompt/image.py} +0 -10
  40. lm_deluge/prompt/message.py +571 -0
  41. lm_deluge/prompt/serialization.py +21 -0
  42. lm_deluge/prompt/signatures.py +77 -0
  43. lm_deluge/prompt/text.py +47 -0
  44. lm_deluge/prompt/thinking.py +55 -0
  45. lm_deluge/prompt/tool_calls.py +245 -0
  46. lm_deluge/server/__init__.py +24 -0
  47. lm_deluge/server/__main__.py +144 -0
  48. lm_deluge/server/adapters.py +369 -0
  49. lm_deluge/server/app.py +388 -0
  50. lm_deluge/server/auth.py +71 -0
  51. lm_deluge/server/model_policy.py +215 -0
  52. lm_deluge/server/models_anthropic.py +172 -0
  53. lm_deluge/server/models_openai.py +175 -0
  54. lm_deluge/skills/anthropic.py +0 -0
  55. lm_deluge/skills/compat.py +0 -0
  56. lm_deluge/tool/__init__.py +78 -19
  57. lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
  58. lm_deluge/tool/cua/actions.py +26 -26
  59. lm_deluge/tool/cua/batch.py +1 -2
  60. lm_deluge/tool/cua/kernel.py +1 -1
  61. lm_deluge/tool/prefab/filesystem.py +2 -2
  62. lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
  63. lm_deluge/tool/prefab/memory.py +3 -1
  64. lm_deluge/tool/prefab/otc/executor.py +3 -3
  65. lm_deluge/tool/prefab/random.py +30 -54
  66. lm_deluge/tool/prefab/rlm/__init__.py +2 -2
  67. lm_deluge/tool/prefab/rlm/executor.py +1 -1
  68. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  69. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  70. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  71. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  72. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  73. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +829 -0
  74. lm_deluge/tool/prefab/skills.py +0 -0
  75. lm_deluge/tool/prefab/subagents.py +1 -1
  76. lm_deluge/util/logprobs.py +4 -4
  77. lm_deluge/util/schema.py +6 -6
  78. lm_deluge/util/validation.py +14 -9
  79. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/METADATA +12 -12
  80. lm_deluge-0.0.91.dist-info/RECORD +140 -0
  81. lm_deluge-0.0.91.dist-info/entry_points.txt +3 -0
  82. lm_deluge/mock_openai.py +0 -643
  83. lm_deluge/tool/prefab/sandbox.py +0 -1621
  84. lm_deluge-0.0.89.dist-info/RECORD +0 -117
  85. /lm_deluge/{file.py → prompt/file.py} +0 -0
  86. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/WHEEL +0 -0
  87. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/licenses/LICENSE +0 -0
  88. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py CHANGED
@@ -1,20 +1,6 @@
1
1
  from .client import APIResponse, LLMClient, SamplingParams
2
- from .file import File
3
- from .prompt import Conversation, Message
4
- from .tool import Tool
5
-
6
- try:
7
- from .mock_openai import ( # noqa
8
- APIError,
9
- APITimeoutError,
10
- BadRequestError,
11
- MockAsyncOpenAI,
12
- RateLimitError,
13
- )
14
-
15
- _has_openai = True
16
- except ImportError:
17
- _has_openai = False
2
+ from .prompt import Conversation, Message, File
3
+ from .tool import Tool, MCPServer
18
4
 
19
5
  # dotenv.load_dotenv() - don't do this, fucks with other packages
20
6
 
@@ -25,16 +11,6 @@ __all__ = [
25
11
  "Conversation",
26
12
  "Message",
27
13
  "Tool",
14
+ "MCPServer",
28
15
  "File",
29
16
  ]
30
-
31
- if _has_openai:
32
- __all__.extend(
33
- [
34
- "MockAsyncOpenAI",
35
- "APIError",
36
- "APITimeoutError",
37
- "BadRequestError",
38
- "RateLimitError",
39
- ]
40
- )
@@ -6,10 +6,11 @@ from aiohttp import ClientResponse
6
6
  from lm_deluge.prompt import (
7
7
  Message,
8
8
  Text,
9
+ ThoughtSignature,
9
10
  Thinking,
10
11
  ToolCall,
11
12
  )
12
- from lm_deluge.request_context import RequestContext
13
+ from lm_deluge.api_requests.context import RequestContext
13
14
  from lm_deluge.tool import MCPServer, Tool
14
15
  from lm_deluge.usage import Usage
15
16
  from lm_deluge.util.schema import (
@@ -102,7 +103,9 @@ def _build_anthropic_request(
102
103
  if "top_p" in request_json:
103
104
  request_json["top_p"] = max(request_json["top_p"], 0.95)
104
105
  request_json["temperature"] = 1.0
105
- request_json["max_tokens"] += budget
106
+ max_tokens = request_json["max_tokens"]
107
+ assert isinstance(max_tokens, int)
108
+ request_json["max_tokens"] = max_tokens + budget
106
109
  else:
107
110
  request_json["thinking"] = {"type": "disabled"}
108
111
  if "kimi" in model.id and "thinking" in model.id:
@@ -250,8 +253,28 @@ class AnthropicRequest(APIRequestBase):
250
253
  if item["type"] == "text":
251
254
  parts.append(Text(item["text"]))
252
255
  elif item["type"] == "thinking":
253
- thinking = item["thinking"]
254
- parts.append(Thinking(item["thinking"]))
256
+ thinking_content = item.get("thinking", "")
257
+ thinking = thinking_content
258
+ signature = item.get("signature")
259
+ parts.append(
260
+ Thinking(
261
+ thinking_content,
262
+ raw_payload=item,
263
+ thought_signature=ThoughtSignature(
264
+ signature,
265
+ provider="anthropic",
266
+ )
267
+ if signature is not None
268
+ else None,
269
+ )
270
+ )
271
+ elif item["type"] == "redacted_thinking":
272
+ parts.append(
273
+ Thinking(
274
+ item.get("data", ""),
275
+ raw_payload=item,
276
+ )
277
+ )
255
278
  elif item["type"] == "tool_use":
256
279
  parts.append(
257
280
  ToolCall(
@@ -265,9 +288,8 @@ class AnthropicRequest(APIRequestBase):
265
288
  usage = Usage.from_anthropic_usage(data["usage"])
266
289
  except Exception as e:
267
290
  is_error = True
268
- error_message = (
269
- f"Error calling .json() on response w/ status {status_code}: {e}"
270
- )
291
+ response_text = await http_response.text()
292
+ error_message = f"Error calling .json() on response w/ status {status_code}: {e}. Response: {response_text[:500]}"
271
293
  elif mimetype and "json" in mimetype.lower():
272
294
  is_error = True # expected status is 200, otherwise it's an error
273
295
  data = await http_response.json()
@@ -1,4 +1,6 @@
1
1
  import asyncio
2
+ import json
3
+ import os
2
4
  import time
3
5
  import traceback
4
6
  from abc import ABC, abstractmethod
@@ -8,7 +10,7 @@ from aiohttp import ClientResponse
8
10
 
9
11
  from ..errors import raise_if_modal_exception
10
12
  from ..models.openai import OPENAI_MODELS
11
- from ..request_context import RequestContext
13
+ from ..api_requests.context import RequestContext
12
14
  from .response import APIResponse
13
15
 
14
16
 
@@ -73,6 +75,24 @@ class APIRequestBase(ABC):
73
75
 
74
76
  # Start with base headers, then overlay filtered extra headers (extra takes precedence)
75
77
  merged = dict(base_headers)
78
+ if "anthropic-beta" in merged and "anthropic-beta" in filtered_extra:
79
+ combined = []
80
+ seen = set()
81
+ for (
82
+ raw
83
+ ) in f"{merged['anthropic-beta']},{filtered_extra['anthropic-beta']}".split(
84
+ ","
85
+ ):
86
+ token = raw.strip()
87
+ if token and token not in seen:
88
+ seen.add(token)
89
+ combined.append(token)
90
+ merged["anthropic-beta"] = ",".join(combined)
91
+ filtered_extra = {
92
+ key: value
93
+ for key, value in filtered_extra.items()
94
+ if key != "anthropic-beta"
95
+ }
76
96
  merged.update(filtered_extra)
77
97
 
78
98
  # Filter out None values from final merged headers
@@ -189,6 +209,23 @@ class APIRequestBase(ABC):
189
209
  await self.build_request()
190
210
  assert self.context.status_tracker
191
211
 
212
+ if os.getenv("DELUGE_PROXY_LOG_PROVIDER_REQUESTS", "").strip().lower() in {
213
+ "1",
214
+ "true",
215
+ "yes",
216
+ "on",
217
+ }:
218
+ print("DELUGE_PROXY_PROVIDER_REQUEST")
219
+ print(f"URL: {self.url}")
220
+ print("Headers:")
221
+ print(self.request_header)
222
+ if self.request_json is not None:
223
+ print("JSON:")
224
+ try:
225
+ print(json.dumps(self.request_json, indent=2))
226
+ except Exception:
227
+ print(self.request_json)
228
+
192
229
  if (
193
230
  self.context.background
194
231
  and self.context.use_responses_api
@@ -16,10 +16,11 @@ except ImportError:
16
16
  from lm_deluge.prompt import (
17
17
  Message,
18
18
  Text,
19
+ ThoughtSignature,
19
20
  Thinking,
20
21
  ToolCall,
21
22
  )
22
- from lm_deluge.request_context import RequestContext
23
+ from lm_deluge.api_requests.context import RequestContext
23
24
  from lm_deluge.tool import MCPServer, Tool
24
25
  from lm_deluge.usage import Usage
25
26
 
@@ -262,6 +263,11 @@ class BedrockRequest(APIRequestBase):
262
263
  # Create a fake requests.PreparedRequest object for AWS4Auth to sign
263
264
  import requests
264
265
 
266
+ assert self.url is not None, "URL must be set after build_request"
267
+ assert (
268
+ self.request_header is not None
269
+ ), "Headers must be set after build_request"
270
+
265
271
  fake_request = requests.Request(
266
272
  method="POST",
267
273
  url=self.url,
@@ -363,8 +369,28 @@ class BedrockRequest(APIRequestBase):
363
369
  if item["type"] == "text":
364
370
  parts.append(Text(item["text"]))
365
371
  elif item["type"] == "thinking":
366
- thinking = item["thinking"]
367
- parts.append(Thinking(item["thinking"]))
372
+ thinking_content = item.get("thinking", "")
373
+ thinking = thinking_content
374
+ signature = item.get("signature")
375
+ parts.append(
376
+ Thinking(
377
+ thinking_content,
378
+ raw_payload=item,
379
+ thought_signature=ThoughtSignature(
380
+ signature,
381
+ provider="anthropic",
382
+ )
383
+ if signature is not None
384
+ else None,
385
+ )
386
+ )
387
+ elif item["type"] == "redacted_thinking":
388
+ parts.append(
389
+ Thinking(
390
+ item.get("data", ""),
391
+ raw_payload=item,
392
+ )
393
+ )
368
394
  elif item["type"] == "tool_use":
369
395
  parts.append(
370
396
  ToolCall(
@@ -2,9 +2,9 @@ from dataclasses import dataclass, field
2
2
  from functools import cached_property
3
3
  from typing import Any, Callable, Sequence, TYPE_CHECKING
4
4
 
5
- from .config import SamplingParams
6
- from .prompt import CachePattern, Conversation
7
- from .tracker import StatusTracker
5
+ from ..config import SamplingParams
6
+ from ..prompt import CachePattern, Conversation
7
+ from ..tracker import StatusTracker
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from pydantic import BaseModel
@@ -83,4 +83,4 @@ class RequestContext:
83
83
  # Update with any overrides
84
84
  current_values.update(overrides)
85
85
 
86
- return RequestContext(**current_values)
86
+ return RequestContext(**current_values) # type: ignore[arg-type]
@@ -1,15 +1,16 @@
1
1
  import json
2
2
  import os
3
+ from typing import Any
3
4
 
4
5
  from aiohttp import ClientResponse
5
6
 
6
- from lm_deluge.request_context import RequestContext
7
+ from lm_deluge.api_requests.context import RequestContext
7
8
  from lm_deluge.tool import Tool
8
9
  from lm_deluge.warnings import maybe_warn
9
10
 
10
11
  from ..config import SamplingParams
11
12
  from ..models import APIModel
12
- from ..prompt import Conversation, Message, Text, Thinking, ToolCall
13
+ from ..prompt import Conversation, Message, Text, ThoughtSignature, Thinking, ToolCall
13
14
  from ..usage import Usage
14
15
  from .base import APIRequestBase, APIResponse
15
16
 
@@ -37,13 +38,14 @@ async def _build_gemini_request(
37
38
  part_type="function call",
38
39
  )
39
40
 
40
- request_json = {
41
+ generation_config: dict[str, Any] = {
42
+ "temperature": sampling_params.temperature,
43
+ "topP": sampling_params.top_p,
44
+ "maxOutputTokens": sampling_params.max_new_tokens,
45
+ }
46
+ request_json: dict[str, Any] = {
41
47
  "contents": messages,
42
- "generationConfig": {
43
- "temperature": sampling_params.temperature,
44
- "topP": sampling_params.top_p,
45
- "maxOutputTokens": sampling_params.max_new_tokens,
46
- },
48
+ "generationConfig": generation_config,
47
49
  }
48
50
 
49
51
  # Add system instruction if present
@@ -83,7 +85,7 @@ async def _build_gemini_request(
83
85
  }
84
86
  effort = level_map[effort_key]
85
87
  thinking_config = {"thinkingLevel": effort}
86
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
88
+ generation_config["thinkingConfig"] = thinking_config
87
89
 
88
90
  elif model.reasoning_model:
89
91
  if (
@@ -126,7 +128,7 @@ async def _build_gemini_request(
126
128
  # no thoughts head empty
127
129
  thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
128
130
 
129
- request_json["generationConfig"]["thinkingConfig"] = thinking_config
131
+ generation_config["thinkingConfig"] = thinking_config
130
132
 
131
133
  else:
132
134
  if sampling_params.reasoning_effort:
@@ -171,14 +173,14 @@ async def _build_gemini_request(
171
173
 
172
174
  # Handle JSON mode
173
175
  if sampling_params.json_mode and model.supports_json:
174
- request_json["generationConfig"]["responseMimeType"] = "application/json"
176
+ generation_config["responseMimeType"] = "application/json"
175
177
 
176
178
  # Handle media_resolution for Gemini 3 (requires v1alpha)
177
179
  if sampling_params.media_resolution is not None:
178
180
  is_gemini_3 = "gemini-3" in model.name.lower()
179
181
  if is_gemini_3:
180
182
  # Add global media resolution to generationConfig
181
- request_json["generationConfig"]["mediaResolution"] = {
183
+ generation_config["mediaResolution"] = {
182
184
  "level": sampling_params.media_resolution
183
185
  }
184
186
  else:
@@ -260,10 +262,20 @@ class GeminiRequest(APIRequestBase):
260
262
  if "content" in candidate and "parts" in candidate["content"]:
261
263
  for part in candidate["content"]["parts"]:
262
264
  # Extract thought signature if present
263
- thought_sig = part.get("thoughtSignature")
265
+ raw_sig = part.get("thoughtSignature")
266
+ thought_sig = (
267
+ ThoughtSignature(raw_sig, provider="gemini")
268
+ if raw_sig is not None
269
+ else None
270
+ )
264
271
 
265
272
  if "text" in part:
266
- parts.append(Text(part["text"]))
273
+ parts.append(
274
+ Text(
275
+ part["text"],
276
+ thought_signature=thought_sig,
277
+ )
278
+ )
267
279
  elif "thought" in part:
268
280
  # Thought with optional signature
269
281
  parts.append(
@@ -286,6 +298,10 @@ class GeminiRequest(APIRequestBase):
286
298
  thought_signature=thought_sig,
287
299
  )
288
300
  )
301
+ elif thought_sig:
302
+ parts.append(
303
+ Text("", thought_signature=thought_sig)
304
+ )
289
305
 
290
306
  content = Message("assistant", parts)
291
307
 
@@ -7,7 +7,7 @@ from lm_deluge.warnings import maybe_warn
7
7
 
8
8
  from ..models import APIModel
9
9
  from ..prompt import Message
10
- from ..request_context import RequestContext
10
+ from ..api_requests.context import RequestContext
11
11
  from ..usage import Usage
12
12
  from .base import APIRequestBase, APIResponse
13
13
 
@@ -7,7 +7,7 @@ from typing import Sequence
7
7
  import aiohttp
8
8
  from aiohttp import ClientResponse
9
9
 
10
- from lm_deluge.request_context import RequestContext
10
+ from lm_deluge.api_requests.context import RequestContext
11
11
  from lm_deluge.tool import MCPServer, Tool
12
12
  from lm_deluge.util.schema import (
13
13
  prepare_output_schema,
@@ -22,6 +22,24 @@ from ..usage import Usage
22
22
  from .base import APIRequestBase, APIResponse
23
23
 
24
24
 
25
+ def _message_contents_to_string(messages: list[dict]):
26
+ messages = messages.copy()
27
+
28
+ for msg in messages:
29
+ content = msg.get("content")
30
+ assert content
31
+ if isinstance(content, list):
32
+ new_content = ""
33
+ for part in content:
34
+ assert "text" in part, "Invalid text part: " + str(part)
35
+ new_content += part["text"]
36
+ new_content += "\n"
37
+
38
+ msg["content"] = new_content.strip()
39
+
40
+ return messages
41
+
42
+
25
43
  async def _build_oa_chat_request(
26
44
  model: APIModel,
27
45
  context: RequestContext,
@@ -55,6 +73,12 @@ async def _build_oa_chat_request(
55
73
  request_json["service_tier"] = context.service_tier
56
74
  else:
57
75
  request_json["service_tier"] = context.service_tier
76
+ # if tinker, for now hack to mush into 1 string
77
+ if "tinker" in model.name:
78
+ messages = request_json["messages"]
79
+ assert isinstance(messages, list)
80
+ request_json["messages"] = _message_contents_to_string(messages)
81
+
58
82
  # set max_tokens or max_completion_tokens dep. on provider
59
83
  if "cohere" in model.api_base:
60
84
  request_json["max_tokens"] = sampling_params.max_new_tokens
@@ -217,7 +241,7 @@ class OpenAIRequest(APIRequestBase):
217
241
  parts.append(Text(message["content"]))
218
242
 
219
243
  # Add tool calls if present
220
- if "tool_calls" in message:
244
+ if "tool_calls" in message and message["tool_calls"] is not None:
221
245
  for tool_call in message["tool_calls"]:
222
246
  parts.append(
223
247
  ToolCall(
@@ -238,9 +262,9 @@ class OpenAIRequest(APIRequestBase):
238
262
  and "logprobs" in data["choices"][0]
239
263
  ):
240
264
  logprobs = data["choices"][0]["logprobs"]["content"]
241
- except Exception:
265
+ except Exception as e:
242
266
  is_error = True
243
- error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response."
267
+ error_message = f"Error getting 'choices' and 'usage' from {self.model.name} response: {data}. Error: {e}"
244
268
  elif mimetype and "json" in mimetype.lower():
245
269
  is_error = True # expected status is 200, otherwise it's an error
246
270
  data = await http_response.json()
@@ -655,7 +679,12 @@ async def stream_chat(
655
679
  request_header.update(filtered_extra)
656
680
 
657
681
  context = SimpleNamespace(
658
- prompt=prompt, tools=tools, sampling_params=sampling_params
682
+ prompt=prompt,
683
+ tools=tools,
684
+ sampling_params=sampling_params,
685
+ service_tier=None,
686
+ output_schema=None,
687
+ model_name=model_name,
659
688
  )
660
689
 
661
690
  request_json = await _build_oa_chat_request(model, context) # type: ignore
lm_deluge/batches.py CHANGED
@@ -3,7 +3,7 @@ import json
3
3
  import os
4
4
  import tempfile
5
5
  import time
6
- from typing import Literal, Sequence, cast
6
+ from typing import Any, Literal, Sequence, cast
7
7
 
8
8
  import aiohttp
9
9
  from rich.console import Console
@@ -22,7 +22,7 @@ from lm_deluge.prompt import (
22
22
  Prompt,
23
23
  prompts_to_conversations,
24
24
  )
25
- from lm_deluge.request_context import RequestContext
25
+ from lm_deluge.api_requests.context import RequestContext
26
26
 
27
27
 
28
28
  def _create_batch_status_display(
@@ -141,31 +141,22 @@ async def submit_batch_oa(file_path: str):
141
141
  return batch_id
142
142
 
143
143
 
144
- async def _submit_anthropic_batch(file_path: str, headers: dict, model: str):
145
- """Upload a JSONL file and create one Anthropic batch."""
144
+ async def _submit_anthropic_batch(requests: list[dict], headers: dict, model: str):
145
+ """Submit batch requests to Anthropic's Message Batches API."""
146
146
 
147
147
  async with aiohttp.ClientSession() as session:
148
148
  url = f"{registry[model].api_base}/messages/batches"
149
- data = aiohttp.FormData()
150
- with open(file_path, "rb") as f:
151
- data.add_field(
152
- "file",
153
- f,
154
- filename=os.path.basename(file_path),
155
- content_type="application/json",
156
- )
157
-
158
- async with session.post(url, data=data, headers=headers) as response:
159
- if response.status != 200:
160
- text = await response.text()
161
- raise ValueError(f"Error creating batch: {text}")
149
+ payload = {"requests": requests}
162
150
 
163
- batch_data = await response.json()
164
- batch_id = batch_data["id"]
165
- print(f"Anthropic batch job started successfully: id = {batch_id}")
151
+ async with session.post(url, json=payload, headers=headers) as response:
152
+ if response.status != 200:
153
+ text = await response.text()
154
+ raise ValueError(f"Error creating batch: {text}")
166
155
 
167
- os.remove(file_path)
168
- return batch_id
156
+ batch_data = await response.json()
157
+ batch_id = batch_data["id"]
158
+ print(f"Anthropic batch job started successfully: id = {batch_id}")
159
+ return batch_id
169
160
 
170
161
 
171
162
  async def create_batch_files_oa(
@@ -409,20 +400,10 @@ async def submit_batches_anthropic(
409
400
 
410
401
  if current_batch and (would_exceed_size or would_exceed_items):
411
402
  # Submit current batch
412
- def write_batch_file():
413
- with tempfile.NamedTemporaryFile(
414
- mode="w+", suffix=".jsonl", delete=False
415
- ) as f:
416
- for batch_request in current_batch:
417
- json.dump(batch_request, f)
418
- f.write("\n")
419
- print("wrote", len(current_batch), "items")
420
- return f.name
421
-
422
- file_path = await asyncio.to_thread(write_batch_file)
403
+ print("wrote", len(current_batch), "items")
423
404
  batch_tasks.append(
424
405
  asyncio.create_task(
425
- _submit_anthropic_batch(file_path, request_headers, model) # type: ignore
406
+ _submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
426
407
  )
427
408
  )
428
409
 
@@ -436,21 +417,10 @@ async def submit_batches_anthropic(
436
417
 
437
418
  # Submit final batch if it has items
438
419
  if current_batch:
439
-
440
- def write_final_batch_file():
441
- with tempfile.NamedTemporaryFile(
442
- mode="w+", suffix=".jsonl", delete=False
443
- ) as f:
444
- for batch_request in current_batch:
445
- json.dump(batch_request, f)
446
- f.write("\n")
447
- print("wrote", len(current_batch), "items")
448
- return f.name
449
-
450
- file_path = await asyncio.to_thread(write_final_batch_file)
420
+ print("wrote", len(current_batch), "items")
451
421
  batch_tasks.append(
452
422
  asyncio.create_task(
453
- _submit_anthropic_batch(file_path, request_headers, model) # type: ignore
423
+ _submit_anthropic_batch(current_batch, request_headers, model) # type: ignore
454
424
  )
455
425
  )
456
426
 
@@ -510,7 +480,7 @@ async def _wait_for_anthropic_batch_completion_async(
510
480
 
511
481
  # Event to signal when to stop the display updater
512
482
  stop_display_event = asyncio.Event()
513
- current_status = {"status": "processing", "counts": None}
483
+ current_status: dict[str, Any] = {"status": "processing", "counts": None}
514
484
 
515
485
  async def display_updater():
516
486
  """Update display independently of polling."""
@@ -662,7 +632,7 @@ async def _wait_for_openai_batch_completion_async(
662
632
 
663
633
  # Event to signal when to stop the display updater
664
634
  stop_display_event = asyncio.Event()
665
- current_status = {"status": "pending", "counts": None}
635
+ current_status: dict[str, Any] = {"status": "pending", "counts": None}
666
636
 
667
637
  async def display_updater():
668
638
  """Update display independently of polling."""
lm_deluge/cache.py CHANGED
@@ -8,7 +8,7 @@ from .api_requests.base import APIResponse
8
8
  try:
9
9
  import plyvel # type: ignore
10
10
  except ImportError:
11
- plyvel = None
11
+ plyvel: Any = None
12
12
  print("Warning: plyvel not installed, cannot use LevelDB.")
13
13
 
14
14