lm-deluge 0.0.90__py3-none-any.whl → 0.0.91__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lm_deluge/__init__.py +3 -3
- lm_deluge/api_requests/anthropic.py +4 -2
- lm_deluge/api_requests/base.py +1 -1
- lm_deluge/api_requests/bedrock.py +6 -1
- lm_deluge/{request_context.py → api_requests/context.py} +4 -4
- lm_deluge/api_requests/gemini.py +13 -11
- lm_deluge/api_requests/mistral.py +1 -1
- lm_deluge/api_requests/openai.py +4 -2
- lm_deluge/batches.py +4 -4
- lm_deluge/cache.py +1 -1
- lm_deluge/cli.py +672 -300
- lm_deluge/{client.py → client/__init__.py} +15 -12
- lm_deluge/config.py +9 -31
- lm_deluge/embed.py +2 -6
- lm_deluge/models/__init__.py +137 -30
- lm_deluge/models/anthropic.py +20 -12
- lm_deluge/models/bedrock.py +9 -0
- lm_deluge/models/cerebras.py +2 -0
- lm_deluge/models/cohere.py +2 -0
- lm_deluge/models/google.py +13 -0
- lm_deluge/models/grok.py +4 -0
- lm_deluge/models/groq.py +2 -0
- lm_deluge/models/meta.py +2 -0
- lm_deluge/models/openai.py +24 -1
- lm_deluge/models/openrouter.py +107 -1
- lm_deluge/models/together.py +3 -0
- lm_deluge/pipelines/extract.py +4 -5
- lm_deluge/pipelines/gepa/__init__.py +1 -1
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
- lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
- lm_deluge/prompt/__init__.py +45 -0
- lm_deluge/{prompt.py → prompt/conversation.py} +45 -1014
- lm_deluge/{image.py → prompt/image.py} +0 -10
- lm_deluge/prompt/message.py +571 -0
- lm_deluge/prompt/serialization.py +21 -0
- lm_deluge/prompt/signatures.py +77 -0
- lm_deluge/prompt/text.py +47 -0
- lm_deluge/prompt/thinking.py +55 -0
- lm_deluge/prompt/tool_calls.py +245 -0
- lm_deluge/server/app.py +1 -1
- lm_deluge/tool/__init__.py +65 -18
- lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
- lm_deluge/tool/cua/actions.py +26 -26
- lm_deluge/tool/cua/batch.py +1 -2
- lm_deluge/tool/cua/kernel.py +1 -1
- lm_deluge/tool/prefab/filesystem.py +2 -2
- lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
- lm_deluge/tool/prefab/memory.py +3 -1
- lm_deluge/tool/prefab/otc/executor.py +3 -3
- lm_deluge/tool/prefab/random.py +30 -54
- lm_deluge/tool/prefab/rlm/__init__.py +2 -2
- lm_deluge/tool/prefab/rlm/executor.py +1 -1
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +2 -2
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +9 -7
- lm_deluge/tool/prefab/subagents.py +1 -1
- lm_deluge/util/logprobs.py +4 -4
- lm_deluge/util/schema.py +6 -6
- lm_deluge/util/validation.py +14 -9
- {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/METADATA +9 -10
- {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/RECORD +68 -60
- lm_deluge-0.0.91.dist-info/entry_points.txt +3 -0
- /lm_deluge/{file.py → prompt/file.py} +0 -0
- {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.90.dist-info → lm_deluge-0.0.91.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from .client import APIResponse, LLMClient, SamplingParams
|
|
2
|
-
from .
|
|
3
|
-
from .
|
|
4
|
-
from .tool import Tool
|
|
2
|
+
from .prompt import Conversation, Message, File
|
|
3
|
+
from .tool import Tool, MCPServer
|
|
5
4
|
|
|
6
5
|
# dotenv.load_dotenv() - don't do this, fucks with other packages
|
|
7
6
|
|
|
@@ -12,5 +11,6 @@ __all__ = [
|
|
|
12
11
|
"Conversation",
|
|
13
12
|
"Message",
|
|
14
13
|
"Tool",
|
|
14
|
+
"MCPServer",
|
|
15
15
|
"File",
|
|
16
16
|
]
|
|
@@ -10,7 +10,7 @@ from lm_deluge.prompt import (
|
|
|
10
10
|
Thinking,
|
|
11
11
|
ToolCall,
|
|
12
12
|
)
|
|
13
|
-
from lm_deluge.
|
|
13
|
+
from lm_deluge.api_requests.context import RequestContext
|
|
14
14
|
from lm_deluge.tool import MCPServer, Tool
|
|
15
15
|
from lm_deluge.usage import Usage
|
|
16
16
|
from lm_deluge.util.schema import (
|
|
@@ -103,7 +103,9 @@ def _build_anthropic_request(
|
|
|
103
103
|
if "top_p" in request_json:
|
|
104
104
|
request_json["top_p"] = max(request_json["top_p"], 0.95)
|
|
105
105
|
request_json["temperature"] = 1.0
|
|
106
|
-
request_json["max_tokens"]
|
|
106
|
+
max_tokens = request_json["max_tokens"]
|
|
107
|
+
assert isinstance(max_tokens, int)
|
|
108
|
+
request_json["max_tokens"] = max_tokens + budget
|
|
107
109
|
else:
|
|
108
110
|
request_json["thinking"] = {"type": "disabled"}
|
|
109
111
|
if "kimi" in model.id and "thinking" in model.id:
|
lm_deluge/api_requests/base.py
CHANGED
|
@@ -10,7 +10,7 @@ from aiohttp import ClientResponse
|
|
|
10
10
|
|
|
11
11
|
from ..errors import raise_if_modal_exception
|
|
12
12
|
from ..models.openai import OPENAI_MODELS
|
|
13
|
-
from ..
|
|
13
|
+
from ..api_requests.context import RequestContext
|
|
14
14
|
from .response import APIResponse
|
|
15
15
|
|
|
16
16
|
|
|
@@ -20,7 +20,7 @@ from lm_deluge.prompt import (
|
|
|
20
20
|
Thinking,
|
|
21
21
|
ToolCall,
|
|
22
22
|
)
|
|
23
|
-
from lm_deluge.
|
|
23
|
+
from lm_deluge.api_requests.context import RequestContext
|
|
24
24
|
from lm_deluge.tool import MCPServer, Tool
|
|
25
25
|
from lm_deluge.usage import Usage
|
|
26
26
|
|
|
@@ -263,6 +263,11 @@ class BedrockRequest(APIRequestBase):
|
|
|
263
263
|
# Create a fake requests.PreparedRequest object for AWS4Auth to sign
|
|
264
264
|
import requests
|
|
265
265
|
|
|
266
|
+
assert self.url is not None, "URL must be set after build_request"
|
|
267
|
+
assert (
|
|
268
|
+
self.request_header is not None
|
|
269
|
+
), "Headers must be set after build_request"
|
|
270
|
+
|
|
266
271
|
fake_request = requests.Request(
|
|
267
272
|
method="POST",
|
|
268
273
|
url=self.url,
|
|
@@ -2,9 +2,9 @@ from dataclasses import dataclass, field
|
|
|
2
2
|
from functools import cached_property
|
|
3
3
|
from typing import Any, Callable, Sequence, TYPE_CHECKING
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
5
|
+
from ..config import SamplingParams
|
|
6
|
+
from ..prompt import CachePattern, Conversation
|
|
7
|
+
from ..tracker import StatusTracker
|
|
8
8
|
|
|
9
9
|
if TYPE_CHECKING:
|
|
10
10
|
from pydantic import BaseModel
|
|
@@ -83,4 +83,4 @@ class RequestContext:
|
|
|
83
83
|
# Update with any overrides
|
|
84
84
|
current_values.update(overrides)
|
|
85
85
|
|
|
86
|
-
return RequestContext(**current_values)
|
|
86
|
+
return RequestContext(**current_values) # type: ignore[arg-type]
|
lm_deluge/api_requests/gemini.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
from aiohttp import ClientResponse
|
|
5
6
|
|
|
6
|
-
from lm_deluge.
|
|
7
|
+
from lm_deluge.api_requests.context import RequestContext
|
|
7
8
|
from lm_deluge.tool import Tool
|
|
8
9
|
from lm_deluge.warnings import maybe_warn
|
|
9
10
|
|
|
@@ -37,13 +38,14 @@ async def _build_gemini_request(
|
|
|
37
38
|
part_type="function call",
|
|
38
39
|
)
|
|
39
40
|
|
|
40
|
-
|
|
41
|
+
generation_config: dict[str, Any] = {
|
|
42
|
+
"temperature": sampling_params.temperature,
|
|
43
|
+
"topP": sampling_params.top_p,
|
|
44
|
+
"maxOutputTokens": sampling_params.max_new_tokens,
|
|
45
|
+
}
|
|
46
|
+
request_json: dict[str, Any] = {
|
|
41
47
|
"contents": messages,
|
|
42
|
-
"generationConfig":
|
|
43
|
-
"temperature": sampling_params.temperature,
|
|
44
|
-
"topP": sampling_params.top_p,
|
|
45
|
-
"maxOutputTokens": sampling_params.max_new_tokens,
|
|
46
|
-
},
|
|
48
|
+
"generationConfig": generation_config,
|
|
47
49
|
}
|
|
48
50
|
|
|
49
51
|
# Add system instruction if present
|
|
@@ -83,7 +85,7 @@ async def _build_gemini_request(
|
|
|
83
85
|
}
|
|
84
86
|
effort = level_map[effort_key]
|
|
85
87
|
thinking_config = {"thinkingLevel": effort}
|
|
86
|
-
|
|
88
|
+
generation_config["thinkingConfig"] = thinking_config
|
|
87
89
|
|
|
88
90
|
elif model.reasoning_model:
|
|
89
91
|
if (
|
|
@@ -126,7 +128,7 @@ async def _build_gemini_request(
|
|
|
126
128
|
# no thoughts head empty
|
|
127
129
|
thinking_config = {"includeThoughts": False, "thinkingBudget": 0}
|
|
128
130
|
|
|
129
|
-
|
|
131
|
+
generation_config["thinkingConfig"] = thinking_config
|
|
130
132
|
|
|
131
133
|
else:
|
|
132
134
|
if sampling_params.reasoning_effort:
|
|
@@ -171,14 +173,14 @@ async def _build_gemini_request(
|
|
|
171
173
|
|
|
172
174
|
# Handle JSON mode
|
|
173
175
|
if sampling_params.json_mode and model.supports_json:
|
|
174
|
-
|
|
176
|
+
generation_config["responseMimeType"] = "application/json"
|
|
175
177
|
|
|
176
178
|
# Handle media_resolution for Gemini 3 (requires v1alpha)
|
|
177
179
|
if sampling_params.media_resolution is not None:
|
|
178
180
|
is_gemini_3 = "gemini-3" in model.name.lower()
|
|
179
181
|
if is_gemini_3:
|
|
180
182
|
# Add global media resolution to generationConfig
|
|
181
|
-
|
|
183
|
+
generation_config["mediaResolution"] = {
|
|
182
184
|
"level": sampling_params.media_resolution
|
|
183
185
|
}
|
|
184
186
|
else:
|
|
@@ -7,7 +7,7 @@ from lm_deluge.warnings import maybe_warn
|
|
|
7
7
|
|
|
8
8
|
from ..models import APIModel
|
|
9
9
|
from ..prompt import Message
|
|
10
|
-
from ..
|
|
10
|
+
from ..api_requests.context import RequestContext
|
|
11
11
|
from ..usage import Usage
|
|
12
12
|
from .base import APIRequestBase, APIResponse
|
|
13
13
|
|
lm_deluge/api_requests/openai.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing import Sequence
|
|
|
7
7
|
import aiohttp
|
|
8
8
|
from aiohttp import ClientResponse
|
|
9
9
|
|
|
10
|
-
from lm_deluge.
|
|
10
|
+
from lm_deluge.api_requests.context import RequestContext
|
|
11
11
|
from lm_deluge.tool import MCPServer, Tool
|
|
12
12
|
from lm_deluge.util.schema import (
|
|
13
13
|
prepare_output_schema,
|
|
@@ -75,7 +75,9 @@ async def _build_oa_chat_request(
|
|
|
75
75
|
request_json["service_tier"] = context.service_tier
|
|
76
76
|
# if tinker, for now hack to mush into 1 string
|
|
77
77
|
if "tinker" in model.name:
|
|
78
|
-
|
|
78
|
+
messages = request_json["messages"]
|
|
79
|
+
assert isinstance(messages, list)
|
|
80
|
+
request_json["messages"] = _message_contents_to_string(messages)
|
|
79
81
|
|
|
80
82
|
# set max_tokens or max_completion_tokens dep. on provider
|
|
81
83
|
if "cohere" in model.api_base:
|
lm_deluge/batches.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import tempfile
|
|
5
5
|
import time
|
|
6
|
-
from typing import Literal, Sequence, cast
|
|
6
|
+
from typing import Any, Literal, Sequence, cast
|
|
7
7
|
|
|
8
8
|
import aiohttp
|
|
9
9
|
from rich.console import Console
|
|
@@ -22,7 +22,7 @@ from lm_deluge.prompt import (
|
|
|
22
22
|
Prompt,
|
|
23
23
|
prompts_to_conversations,
|
|
24
24
|
)
|
|
25
|
-
from lm_deluge.
|
|
25
|
+
from lm_deluge.api_requests.context import RequestContext
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def _create_batch_status_display(
|
|
@@ -480,7 +480,7 @@ async def _wait_for_anthropic_batch_completion_async(
|
|
|
480
480
|
|
|
481
481
|
# Event to signal when to stop the display updater
|
|
482
482
|
stop_display_event = asyncio.Event()
|
|
483
|
-
current_status = {"status": "processing", "counts": None}
|
|
483
|
+
current_status: dict[str, Any] = {"status": "processing", "counts": None}
|
|
484
484
|
|
|
485
485
|
async def display_updater():
|
|
486
486
|
"""Update display independently of polling."""
|
|
@@ -632,7 +632,7 @@ async def _wait_for_openai_batch_completion_async(
|
|
|
632
632
|
|
|
633
633
|
# Event to signal when to stop the display updater
|
|
634
634
|
stop_display_event = asyncio.Event()
|
|
635
|
-
current_status = {"status": "pending", "counts": None}
|
|
635
|
+
current_status: dict[str, Any] = {"status": "pending", "counts": None}
|
|
636
636
|
|
|
637
637
|
async def display_updater():
|
|
638
638
|
"""Update display independently of polling."""
|