lm-deluge 0.0.67__py3-none-any.whl → 0.0.69__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lm_deluge/__init__.py +10 -0
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/openai.py +1 -1
- lm_deluge/client.py +9 -8
- lm_deluge/mock_openai.py +482 -0
- lm_deluge/models/__init__.py +4 -0
- lm_deluge/models/kimi.py +34 -0
- lm_deluge/models/minimax.py +10 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.69.dist-info}/METADATA +3 -1
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.69.dist-info}/RECORD +13 -9
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.69.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.69.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.69.dist-info}/top_level.txt +0 -0
lm_deluge/__init__.py
CHANGED
|
@@ -3,6 +3,13 @@ from .file import File
|
|
|
3
3
|
from .prompt import Conversation, Message
|
|
4
4
|
from .tool import Tool, ToolParams
|
|
5
5
|
|
|
6
|
+
try:
|
|
7
|
+
from .mock_openai import MockAsyncOpenAI # noqa
|
|
8
|
+
|
|
9
|
+
_has_openai = True
|
|
10
|
+
except ImportError:
|
|
11
|
+
_has_openai = False
|
|
12
|
+
|
|
6
13
|
# dotenv.load_dotenv() - don't do this, fucks with other packages
|
|
7
14
|
|
|
8
15
|
__all__ = [
|
|
@@ -15,3 +22,6 @@ __all__ = [
|
|
|
15
22
|
"ToolParams",
|
|
16
23
|
"File",
|
|
17
24
|
]
|
|
25
|
+
|
|
26
|
+
if _has_openai:
|
|
27
|
+
__all__.append("MockAsyncOpenAI")
|
lm_deluge/api_requests/openai.py
CHANGED
|
@@ -381,7 +381,7 @@ class OpenAIResponsesRequest(APIRequestBase):
|
|
|
381
381
|
output = data.get("output", [])
|
|
382
382
|
if not output:
|
|
383
383
|
is_error = True
|
|
384
|
-
error_message = "No output in response"
|
|
384
|
+
error_message = f"No output in response. Status: {data.get('status')}, error: {data.get('error')}, incomplete details: {data.get('incomplete_details')}"
|
|
385
385
|
else:
|
|
386
386
|
# Process each output item
|
|
387
387
|
for item in output:
|
lm_deluge/client.py
CHANGED
|
@@ -702,7 +702,7 @@ class _LLMClient(BaseModel):
|
|
|
702
702
|
|
|
703
703
|
async def start(
|
|
704
704
|
self,
|
|
705
|
-
prompt:
|
|
705
|
+
prompt: Prompt,
|
|
706
706
|
*,
|
|
707
707
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
708
708
|
cache: CachePattern | None = None,
|
|
@@ -780,12 +780,12 @@ class _LLMClient(BaseModel):
|
|
|
780
780
|
|
|
781
781
|
async def stream(
|
|
782
782
|
self,
|
|
783
|
-
prompt:
|
|
783
|
+
prompt: Prompt,
|
|
784
784
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
785
785
|
):
|
|
786
786
|
model, sampling_params = self._select_model()
|
|
787
|
-
|
|
788
|
-
|
|
787
|
+
prompt = prompts_to_conversations([prompt])[0]
|
|
788
|
+
assert isinstance(prompt, Conversation)
|
|
789
789
|
async for item in stream_chat(
|
|
790
790
|
model, prompt, sampling_params, tools, None, self.extra_headers
|
|
791
791
|
):
|
|
@@ -799,7 +799,7 @@ class _LLMClient(BaseModel):
|
|
|
799
799
|
|
|
800
800
|
async def run_agent_loop(
|
|
801
801
|
self,
|
|
802
|
-
conversation:
|
|
802
|
+
conversation: Prompt,
|
|
803
803
|
*,
|
|
804
804
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
805
805
|
max_rounds: int = 5,
|
|
@@ -812,8 +812,9 @@ class _LLMClient(BaseModel):
|
|
|
812
812
|
instances or built‑in tool dictionaries.
|
|
813
813
|
"""
|
|
814
814
|
|
|
815
|
-
if isinstance(conversation,
|
|
816
|
-
conversation =
|
|
815
|
+
if not isinstance(conversation, Conversation):
|
|
816
|
+
conversation = prompts_to_conversations([conversation])[0]
|
|
817
|
+
assert isinstance(conversation, Conversation)
|
|
817
818
|
|
|
818
819
|
# Expand MCPServer objects to their constituent tools for tool execution
|
|
819
820
|
expanded_tools: list[Tool] = []
|
|
@@ -870,7 +871,7 @@ class _LLMClient(BaseModel):
|
|
|
870
871
|
|
|
871
872
|
def run_agent_loop_sync(
|
|
872
873
|
self,
|
|
873
|
-
conversation:
|
|
874
|
+
conversation: Prompt,
|
|
874
875
|
*,
|
|
875
876
|
tools: list[Tool | dict | MCPServer] | None = None,
|
|
876
877
|
max_rounds: int = 5,
|
lm_deluge/mock_openai.py
ADDED
|
@@ -0,0 +1,482 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Mock OpenAI client that implements the AsyncOpenAI interface but uses lm-deluge's
|
|
3
|
+
LLMClient internally. This allows using any lm-deluge-supported provider through
|
|
4
|
+
the standard OpenAI Python SDK interface.
|
|
5
|
+
|
|
6
|
+
Example usage:
|
|
7
|
+
from lm_deluge.mock_openai import MockAsyncOpenAI
|
|
8
|
+
|
|
9
|
+
# Use Claude through OpenAI interface
|
|
10
|
+
client = MockAsyncOpenAI(model="claude-sonnet-4")
|
|
11
|
+
response = await client.chat.completions.create(
|
|
12
|
+
model="claude-sonnet-4", # Can override here
|
|
13
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
14
|
+
temperature=0.7
|
|
15
|
+
)
|
|
16
|
+
print(response.choices[0].message.content)
|
|
17
|
+
|
|
18
|
+
Installation:
|
|
19
|
+
pip install lm-deluge[openai]
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import json
|
|
23
|
+
import time
|
|
24
|
+
import uuid
|
|
25
|
+
from typing import Any, AsyncIterator, Literal, Union, overload
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
from openai.types.chat import (
|
|
29
|
+
ChatCompletion,
|
|
30
|
+
ChatCompletionChunk,
|
|
31
|
+
ChatCompletionMessage,
|
|
32
|
+
ChatCompletionMessageToolCall,
|
|
33
|
+
)
|
|
34
|
+
from openai.types.chat.chat_completion import Choice as CompletionChoice
|
|
35
|
+
from openai.types.chat.chat_completion_chunk import (
|
|
36
|
+
Choice as ChunkChoice,
|
|
37
|
+
ChoiceDelta,
|
|
38
|
+
ChoiceDeltaToolCall,
|
|
39
|
+
ChoiceDeltaToolCallFunction,
|
|
40
|
+
)
|
|
41
|
+
from openai.types.chat.chat_completion_message_tool_call import Function
|
|
42
|
+
from openai.types.completion_usage import CompletionUsage
|
|
43
|
+
except ImportError:
|
|
44
|
+
raise ImportError(
|
|
45
|
+
"The openai package is required to use MockAsyncOpenAI. "
|
|
46
|
+
"Install it with: pip install lm-deluge[openai]"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
from lm_deluge.client import LLMClient
|
|
50
|
+
from lm_deluge.prompt import Conversation, Message, Part, Text, ToolCall, ToolResult
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _messages_to_conversation(messages: list[dict[str, Any]]) -> Conversation:
|
|
54
|
+
"""Convert OpenAI messages format to lm-deluge Conversation."""
|
|
55
|
+
conv_messages = []
|
|
56
|
+
|
|
57
|
+
for msg in messages:
|
|
58
|
+
role = msg["role"]
|
|
59
|
+
content = msg.get("content")
|
|
60
|
+
tool_calls = msg.get("tool_calls")
|
|
61
|
+
tool_call_id = msg.get("tool_call_id")
|
|
62
|
+
|
|
63
|
+
parts: list[Part] = []
|
|
64
|
+
|
|
65
|
+
# Handle regular content
|
|
66
|
+
if content:
|
|
67
|
+
if isinstance(content, str):
|
|
68
|
+
parts.append(Text(content))
|
|
69
|
+
elif isinstance(content, list):
|
|
70
|
+
# Multi-part content (text, images, etc.)
|
|
71
|
+
for item in content:
|
|
72
|
+
if item.get("type") == "text":
|
|
73
|
+
parts.append(Text(item["text"]))
|
|
74
|
+
# Could add image support here later
|
|
75
|
+
|
|
76
|
+
# Handle tool calls (from assistant)
|
|
77
|
+
if tool_calls:
|
|
78
|
+
for tc in tool_calls:
|
|
79
|
+
# Parse arguments from JSON string to dict
|
|
80
|
+
args_str = tc["function"]["arguments"]
|
|
81
|
+
args_dict = (
|
|
82
|
+
json.loads(args_str) if isinstance(args_str, str) else args_str
|
|
83
|
+
)
|
|
84
|
+
parts.append(
|
|
85
|
+
ToolCall(
|
|
86
|
+
id=tc["id"],
|
|
87
|
+
name=tc["function"]["name"],
|
|
88
|
+
arguments=args_dict,
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Handle tool results (from tool role)
|
|
93
|
+
if role == "tool" and tool_call_id:
|
|
94
|
+
parts.append(ToolResult(tool_call_id=tool_call_id, result=content or ""))
|
|
95
|
+
|
|
96
|
+
conv_messages.append(Message(role=role, parts=parts))
|
|
97
|
+
|
|
98
|
+
return Conversation(messages=conv_messages)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _response_to_chat_completion(
|
|
102
|
+
response: Any, # APIResponse
|
|
103
|
+
model: str,
|
|
104
|
+
request_id: str | None = None,
|
|
105
|
+
) -> ChatCompletion:
|
|
106
|
+
"""Convert lm-deluge APIResponse to OpenAI ChatCompletion."""
|
|
107
|
+
if request_id is None:
|
|
108
|
+
request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
|
109
|
+
|
|
110
|
+
# Handle error responses
|
|
111
|
+
if response.is_error:
|
|
112
|
+
# For errors, create an empty response with error finish reason
|
|
113
|
+
message = ChatCompletionMessage(
|
|
114
|
+
role="assistant",
|
|
115
|
+
content=response.error_message or "Error occurred",
|
|
116
|
+
)
|
|
117
|
+
choice = CompletionChoice(
|
|
118
|
+
index=0,
|
|
119
|
+
message=message,
|
|
120
|
+
finish_reason="stop", # or could use "error" but that's not standard
|
|
121
|
+
)
|
|
122
|
+
return ChatCompletion(
|
|
123
|
+
id=request_id,
|
|
124
|
+
choices=[choice],
|
|
125
|
+
created=int(time.time()),
|
|
126
|
+
model=model,
|
|
127
|
+
object="chat.completion",
|
|
128
|
+
usage=None,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Extract content from response
|
|
132
|
+
content_text = None
|
|
133
|
+
tool_calls = None
|
|
134
|
+
|
|
135
|
+
if response.content:
|
|
136
|
+
# Extract text parts
|
|
137
|
+
text_parts = [p.text for p in response.content.parts if isinstance(p, Text)]
|
|
138
|
+
if text_parts:
|
|
139
|
+
content_text = "".join(text_parts)
|
|
140
|
+
|
|
141
|
+
# Extract tool calls
|
|
142
|
+
tool_call_parts = [p for p in response.content.parts if isinstance(p, ToolCall)]
|
|
143
|
+
if tool_call_parts:
|
|
144
|
+
tool_calls = [
|
|
145
|
+
ChatCompletionMessageToolCall(
|
|
146
|
+
id=tc.id,
|
|
147
|
+
type="function",
|
|
148
|
+
function=Function(
|
|
149
|
+
name=tc.name,
|
|
150
|
+
# Convert dict arguments to JSON string for OpenAI format
|
|
151
|
+
arguments=json.dumps(tc.arguments)
|
|
152
|
+
if isinstance(tc.arguments, dict)
|
|
153
|
+
else tc.arguments,
|
|
154
|
+
),
|
|
155
|
+
)
|
|
156
|
+
for tc in tool_call_parts
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
# Create message
|
|
160
|
+
message = ChatCompletionMessage(
|
|
161
|
+
role="assistant",
|
|
162
|
+
content=content_text,
|
|
163
|
+
tool_calls=tool_calls,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Create choice
|
|
167
|
+
choice = CompletionChoice(
|
|
168
|
+
index=0,
|
|
169
|
+
message=message,
|
|
170
|
+
finish_reason=response.finish_reason or "stop",
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Create usage
|
|
174
|
+
usage = None
|
|
175
|
+
if response.usage:
|
|
176
|
+
usage = CompletionUsage(
|
|
177
|
+
prompt_tokens=response.usage.input_tokens,
|
|
178
|
+
completion_tokens=response.usage.output_tokens,
|
|
179
|
+
total_tokens=response.usage.input_tokens + response.usage.output_tokens,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
return ChatCompletion(
|
|
183
|
+
id=request_id,
|
|
184
|
+
choices=[choice],
|
|
185
|
+
created=int(time.time()),
|
|
186
|
+
model=model,
|
|
187
|
+
object="chat.completion",
|
|
188
|
+
usage=usage,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class _AsyncStreamWrapper:
|
|
193
|
+
"""Wrapper to convert lm-deluge streaming to OpenAI ChatCompletionChunk format."""
|
|
194
|
+
|
|
195
|
+
def __init__(self, stream: AsyncIterator, model: str, request_id: str):
|
|
196
|
+
self._stream = stream
|
|
197
|
+
self._model = model
|
|
198
|
+
self._request_id = request_id
|
|
199
|
+
self._first_chunk = True
|
|
200
|
+
|
|
201
|
+
def __aiter__(self):
|
|
202
|
+
return self
|
|
203
|
+
|
|
204
|
+
async def __anext__(self) -> ChatCompletionChunk:
|
|
205
|
+
chunk = await self._stream.__anext__()
|
|
206
|
+
|
|
207
|
+
# Create delta based on chunk content
|
|
208
|
+
delta = ChoiceDelta()
|
|
209
|
+
|
|
210
|
+
if self._first_chunk:
|
|
211
|
+
delta.role = "assistant"
|
|
212
|
+
self._first_chunk = False
|
|
213
|
+
|
|
214
|
+
# Extract content from chunk
|
|
215
|
+
if hasattr(chunk, "content") and chunk.content:
|
|
216
|
+
if isinstance(chunk.content, str):
|
|
217
|
+
delta.content = chunk.content
|
|
218
|
+
elif hasattr(chunk.content, "parts"):
|
|
219
|
+
# Extract text from parts
|
|
220
|
+
text_parts = [
|
|
221
|
+
p.text for p in chunk.content.parts if isinstance(p, Text)
|
|
222
|
+
]
|
|
223
|
+
if text_parts:
|
|
224
|
+
delta.content = "".join(text_parts)
|
|
225
|
+
|
|
226
|
+
# Extract tool calls from parts
|
|
227
|
+
tool_call_parts = [
|
|
228
|
+
p for p in chunk.content.parts if isinstance(p, ToolCall)
|
|
229
|
+
]
|
|
230
|
+
if tool_call_parts:
|
|
231
|
+
delta.tool_calls = [
|
|
232
|
+
ChoiceDeltaToolCall(
|
|
233
|
+
index=i,
|
|
234
|
+
id=tc.id,
|
|
235
|
+
type="function",
|
|
236
|
+
function=ChoiceDeltaToolCallFunction(
|
|
237
|
+
name=tc.name,
|
|
238
|
+
# Convert dict arguments to JSON string for OpenAI format
|
|
239
|
+
arguments=json.dumps(tc.arguments)
|
|
240
|
+
if isinstance(tc.arguments, dict)
|
|
241
|
+
else tc.arguments,
|
|
242
|
+
),
|
|
243
|
+
)
|
|
244
|
+
for i, tc in enumerate(tool_call_parts)
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
# Create choice
|
|
248
|
+
choice = ChunkChoice(
|
|
249
|
+
index=0,
|
|
250
|
+
delta=delta,
|
|
251
|
+
finish_reason=getattr(chunk, "finish_reason", None),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
return ChatCompletionChunk(
|
|
255
|
+
id=self._request_id,
|
|
256
|
+
choices=[choice],
|
|
257
|
+
created=int(time.time()),
|
|
258
|
+
model=self._model,
|
|
259
|
+
object="chat.completion.chunk",
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class MockCompletions:
|
|
264
|
+
"""Mock completions resource that implements OpenAI's completions.create interface."""
|
|
265
|
+
|
|
266
|
+
def __init__(self, parent: "MockAsyncOpenAI"):
|
|
267
|
+
self._parent = parent
|
|
268
|
+
|
|
269
|
+
@overload
|
|
270
|
+
async def create(
|
|
271
|
+
self,
|
|
272
|
+
*,
|
|
273
|
+
messages: list[dict[str, Any]],
|
|
274
|
+
model: str,
|
|
275
|
+
stream: Literal[False] = False,
|
|
276
|
+
**kwargs: Any,
|
|
277
|
+
) -> ChatCompletion: ...
|
|
278
|
+
|
|
279
|
+
@overload
|
|
280
|
+
async def create(
|
|
281
|
+
self,
|
|
282
|
+
*,
|
|
283
|
+
messages: list[dict[str, Any]],
|
|
284
|
+
model: str,
|
|
285
|
+
stream: Literal[True],
|
|
286
|
+
**kwargs: Any,
|
|
287
|
+
) -> AsyncIterator[ChatCompletionChunk]: ...
|
|
288
|
+
|
|
289
|
+
async def create(
|
|
290
|
+
self,
|
|
291
|
+
*,
|
|
292
|
+
messages: list[dict[str, Any]],
|
|
293
|
+
model: str,
|
|
294
|
+
stream: bool = False,
|
|
295
|
+
temperature: float | None = None,
|
|
296
|
+
max_tokens: int | None = None,
|
|
297
|
+
max_completion_tokens: int | None = None,
|
|
298
|
+
top_p: float | None = None,
|
|
299
|
+
seed: int | None = None,
|
|
300
|
+
tools: list[dict[str, Any]] | None = None,
|
|
301
|
+
tool_choice: Any | None = None,
|
|
302
|
+
reasoning_effort: str | None = None,
|
|
303
|
+
response_format: dict[str, Any] | None = None,
|
|
304
|
+
n: int | None = None,
|
|
305
|
+
stop: str | list[str] | None = None,
|
|
306
|
+
presence_penalty: float | None = None,
|
|
307
|
+
frequency_penalty: float | None = None,
|
|
308
|
+
**kwargs: Any,
|
|
309
|
+
) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
|
|
310
|
+
"""
|
|
311
|
+
Create a chat completion using lm-deluge's LLMClient.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
messages: List of message dictionaries with 'role' and 'content'
|
|
315
|
+
model: Model identifier (can override client's default model)
|
|
316
|
+
stream: Whether to stream the response
|
|
317
|
+
temperature: Sampling temperature (0-2)
|
|
318
|
+
max_tokens: Max tokens (deprecated, use max_completion_tokens)
|
|
319
|
+
max_completion_tokens: Max completion tokens
|
|
320
|
+
top_p: Nucleus sampling parameter
|
|
321
|
+
seed: Random seed for deterministic sampling
|
|
322
|
+
tools: List of tool definitions
|
|
323
|
+
tool_choice: Tool choice strategy
|
|
324
|
+
reasoning_effort: Reasoning effort for reasoning models
|
|
325
|
+
response_format: Response format (e.g., {"type": "json_object"})
|
|
326
|
+
**kwargs: Other parameters (mostly ignored for compatibility)
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
ChatCompletion (non-streaming) or AsyncIterator[ChatCompletionChunk] (streaming)
|
|
330
|
+
"""
|
|
331
|
+
# Get or create client for this model
|
|
332
|
+
client = self._parent._get_or_create_client(model)
|
|
333
|
+
|
|
334
|
+
# Convert messages to Conversation
|
|
335
|
+
conversation = _messages_to_conversation(messages)
|
|
336
|
+
|
|
337
|
+
# Build sampling params
|
|
338
|
+
sampling_kwargs = {}
|
|
339
|
+
if temperature is not None:
|
|
340
|
+
sampling_kwargs["temperature"] = temperature
|
|
341
|
+
if max_completion_tokens is not None:
|
|
342
|
+
sampling_kwargs["max_new_tokens"] = max_completion_tokens
|
|
343
|
+
elif max_tokens is not None:
|
|
344
|
+
sampling_kwargs["max_new_tokens"] = max_tokens
|
|
345
|
+
if top_p is not None:
|
|
346
|
+
sampling_kwargs["top_p"] = top_p
|
|
347
|
+
if seed is not None:
|
|
348
|
+
sampling_kwargs["seed"] = seed
|
|
349
|
+
if reasoning_effort is not None:
|
|
350
|
+
sampling_kwargs["reasoning_effort"] = reasoning_effort
|
|
351
|
+
if response_format and response_format.get("type") == "json_object":
|
|
352
|
+
sampling_kwargs["json_mode"] = True
|
|
353
|
+
|
|
354
|
+
# If sampling params are provided, create a new client with merged params
|
|
355
|
+
if sampling_kwargs:
|
|
356
|
+
# Merge with default params
|
|
357
|
+
merged_params = {**self._parent._default_sampling_params, **sampling_kwargs}
|
|
358
|
+
client = self._parent._create_client_with_params(model, merged_params)
|
|
359
|
+
|
|
360
|
+
# Convert tools if provided
|
|
361
|
+
lm_tools = None
|
|
362
|
+
if tools:
|
|
363
|
+
# For now, just pass through - lm-deluge will handle the format
|
|
364
|
+
lm_tools = tools
|
|
365
|
+
|
|
366
|
+
# Execute request
|
|
367
|
+
if stream:
|
|
368
|
+
# Streaming mode
|
|
369
|
+
request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
|
370
|
+
# Note: client.stream() is an async generator, not a coroutine
|
|
371
|
+
# We can directly wrap it
|
|
372
|
+
stream_iter = client.stream(conversation, tools=lm_tools)
|
|
373
|
+
# Verify it's a generator, not a coroutine
|
|
374
|
+
if hasattr(stream_iter, "__anext__"):
|
|
375
|
+
return _AsyncStreamWrapper(stream_iter, model, request_id)
|
|
376
|
+
else:
|
|
377
|
+
# If it's a coroutine, we need to await it first
|
|
378
|
+
# But this shouldn't happen with the current implementation
|
|
379
|
+
raise TypeError(f"Expected async generator, got {type(stream_iter)}")
|
|
380
|
+
else:
|
|
381
|
+
# Non-streaming mode
|
|
382
|
+
response = await client.start(conversation, tools=lm_tools)
|
|
383
|
+
return _response_to_chat_completion(response, model)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
class MockChat:
|
|
387
|
+
"""Mock chat resource that provides access to completions."""
|
|
388
|
+
|
|
389
|
+
def __init__(self, parent: "MockAsyncOpenAI"):
|
|
390
|
+
self._parent = parent
|
|
391
|
+
self._completions = MockCompletions(parent)
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def completions(self) -> MockCompletions:
|
|
395
|
+
"""Access the completions resource."""
|
|
396
|
+
return self._completions
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
class MockAsyncOpenAI:
|
|
400
|
+
"""
|
|
401
|
+
Mock AsyncOpenAI client that uses lm-deluge's LLMClient internally.
|
|
402
|
+
|
|
403
|
+
This allows using any lm-deluge-supported provider (Anthropic, Google, etc.)
|
|
404
|
+
through the standard OpenAI Python SDK interface.
|
|
405
|
+
|
|
406
|
+
Example:
|
|
407
|
+
# Use Claude through OpenAI interface
|
|
408
|
+
client = MockAsyncOpenAI(model="claude-sonnet-4")
|
|
409
|
+
response = await client.chat.completions.create(
|
|
410
|
+
model="claude-sonnet-4",
|
|
411
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
412
|
+
temperature=0.7
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
model: Default model to use (can be overridden in create())
|
|
417
|
+
temperature: Default temperature
|
|
418
|
+
max_completion_tokens: Default max completion tokens
|
|
419
|
+
top_p: Default top_p
|
|
420
|
+
**kwargs: Additional parameters passed to LLMClient
|
|
421
|
+
"""
|
|
422
|
+
|
|
423
|
+
def __init__(
|
|
424
|
+
self,
|
|
425
|
+
*,
|
|
426
|
+
model: str,
|
|
427
|
+
temperature: float | None = None,
|
|
428
|
+
max_completion_tokens: int | None = None,
|
|
429
|
+
top_p: float | None = None,
|
|
430
|
+
seed: int | None = None,
|
|
431
|
+
**kwargs: Any,
|
|
432
|
+
):
|
|
433
|
+
self._default_model = model
|
|
434
|
+
self._default_sampling_params = {}
|
|
435
|
+
|
|
436
|
+
if temperature is not None:
|
|
437
|
+
self._default_sampling_params["temperature"] = temperature
|
|
438
|
+
if max_completion_tokens is not None:
|
|
439
|
+
self._default_sampling_params["max_new_tokens"] = max_completion_tokens
|
|
440
|
+
if top_p is not None:
|
|
441
|
+
self._default_sampling_params["top_p"] = top_p
|
|
442
|
+
if seed is not None:
|
|
443
|
+
self._default_sampling_params["seed"] = seed
|
|
444
|
+
|
|
445
|
+
# Additional kwargs for LLMClient
|
|
446
|
+
self._client_kwargs = kwargs
|
|
447
|
+
|
|
448
|
+
# Cache of LLMClient instances by model
|
|
449
|
+
self._clients: dict[str, Any] = {}
|
|
450
|
+
|
|
451
|
+
# Create the default client
|
|
452
|
+
self._clients[model] = self._create_client(model)
|
|
453
|
+
|
|
454
|
+
# Create nested resources
|
|
455
|
+
self._chat = MockChat(self)
|
|
456
|
+
|
|
457
|
+
def _create_client(self, model: str) -> Any:
|
|
458
|
+
"""Create a new LLMClient for the given model."""
|
|
459
|
+
return LLMClient(
|
|
460
|
+
model,
|
|
461
|
+
**self._default_sampling_params,
|
|
462
|
+
**self._client_kwargs,
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
def _create_client_with_params(self, model: str, params: dict[str, Any]) -> Any:
|
|
466
|
+
"""Create a new LLMClient with specific sampling parameters."""
|
|
467
|
+
return LLMClient(
|
|
468
|
+
model,
|
|
469
|
+
**params,
|
|
470
|
+
**self._client_kwargs,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
def _get_or_create_client(self, model: str) -> Any:
|
|
474
|
+
"""Get existing client or create new one for the model."""
|
|
475
|
+
if model not in self._clients:
|
|
476
|
+
self._clients[model] = self._create_client(model)
|
|
477
|
+
return self._clients[model]
|
|
478
|
+
|
|
479
|
+
@property
|
|
480
|
+
def chat(self) -> MockChat:
|
|
481
|
+
"""Access the chat resource."""
|
|
482
|
+
return self._chat
|
lm_deluge/models/__init__.py
CHANGED
|
@@ -15,7 +15,9 @@ from .fireworks import FIREWORKS_MODELS
|
|
|
15
15
|
from .google import GOOGLE_MODELS
|
|
16
16
|
from .grok import XAI_MODELS
|
|
17
17
|
from .groq import GROQ_MODELS
|
|
18
|
+
from .kimi import KIMI_MODELS
|
|
18
19
|
from .meta import META_MODELS
|
|
20
|
+
from .minimax import MINIMAX_MODELS
|
|
19
21
|
from .mistral import MISTRAL_MODELS
|
|
20
22
|
from .openai import OPENAI_MODELS
|
|
21
23
|
from .openrouter import OPENROUTER_MODELS
|
|
@@ -132,7 +134,9 @@ for model_dict in [
|
|
|
132
134
|
FIREWORKS_MODELS,
|
|
133
135
|
GOOGLE_MODELS,
|
|
134
136
|
XAI_MODELS,
|
|
137
|
+
KIMI_MODELS,
|
|
135
138
|
META_MODELS,
|
|
139
|
+
MINIMAX_MODELS,
|
|
136
140
|
MISTRAL_MODELS,
|
|
137
141
|
OPENAI_MODELS,
|
|
138
142
|
OPENROUTER_MODELS,
|
lm_deluge/models/kimi.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
KIMI_MODELS = {
|
|
2
|
+
"kimi-k2": {
|
|
3
|
+
"id": "kimi-k2",
|
|
4
|
+
"name": "kimi-k2-0905-preview",
|
|
5
|
+
"api_base": "https://api.moonshot.ai/anthropic/v1",
|
|
6
|
+
"api_key_env_var": "MOONSHOT_API_KEY",
|
|
7
|
+
"supports_json": True,
|
|
8
|
+
"api_spec": "anthropic",
|
|
9
|
+
},
|
|
10
|
+
"kimi-k2-turbo": {
|
|
11
|
+
"id": "kimi-k2-turbo",
|
|
12
|
+
"name": "kimi-k2-turbo-preview",
|
|
13
|
+
"api_base": "https://api.moonshot.ai/anthropic/v1",
|
|
14
|
+
"api_key_env_var": "MOONSHOT_API_KEY",
|
|
15
|
+
"supports_json": True,
|
|
16
|
+
"api_spec": "anthropic",
|
|
17
|
+
},
|
|
18
|
+
"kimi-k2-thinking": {
|
|
19
|
+
"id": "kimi-k2-thinking",
|
|
20
|
+
"name": "kimi-k2-thinking",
|
|
21
|
+
"api_base": "https://api.moonshot.ai/anthropic/v1",
|
|
22
|
+
"api_key_env_var": "MOONSHOT_API_KEY",
|
|
23
|
+
"supports_json": True,
|
|
24
|
+
"api_spec": "anthropic",
|
|
25
|
+
},
|
|
26
|
+
"kimi-k2-thinking-turbo": {
|
|
27
|
+
"id": "kimi-k2-thinking-turbo",
|
|
28
|
+
"name": "kimi-k2-thinking-turbo",
|
|
29
|
+
"api_base": "https://api.moonshot.ai/anthropic/v1",
|
|
30
|
+
"api_key_env_var": "MOONSHOT_API_KEY",
|
|
31
|
+
"supports_json": True,
|
|
32
|
+
"api_spec": "anthropic",
|
|
33
|
+
},
|
|
34
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lm_deluge
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.69
|
|
4
4
|
Summary: Python utility for using LLM API models.
|
|
5
5
|
Author-email: Benjamin Anderson <ben@trytaylor.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -23,6 +23,8 @@ Requires-Dist: pdf2image
|
|
|
23
23
|
Requires-Dist: pillow
|
|
24
24
|
Requires-Dist: fastmcp>=2.4
|
|
25
25
|
Requires-Dist: rich
|
|
26
|
+
Provides-Extra: openai
|
|
27
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
26
28
|
Dynamic: license-file
|
|
27
29
|
|
|
28
30
|
# lm-deluge
|
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
lm_deluge/__init__.py,sha256=
|
|
1
|
+
lm_deluge/__init__.py,sha256=bGF2eVo60StVEGjt5lgVhSoJmYBlvQTUa8DR96sNh0w,558
|
|
2
2
|
lm_deluge/batches.py,sha256=Km6QM5_7BlF2qEyo4WPlhkaZkpzrLqf50AaveHXQOoY,25127
|
|
3
3
|
lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
|
|
4
4
|
lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
|
|
5
|
-
lm_deluge/client.py,sha256=
|
|
5
|
+
lm_deluge/client.py,sha256=nBKuP6buwQYNMCP9f2SOuPkfowKRijJv4-bI-STg7Iw,40824
|
|
6
6
|
lm_deluge/config.py,sha256=s3wFBRD6pi0wtXMJRmQDT2vdiqSvhjUPmLehbkv41i0,943
|
|
7
7
|
lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
|
|
8
8
|
lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
|
|
9
9
|
lm_deluge/file.py,sha256=PTmlJQ-IaYcYUFun9V0bJ1NPVP84edJrR0hvCMWFylY,19697
|
|
10
10
|
lm_deluge/image.py,sha256=5AMXmn2x47yXeYNfMSMAOWcnlrOxxOel-4L8QCJwU70,8928
|
|
11
|
+
lm_deluge/mock_openai.py,sha256=OelIYWGBf5vBZXJOLaz54s5gE-HPIg1kPXARnv4NoKg,16592
|
|
11
12
|
lm_deluge/prompt.py,sha256=Bgszws8-3GPefiVRa-Mht4tfyfoqD_hV5MX1nrbkJn0,63465
|
|
12
13
|
lm_deluge/request_context.py,sha256=cBayMFWupWhde2OjRugW3JH-Gin-WFGc6DK2Mb4Prdc,2576
|
|
13
14
|
lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
|
|
@@ -19,10 +20,11 @@ lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBd
|
|
|
19
20
|
lm_deluge/api_requests/anthropic.py,sha256=QGq3G5jJIGcoM2HdRt73GgkvZs4GOViyjYexWex05Vk,8927
|
|
20
21
|
lm_deluge/api_requests/base.py,sha256=GCcydwBRx4_xAuYLvasXlyj-TgqvKAVhVvxRfJkvPbY,9471
|
|
21
22
|
lm_deluge/api_requests/bedrock.py,sha256=Uppne03GcIEk1tVYzoGu7GXK2Sg94a_xvFTLDRN_phY,15412
|
|
23
|
+
lm_deluge/api_requests/chat_reasoning.py,sha256=sJvstvKFqsSBUjYcwxzGt2_FH4cEp3Z6gKcBPyPjGwk,236
|
|
22
24
|
lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
|
|
23
25
|
lm_deluge/api_requests/gemini.py,sha256=4uD7fQl0yWyAvYkPNi3oO1InBnvYfo5_QR6k-va-2GI,7838
|
|
24
26
|
lm_deluge/api_requests/mistral.py,sha256=8JZP2CDf1XZfaPcTk0WS4q-VfYYj58ptpoH8LD3MQG4,4528
|
|
25
|
-
lm_deluge/api_requests/openai.py,sha256=
|
|
27
|
+
lm_deluge/api_requests/openai.py,sha256=ezlGYNGHFvQGgs-xuxhDDeiEembHhVh_KqJBdRBqSlM,26038
|
|
26
28
|
lm_deluge/api_requests/response.py,sha256=vG194gAH5p7ulpNy4qy5Pryfb1p3ZV21-YGoj__ru3E,7436
|
|
27
29
|
lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
|
|
28
30
|
lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
|
|
@@ -42,7 +44,7 @@ lm_deluge/llm_tools/locate.py,sha256=lYNbKTmy9dTvj0lEQkOQ7yrxyqsgYzjD0C_byJKI_4w
|
|
|
42
44
|
lm_deluge/llm_tools/ocr.py,sha256=7fDlvs6uUOvbxMasvGGNJx5Fj6biM6z3lijKZaGN26k,23
|
|
43
45
|
lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
|
|
44
46
|
lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
|
|
45
|
-
lm_deluge/models/__init__.py,sha256=
|
|
47
|
+
lm_deluge/models/__init__.py,sha256=54H24K_eADbfdEH9aNORrNEXvDLZCQ4TEekeLiWljSE,4619
|
|
46
48
|
lm_deluge/models/anthropic.py,sha256=5j75sB40yZzT1wwKC7Dh0f2Y2cXnp8yxHuXW63PCuns,6285
|
|
47
49
|
lm_deluge/models/bedrock.py,sha256=g1PbfceSRH2lWST3ja0mUlF3oTq4e4T-si6RMe7qXgg,4888
|
|
48
50
|
lm_deluge/models/cerebras.py,sha256=u2FMXJF6xMr0euDRKLKMo_NVTOcvSrrEpehbHr8sSeE,2050
|
|
@@ -52,7 +54,9 @@ lm_deluge/models/fireworks.py,sha256=yvt2Ggzye4aUqCqY74ta67Vu7FrQaLFjdFtN4P7D-dc
|
|
|
52
54
|
lm_deluge/models/google.py,sha256=Hr2MolQoaeY85pKCGO7k7OH_1nQJdrwMgrJbfz5bI8w,5387
|
|
53
55
|
lm_deluge/models/grok.py,sha256=TDzr8yfTaHbdJhwMA-Du6L-efaKFJhjTQViuVElCCHI,2566
|
|
54
56
|
lm_deluge/models/groq.py,sha256=Mi5WE1xOBGoZlymD0UN6kzhH_NOmfJYU4N2l-TO0Z8Q,2552
|
|
57
|
+
lm_deluge/models/kimi.py,sha256=1voigLdNO2CxpWv0KDpQPP3Wolx5WrqgAlYL9ObJFuQ,1117
|
|
55
58
|
lm_deluge/models/meta.py,sha256=BBgnscL1gMcIdPbRqrlDl_q9YAYGSrkw9JkAIabXtLs,1883
|
|
59
|
+
lm_deluge/models/minimax.py,sha256=rwW9gNotAYfDVtMlqmSYegN6GoZM_9DSNNZU2yPOmaU,275
|
|
56
60
|
lm_deluge/models/mistral.py,sha256=x67o5gckBGmPcIGdVbS26XZAYFKBYM4tsxEAahGp8bk,4323
|
|
57
61
|
lm_deluge/models/openai.py,sha256=6J4eAt6Iu5RopokyldUQzRlviFBXBqhLqpVP5tztzqI,11074
|
|
58
62
|
lm_deluge/models/openrouter.py,sha256=O-Po4tmHjAqFIVU96TUL0QnK01R4e2yDN7Z4sYJ-CuE,2120
|
|
@@ -65,8 +69,8 @@ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11
|
|
|
65
69
|
lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
|
|
66
70
|
lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
|
|
67
71
|
lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
|
|
68
|
-
lm_deluge-0.0.
|
|
69
|
-
lm_deluge-0.0.
|
|
70
|
-
lm_deluge-0.0.
|
|
71
|
-
lm_deluge-0.0.
|
|
72
|
-
lm_deluge-0.0.
|
|
72
|
+
lm_deluge-0.0.69.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
|
|
73
|
+
lm_deluge-0.0.69.dist-info/METADATA,sha256=BMFkIulQwTPBEtqViIDyY6RjaMH4hZBzC-4qTCXpGQY,13514
|
|
74
|
+
lm_deluge-0.0.69.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
75
|
+
lm_deluge-0.0.69.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
|
|
76
|
+
lm_deluge-0.0.69.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|