lm-deluge 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (41) hide show
  1. lm_deluge/__init__.py +0 -24
  2. lm_deluge/api_requests/anthropic.py +25 -5
  3. lm_deluge/api_requests/base.py +37 -0
  4. lm_deluge/api_requests/bedrock.py +23 -2
  5. lm_deluge/api_requests/gemini.py +36 -10
  6. lm_deluge/api_requests/openai.py +31 -4
  7. lm_deluge/batches.py +15 -45
  8. lm_deluge/client.py +27 -1
  9. lm_deluge/models/__init__.py +2 -0
  10. lm_deluge/models/anthropic.py +12 -12
  11. lm_deluge/models/google.py +13 -0
  12. lm_deluge/models/minimax.py +9 -1
  13. lm_deluge/models/openrouter.py +48 -0
  14. lm_deluge/models/zai.py +50 -1
  15. lm_deluge/pipelines/gepa/docs/samples.py +19 -10
  16. lm_deluge/prompt.py +333 -68
  17. lm_deluge/server/__init__.py +24 -0
  18. lm_deluge/server/__main__.py +144 -0
  19. lm_deluge/server/adapters.py +369 -0
  20. lm_deluge/server/app.py +388 -0
  21. lm_deluge/server/auth.py +71 -0
  22. lm_deluge/server/model_policy.py +215 -0
  23. lm_deluge/server/models_anthropic.py +172 -0
  24. lm_deluge/server/models_openai.py +175 -0
  25. lm_deluge/skills/anthropic.py +0 -0
  26. lm_deluge/skills/compat.py +0 -0
  27. lm_deluge/tool/__init__.py +13 -1
  28. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  29. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  30. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  31. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  32. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  33. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
  34. lm_deluge/tool/prefab/skills.py +0 -0
  35. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +4 -3
  36. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/RECORD +39 -24
  37. lm_deluge/mock_openai.py +0 -643
  38. lm_deluge/tool/prefab/sandbox.py +0 -1621
  39. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
  40. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
  41. {lm_deluge-0.0.88.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
lm_deluge/mock_openai.py DELETED
@@ -1,643 +0,0 @@
1
- """
2
- Mock OpenAI client that implements the AsyncOpenAI interface but uses lm-deluge's
3
- LLMClient internally. This allows using any lm-deluge-supported provider through
4
- the standard OpenAI Python SDK interface.
5
-
6
- Example usage:
7
- from lm_deluge.mock_openai import MockAsyncOpenAI
8
-
9
- # Use Claude through OpenAI interface
10
- client = MockAsyncOpenAI(model="claude-sonnet-4")
11
- response = await client.chat.completions.create(
12
- model="claude-sonnet-4", # Can override here
13
- messages=[{"role": "user", "content": "Hello!"}],
14
- temperature=0.7
15
- )
16
- print(response.choices[0].message.content)
17
-
18
- Installation:
19
- pip install lm-deluge[openai]
20
- """
21
-
22
- import json
23
- import time
24
- import uuid
25
- from typing import Any, AsyncIterator, Literal, Union, overload
26
-
27
- try:
28
- from openai import (
29
- APIError,
30
- APITimeoutError,
31
- BadRequestError,
32
- RateLimitError,
33
- )
34
- from openai.types import Completion
35
- from openai.types.chat import (
36
- ChatCompletion,
37
- ChatCompletionChunk,
38
- ChatCompletionMessage,
39
- ChatCompletionMessageToolCall,
40
- )
41
- from openai.types.chat.chat_completion import Choice as ChatCompletionChoice
42
- from openai.types.chat.chat_completion_chunk import (
43
- Choice as ChunkChoice,
44
- )
45
- from openai.types.chat.chat_completion_chunk import (
46
- ChoiceDelta,
47
- ChoiceDeltaToolCall,
48
- ChoiceDeltaToolCallFunction,
49
- )
50
- from openai.types.chat.chat_completion_message_tool_call import Function
51
- from openai.types.completion_choice import CompletionChoice as TextCompletionChoice
52
- from openai.types.completion_usage import CompletionUsage
53
- except ImportError:
54
- raise ImportError(
55
- "The openai package is required to use MockAsyncOpenAI. "
56
- "Install it with: pip install lm-deluge[openai]"
57
- )
58
-
59
- # Re-export exceptions for compatibility
60
- __all__ = [
61
- "MockAsyncOpenAI",
62
- "APIError",
63
- "APITimeoutError",
64
- "BadRequestError",
65
- "RateLimitError",
66
- ]
67
-
68
- from lm_deluge.client import LLMClient, _LLMClient
69
- from lm_deluge.prompt import CachePattern, Conversation, Message, Text, ToolCall
70
- from lm_deluge.tool import Tool
71
-
72
-
73
- def _openai_tools_to_lm_deluge(tools: list[dict[str, Any]]) -> list[Tool]:
74
- """
75
- Convert OpenAI tool format to lm-deluge Tool objects.
76
-
77
- OpenAI format:
78
- {
79
- "type": "function",
80
- "function": {
81
- "name": "get_weather",
82
- "description": "Get weather",
83
- "parameters": {
84
- "type": "object",
85
- "properties": {...},
86
- "required": [...]
87
- }
88
- }
89
- }
90
-
91
- lm-deluge format:
92
- Tool(
93
- name="get_weather",
94
- description="Get weather",
95
- parameters={...properties...},
96
- required=[...]
97
- )
98
- """
99
- lm_tools = []
100
- for tool in tools:
101
- if tool.get("type") == "function":
102
- func = tool["function"]
103
- params_schema = func.get("parameters", {})
104
-
105
- # Extract properties and required from the parameters schema
106
- properties = params_schema.get("properties", {})
107
- required = params_schema.get("required", [])
108
-
109
- lm_tool = Tool(
110
- name=func["name"],
111
- description=func.get("description"),
112
- parameters=properties if properties else None,
113
- required=required,
114
- )
115
- lm_tools.append(lm_tool)
116
-
117
- return lm_tools
118
-
119
-
120
- def _messages_to_conversation(messages: list[dict[str, Any]]) -> Conversation:
121
- """Convert OpenAI messages format to lm-deluge Conversation."""
122
- return Conversation.from_openai_chat(messages)
123
-
124
-
125
- def _response_to_chat_completion(
126
- response: Any, # APIResponse
127
- model: str,
128
- request_id: str | None = None,
129
- ) -> ChatCompletion:
130
- """Convert lm-deluge APIResponse to OpenAI ChatCompletion."""
131
- if request_id is None:
132
- request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
133
-
134
- # Handle error responses
135
- if response.is_error:
136
- # For errors, create an empty response with error finish reason
137
- message = ChatCompletionMessage(
138
- role="assistant",
139
- content=response.error_message or "Error occurred",
140
- )
141
- choice = ChatCompletionChoice(
142
- index=0,
143
- message=message,
144
- finish_reason="stop", # or could use "error" but that's not standard
145
- )
146
- return ChatCompletion(
147
- id=request_id,
148
- choices=[choice],
149
- created=int(time.time()),
150
- model=model,
151
- object="chat.completion",
152
- usage=None,
153
- )
154
-
155
- # Extract content from response
156
- content_text = None
157
- tool_calls = None
158
-
159
- if response.content:
160
- # Extract text parts
161
- text_parts = [p.text for p in response.content.parts if isinstance(p, Text)]
162
- if text_parts:
163
- content_text = "".join(text_parts)
164
-
165
- # Extract tool calls
166
- tool_call_parts = [p for p in response.content.parts if isinstance(p, ToolCall)]
167
- if tool_call_parts:
168
- tool_calls = [
169
- ChatCompletionMessageToolCall(
170
- id=tc.id,
171
- type="function",
172
- function=Function(
173
- name=tc.name,
174
- # Convert dict arguments to JSON string for OpenAI format
175
- arguments=json.dumps(tc.arguments)
176
- if isinstance(tc.arguments, dict)
177
- else tc.arguments,
178
- ),
179
- )
180
- for tc in tool_call_parts
181
- ]
182
-
183
- # Create message
184
- message = ChatCompletionMessage(
185
- role="assistant",
186
- content=content_text,
187
- tool_calls=tool_calls,
188
- )
189
-
190
- # Create choice
191
- choice = ChatCompletionChoice(
192
- index=0,
193
- message=message,
194
- finish_reason=response.finish_reason or "stop",
195
- )
196
-
197
- # Create usage
198
- usage = None
199
- if response.usage:
200
- usage = CompletionUsage(
201
- prompt_tokens=response.usage.input_tokens,
202
- completion_tokens=response.usage.output_tokens,
203
- total_tokens=response.usage.input_tokens + response.usage.output_tokens,
204
- )
205
-
206
- return ChatCompletion(
207
- id=request_id,
208
- choices=[choice],
209
- created=int(time.time()),
210
- model=model,
211
- object="chat.completion",
212
- usage=usage,
213
- )
214
-
215
-
216
- class _AsyncStreamWrapper:
217
- """Wrapper to convert lm-deluge streaming to OpenAI ChatCompletionChunk format."""
218
-
219
- def __init__(self, stream: AsyncIterator, model: str, request_id: str):
220
- self._stream = stream
221
- self._model = model
222
- self._request_id = request_id
223
- self._first_chunk = True
224
-
225
- def __aiter__(self):
226
- return self
227
-
228
- async def __anext__(self) -> ChatCompletionChunk:
229
- chunk = await self._stream.__anext__()
230
-
231
- # Create delta based on chunk content
232
- delta = ChoiceDelta()
233
-
234
- if self._first_chunk:
235
- delta.role = "assistant"
236
- self._first_chunk = False
237
-
238
- # Extract content from chunk
239
- if hasattr(chunk, "content") and chunk.content:
240
- if isinstance(chunk.content, str):
241
- delta.content = chunk.content
242
- elif hasattr(chunk.content, "parts"):
243
- # Extract text from parts
244
- text_parts = [
245
- p.text for p in chunk.content.parts if isinstance(p, Text)
246
- ]
247
- if text_parts:
248
- delta.content = "".join(text_parts)
249
-
250
- # Extract tool calls from parts
251
- tool_call_parts = [
252
- p for p in chunk.content.parts if isinstance(p, ToolCall)
253
- ]
254
- if tool_call_parts:
255
- delta.tool_calls = [
256
- ChoiceDeltaToolCall(
257
- index=i,
258
- id=tc.id,
259
- type="function",
260
- function=ChoiceDeltaToolCallFunction(
261
- name=tc.name,
262
- # Convert dict arguments to JSON string for OpenAI format
263
- arguments=json.dumps(tc.arguments)
264
- if isinstance(tc.arguments, dict)
265
- else tc.arguments,
266
- ),
267
- )
268
- for i, tc in enumerate(tool_call_parts)
269
- ]
270
-
271
- # Create choice
272
- choice = ChunkChoice(
273
- index=0,
274
- delta=delta,
275
- finish_reason=getattr(chunk, "finish_reason", None),
276
- )
277
-
278
- return ChatCompletionChunk(
279
- id=self._request_id,
280
- choices=[choice],
281
- created=int(time.time()),
282
- model=self._model,
283
- object="chat.completion.chunk",
284
- )
285
-
286
-
287
- class MockCompletions:
288
- """Mock completions resource that implements OpenAI's completions.create interface."""
289
-
290
- def __init__(self, parent: "MockAsyncOpenAI"):
291
- self._parent = parent
292
-
293
- @overload
294
- async def create(
295
- self,
296
- *,
297
- messages: list[dict[str, Any]],
298
- model: str,
299
- stream: Literal[False] = False,
300
- **kwargs: Any,
301
- ) -> ChatCompletion: ...
302
-
303
- @overload
304
- async def create(
305
- self,
306
- *,
307
- messages: list[dict[str, Any]],
308
- model: str,
309
- stream: Literal[True],
310
- **kwargs: Any,
311
- ) -> AsyncIterator[ChatCompletionChunk]: ...
312
-
313
- async def create(
314
- self,
315
- *,
316
- messages: list[dict[str, Any]],
317
- model: str,
318
- stream: bool = False,
319
- temperature: float | None = None,
320
- max_tokens: int | None = None,
321
- max_completion_tokens: int | None = None,
322
- top_p: float | None = None,
323
- seed: int | None = None,
324
- tools: list[dict[str, Any]] | None = None,
325
- tool_choice: Any | None = None,
326
- reasoning_effort: str | None = None,
327
- response_format: dict[str, Any] | None = None,
328
- n: int | None = None,
329
- stop: str | list[str] | None = None,
330
- presence_penalty: float | None = None,
331
- frequency_penalty: float | None = None,
332
- **kwargs: Any,
333
- ) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
334
- """
335
- Create a chat completion using lm-deluge's LLMClient.
336
-
337
- Args:
338
- messages: List of message dictionaries with 'role' and 'content'
339
- model: Model identifier (can override client's default model)
340
- stream: Whether to stream the response
341
- temperature: Sampling temperature (0-2)
342
- max_tokens: Max tokens (deprecated, use max_completion_tokens)
343
- max_completion_tokens: Max completion tokens
344
- top_p: Nucleus sampling parameter
345
- seed: Random seed for deterministic sampling
346
- tools: List of tool definitions
347
- tool_choice: Tool choice strategy
348
- reasoning_effort: Reasoning effort for reasoning models
349
- response_format: Response format (e.g., {"type": "json_object"})
350
- **kwargs: Other parameters (mostly ignored for compatibility)
351
-
352
- Returns:
353
- ChatCompletion (non-streaming) or AsyncIterator[ChatCompletionChunk] (streaming)
354
- """
355
- # Get or create client for this model
356
- client: _LLMClient = self._parent._get_or_create_client(model)
357
-
358
- # Convert messages to Conversation
359
- conversation = _messages_to_conversation(messages)
360
-
361
- # Build sampling params
362
- sampling_kwargs = {}
363
- if temperature is not None:
364
- sampling_kwargs["temperature"] = temperature
365
- if max_completion_tokens is not None:
366
- sampling_kwargs["max_new_tokens"] = max_completion_tokens
367
- elif max_tokens is not None:
368
- sampling_kwargs["max_new_tokens"] = max_tokens
369
- if top_p is not None:
370
- sampling_kwargs["top_p"] = top_p
371
- if seed is not None:
372
- sampling_kwargs["seed"] = seed
373
- if reasoning_effort is not None:
374
- sampling_kwargs["reasoning_effort"] = reasoning_effort
375
- if response_format and response_format.get("type") == "json_object":
376
- sampling_kwargs["json_mode"] = True
377
-
378
- # If sampling params are provided, create a new client with merged params
379
- if sampling_kwargs:
380
- # Merge with default params
381
- merged_params = {**self._parent._default_sampling_params, **sampling_kwargs}
382
- client = self._parent._create_client_with_params(model, merged_params)
383
-
384
- # Convert tools if provided
385
- lm_tools = None
386
- if tools:
387
- # Convert from OpenAI format to lm-deluge Tool objects
388
- lm_tools = _openai_tools_to_lm_deluge(tools)
389
-
390
- # Execute request
391
- if stream:
392
- raise RuntimeError("streaming not supported")
393
- else:
394
- # Non-streaming mode
395
- response = await client.start(
396
- conversation,
397
- tools=lm_tools, # type: ignore
398
- cache=self._parent.cache_pattern, # type: ignore
399
- )
400
- return _response_to_chat_completion(response, model)
401
-
402
-
403
- class MockTextCompletions:
404
- """Mock text completions resource for legacy completions API."""
405
-
406
- def __init__(self, parent: "MockAsyncOpenAI"):
407
- self._parent = parent
408
-
409
- async def create(
410
- self,
411
- *,
412
- model: str,
413
- prompt: str | list[str],
414
- temperature: float | None = None,
415
- max_tokens: int | None = None,
416
- top_p: float | None = None,
417
- seed: int | None = None,
418
- n: int | None = None,
419
- stop: str | list[str] | None = None,
420
- **kwargs: Any,
421
- ) -> Completion:
422
- """
423
- Create a text completion using lm-deluge's LLMClient.
424
-
425
- Args:
426
- model: Model identifier
427
- prompt: Text prompt or list of prompts
428
- temperature: Sampling temperature
429
- max_tokens: Max tokens to generate
430
- top_p: Nucleus sampling parameter
431
- seed: Random seed
432
- n: Number of completions (currently ignored, always returns 1)
433
- stop: Stop sequences
434
- **kwargs: Other parameters
435
-
436
- Returns:
437
- Completion object
438
- """
439
- # Get or create client for this model
440
- client: _LLMClient = self._parent._get_or_create_client(model)
441
-
442
- # Handle single prompt
443
- if isinstance(prompt, list):
444
- # For now, just use the first prompt
445
- prompt = prompt[0] if prompt else ""
446
-
447
- # Convert prompt to Conversation
448
- conversation = Conversation([Message(role="user", parts=[Text(prompt)])])
449
-
450
- # Build sampling params
451
- sampling_kwargs = {}
452
- if temperature is not None:
453
- sampling_kwargs["temperature"] = temperature
454
- if max_tokens is not None:
455
- sampling_kwargs["max_new_tokens"] = max_tokens
456
- if top_p is not None:
457
- sampling_kwargs["top_p"] = top_p
458
- if seed is not None:
459
- sampling_kwargs["seed"] = seed
460
-
461
- # Create client with merged params if needed
462
- if sampling_kwargs:
463
- merged_params = {**self._parent._default_sampling_params, **sampling_kwargs}
464
- client = self._parent._create_client_with_params(model, merged_params)
465
-
466
- # Execute request
467
- response = await client.start(conversation, cache=self._parent.cache_pattern) # type: ignore
468
-
469
- # Convert to Completion format
470
- completion_text = None
471
- if response.content:
472
- text_parts = [p.text for p in response.content.parts if isinstance(p, Text)]
473
- if text_parts:
474
- completion_text = "".join(text_parts)
475
-
476
- # Create choice
477
- choice = TextCompletionChoice(
478
- index=0,
479
- text=completion_text or "",
480
- finish_reason=response.finish_reason or "stop", # type: ignore
481
- )
482
-
483
- # Create usage
484
- usage = None
485
- if response.usage:
486
- usage = CompletionUsage(
487
- prompt_tokens=response.usage.input_tokens,
488
- completion_tokens=response.usage.output_tokens,
489
- total_tokens=response.usage.input_tokens + response.usage.output_tokens,
490
- )
491
-
492
- return Completion(
493
- id=f"cmpl-{uuid.uuid4().hex[:24]}",
494
- choices=[choice],
495
- created=int(time.time()),
496
- model=model,
497
- object="text_completion",
498
- usage=usage,
499
- )
500
-
501
-
502
- class MockChat:
503
- """Mock chat resource that provides access to completions."""
504
-
505
- def __init__(self, parent: "MockAsyncOpenAI"):
506
- self._parent = parent
507
- self._completions = MockCompletions(parent)
508
-
509
- @property
510
- def completions(self) -> MockCompletions:
511
- """Access the completions resource."""
512
- return self._completions
513
-
514
-
515
- class MockAsyncOpenAI:
516
- """
517
- Mock AsyncOpenAI client that uses lm-deluge's LLMClient internally.
518
-
519
- This allows using any lm-deluge-supported provider (Anthropic, Google, etc.)
520
- through the standard OpenAI Python SDK interface.
521
-
522
- Example:
523
- # Use Claude through OpenAI interface
524
- client = MockAsyncOpenAI(model="claude-sonnet-4")
525
- response = await client.chat.completions.create(
526
- model="claude-sonnet-4",
527
- messages=[{"role": "user", "content": "Hello!"}],
528
- temperature=0.7
529
- )
530
-
531
- Args:
532
- model: Default model to use (can be overridden in create())
533
- api_key: API key (optional, for compatibility)
534
- organization: Organization ID (optional, for compatibility)
535
- project: Project ID (optional, for compatibility)
536
- base_url: Base URL (defaults to OpenAI's URL for compatibility)
537
- timeout: Request timeout (optional, for compatibility)
538
- max_retries: Max retries (defaults to 2 for compatibility)
539
- default_headers: Default headers (optional, for compatibility)
540
- temperature: Default temperature
541
- max_completion_tokens: Default max completion tokens
542
- top_p: Default top_p
543
- seed: Default seed for deterministic sampling
544
- **kwargs: Additional parameters passed to LLMClient
545
- """
546
-
547
- def __init__(
548
- self,
549
- *,
550
- model: str | None = None,
551
- api_key: str | None = None,
552
- organization: str | None = None,
553
- project: str | None = None,
554
- base_url: str | None = None,
555
- timeout: float | None = None,
556
- max_retries: int | None = None,
557
- default_headers: dict[str, str] | None = None,
558
- http_client: Any | None = None,
559
- temperature: float | None = None,
560
- max_completion_tokens: int | None = None,
561
- top_p: float | None = None,
562
- seed: int | None = None,
563
- cache_pattern: CachePattern | None = None,
564
- **kwargs: Any,
565
- ):
566
- # OpenAI-compatible attributes
567
- self.api_key = api_key
568
- self.organization = organization
569
- self.project = project
570
- self.base_url = base_url or "https://api.openai.com/v1"
571
- self.timeout = timeout
572
- self.max_retries = max_retries or 2
573
- self.default_headers = default_headers
574
- self.http_client = http_client
575
- self.cache_pattern = cache_pattern
576
-
577
- # Internal attributes
578
- self._default_model = model or "gpt-4o-mini"
579
- self._default_sampling_params = {}
580
-
581
- if temperature is not None:
582
- self._default_sampling_params["temperature"] = temperature
583
- if max_completion_tokens is not None:
584
- self._default_sampling_params["max_new_tokens"] = max_completion_tokens
585
- if top_p is not None:
586
- self._default_sampling_params["top_p"] = top_p
587
- if seed is not None:
588
- self._default_sampling_params["seed"] = seed
589
-
590
- # Additional kwargs for LLMClient
591
- self._client_kwargs = kwargs
592
-
593
- # Cache of LLMClient instances by model
594
- self._clients: dict[str, Any] = {}
595
-
596
- # Create the default client
597
- self._clients[self._default_model] = self._create_client(self._default_model)
598
-
599
- # Create nested resources
600
- self._chat = MockChat(self)
601
- self._completions = MockTextCompletions(self)
602
-
603
- def _create_client(self, model: str) -> Any:
604
- """Create a new LLMClient for the given model."""
605
- return LLMClient(
606
- model,
607
- **self._default_sampling_params,
608
- **self._client_kwargs,
609
- )
610
-
611
- def _create_client_with_params(self, model: str, params: dict[str, Any]) -> Any:
612
- """Create a new LLMClient with specific sampling parameters."""
613
- return LLMClient(
614
- model,
615
- **params,
616
- **self._client_kwargs,
617
- )
618
-
619
- def _get_or_create_client(self, model: str) -> Any:
620
- """Get existing client or create new one for the model."""
621
- if model not in self._clients:
622
- self._clients[model] = self._create_client(model)
623
- return self._clients[model]
624
-
625
- @property
626
- def chat(self) -> MockChat:
627
- """Access the chat resource."""
628
- return self._chat
629
-
630
- @property
631
- def completions(self) -> MockTextCompletions:
632
- """Access the text completions resource."""
633
- return self._completions
634
-
635
- async def close(self) -> None:
636
- """
637
- Close the client and clean up resources.
638
-
639
- This is provided for compatibility with AsyncOpenAI's close() method.
640
- Currently a no-op as LLMClient instances don't need explicit cleanup.
641
- """
642
- # No cleanup needed for LLMClient instances
643
- pass