lm-deluge 0.0.68__py3-none-any.whl → 0.0.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lm_deluge/__init__.py CHANGED
@@ -3,6 +3,19 @@ from .file import File
3
3
  from .prompt import Conversation, Message
4
4
  from .tool import Tool, ToolParams
5
5
 
6
+ try:
7
+ from .mock_openai import ( # noqa
8
+ APIError,
9
+ APITimeoutError,
10
+ BadRequestError,
11
+ MockAsyncOpenAI,
12
+ RateLimitError,
13
+ )
14
+
15
+ _has_openai = True
16
+ except ImportError:
17
+ _has_openai = False
18
+
6
19
  # dotenv.load_dotenv() - don't do this, fucks with other packages
7
20
 
8
21
  __all__ = [
@@ -15,3 +28,14 @@ __all__ = [
15
28
  "ToolParams",
16
29
  "File",
17
30
  ]
31
+
32
+ if _has_openai:
33
+ __all__.extend(
34
+ [
35
+ "MockAsyncOpenAI",
36
+ "APIError",
37
+ "APITimeoutError",
38
+ "BadRequestError",
39
+ "RateLimitError",
40
+ ]
41
+ )
@@ -0,0 +1,4 @@
1
+ # this request type is for models that add "reasoning_content"
2
+ # on top of the openai chat completions. it's important to be separate
3
+ # for providers that expect you to provide back the reasoning content to
4
+ # preserve best performance.
@@ -381,7 +381,7 @@ class OpenAIResponsesRequest(APIRequestBase):
381
381
  output = data.get("output", [])
382
382
  if not output:
383
383
  is_error = True
384
- error_message = "No output in response"
384
+ error_message = f"No output in response. Status: {data.get('status')}, error: {data.get('error')}, incomplete details: {data.get('incomplete_details')}"
385
385
  else:
386
386
  # Process each output item
387
387
  for item in output:
@@ -0,0 +1,641 @@
1
+ """
2
+ Mock OpenAI client that implements the AsyncOpenAI interface but uses lm-deluge's
3
+ LLMClient internally. This allows using any lm-deluge-supported provider through
4
+ the standard OpenAI Python SDK interface.
5
+
6
+ Example usage:
7
+ from lm_deluge.mock_openai import MockAsyncOpenAI
8
+
9
+ # Use Claude through OpenAI interface
10
+ client = MockAsyncOpenAI(model="claude-sonnet-4")
11
+ response = await client.chat.completions.create(
12
+ model="claude-sonnet-4", # Can override here
13
+ messages=[{"role": "user", "content": "Hello!"}],
14
+ temperature=0.7
15
+ )
16
+ print(response.choices[0].message.content)
17
+
18
+ Installation:
19
+ pip install lm-deluge[openai]
20
+ """
21
+
22
+ import json
23
+ import time
24
+ import uuid
25
+ from typing import Any, AsyncIterator, Literal, Union, overload
26
+
27
+ try:
28
+ from openai import (
29
+ APIError,
30
+ APITimeoutError,
31
+ BadRequestError,
32
+ RateLimitError,
33
+ )
34
+ from openai.types import Completion
35
+ from openai.types.chat import (
36
+ ChatCompletion,
37
+ ChatCompletionChunk,
38
+ ChatCompletionMessage,
39
+ ChatCompletionMessageToolCall,
40
+ )
41
+ from openai.types.chat.chat_completion import Choice as ChatCompletionChoice
42
+ from openai.types.chat.chat_completion_chunk import (
43
+ Choice as ChunkChoice,
44
+ ChoiceDelta,
45
+ ChoiceDeltaToolCall,
46
+ ChoiceDeltaToolCallFunction,
47
+ )
48
+ from openai.types.chat.chat_completion_message_tool_call import Function
49
+ from openai.types.completion_choice import CompletionChoice as TextCompletionChoice
50
+ from openai.types.completion_usage import CompletionUsage
51
+ except ImportError:
52
+ raise ImportError(
53
+ "The openai package is required to use MockAsyncOpenAI. "
54
+ "Install it with: pip install lm-deluge[openai]"
55
+ )
56
+
57
+ # Re-export exceptions for compatibility
58
+ __all__ = [
59
+ "MockAsyncOpenAI",
60
+ "APIError",
61
+ "APITimeoutError",
62
+ "BadRequestError",
63
+ "RateLimitError",
64
+ ]
65
+
66
+ from lm_deluge.client import LLMClient
67
+ from lm_deluge.prompt import Conversation, Message, Part, Text, ToolCall, ToolResult
68
+
69
+
70
+ def _messages_to_conversation(messages: list[dict[str, Any]]) -> Conversation:
71
+ """Convert OpenAI messages format to lm-deluge Conversation."""
72
+ conv_messages = []
73
+
74
+ for msg in messages:
75
+ role = msg["role"]
76
+ content = msg.get("content")
77
+ tool_calls = msg.get("tool_calls")
78
+ tool_call_id = msg.get("tool_call_id")
79
+
80
+ parts: list[Part] = []
81
+
82
+ # Handle regular content
83
+ if content:
84
+ if isinstance(content, str):
85
+ parts.append(Text(content))
86
+ elif isinstance(content, list):
87
+ # Multi-part content (text, images, etc.)
88
+ for item in content:
89
+ if item.get("type") == "text":
90
+ parts.append(Text(item["text"]))
91
+ # Could add image support here later
92
+
93
+ # Handle tool calls (from assistant)
94
+ if tool_calls:
95
+ for tc in tool_calls:
96
+ # Parse arguments from JSON string to dict
97
+ args_str = tc["function"]["arguments"]
98
+ args_dict = (
99
+ json.loads(args_str) if isinstance(args_str, str) else args_str
100
+ )
101
+ parts.append(
102
+ ToolCall(
103
+ id=tc["id"],
104
+ name=tc["function"]["name"],
105
+ arguments=args_dict,
106
+ )
107
+ )
108
+
109
+ # Handle tool results (from tool role)
110
+ if role == "tool" and tool_call_id:
111
+ parts.append(ToolResult(tool_call_id=tool_call_id, result=content or ""))
112
+
113
+ conv_messages.append(Message(role=role, parts=parts))
114
+
115
+ return Conversation(messages=conv_messages)
116
+
117
+
118
+ def _response_to_chat_completion(
119
+ response: Any, # APIResponse
120
+ model: str,
121
+ request_id: str | None = None,
122
+ ) -> ChatCompletion:
123
+ """Convert lm-deluge APIResponse to OpenAI ChatCompletion."""
124
+ if request_id is None:
125
+ request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
126
+
127
+ # Handle error responses
128
+ if response.is_error:
129
+ # For errors, create an empty response with error finish reason
130
+ message = ChatCompletionMessage(
131
+ role="assistant",
132
+ content=response.error_message or "Error occurred",
133
+ )
134
+ choice = ChatCompletionChoice(
135
+ index=0,
136
+ message=message,
137
+ finish_reason="stop", # or could use "error" but that's not standard
138
+ )
139
+ return ChatCompletion(
140
+ id=request_id,
141
+ choices=[choice],
142
+ created=int(time.time()),
143
+ model=model,
144
+ object="chat.completion",
145
+ usage=None,
146
+ )
147
+
148
+ # Extract content from response
149
+ content_text = None
150
+ tool_calls = None
151
+
152
+ if response.content:
153
+ # Extract text parts
154
+ text_parts = [p.text for p in response.content.parts if isinstance(p, Text)]
155
+ if text_parts:
156
+ content_text = "".join(text_parts)
157
+
158
+ # Extract tool calls
159
+ tool_call_parts = [p for p in response.content.parts if isinstance(p, ToolCall)]
160
+ if tool_call_parts:
161
+ tool_calls = [
162
+ ChatCompletionMessageToolCall(
163
+ id=tc.id,
164
+ type="function",
165
+ function=Function(
166
+ name=tc.name,
167
+ # Convert dict arguments to JSON string for OpenAI format
168
+ arguments=json.dumps(tc.arguments)
169
+ if isinstance(tc.arguments, dict)
170
+ else tc.arguments,
171
+ ),
172
+ )
173
+ for tc in tool_call_parts
174
+ ]
175
+
176
+ # Create message
177
+ message = ChatCompletionMessage(
178
+ role="assistant",
179
+ content=content_text,
180
+ tool_calls=tool_calls,
181
+ )
182
+
183
+ # Create choice
184
+ choice = ChatCompletionChoice(
185
+ index=0,
186
+ message=message,
187
+ finish_reason=response.finish_reason or "stop",
188
+ )
189
+
190
+ # Create usage
191
+ usage = None
192
+ if response.usage:
193
+ usage = CompletionUsage(
194
+ prompt_tokens=response.usage.input_tokens,
195
+ completion_tokens=response.usage.output_tokens,
196
+ total_tokens=response.usage.input_tokens + response.usage.output_tokens,
197
+ )
198
+
199
+ return ChatCompletion(
200
+ id=request_id,
201
+ choices=[choice],
202
+ created=int(time.time()),
203
+ model=model,
204
+ object="chat.completion",
205
+ usage=usage,
206
+ )
207
+
208
+
209
+ class _AsyncStreamWrapper:
210
+ """Wrapper to convert lm-deluge streaming to OpenAI ChatCompletionChunk format."""
211
+
212
+ def __init__(self, stream: AsyncIterator, model: str, request_id: str):
213
+ self._stream = stream
214
+ self._model = model
215
+ self._request_id = request_id
216
+ self._first_chunk = True
217
+
218
+ def __aiter__(self):
219
+ return self
220
+
221
+ async def __anext__(self) -> ChatCompletionChunk:
222
+ chunk = await self._stream.__anext__()
223
+
224
+ # Create delta based on chunk content
225
+ delta = ChoiceDelta()
226
+
227
+ if self._first_chunk:
228
+ delta.role = "assistant"
229
+ self._first_chunk = False
230
+
231
+ # Extract content from chunk
232
+ if hasattr(chunk, "content") and chunk.content:
233
+ if isinstance(chunk.content, str):
234
+ delta.content = chunk.content
235
+ elif hasattr(chunk.content, "parts"):
236
+ # Extract text from parts
237
+ text_parts = [
238
+ p.text for p in chunk.content.parts if isinstance(p, Text)
239
+ ]
240
+ if text_parts:
241
+ delta.content = "".join(text_parts)
242
+
243
+ # Extract tool calls from parts
244
+ tool_call_parts = [
245
+ p for p in chunk.content.parts if isinstance(p, ToolCall)
246
+ ]
247
+ if tool_call_parts:
248
+ delta.tool_calls = [
249
+ ChoiceDeltaToolCall(
250
+ index=i,
251
+ id=tc.id,
252
+ type="function",
253
+ function=ChoiceDeltaToolCallFunction(
254
+ name=tc.name,
255
+ # Convert dict arguments to JSON string for OpenAI format
256
+ arguments=json.dumps(tc.arguments)
257
+ if isinstance(tc.arguments, dict)
258
+ else tc.arguments,
259
+ ),
260
+ )
261
+ for i, tc in enumerate(tool_call_parts)
262
+ ]
263
+
264
+ # Create choice
265
+ choice = ChunkChoice(
266
+ index=0,
267
+ delta=delta,
268
+ finish_reason=getattr(chunk, "finish_reason", None),
269
+ )
270
+
271
+ return ChatCompletionChunk(
272
+ id=self._request_id,
273
+ choices=[choice],
274
+ created=int(time.time()),
275
+ model=self._model,
276
+ object="chat.completion.chunk",
277
+ )
278
+
279
+
280
+ class MockCompletions:
281
+ """Mock completions resource that implements OpenAI's completions.create interface."""
282
+
283
+ def __init__(self, parent: "MockAsyncOpenAI"):
284
+ self._parent = parent
285
+
286
+ @overload
287
+ async def create(
288
+ self,
289
+ *,
290
+ messages: list[dict[str, Any]],
291
+ model: str,
292
+ stream: Literal[False] = False,
293
+ **kwargs: Any,
294
+ ) -> ChatCompletion: ...
295
+
296
+ @overload
297
+ async def create(
298
+ self,
299
+ *,
300
+ messages: list[dict[str, Any]],
301
+ model: str,
302
+ stream: Literal[True],
303
+ **kwargs: Any,
304
+ ) -> AsyncIterator[ChatCompletionChunk]: ...
305
+
306
+ async def create(
307
+ self,
308
+ *,
309
+ messages: list[dict[str, Any]],
310
+ model: str,
311
+ stream: bool = False,
312
+ temperature: float | None = None,
313
+ max_tokens: int | None = None,
314
+ max_completion_tokens: int | None = None,
315
+ top_p: float | None = None,
316
+ seed: int | None = None,
317
+ tools: list[dict[str, Any]] | None = None,
318
+ tool_choice: Any | None = None,
319
+ reasoning_effort: str | None = None,
320
+ response_format: dict[str, Any] | None = None,
321
+ n: int | None = None,
322
+ stop: str | list[str] | None = None,
323
+ presence_penalty: float | None = None,
324
+ frequency_penalty: float | None = None,
325
+ **kwargs: Any,
326
+ ) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
327
+ """
328
+ Create a chat completion using lm-deluge's LLMClient.
329
+
330
+ Args:
331
+ messages: List of message dictionaries with 'role' and 'content'
332
+ model: Model identifier (can override client's default model)
333
+ stream: Whether to stream the response
334
+ temperature: Sampling temperature (0-2)
335
+ max_tokens: Max tokens (deprecated, use max_completion_tokens)
336
+ max_completion_tokens: Max completion tokens
337
+ top_p: Nucleus sampling parameter
338
+ seed: Random seed for deterministic sampling
339
+ tools: List of tool definitions
340
+ tool_choice: Tool choice strategy
341
+ reasoning_effort: Reasoning effort for reasoning models
342
+ response_format: Response format (e.g., {"type": "json_object"})
343
+ **kwargs: Other parameters (mostly ignored for compatibility)
344
+
345
+ Returns:
346
+ ChatCompletion (non-streaming) or AsyncIterator[ChatCompletionChunk] (streaming)
347
+ """
348
+ # Get or create client for this model
349
+ client = self._parent._get_or_create_client(model)
350
+
351
+ # Convert messages to Conversation
352
+ conversation = _messages_to_conversation(messages)
353
+
354
+ # Build sampling params
355
+ sampling_kwargs = {}
356
+ if temperature is not None:
357
+ sampling_kwargs["temperature"] = temperature
358
+ if max_completion_tokens is not None:
359
+ sampling_kwargs["max_new_tokens"] = max_completion_tokens
360
+ elif max_tokens is not None:
361
+ sampling_kwargs["max_new_tokens"] = max_tokens
362
+ if top_p is not None:
363
+ sampling_kwargs["top_p"] = top_p
364
+ if seed is not None:
365
+ sampling_kwargs["seed"] = seed
366
+ if reasoning_effort is not None:
367
+ sampling_kwargs["reasoning_effort"] = reasoning_effort
368
+ if response_format and response_format.get("type") == "json_object":
369
+ sampling_kwargs["json_mode"] = True
370
+
371
+ # If sampling params are provided, create a new client with merged params
372
+ if sampling_kwargs:
373
+ # Merge with default params
374
+ merged_params = {**self._parent._default_sampling_params, **sampling_kwargs}
375
+ client = self._parent._create_client_with_params(model, merged_params)
376
+
377
+ # Convert tools if provided
378
+ lm_tools = None
379
+ if tools:
380
+ # For now, just pass through - lm-deluge will handle the format
381
+ lm_tools = tools
382
+
383
+ # Execute request
384
+ if stream:
385
+ # Streaming mode
386
+ request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
387
+ # Note: client.stream() is an async generator, not a coroutine
388
+ # We can directly wrap it
389
+ stream_iter = client.stream(conversation, tools=lm_tools)
390
+ # Verify it's a generator, not a coroutine
391
+ if hasattr(stream_iter, "__anext__"):
392
+ return _AsyncStreamWrapper(stream_iter, model, request_id)
393
+ else:
394
+ # If it's a coroutine, we need to await it first
395
+ # But this shouldn't happen with the current implementation
396
+ raise TypeError(f"Expected async generator, got {type(stream_iter)}")
397
+ else:
398
+ # Non-streaming mode
399
+ response = await client.start(conversation, tools=lm_tools)
400
+ return _response_to_chat_completion(response, model)
401
+
402
+
403
+ class MockTextCompletions:
404
+ """Mock text completions resource for legacy completions API."""
405
+
406
+ def __init__(self, parent: "MockAsyncOpenAI"):
407
+ self._parent = parent
408
+
409
+ async def create(
410
+ self,
411
+ *,
412
+ model: str,
413
+ prompt: str | list[str],
414
+ temperature: float | None = None,
415
+ max_tokens: int | None = None,
416
+ top_p: float | None = None,
417
+ seed: int | None = None,
418
+ n: int | None = None,
419
+ stop: str | list[str] | None = None,
420
+ **kwargs: Any,
421
+ ) -> Completion:
422
+ """
423
+ Create a text completion using lm-deluge's LLMClient.
424
+
425
+ Args:
426
+ model: Model identifier
427
+ prompt: Text prompt or list of prompts
428
+ temperature: Sampling temperature
429
+ max_tokens: Max tokens to generate
430
+ top_p: Nucleus sampling parameter
431
+ seed: Random seed
432
+ n: Number of completions (currently ignored, always returns 1)
433
+ stop: Stop sequences
434
+ **kwargs: Other parameters
435
+
436
+ Returns:
437
+ Completion object
438
+ """
439
+ # Get or create client for this model
440
+ client = self._parent._get_or_create_client(model)
441
+
442
+ # Handle single prompt
443
+ if isinstance(prompt, list):
444
+ # For now, just use the first prompt
445
+ prompt = prompt[0] if prompt else ""
446
+
447
+ # Convert prompt to Conversation
448
+ conversation = Conversation([Message(role="user", parts=[Text(prompt)])])
449
+
450
+ # Build sampling params
451
+ sampling_kwargs = {}
452
+ if temperature is not None:
453
+ sampling_kwargs["temperature"] = temperature
454
+ if max_tokens is not None:
455
+ sampling_kwargs["max_new_tokens"] = max_tokens
456
+ if top_p is not None:
457
+ sampling_kwargs["top_p"] = top_p
458
+ if seed is not None:
459
+ sampling_kwargs["seed"] = seed
460
+
461
+ # Create client with merged params if needed
462
+ if sampling_kwargs:
463
+ merged_params = {**self._parent._default_sampling_params, **sampling_kwargs}
464
+ client = self._parent._create_client_with_params(model, merged_params)
465
+
466
+ # Execute request
467
+ response = await client.start(conversation)
468
+
469
+ # Convert to Completion format
470
+ completion_text = None
471
+ if response.content:
472
+ text_parts = [p.text for p in response.content.parts if isinstance(p, Text)]
473
+ if text_parts:
474
+ completion_text = "".join(text_parts)
475
+
476
+ # Create choice
477
+ choice = TextCompletionChoice(
478
+ index=0,
479
+ text=completion_text or "",
480
+ finish_reason=response.finish_reason or "stop",
481
+ )
482
+
483
+ # Create usage
484
+ usage = None
485
+ if response.usage:
486
+ usage = CompletionUsage(
487
+ prompt_tokens=response.usage.input_tokens,
488
+ completion_tokens=response.usage.output_tokens,
489
+ total_tokens=response.usage.input_tokens + response.usage.output_tokens,
490
+ )
491
+
492
+ return Completion(
493
+ id=f"cmpl-{uuid.uuid4().hex[:24]}",
494
+ choices=[choice],
495
+ created=int(time.time()),
496
+ model=model,
497
+ object="text_completion",
498
+ usage=usage,
499
+ )
500
+
501
+
502
+ class MockChat:
503
+ """Mock chat resource that provides access to completions."""
504
+
505
+ def __init__(self, parent: "MockAsyncOpenAI"):
506
+ self._parent = parent
507
+ self._completions = MockCompletions(parent)
508
+
509
+ @property
510
+ def completions(self) -> MockCompletions:
511
+ """Access the completions resource."""
512
+ return self._completions
513
+
514
+
515
+ class MockAsyncOpenAI:
516
+ """
517
+ Mock AsyncOpenAI client that uses lm-deluge's LLMClient internally.
518
+
519
+ This allows using any lm-deluge-supported provider (Anthropic, Google, etc.)
520
+ through the standard OpenAI Python SDK interface.
521
+
522
+ Example:
523
+ # Use Claude through OpenAI interface
524
+ client = MockAsyncOpenAI(model="claude-sonnet-4")
525
+ response = await client.chat.completions.create(
526
+ model="claude-sonnet-4",
527
+ messages=[{"role": "user", "content": "Hello!"}],
528
+ temperature=0.7
529
+ )
530
+
531
+ Args:
532
+ model: Default model to use (can be overridden in create())
533
+ api_key: API key (optional, for compatibility)
534
+ organization: Organization ID (optional, for compatibility)
535
+ project: Project ID (optional, for compatibility)
536
+ base_url: Base URL (defaults to OpenAI's URL for compatibility)
537
+ timeout: Request timeout (optional, for compatibility)
538
+ max_retries: Max retries (defaults to 2 for compatibility)
539
+ default_headers: Default headers (optional, for compatibility)
540
+ temperature: Default temperature
541
+ max_completion_tokens: Default max completion tokens
542
+ top_p: Default top_p
543
+ seed: Default seed for deterministic sampling
544
+ **kwargs: Additional parameters passed to LLMClient
545
+ """
546
+
547
+ def __init__(
548
+ self,
549
+ *,
550
+ model: str | None = None,
551
+ api_key: str | None = None,
552
+ organization: str | None = None,
553
+ project: str | None = None,
554
+ base_url: str | None = None,
555
+ timeout: float | None = None,
556
+ max_retries: int | None = None,
557
+ default_headers: dict[str, str] | None = None,
558
+ http_client: Any | None = None,
559
+ temperature: float | None = None,
560
+ max_completion_tokens: int | None = None,
561
+ top_p: float | None = None,
562
+ seed: int | None = None,
563
+ **kwargs: Any,
564
+ ):
565
+ # OpenAI-compatible attributes
566
+ self.api_key = api_key
567
+ self.organization = organization
568
+ self.project = project
569
+ self.base_url = base_url or "https://api.openai.com/v1"
570
+ self.timeout = timeout
571
+ self.max_retries = max_retries or 2
572
+ self.default_headers = default_headers
573
+ self.http_client = http_client
574
+
575
+ # Internal attributes
576
+ self._default_model = model or "gpt-4o-mini"
577
+ self._default_sampling_params = {}
578
+
579
+ if temperature is not None:
580
+ self._default_sampling_params["temperature"] = temperature
581
+ if max_completion_tokens is not None:
582
+ self._default_sampling_params["max_new_tokens"] = max_completion_tokens
583
+ if top_p is not None:
584
+ self._default_sampling_params["top_p"] = top_p
585
+ if seed is not None:
586
+ self._default_sampling_params["seed"] = seed
587
+
588
+ # Additional kwargs for LLMClient
589
+ self._client_kwargs = kwargs
590
+
591
+ # Cache of LLMClient instances by model
592
+ self._clients: dict[str, Any] = {}
593
+
594
+ # Create the default client
595
+ self._clients[self._default_model] = self._create_client(self._default_model)
596
+
597
+ # Create nested resources
598
+ self._chat = MockChat(self)
599
+ self._completions = MockTextCompletions(self)
600
+
601
+ def _create_client(self, model: str) -> Any:
602
+ """Create a new LLMClient for the given model."""
603
+ return LLMClient(
604
+ model,
605
+ **self._default_sampling_params,
606
+ **self._client_kwargs,
607
+ )
608
+
609
+ def _create_client_with_params(self, model: str, params: dict[str, Any]) -> Any:
610
+ """Create a new LLMClient with specific sampling parameters."""
611
+ return LLMClient(
612
+ model,
613
+ **params,
614
+ **self._client_kwargs,
615
+ )
616
+
617
+ def _get_or_create_client(self, model: str) -> Any:
618
+ """Get existing client or create new one for the model."""
619
+ if model not in self._clients:
620
+ self._clients[model] = self._create_client(model)
621
+ return self._clients[model]
622
+
623
+ @property
624
+ def chat(self) -> MockChat:
625
+ """Access the chat resource."""
626
+ return self._chat
627
+
628
+ @property
629
+ def completions(self) -> MockTextCompletions:
630
+ """Access the text completions resource."""
631
+ return self._completions
632
+
633
+ async def close(self) -> None:
634
+ """
635
+ Close the client and clean up resources.
636
+
637
+ This is provided for compatibility with AsyncOpenAI's close() method.
638
+ Currently a no-op as LLMClient instances don't need explicit cleanup.
639
+ """
640
+ # No cleanup needed for LLMClient instances
641
+ pass
@@ -15,7 +15,9 @@ from .fireworks import FIREWORKS_MODELS
15
15
  from .google import GOOGLE_MODELS
16
16
  from .grok import XAI_MODELS
17
17
  from .groq import GROQ_MODELS
18
+ from .kimi import KIMI_MODELS
18
19
  from .meta import META_MODELS
20
+ from .minimax import MINIMAX_MODELS
19
21
  from .mistral import MISTRAL_MODELS
20
22
  from .openai import OPENAI_MODELS
21
23
  from .openrouter import OPENROUTER_MODELS
@@ -132,7 +134,9 @@ for model_dict in [
132
134
  FIREWORKS_MODELS,
133
135
  GOOGLE_MODELS,
134
136
  XAI_MODELS,
137
+ KIMI_MODELS,
135
138
  META_MODELS,
139
+ MINIMAX_MODELS,
136
140
  MISTRAL_MODELS,
137
141
  OPENAI_MODELS,
138
142
  OPENROUTER_MODELS,
@@ -0,0 +1,34 @@
1
+ KIMI_MODELS = {
2
+ "kimi-k2": {
3
+ "id": "kimi-k2",
4
+ "name": "kimi-k2-0905-preview",
5
+ "api_base": "https://api.moonshot.ai/anthropic/v1",
6
+ "api_key_env_var": "MOONSHOT_API_KEY",
7
+ "supports_json": True,
8
+ "api_spec": "anthropic",
9
+ },
10
+ "kimi-k2-turbo": {
11
+ "id": "kimi-k2-turbo",
12
+ "name": "kimi-k2-turbo-preview",
13
+ "api_base": "https://api.moonshot.ai/anthropic/v1",
14
+ "api_key_env_var": "MOONSHOT_API_KEY",
15
+ "supports_json": True,
16
+ "api_spec": "anthropic",
17
+ },
18
+ "kimi-k2-thinking": {
19
+ "id": "kimi-k2-thinking",
20
+ "name": "kimi-k2-thinking",
21
+ "api_base": "https://api.moonshot.ai/anthropic/v1",
22
+ "api_key_env_var": "MOONSHOT_API_KEY",
23
+ "supports_json": True,
24
+ "api_spec": "anthropic",
25
+ },
26
+ "kimi-k2-thinking-turbo": {
27
+ "id": "kimi-k2-thinking-turbo",
28
+ "name": "kimi-k2-thinking-turbo",
29
+ "api_base": "https://api.moonshot.ai/anthropic/v1",
30
+ "api_key_env_var": "MOONSHOT_API_KEY",
31
+ "supports_json": True,
32
+ "api_spec": "anthropic",
33
+ },
34
+ }
@@ -0,0 +1,10 @@
1
+ MINIMAX_MODELS = {
2
+ "minimax-m2": {
3
+ "id": "minimax-m2",
4
+ "name": "MiniMax-M2",
5
+ "api_base": "https://api.minimax.io/anthropic/v1",
6
+ "api_key_env_var": "MINIMAX_API_KEY",
7
+ "supports_json": False,
8
+ "api_spec": "anthropic",
9
+ }
10
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.68
3
+ Version: 0.0.70
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -23,6 +23,8 @@ Requires-Dist: pdf2image
23
23
  Requires-Dist: pillow
24
24
  Requires-Dist: fastmcp>=2.4
25
25
  Requires-Dist: rich
26
+ Provides-Extra: openai
27
+ Requires-Dist: openai>=1.0.0; extra == "openai"
26
28
  Dynamic: license-file
27
29
 
28
30
  # lm-deluge
@@ -1,4 +1,4 @@
1
- lm_deluge/__init__.py,sha256=LKKIcqQoQyDpTck6fnB7iAs75BnfNNa3Bj5Nz7KU4Hk,376
1
+ lm_deluge/__init__.py,sha256=zF5lAitfgJ8A28IXJ5BE9OUCqGOqSnGOWn3ZIlizNyY,822
2
2
  lm_deluge/batches.py,sha256=Km6QM5_7BlF2qEyo4WPlhkaZkpzrLqf50AaveHXQOoY,25127
3
3
  lm_deluge/cache.py,sha256=xO2AIYvP3tUpTMKQjwQQYfGRJSRi6e7sMlRhLjsS-u4,4873
4
4
  lm_deluge/cli.py,sha256=Ilww5gOw3J5v0NReq_Ra4hhxU4BCIJBl1oTGxJZKedc,12065
@@ -8,6 +8,7 @@ lm_deluge/embed.py,sha256=CO-TOlC5kOTAM8lcnicoG4u4K664vCBwHF1vHa-nAGg,13382
8
8
  lm_deluge/errors.py,sha256=oHjt7YnxWbh-eXMScIzov4NvpJMo0-2r5J6Wh5DQ1tk,209
9
9
  lm_deluge/file.py,sha256=PTmlJQ-IaYcYUFun9V0bJ1NPVP84edJrR0hvCMWFylY,19697
10
10
  lm_deluge/image.py,sha256=5AMXmn2x47yXeYNfMSMAOWcnlrOxxOel-4L8QCJwU70,8928
11
+ lm_deluge/mock_openai.py,sha256=dYZDBKgTepQ-yd5zPRYBgMRXO6TeLqiM1fDQe622Ono,22110
11
12
  lm_deluge/prompt.py,sha256=Bgszws8-3GPefiVRa-Mht4tfyfoqD_hV5MX1nrbkJn0,63465
12
13
  lm_deluge/request_context.py,sha256=cBayMFWupWhde2OjRugW3JH-Gin-WFGc6DK2Mb4Prdc,2576
13
14
  lm_deluge/rerank.py,sha256=-NBAJdHz9OB-SWWJnHzkFmeVO4wR6lFV7Vw-SxG7aVo,11457
@@ -19,10 +20,11 @@ lm_deluge/api_requests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBd
19
20
  lm_deluge/api_requests/anthropic.py,sha256=QGq3G5jJIGcoM2HdRt73GgkvZs4GOViyjYexWex05Vk,8927
20
21
  lm_deluge/api_requests/base.py,sha256=GCcydwBRx4_xAuYLvasXlyj-TgqvKAVhVvxRfJkvPbY,9471
21
22
  lm_deluge/api_requests/bedrock.py,sha256=Uppne03GcIEk1tVYzoGu7GXK2Sg94a_xvFTLDRN_phY,15412
23
+ lm_deluge/api_requests/chat_reasoning.py,sha256=sJvstvKFqsSBUjYcwxzGt2_FH4cEp3Z6gKcBPyPjGwk,236
22
24
  lm_deluge/api_requests/common.py,sha256=BZ3vRO5TB669_UsNKugkkuFSzoLHOYJIKt4nV4sf4vc,422
23
25
  lm_deluge/api_requests/gemini.py,sha256=4uD7fQl0yWyAvYkPNi3oO1InBnvYfo5_QR6k-va-2GI,7838
24
26
  lm_deluge/api_requests/mistral.py,sha256=8JZP2CDf1XZfaPcTk0WS4q-VfYYj58ptpoH8LD3MQG4,4528
25
- lm_deluge/api_requests/openai.py,sha256=d1Ddf5sSutx9Ti1riwOEkeADnhYG7Y4vQm2DOhKl67I,25925
27
+ lm_deluge/api_requests/openai.py,sha256=ezlGYNGHFvQGgs-xuxhDDeiEembHhVh_KqJBdRBqSlM,26038
26
28
  lm_deluge/api_requests/response.py,sha256=vG194gAH5p7ulpNy4qy5Pryfb1p3ZV21-YGoj__ru3E,7436
27
29
  lm_deluge/api_requests/deprecated/bedrock.py,sha256=WrcIShCoO8JCUSlFOCHxg6KQCNTZfw3TpYTvSpYk4mA,11320
28
30
  lm_deluge/api_requests/deprecated/cohere.py,sha256=KgDScD6_bWhAzOY5BHZQKSA3kurt4KGENqC4wLsGmcU,5142
@@ -42,7 +44,7 @@ lm_deluge/llm_tools/locate.py,sha256=lYNbKTmy9dTvj0lEQkOQ7yrxyqsgYzjD0C_byJKI_4w
42
44
  lm_deluge/llm_tools/ocr.py,sha256=7fDlvs6uUOvbxMasvGGNJx5Fj6biM6z3lijKZaGN26k,23
43
45
  lm_deluge/llm_tools/score.py,sha256=9oGA3-k2U5buHQXkXaEI9M4Wb5yysNhTLsPbGeghAlQ,2580
44
46
  lm_deluge/llm_tools/translate.py,sha256=iXyYvQZ8bC44FWhBk4qpdqjKM1WFF7Shq-H2PxhPgg4,1452
45
- lm_deluge/models/__init__.py,sha256=a2xzQNG2axdMaSzoLbzdOKBM5EVOLztvlo8E1k-brqM,4516
47
+ lm_deluge/models/__init__.py,sha256=54H24K_eADbfdEH9aNORrNEXvDLZCQ4TEekeLiWljSE,4619
46
48
  lm_deluge/models/anthropic.py,sha256=5j75sB40yZzT1wwKC7Dh0f2Y2cXnp8yxHuXW63PCuns,6285
47
49
  lm_deluge/models/bedrock.py,sha256=g1PbfceSRH2lWST3ja0mUlF3oTq4e4T-si6RMe7qXgg,4888
48
50
  lm_deluge/models/cerebras.py,sha256=u2FMXJF6xMr0euDRKLKMo_NVTOcvSrrEpehbHr8sSeE,2050
@@ -52,7 +54,9 @@ lm_deluge/models/fireworks.py,sha256=yvt2Ggzye4aUqCqY74ta67Vu7FrQaLFjdFtN4P7D-dc
52
54
  lm_deluge/models/google.py,sha256=Hr2MolQoaeY85pKCGO7k7OH_1nQJdrwMgrJbfz5bI8w,5387
53
55
  lm_deluge/models/grok.py,sha256=TDzr8yfTaHbdJhwMA-Du6L-efaKFJhjTQViuVElCCHI,2566
54
56
  lm_deluge/models/groq.py,sha256=Mi5WE1xOBGoZlymD0UN6kzhH_NOmfJYU4N2l-TO0Z8Q,2552
57
+ lm_deluge/models/kimi.py,sha256=1voigLdNO2CxpWv0KDpQPP3Wolx5WrqgAlYL9ObJFuQ,1117
55
58
  lm_deluge/models/meta.py,sha256=BBgnscL1gMcIdPbRqrlDl_q9YAYGSrkw9JkAIabXtLs,1883
59
+ lm_deluge/models/minimax.py,sha256=rwW9gNotAYfDVtMlqmSYegN6GoZM_9DSNNZU2yPOmaU,275
56
60
  lm_deluge/models/mistral.py,sha256=x67o5gckBGmPcIGdVbS26XZAYFKBYM4tsxEAahGp8bk,4323
57
61
  lm_deluge/models/openai.py,sha256=6J4eAt6Iu5RopokyldUQzRlviFBXBqhLqpVP5tztzqI,11074
58
62
  lm_deluge/models/openrouter.py,sha256=O-Po4tmHjAqFIVU96TUL0QnK01R4e2yDN7Z4sYJ-CuE,2120
@@ -65,8 +69,8 @@ lm_deluge/util/logprobs.py,sha256=UkBZakOxWluaLqHrjARu7xnJ0uCHVfLGHJdnYlEcutk,11
65
69
  lm_deluge/util/spatial.py,sha256=BsF_UKhE-x0xBirc-bV1xSKZRTUhsOBdGqsMKme20C8,4099
66
70
  lm_deluge/util/validation.py,sha256=hz5dDb3ebvZrZhnaWxOxbNSVMI6nmaOODBkk0htAUhs,1575
67
71
  lm_deluge/util/xml.py,sha256=Ft4zajoYBJR3HHCt2oHwGfymGLdvp_gegVmJ-Wqk4Ck,10547
68
- lm_deluge-0.0.68.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
69
- lm_deluge-0.0.68.dist-info/METADATA,sha256=dSD-PnK2RiwWGsC_-ui4gJ6cQvcXQODAKfod6xUjoXQ,13443
70
- lm_deluge-0.0.68.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
- lm_deluge-0.0.68.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
72
- lm_deluge-0.0.68.dist-info/RECORD,,
72
+ lm_deluge-0.0.70.dist-info/licenses/LICENSE,sha256=uNNXGXPCw2TC7CUs7SEBkA-Mz6QBQFWUUEWDMgEs1dU,1058
73
+ lm_deluge-0.0.70.dist-info/METADATA,sha256=URQWK2LB1itY_viE7mv0ijJOfUolZMDRzvK-Pdzmn_o,13514
74
+ lm_deluge-0.0.70.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
+ lm_deluge-0.0.70.dist-info/top_level.txt,sha256=hqU-TJX93yBwpgkDtYcXyLr3t7TLSCCZ_reytJjwBaE,10
76
+ lm_deluge-0.0.70.dist-info/RECORD,,