lm-deluge 0.0.35__py3-none-any.whl → 0.0.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

@@ -57,9 +57,9 @@ def _build_anthropic_request(
57
57
  # handle thinking
58
58
  if model.reasoning_model and sampling_params.reasoning_effort:
59
59
  # translate reasoning effort of low, medium, high to budget tokens
60
- budget = {
61
- "minimal": 256, "low": 1024, "medium": 4096, "high": 16384
62
- }.get(sampling_params.reasoning_effort)
60
+ budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
61
+ sampling_params.reasoning_effort
62
+ )
63
63
  request_json["thinking"] = {
64
64
  "type": "enabled",
65
65
  "budget_tokens": budget,
@@ -46,7 +46,9 @@ async def _build_gemini_request(
46
46
  else:
47
47
  thinking_config = {"includeThoughts": True}
48
48
  if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
49
- budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[effort]
49
+ budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
50
+ effort
51
+ ]
50
52
  thinking_config["thinkingBudget"] = budget
51
53
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
52
54
 
@@ -47,7 +47,9 @@ async def _build_oa_chat_request(
47
47
  else:
48
48
  effort = "low"
49
49
  if effort == "minimal" and "gpt-5" not in model.id:
50
- print("WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'.")
50
+ print(
51
+ "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
52
+ )
51
53
  effort = "low"
52
54
  request_json["reasoning_effort"] = effort
53
55
  else:
@@ -89,9 +89,10 @@ class APIResponse:
89
89
  + self.usage.output_tokens * api_model.output_cost / 1e6
90
90
  )
91
91
  elif self.content is not None and self.completion is not None:
92
- print(
93
- f"Warning: Completion provided without token counts for model {self.model_internal}."
94
- )
92
+ pass
93
+ # print(
94
+ # f"Warning: Completion provided without token counts for model {self.model_internal}."
95
+ # )
95
96
  if isinstance(self.prompt, Conversation):
96
97
  self.prompt = self.prompt.to_log() # avoid keeping images in memory
97
98
 
lm_deluge/cli.py ADDED
@@ -0,0 +1,300 @@
1
+ # import argparse
2
+ # import asyncio
3
+ # import os
4
+ # import sys
5
+ # from typing import Optional
6
+
7
+ # from .client import LLMClient
8
+ # from .models import registry, APIModel
9
+ # from .prompt import Conversation, Message
10
+
11
+
12
+ # def _ensure_api_key_for_model(model_id: str, passed_api_key: Optional[str] = None):
13
+ # model: APIModel = APIModel.from_registry(model_id)
14
+ # env_var = model.api_key_env_var or ""
15
+ # if not env_var:
16
+ # return # Some providers (e.g., Bedrock entries) don't use a single key
17
+ # if os.getenv(env_var):
18
+ # return
19
+ # if passed_api_key:
20
+ # os.environ[env_var] = passed_api_key
21
+ # return
22
+ # # If we get here, interactive prompting should occur at the UI layer.
23
+ # # In non-interactive contexts, we will error before calling this without key.
24
+
25
+
26
+ # def run_non_interactive(model_id: str, prompt_text: str, api_key: Optional[str]):
27
+ # _ensure_api_key_for_model(model_id, api_key)
28
+ # client = LLMClient(model_names=[model_id], progress="manual")
29
+ # # Single round, print completion only to stdout
30
+ # completions = asyncio.run(
31
+ # client.process_prompts_async(
32
+ # [Conversation.user(prompt_text)],
33
+ # return_completions_only=True,
34
+ # show_progress=False,
35
+ # )
36
+ # )
37
+ # out = completions[0] if completions and completions[0] is not None else ""
38
+ # # Write raw completion to stdout with no extra decoration
39
+ # sys.stdout.write(out)
40
+ # if out and not out.endswith("\n"):
41
+ # sys.stdout.write("\n")
42
+
43
+
44
+ # # -------- Textual UI (interactive chat) --------
45
+ # try:
46
+ # from textual.app import App, ComposeResult
47
+ # from textual.containers import Container, Horizontal
48
+ # from textual.widgets import Footer, Header, Input, Static, Button, ListView, ListItem, Label
49
+ # from textual.widgets._rich_log import RichLog
50
+ # from textual.reactive import reactive
51
+ # TEXTUAL_AVAILABLE = True
52
+ # except Exception: # pragma: no cover - textual may not be installed in some dev envs
53
+ # TEXTUAL_AVAILABLE = False
54
+
55
+
56
+ # if TEXTUAL_AVAILABLE:
57
+ # class ModelPicker(Static):
58
+ # """Minimal model picker: arrows to move, Enter to select."""
59
+
60
+ # def __init__(self, preselected: Optional[str] = None):
61
+ # super().__init__()
62
+ # self.preselected = preselected
63
+
64
+ # def compose(self) -> ComposeResult: # type: ignore[override]
65
+ # # Keep it terminal-y: one-line hint + list. No buttons.
66
+ # yield Static("Pick a model (Enter)", classes="hint")
67
+ # list_items: list[ListItem] = []
68
+ # # Curated small set to avoid scrollbars
69
+ # preferred = [
70
+ # "gpt-5",
71
+ # "gpt-5-chat",
72
+ # "gpt-5-mini",
73
+ # "claude-4-sonnet",
74
+ # "gemini-2.5-pro",
75
+ # "gemini-2.5-flash",
76
+ # "gemini-2.0-flash",
77
+ # ]
78
+ # for mid in preferred:
79
+ # if mid in registry:
80
+ # list_items.append(ListItem(Label(mid)))
81
+ # yield ListView(*list_items, classes="model-list")
82
+
83
+ # def on_mount(self) -> None: # type: ignore[override]
84
+ # # Focus the list so Enter works immediately
85
+ # self.query_one(ListView).focus()
86
+
87
+ # def get_selected(self) -> Optional[str]:
88
+ # listview = self.query_one(ListView)
89
+ # if not listview.index is None and 0 <= listview.index < len(listview.children):
90
+ # label = listview.children[listview.index].query_one(Label)
91
+ # return label.renderable if isinstance(label.renderable, str) else str(label.renderable)
92
+ # return None
93
+
94
+ # def on_key(self, event): # type: ignore[override]
95
+ # # Select current item on Enter
96
+ # try:
97
+ # key = getattr(event, "key", None)
98
+ # except Exception:
99
+ # key = None
100
+ # if key == "enter":
101
+ # sel = self.get_selected()
102
+ # if sel:
103
+ # # Ask app to proceed with the chosen model
104
+ # getattr(self.app, "model_chosen", lambda *_: None)(sel) # type: ignore[attr-defined]
105
+
106
+
107
+ # class ApiKeyPrompt(Static):
108
+ # def __init__(self, env_var: str):
109
+ # super().__init__()
110
+ # self.env_var = env_var
111
+ # self.input = Input(password=True, placeholder=f"Enter {env_var}")
112
+
113
+ # def compose(self) -> ComposeResult: # type: ignore[override]
114
+ # yield Static(f"API key required: set {self.env_var}", classes="title")
115
+ # yield self.input
116
+ # yield Button("Save", id="save-key", variant="primary")
117
+
118
+ # def value(self) -> str:
119
+ # return self.input.value
120
+
121
+
122
+ # class MessagesView(RichLog):
123
+ # def __init__(self, **kwargs):
124
+ # # Terminal-like log with markup and auto-scroll
125
+ # super().__init__(wrap=True, markup=True, auto_scroll=True, **kwargs)
126
+
127
+ # def append_user(self, text: str):
128
+ # self.write(f"[bold cyan]You:[/bold cyan] {text}")
129
+
130
+ # def append_assistant(self, text: str):
131
+ # self.write(f"[bold magenta]Model:[/bold magenta] {text}")
132
+
133
+
134
+ # class ChatInput(Horizontal):
135
+ # def compose(self) -> ComposeResult: # type: ignore[override]
136
+ # self.input = Input(placeholder="Type message, Enter to send")
137
+ # yield self.input
138
+
139
+
140
+ # class DelugeApp(App):
141
+ # CSS = """
142
+ # #screen { height: 100%; }
143
+ # .chat { height: 1fr; padding: 0 1; }
144
+ # .composer { dock: bottom; height: 3; }
145
+ # """
146
+
147
+ # BINDINGS = [
148
+ # ("ctrl+c", "quit", "Quit"),
149
+ # ]
150
+
151
+ # model_id = reactive("")
152
+ # api_env_var = reactive("")
153
+
154
+ # def __init__(self, model_arg: Optional[str], api_key_arg: Optional[str]):
155
+ # super().__init__()
156
+ # self._model_arg = model_arg
157
+ # self._api_key_arg = api_key_arg
158
+ # self._conversation = Conversation.system("You are a helpful assistant.")
159
+ # self._client = None
160
+
161
+ # def compose(self) -> ComposeResult: # type: ignore[override]
162
+ # yield Header(show_clock=True)
163
+ # self.body = Container(id="screen")
164
+ # yield self.body
165
+ # yield Footer()
166
+
167
+ # def on_mount(self): # type: ignore[override]
168
+ # # Step 1: pick model if not provided
169
+ # if not self._model_arg:
170
+ # self.model_picker = ModelPicker()
171
+ # self.body.mount(self.model_picker)
172
+ # else:
173
+ # self.model_id = self._model_arg
174
+ # self._after_model_selected()
175
+
176
+ # def action_quit(self) -> None: # type: ignore[override]
177
+ # self.exit()
178
+
179
+ # def _after_model_selected(self):
180
+ # # Resolve API requirement
181
+ # model = APIModel.from_registry(self.model_id)
182
+ # self.api_env_var = model.api_key_env_var or ""
183
+ # if self.api_env_var and not os.getenv(self.api_env_var):
184
+ # if self._api_key_arg:
185
+ # os.environ[self.api_env_var] = self._api_key_arg
186
+ # self._show_chat()
187
+ # else:
188
+ # # Prompt for key
189
+ # self.body.remove_children()
190
+ # self.key_prompt = ApiKeyPrompt(self.api_env_var)
191
+ # self.body.mount(self.key_prompt)
192
+ # else:
193
+ # self._show_chat()
194
+
195
+ # def model_chosen(self, sel: str) -> None:
196
+ # """Called by ModelPicker when Enter is pressed on a selection."""
197
+ # self.model_id = sel
198
+ # self._after_model_selected()
199
+
200
+ # def _show_chat(self):
201
+ # self.body.remove_children()
202
+ # # Build UI
203
+ # self.messages = MessagesView(classes="chat")
204
+ # self.composer = ChatInput(classes="composer")
205
+ # self.body.mount(self.messages)
206
+ # self.body.mount(self.composer)
207
+ # # Focus input after mounting
208
+ # self.set_focus(self.composer.input)
209
+ # # Init client
210
+ # self._client = LLMClient(model_names=[self.model_id], progress="manual")
211
+ # # Update header subtitle
212
+ # self.query_one(Header).sub_title = f"Model: {self.model_id}"
213
+
214
+ # async def _send_and_receive(self, text: str):
215
+ # # Append user message
216
+ # self._conversation.add(Message.user(text))
217
+ # self.messages.append_user(text)
218
+ # # Call model (non-streaming for simplicity across providers)
219
+ # responses = await self._client.process_prompts_async(
220
+ # [self._conversation], return_completions_only=False, show_progress=False
221
+ # )
222
+ # resp = responses[0]
223
+ # if resp and resp.completion:
224
+ # self._conversation.add(Message.ai(resp.completion))
225
+ # self.messages.append_assistant(resp.completion)
226
+ # else:
227
+ # self.messages.append_assistant("<no response>")
228
+
229
+ # async def on_button_pressed(self, event): # type: ignore[override]
230
+ # if hasattr(event.button, "id"):
231
+ # if event.button.id == "save-key":
232
+ # key = self.key_prompt.value().strip()
233
+ # if self.api_env_var and key:
234
+ # os.environ[self.api_env_var] = key
235
+ # self._show_chat()
236
+ # elif event.button.id == "send":
237
+ # text = self.composer.input.value.strip()
238
+ # if text:
239
+ # self.composer.input.value = ""
240
+ # await self._send_and_receive(text)
241
+
242
+ # async def on_input_submitted(self, event: Input.Submitted): # type: ignore[override]
243
+ # if isinstance(event.input.parent, ChatInput):
244
+ # text = event.value.strip()
245
+ # if text:
246
+ # self.composer.input.value = ""
247
+ # await self._send_and_receive(text)
248
+
249
+
250
+ # def run_interactive(model: Optional[str], api_key: Optional[str]):
251
+ # if not TEXTUAL_AVAILABLE:
252
+ # sys.stderr.write(
253
+ # "Textual is not installed. Please install with `pip install textual` or reinstall lm_deluge.\n"
254
+ # )
255
+ # sys.exit(2)
256
+ # app = DelugeApp(model, api_key) # type: ignore[name-defined]
257
+ # app.run()
258
+
259
+
260
+ # def main():
261
+ # parser = argparse.ArgumentParser(prog="deluge", description="Deluge CLI")
262
+ # parser.add_argument("prompt", nargs="*", help="Prompt text (non-interactive -p only)")
263
+ # parser.add_argument("--model", dest="model", help="Model ID to use")
264
+ # parser.add_argument("--api-key", dest="api_key", help="API key for chosen model provider")
265
+ # parser.add_argument(
266
+ # "-p",
267
+ # dest="print_mode",
268
+ # action="store_true",
269
+ # help="Print single completion to stdout (non-interactive)",
270
+ # )
271
+
272
+ # args = parser.parse_args()
273
+
274
+ # if args.print_mode:
275
+ # # Determine prompt text
276
+ # prompt_text = " ".join(args.prompt).strip()
277
+ # if not prompt_text and not sys.stdin.isatty():
278
+ # prompt_text = sys.stdin.read()
279
+ # if not prompt_text:
280
+ # sys.stderr.write("No prompt provided. Pass text or pipe input.\n")
281
+ # sys.exit(2)
282
+
283
+ # # Determine model
284
+ # model_id = args.model or os.getenv("DELUGE_DEFAULT_MODEL") or "gpt-4o-mini"
285
+ # # Require API key non-interactively if provider needs it and not set
286
+ # env_var = APIModel.from_registry(model_id).api_key_env_var or ""
287
+ # if env_var and not (os.getenv(env_var) or args.api_key):
288
+ # sys.stderr.write(
289
+ # f"Missing API key. Set {env_var} or pass --api-key.\n"
290
+ # )
291
+ # sys.exit(2)
292
+ # run_non_interactive(model_id, prompt_text, args.api_key)
293
+ # return
294
+
295
+ # # Interactive Textual chat
296
+ # run_interactive(args.model, args.api_key)
297
+
298
+
299
+ # if __name__ == "__main__":
300
+ # main()
lm_deluge/client.py CHANGED
@@ -22,6 +22,7 @@ from .models import APIModel, registry
22
22
  from .request_context import RequestContext
23
23
  from .tracker import StatusTracker
24
24
 
25
+
25
26
  # TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
26
27
  class _LLMClient(BaseModel):
27
28
  """
@@ -246,6 +247,7 @@ class _LLMClient(BaseModel):
246
247
  self, context: RequestContext, retry_queue: asyncio.Queue | None = None
247
248
  ) -> APIResponse:
248
249
  """Handle caching and single HTTP call for a request. Failed requests go to retry queue."""
250
+
249
251
  # Check cache first
250
252
  def _maybe_postprocess(response: APIResponse):
251
253
  if self.postprocess:
@@ -712,6 +714,7 @@ class _LLMClient(BaseModel):
712
714
  batch_ids, provider, poll_interval=30
713
715
  )
714
716
 
717
+
715
718
  # factory function -- allows positional model names,
716
719
  # keeps pydantic validation, without sacrificing IDE support
717
720
  @overload
@@ -736,7 +739,7 @@ def LLMClient(
736
739
  top_logprobs: int | None = None,
737
740
  force_local_mcp: bool = False,
738
741
  progress: Literal["rich", "tqdm", "manual"] = "rich",
739
- postprocess: Callable[[APIResponse], APIResponse] | None = None
742
+ postprocess: Callable[[APIResponse], APIResponse] | None = None,
740
743
  ) -> _LLMClient: ...
741
744
 
742
745
 
@@ -762,7 +765,7 @@ def LLMClient(
762
765
  top_logprobs: int | None = None,
763
766
  force_local_mcp: bool = False,
764
767
  progress: Literal["rich", "tqdm", "manual"] = "rich",
765
- postprocess: Callable[[APIResponse], APIResponse] | None = None
768
+ postprocess: Callable[[APIResponse], APIResponse] | None = None,
766
769
  ) -> _LLMClient: ...
767
770
 
768
771
 
@@ -787,7 +790,7 @@ def LLMClient(
787
790
  top_logprobs: int | None = None,
788
791
  force_local_mcp: bool = False,
789
792
  progress: Literal["rich", "tqdm", "manual"] = "rich",
790
- postprocess: Callable[[APIResponse], APIResponse] | None = None
793
+ postprocess: Callable[[APIResponse], APIResponse] | None = None,
791
794
  ) -> _LLMClient:
792
795
  """
793
796
  Create an LLMClient with model_names as a positional argument.
@@ -824,5 +827,5 @@ def LLMClient(
824
827
  top_logprobs=top_logprobs,
825
828
  force_local_mcp=force_local_mcp,
826
829
  progress=progress,
827
- postprocess=postprocess
830
+ postprocess=postprocess,
828
831
  )