lm-deluge 0.0.34__tar.gz → 0.0.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (79) hide show
  1. {lm_deluge-0.0.34/src/lm_deluge.egg-info → lm_deluge-0.0.36}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/pyproject.toml +6 -2
  3. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/anthropic.py +1 -1
  4. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/gemini.py +4 -2
  5. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/openai.py +17 -4
  6. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/response.py +4 -3
  7. lm_deluge-0.0.36/src/lm_deluge/cli.py +300 -0
  8. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/client.py +22 -73
  9. lm_deluge-0.0.36/src/lm_deluge/models/__init__.py +144 -0
  10. lm_deluge-0.0.36/src/lm_deluge/models/anthropic.py +124 -0
  11. lm_deluge-0.0.36/src/lm_deluge/models/bedrock.py +99 -0
  12. lm_deluge-0.0.36/src/lm_deluge/models/cerebras.py +57 -0
  13. lm_deluge-0.0.36/src/lm_deluge/models/cohere.py +98 -0
  14. lm_deluge-0.0.36/src/lm_deluge/models/deepseek.py +27 -0
  15. lm_deluge-0.0.36/src/lm_deluge/models/fireworks.py +16 -0
  16. lm_deluge-0.0.36/src/lm_deluge/models/google.py +153 -0
  17. lm_deluge-0.0.36/src/lm_deluge/models/grok.py +38 -0
  18. lm_deluge-0.0.36/src/lm_deluge/models/groq.py +74 -0
  19. lm_deluge-0.0.36/src/lm_deluge/models/meta.py +65 -0
  20. lm_deluge-0.0.36/src/lm_deluge/models/mistral.py +110 -0
  21. lm_deluge-0.0.36/src/lm_deluge/models/openai.py +318 -0
  22. lm_deluge-0.0.36/src/lm_deluge/models/openrouter.py +1 -0
  23. lm_deluge-0.0.36/src/lm_deluge/models/together.py +112 -0
  24. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/prompt.py +2 -2
  25. lm_deluge-0.0.36/src/lm_deluge/util/harmony.py +47 -0
  26. {lm_deluge-0.0.34 → lm_deluge-0.0.36/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  27. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/SOURCES.txt +17 -1
  28. lm_deluge-0.0.34/src/lm_deluge/models.py +0 -1305
  29. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/LICENSE +0 -0
  30. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/README.md +0 -0
  31. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/setup.cfg +0 -0
  32. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/__init__.py +0 -0
  33. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/agent.py +0 -0
  34. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/__init__.py +0 -0
  35. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/base.py +0 -0
  36. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/bedrock.py +0 -0
  37. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/common.py +0 -0
  38. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  39. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  40. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  41. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  42. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  43. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/mistral.py +0 -0
  44. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/batches.py +0 -0
  45. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  46. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  47. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  48. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  49. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/base.py +0 -0
  50. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/openai.py +0 -0
  51. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/cache.py +0 -0
  52. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/config.py +0 -0
  53. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/embed.py +0 -0
  54. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/errors.py +0 -0
  55. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/file.py +0 -0
  56. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/gemini_limits.py +0 -0
  57. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/image.py +0 -0
  58. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/__init__.py +0 -0
  59. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/classify.py +0 -0
  60. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/extract.py +0 -0
  61. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/locate.py +0 -0
  62. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/ocr.py +0 -0
  63. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/score.py +0 -0
  64. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/translate.py +0 -0
  65. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/request_context.py +0 -0
  66. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/rerank.py +0 -0
  67. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/tool.py +0 -0
  68. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/tracker.py +0 -0
  69. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/usage.py +0 -0
  70. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/json.py +0 -0
  71. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/logprobs.py +0 -0
  72. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/spatial.py +0 -0
  73. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/validation.py +0 -0
  74. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/xml.py +0 -0
  75. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  76. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/requires.txt +0 -0
  77. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/top_level.txt +0 -0
  78. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/tests/test_builtin_tools.py +0 -0
  79. {lm_deluge-0.0.34 → lm_deluge-0.0.36}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.34
3
+ Version: 0.0.36
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.34"
6
+ version = "0.0.36"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -28,5 +28,9 @@ dependencies = [
28
28
  "pdf2image",
29
29
  "pillow",
30
30
  "fastmcp>=2.4",
31
- "rich"
31
+ "rich",
32
+ # "textual>=0.58.0"
32
33
  ]
34
+
35
+ # [project.scripts]
36
+ # deluge = "lm_deluge.cli:main"
@@ -57,7 +57,7 @@ def _build_anthropic_request(
57
57
  # handle thinking
58
58
  if model.reasoning_model and sampling_params.reasoning_effort:
59
59
  # translate reasoning effort of low, medium, high to budget tokens
60
- budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
60
+ budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
61
61
  sampling_params.reasoning_effort
62
62
  )
63
63
  request_json["thinking"] = {
@@ -45,8 +45,10 @@ async def _build_gemini_request(
45
45
  thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
46
46
  else:
47
47
  thinking_config = {"includeThoughts": True}
48
- if effort in {"low", "medium", "high"} and "flash" in model.id:
49
- budget = {"low": 1024, "medium": 4096, "high": 16384}[effort]
48
+ if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
49
+ budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
50
+ effort
51
+ ]
50
52
  thinking_config["thinkingBudget"] = budget
51
53
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
52
54
 
@@ -42,8 +42,15 @@ async def _build_oa_chat_request(
42
42
  # Disable reasoning for Gemini models when no effort requested
43
43
  if "gemini" in model.id:
44
44
  effort = "none"
45
+ elif "gpt-5" in model.id:
46
+ effort = "minimal"
45
47
  else:
46
48
  effort = "low"
49
+ if effort == "minimal" and "gpt-5" not in model.id:
50
+ print(
51
+ "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
52
+ )
53
+ effort = "low"
47
54
  request_json["reasoning_effort"] = effort
48
55
  else:
49
56
  if sampling_params.reasoning_effort:
@@ -122,15 +129,21 @@ class OpenAIRequest(APIRequestBase):
122
129
  message = data["choices"][0]["message"]
123
130
  finish_reason = data["choices"][0]["finish_reason"]
124
131
 
125
- # Add text content if present
126
- if message.get("content"):
127
- parts.append(Text(message["content"]))
128
-
129
132
  # Add thinking content if present (reasoning models)
130
133
  if "reasoning_content" in message:
131
134
  thinking = message["reasoning_content"]
132
135
  parts.append(Thinking(thinking))
133
136
 
137
+ # Together AI returns reasoning in a "reasoning"
138
+ # field which is not correct but whatever
139
+ if message.get("reasoning"):
140
+ thinking = message["reasoning"]
141
+ parts.append(Thinking(thinking))
142
+
143
+ # Add text content if present
144
+ if message.get("content"):
145
+ parts.append(Text(message["content"]))
146
+
134
147
  # Add tool calls if present
135
148
  if "tool_calls" in message:
136
149
  for tool_call in message["tool_calls"]:
@@ -89,9 +89,10 @@ class APIResponse:
89
89
  + self.usage.output_tokens * api_model.output_cost / 1e6
90
90
  )
91
91
  elif self.content is not None and self.completion is not None:
92
- print(
93
- f"Warning: Completion provided without token counts for model {self.model_internal}."
94
- )
92
+ pass
93
+ # print(
94
+ # f"Warning: Completion provided without token counts for model {self.model_internal}."
95
+ # )
95
96
  if isinstance(self.prompt, Conversation):
96
97
  self.prompt = self.prompt.to_log() # avoid keeping images in memory
97
98
 
@@ -0,0 +1,300 @@
1
+ # import argparse
2
+ # import asyncio
3
+ # import os
4
+ # import sys
5
+ # from typing import Optional
6
+
7
+ # from .client import LLMClient
8
+ # from .models import registry, APIModel
9
+ # from .prompt import Conversation, Message
10
+
11
+
12
+ # def _ensure_api_key_for_model(model_id: str, passed_api_key: Optional[str] = None):
13
+ # model: APIModel = APIModel.from_registry(model_id)
14
+ # env_var = model.api_key_env_var or ""
15
+ # if not env_var:
16
+ # return # Some providers (e.g., Bedrock entries) don't use a single key
17
+ # if os.getenv(env_var):
18
+ # return
19
+ # if passed_api_key:
20
+ # os.environ[env_var] = passed_api_key
21
+ # return
22
+ # # If we get here, interactive prompting should occur at the UI layer.
23
+ # # In non-interactive contexts, we will error before calling this without key.
24
+
25
+
26
+ # def run_non_interactive(model_id: str, prompt_text: str, api_key: Optional[str]):
27
+ # _ensure_api_key_for_model(model_id, api_key)
28
+ # client = LLMClient(model_names=[model_id], progress="manual")
29
+ # # Single round, print completion only to stdout
30
+ # completions = asyncio.run(
31
+ # client.process_prompts_async(
32
+ # [Conversation.user(prompt_text)],
33
+ # return_completions_only=True,
34
+ # show_progress=False,
35
+ # )
36
+ # )
37
+ # out = completions[0] if completions and completions[0] is not None else ""
38
+ # # Write raw completion to stdout with no extra decoration
39
+ # sys.stdout.write(out)
40
+ # if out and not out.endswith("\n"):
41
+ # sys.stdout.write("\n")
42
+
43
+
44
+ # # -------- Textual UI (interactive chat) --------
45
+ # try:
46
+ # from textual.app import App, ComposeResult
47
+ # from textual.containers import Container, Horizontal
48
+ # from textual.widgets import Footer, Header, Input, Static, Button, ListView, ListItem, Label
49
+ # from textual.widgets._rich_log import RichLog
50
+ # from textual.reactive import reactive
51
+ # TEXTUAL_AVAILABLE = True
52
+ # except Exception: # pragma: no cover - textual may not be installed in some dev envs
53
+ # TEXTUAL_AVAILABLE = False
54
+
55
+
56
+ # if TEXTUAL_AVAILABLE:
57
+ # class ModelPicker(Static):
58
+ # """Minimal model picker: arrows to move, Enter to select."""
59
+
60
+ # def __init__(self, preselected: Optional[str] = None):
61
+ # super().__init__()
62
+ # self.preselected = preselected
63
+
64
+ # def compose(self) -> ComposeResult: # type: ignore[override]
65
+ # # Keep it terminal-y: one-line hint + list. No buttons.
66
+ # yield Static("Pick a model (Enter)", classes="hint")
67
+ # list_items: list[ListItem] = []
68
+ # # Curated small set to avoid scrollbars
69
+ # preferred = [
70
+ # "gpt-5",
71
+ # "gpt-5-chat",
72
+ # "gpt-5-mini",
73
+ # "claude-4-sonnet",
74
+ # "gemini-2.5-pro",
75
+ # "gemini-2.5-flash",
76
+ # "gemini-2.0-flash",
77
+ # ]
78
+ # for mid in preferred:
79
+ # if mid in registry:
80
+ # list_items.append(ListItem(Label(mid)))
81
+ # yield ListView(*list_items, classes="model-list")
82
+
83
+ # def on_mount(self) -> None: # type: ignore[override]
84
+ # # Focus the list so Enter works immediately
85
+ # self.query_one(ListView).focus()
86
+
87
+ # def get_selected(self) -> Optional[str]:
88
+ # listview = self.query_one(ListView)
89
+ # if not listview.index is None and 0 <= listview.index < len(listview.children):
90
+ # label = listview.children[listview.index].query_one(Label)
91
+ # return label.renderable if isinstance(label.renderable, str) else str(label.renderable)
92
+ # return None
93
+
94
+ # def on_key(self, event): # type: ignore[override]
95
+ # # Select current item on Enter
96
+ # try:
97
+ # key = getattr(event, "key", None)
98
+ # except Exception:
99
+ # key = None
100
+ # if key == "enter":
101
+ # sel = self.get_selected()
102
+ # if sel:
103
+ # # Ask app to proceed with the chosen model
104
+ # getattr(self.app, "model_chosen", lambda *_: None)(sel) # type: ignore[attr-defined]
105
+
106
+
107
+ # class ApiKeyPrompt(Static):
108
+ # def __init__(self, env_var: str):
109
+ # super().__init__()
110
+ # self.env_var = env_var
111
+ # self.input = Input(password=True, placeholder=f"Enter {env_var}")
112
+
113
+ # def compose(self) -> ComposeResult: # type: ignore[override]
114
+ # yield Static(f"API key required: set {self.env_var}", classes="title")
115
+ # yield self.input
116
+ # yield Button("Save", id="save-key", variant="primary")
117
+
118
+ # def value(self) -> str:
119
+ # return self.input.value
120
+
121
+
122
+ # class MessagesView(RichLog):
123
+ # def __init__(self, **kwargs):
124
+ # # Terminal-like log with markup and auto-scroll
125
+ # super().__init__(wrap=True, markup=True, auto_scroll=True, **kwargs)
126
+
127
+ # def append_user(self, text: str):
128
+ # self.write(f"[bold cyan]You:[/bold cyan] {text}")
129
+
130
+ # def append_assistant(self, text: str):
131
+ # self.write(f"[bold magenta]Model:[/bold magenta] {text}")
132
+
133
+
134
+ # class ChatInput(Horizontal):
135
+ # def compose(self) -> ComposeResult: # type: ignore[override]
136
+ # self.input = Input(placeholder="Type message, Enter to send")
137
+ # yield self.input
138
+
139
+
140
+ # class DelugeApp(App):
141
+ # CSS = """
142
+ # #screen { height: 100%; }
143
+ # .chat { height: 1fr; padding: 0 1; }
144
+ # .composer { dock: bottom; height: 3; }
145
+ # """
146
+
147
+ # BINDINGS = [
148
+ # ("ctrl+c", "quit", "Quit"),
149
+ # ]
150
+
151
+ # model_id = reactive("")
152
+ # api_env_var = reactive("")
153
+
154
+ # def __init__(self, model_arg: Optional[str], api_key_arg: Optional[str]):
155
+ # super().__init__()
156
+ # self._model_arg = model_arg
157
+ # self._api_key_arg = api_key_arg
158
+ # self._conversation = Conversation.system("You are a helpful assistant.")
159
+ # self._client = None
160
+
161
+ # def compose(self) -> ComposeResult: # type: ignore[override]
162
+ # yield Header(show_clock=True)
163
+ # self.body = Container(id="screen")
164
+ # yield self.body
165
+ # yield Footer()
166
+
167
+ # def on_mount(self): # type: ignore[override]
168
+ # # Step 1: pick model if not provided
169
+ # if not self._model_arg:
170
+ # self.model_picker = ModelPicker()
171
+ # self.body.mount(self.model_picker)
172
+ # else:
173
+ # self.model_id = self._model_arg
174
+ # self._after_model_selected()
175
+
176
+ # def action_quit(self) -> None: # type: ignore[override]
177
+ # self.exit()
178
+
179
+ # def _after_model_selected(self):
180
+ # # Resolve API requirement
181
+ # model = APIModel.from_registry(self.model_id)
182
+ # self.api_env_var = model.api_key_env_var or ""
183
+ # if self.api_env_var and not os.getenv(self.api_env_var):
184
+ # if self._api_key_arg:
185
+ # os.environ[self.api_env_var] = self._api_key_arg
186
+ # self._show_chat()
187
+ # else:
188
+ # # Prompt for key
189
+ # self.body.remove_children()
190
+ # self.key_prompt = ApiKeyPrompt(self.api_env_var)
191
+ # self.body.mount(self.key_prompt)
192
+ # else:
193
+ # self._show_chat()
194
+
195
+ # def model_chosen(self, sel: str) -> None:
196
+ # """Called by ModelPicker when Enter is pressed on a selection."""
197
+ # self.model_id = sel
198
+ # self._after_model_selected()
199
+
200
+ # def _show_chat(self):
201
+ # self.body.remove_children()
202
+ # # Build UI
203
+ # self.messages = MessagesView(classes="chat")
204
+ # self.composer = ChatInput(classes="composer")
205
+ # self.body.mount(self.messages)
206
+ # self.body.mount(self.composer)
207
+ # # Focus input after mounting
208
+ # self.set_focus(self.composer.input)
209
+ # # Init client
210
+ # self._client = LLMClient(model_names=[self.model_id], progress="manual")
211
+ # # Update header subtitle
212
+ # self.query_one(Header).sub_title = f"Model: {self.model_id}"
213
+
214
+ # async def _send_and_receive(self, text: str):
215
+ # # Append user message
216
+ # self._conversation.add(Message.user(text))
217
+ # self.messages.append_user(text)
218
+ # # Call model (non-streaming for simplicity across providers)
219
+ # responses = await self._client.process_prompts_async(
220
+ # [self._conversation], return_completions_only=False, show_progress=False
221
+ # )
222
+ # resp = responses[0]
223
+ # if resp and resp.completion:
224
+ # self._conversation.add(Message.ai(resp.completion))
225
+ # self.messages.append_assistant(resp.completion)
226
+ # else:
227
+ # self.messages.append_assistant("<no response>")
228
+
229
+ # async def on_button_pressed(self, event): # type: ignore[override]
230
+ # if hasattr(event.button, "id"):
231
+ # if event.button.id == "save-key":
232
+ # key = self.key_prompt.value().strip()
233
+ # if self.api_env_var and key:
234
+ # os.environ[self.api_env_var] = key
235
+ # self._show_chat()
236
+ # elif event.button.id == "send":
237
+ # text = self.composer.input.value.strip()
238
+ # if text:
239
+ # self.composer.input.value = ""
240
+ # await self._send_and_receive(text)
241
+
242
+ # async def on_input_submitted(self, event: Input.Submitted): # type: ignore[override]
243
+ # if isinstance(event.input.parent, ChatInput):
244
+ # text = event.value.strip()
245
+ # if text:
246
+ # self.composer.input.value = ""
247
+ # await self._send_and_receive(text)
248
+
249
+
250
+ # def run_interactive(model: Optional[str], api_key: Optional[str]):
251
+ # if not TEXTUAL_AVAILABLE:
252
+ # sys.stderr.write(
253
+ # "Textual is not installed. Please install with `pip install textual` or reinstall lm_deluge.\n"
254
+ # )
255
+ # sys.exit(2)
256
+ # app = DelugeApp(model, api_key) # type: ignore[name-defined]
257
+ # app.run()
258
+
259
+
260
+ # def main():
261
+ # parser = argparse.ArgumentParser(prog="deluge", description="Deluge CLI")
262
+ # parser.add_argument("prompt", nargs="*", help="Prompt text (non-interactive -p only)")
263
+ # parser.add_argument("--model", dest="model", help="Model ID to use")
264
+ # parser.add_argument("--api-key", dest="api_key", help="API key for chosen model provider")
265
+ # parser.add_argument(
266
+ # "-p",
267
+ # dest="print_mode",
268
+ # action="store_true",
269
+ # help="Print single completion to stdout (non-interactive)",
270
+ # )
271
+
272
+ # args = parser.parse_args()
273
+
274
+ # if args.print_mode:
275
+ # # Determine prompt text
276
+ # prompt_text = " ".join(args.prompt).strip()
277
+ # if not prompt_text and not sys.stdin.isatty():
278
+ # prompt_text = sys.stdin.read()
279
+ # if not prompt_text:
280
+ # sys.stderr.write("No prompt provided. Pass text or pipe input.\n")
281
+ # sys.exit(2)
282
+
283
+ # # Determine model
284
+ # model_id = args.model or os.getenv("DELUGE_DEFAULT_MODEL") or "gpt-4o-mini"
285
+ # # Require API key non-interactively if provider needs it and not set
286
+ # env_var = APIModel.from_registry(model_id).api_key_env_var or ""
287
+ # if env_var and not (os.getenv(env_var) or args.api_key):
288
+ # sys.stderr.write(
289
+ # f"Missing API key. Set {env_var} or pass --api-key.\n"
290
+ # )
291
+ # sys.exit(2)
292
+ # run_non_interactive(model_id, prompt_text, args.api_key)
293
+ # return
294
+
295
+ # # Interactive Textual chat
296
+ # run_interactive(args.model, args.api_key)
297
+
298
+
299
+ # if __name__ == "__main__":
300
+ # main()
@@ -1,6 +1,6 @@
1
1
  import asyncio
2
2
  import random
3
- from typing import Any, Literal, Self, Sequence, overload
3
+ from typing import Any, Literal, Self, Sequence, Callable, overload
4
4
 
5
5
  import numpy as np
6
6
  import yaml
@@ -23,7 +23,6 @@ from .request_context import RequestContext
23
23
  from .tracker import StatusTracker
24
24
 
25
25
 
26
- # TODO: get completions as they finish, not all at once at the end.
27
26
  # TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
28
27
  class _LLMClient(BaseModel):
29
28
  """
@@ -55,6 +54,9 @@ class _LLMClient(BaseModel):
55
54
  # Progress configuration
56
55
  progress: Literal["rich", "tqdm", "manual"] = "rich"
57
56
 
57
+ # Postprocessing - run on every APIResponse
58
+ postprocess: Callable[[APIResponse], APIResponse] | None = None
59
+
58
60
  # Internal state for async task handling
59
61
  _next_task_id: int = PrivateAttr(default=0)
60
62
  _tasks: dict[int, asyncio.Task] = PrivateAttr(default_factory=dict)
@@ -196,14 +198,6 @@ class _LLMClient(BaseModel):
196
198
  config_dict = yaml.safe_load(open(file_path))
197
199
  return cls.from_dict(config_dict)
198
200
 
199
- @classmethod
200
- def basic(cls, model: str | list[str], **kwargs):
201
- """
202
- Doesn't do anything differently now, kept for backwards compat.
203
- """
204
- kwargs["model_names"] = model
205
- return cls(**kwargs)
206
-
207
201
  def _select_model(self):
208
202
  assert isinstance(self.model_weights, list)
209
203
  model_idx = np.random.choice(range(len(self.models)), p=self.model_weights)
@@ -253,14 +247,20 @@ class _LLMClient(BaseModel):
253
247
  self, context: RequestContext, retry_queue: asyncio.Queue | None = None
254
248
  ) -> APIResponse:
255
249
  """Handle caching and single HTTP call for a request. Failed requests go to retry queue."""
250
+
256
251
  # Check cache first
252
+ def _maybe_postprocess(response: APIResponse):
253
+ if self.postprocess:
254
+ return self.postprocess(response)
255
+ return response
256
+
257
257
  if self.cache:
258
258
  cached = self.cache.get(context.prompt)
259
259
  if cached:
260
260
  cached.local_cache_hit = True
261
261
  if context.status_tracker:
262
262
  context.status_tracker.task_succeeded(context.task_id)
263
- return cached
263
+ return _maybe_postprocess(cached)
264
264
 
265
265
  # Execute single request
266
266
  assert context.status_tracker
@@ -275,7 +275,7 @@ class _LLMClient(BaseModel):
275
275
  self.cache.put(context.prompt, response)
276
276
  # Call callback if provided
277
277
  context.maybe_callback(response, context.status_tracker)
278
- return response
278
+ return _maybe_postprocess(response)
279
279
 
280
280
  # Handle error response - add to retry queue if available
281
281
  if retry_queue and context.attempts_left > 1:
@@ -303,7 +303,7 @@ class _LLMClient(BaseModel):
303
303
 
304
304
  # Add to retry queue for later processing
305
305
  await retry_queue.put(retry_context)
306
- return response # Return the error response for now
306
+ return _maybe_postprocess(response) # Return the error response for now
307
307
 
308
308
  # No retries left or no retry queue - final failure
309
309
  context.status_tracker.task_failed(context.task_id)
@@ -316,7 +316,7 @@ class _LLMClient(BaseModel):
316
316
  error_msg += f" Message: {response.error_message}. Giving up."
317
317
  print(error_msg)
318
318
 
319
- return response
319
+ return _maybe_postprocess(response)
320
320
 
321
321
  @overload
322
322
  async def process_prompts_async(
@@ -570,6 +570,8 @@ class _LLMClient(BaseModel):
570
570
  print(item, end="", flush=True)
571
571
  else:
572
572
  # final item
573
+ if self.postprocess:
574
+ return self.postprocess(item)
573
575
  return item
574
576
 
575
577
  async def run_agent_loop(
@@ -713,65 +715,8 @@ class _LLMClient(BaseModel):
713
715
  )
714
716
 
715
717
 
716
- # def api_prompts_dry_run(
717
- # ids: np.ndarray | list[int],
718
- # prompts: list[Conversation],
719
- # models: str | list[str],
720
- # model_weights: list[float],
721
- # sampling_params: list[SamplingParams],
722
- # max_tokens_per_minute: int = 500_000,
723
- # max_requests_per_minute: int = 1_000,
724
- # ):
725
- # """
726
- # Count tokens and estimate costs for a batch of prompts.
727
- # """
728
- # results = []
729
- # for i, prompt in zip(ids, prompts):
730
- # # choose a model
731
- # model_idx = np.random.choice(range(len(models)), p=model_weights)
732
- # model = models[model_idx]
733
-
734
- # # dry run
735
- # input_tokens, output_tokens, min_cost, max_cost = prompt.dry_run(
736
- # model, sampling_params[model_idx].max_new_tokens
737
- # )
738
- # results.append(
739
- # {
740
- # "id": i,
741
- # "input_tokens": input_tokens,
742
- # "output_tokens": output_tokens,
743
- # "min_cost": min_cost,
744
- # "max_cost": max_cost,
745
- # }
746
- # )
747
-
748
- # combined_results: dict[str, Any] = {
749
- # "total_input_tokens": sum([r["input_tokens"] for r in results]),
750
- # "total_output_tokens": sum([r["output_tokens"] for r in results]),
751
- # "total_min_cost": sum([r["min_cost"] for r in results]),
752
- # "total_max_cost": sum([r["max_cost"] for r in results]),
753
- # }
754
- # minimum_time_tpm = combined_results["total_input_tokens"] / max_tokens_per_minute
755
- # maximum_time_tpm = (
756
- # combined_results["total_input_tokens"] + combined_results["total_output_tokens"]
757
- # ) / max_tokens_per_minute
758
- # minimum_time_rpm = len(prompts) / max_requests_per_minute
759
-
760
- # combined_results["minimum_time"] = max(minimum_time_tpm, minimum_time_rpm)
761
- # combined_results["maximum_time"] = max(maximum_time_tpm, minimum_time_rpm)
762
- # limiting_factor = None
763
- # if minimum_time_rpm > maximum_time_tpm:
764
- # limiting_factor = "requests"
765
- # elif minimum_time_rpm < minimum_time_tpm:
766
- # limiting_factor = "tokens"
767
- # else:
768
- # limiting_factor = "depends"
769
- # combined_results["limiting_factor"] = limiting_factor
770
-
771
- # return combined_results
772
-
773
-
774
- # Clean factory function with perfect IDE support
718
+ # factory function -- allows positional model names,
719
+ # keeps pydantic validation, without sacrificing IDE support
775
720
  @overload
776
721
  def LLMClient(
777
722
  model_names: str,
@@ -794,6 +739,7 @@ def LLMClient(
794
739
  top_logprobs: int | None = None,
795
740
  force_local_mcp: bool = False,
796
741
  progress: Literal["rich", "tqdm", "manual"] = "rich",
742
+ postprocess: Callable[[APIResponse], APIResponse] | None = None,
797
743
  ) -> _LLMClient: ...
798
744
 
799
745
 
@@ -819,6 +765,7 @@ def LLMClient(
819
765
  top_logprobs: int | None = None,
820
766
  force_local_mcp: bool = False,
821
767
  progress: Literal["rich", "tqdm", "manual"] = "rich",
768
+ postprocess: Callable[[APIResponse], APIResponse] | None = None,
822
769
  ) -> _LLMClient: ...
823
770
 
824
771
 
@@ -843,6 +790,7 @@ def LLMClient(
843
790
  top_logprobs: int | None = None,
844
791
  force_local_mcp: bool = False,
845
792
  progress: Literal["rich", "tqdm", "manual"] = "rich",
793
+ postprocess: Callable[[APIResponse], APIResponse] | None = None,
846
794
  ) -> _LLMClient:
847
795
  """
848
796
  Create an LLMClient with model_names as a positional argument.
@@ -879,4 +827,5 @@ def LLMClient(
879
827
  top_logprobs=top_logprobs,
880
828
  force_local_mcp=force_local_mcp,
881
829
  progress=progress,
830
+ postprocess=postprocess,
882
831
  )