lm-deluge 0.0.35__tar.gz → 0.0.37__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (79) hide show
  1. {lm_deluge-0.0.35/src/lm_deluge.egg-info → lm_deluge-0.0.37}/PKG-INFO +1 -1
  2. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/pyproject.toml +6 -2
  3. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/anthropic.py +3 -3
  4. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/gemini.py +3 -1
  5. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/openai.py +3 -1
  6. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/response.py +4 -3
  7. lm_deluge-0.0.37/src/lm_deluge/cli.py +300 -0
  8. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/client.py +7 -4
  9. lm_deluge-0.0.37/src/lm_deluge/models/__init__.py +144 -0
  10. lm_deluge-0.0.37/src/lm_deluge/models/anthropic.py +124 -0
  11. lm_deluge-0.0.37/src/lm_deluge/models/bedrock.py +99 -0
  12. lm_deluge-0.0.37/src/lm_deluge/models/cerebras.py +57 -0
  13. lm_deluge-0.0.37/src/lm_deluge/models/cohere.py +98 -0
  14. lm_deluge-0.0.37/src/lm_deluge/models/deepseek.py +27 -0
  15. lm_deluge-0.0.37/src/lm_deluge/models/fireworks.py +16 -0
  16. lm_deluge-0.0.37/src/lm_deluge/models/google.py +153 -0
  17. lm_deluge-0.0.37/src/lm_deluge/models/grok.py +38 -0
  18. lm_deluge-0.0.37/src/lm_deluge/models/groq.py +74 -0
  19. lm_deluge-0.0.37/src/lm_deluge/models/meta.py +65 -0
  20. lm_deluge-0.0.37/src/lm_deluge/models/mistral.py +110 -0
  21. lm_deluge-0.0.37/src/lm_deluge/models/openai.py +318 -0
  22. lm_deluge-0.0.37/src/lm_deluge/models/openrouter.py +1 -0
  23. lm_deluge-0.0.37/src/lm_deluge/models/together.py +112 -0
  24. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/prompt.py +2 -2
  25. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/util/harmony.py +6 -4
  26. {lm_deluge-0.0.35 → lm_deluge-0.0.37/src/lm_deluge.egg-info}/PKG-INFO +1 -1
  27. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge.egg-info/SOURCES.txt +15 -0
  28. lm_deluge-0.0.35/src/lm_deluge/models/__init__.py +0 -1390
  29. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/LICENSE +0 -0
  30. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/README.md +0 -0
  31. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/setup.cfg +0 -0
  32. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/__init__.py +0 -0
  33. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/agent.py +0 -0
  34. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/__init__.py +0 -0
  35. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/base.py +0 -0
  36. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/bedrock.py +0 -0
  37. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/common.py +0 -0
  38. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
  39. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
  40. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
  41. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
  42. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
  43. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/api_requests/mistral.py +0 -0
  44. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/batches.py +0 -0
  45. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
  46. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
  47. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
  48. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
  49. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/built_in_tools/base.py +0 -0
  50. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/built_in_tools/openai.py +0 -0
  51. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/cache.py +0 -0
  52. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/config.py +0 -0
  53. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/embed.py +0 -0
  54. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/errors.py +0 -0
  55. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/file.py +0 -0
  56. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/gemini_limits.py +0 -0
  57. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/image.py +0 -0
  58. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/llm_tools/__init__.py +0 -0
  59. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/llm_tools/classify.py +0 -0
  60. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/llm_tools/extract.py +0 -0
  61. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/llm_tools/locate.py +0 -0
  62. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/llm_tools/ocr.py +0 -0
  63. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/llm_tools/score.py +0 -0
  64. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/llm_tools/translate.py +0 -0
  65. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/request_context.py +0 -0
  66. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/rerank.py +0 -0
  67. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/tool.py +0 -0
  68. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/tracker.py +0 -0
  69. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/usage.py +0 -0
  70. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/util/json.py +0 -0
  71. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/util/logprobs.py +0 -0
  72. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/util/spatial.py +0 -0
  73. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/util/validation.py +0 -0
  74. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge/util/xml.py +0 -0
  75. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
  76. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge.egg-info/requires.txt +0 -0
  77. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/src/lm_deluge.egg-info/top_level.txt +0 -0
  78. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/tests/test_builtin_tools.py +0 -0
  79. {lm_deluge-0.0.35 → lm_deluge-0.0.37}/tests/test_native_mcp_server.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lm_deluge
3
- Version: 0.0.35
3
+ Version: 0.0.37
4
4
  Summary: Python utility for using LLM API models.
5
5
  Author-email: Benjamin Anderson <ben@trytaylor.ai>
6
6
  Requires-Python: >=3.10
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
3
3
 
4
4
  [project]
5
5
  name = "lm_deluge"
6
- version = "0.0.35"
6
+ version = "0.0.37"
7
7
  authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
8
8
  description = "Python utility for using LLM API models."
9
9
  readme = "README.md"
@@ -28,5 +28,9 @@ dependencies = [
28
28
  "pdf2image",
29
29
  "pillow",
30
30
  "fastmcp>=2.4",
31
- "rich"
31
+ "rich",
32
+ # "textual>=0.58.0"
32
33
  ]
34
+
35
+ # [project.scripts]
36
+ # deluge = "lm_deluge.cli:main"
@@ -57,9 +57,9 @@ def _build_anthropic_request(
57
57
  # handle thinking
58
58
  if model.reasoning_model and sampling_params.reasoning_effort:
59
59
  # translate reasoning effort of low, medium, high to budget tokens
60
- budget = {
61
- "minimal": 256, "low": 1024, "medium": 4096, "high": 16384
62
- }.get(sampling_params.reasoning_effort)
60
+ budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
61
+ sampling_params.reasoning_effort
62
+ )
63
63
  request_json["thinking"] = {
64
64
  "type": "enabled",
65
65
  "budget_tokens": budget,
@@ -46,7 +46,9 @@ async def _build_gemini_request(
46
46
  else:
47
47
  thinking_config = {"includeThoughts": True}
48
48
  if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
49
- budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[effort]
49
+ budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
50
+ effort
51
+ ]
50
52
  thinking_config["thinkingBudget"] = budget
51
53
  request_json["generationConfig"]["thinkingConfig"] = thinking_config
52
54
 
@@ -47,7 +47,9 @@ async def _build_oa_chat_request(
47
47
  else:
48
48
  effort = "low"
49
49
  if effort == "minimal" and "gpt-5" not in model.id:
50
- print("WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'.")
50
+ print(
51
+ "WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
52
+ )
51
53
  effort = "low"
52
54
  request_json["reasoning_effort"] = effort
53
55
  else:
@@ -89,9 +89,10 @@ class APIResponse:
89
89
  + self.usage.output_tokens * api_model.output_cost / 1e6
90
90
  )
91
91
  elif self.content is not None and self.completion is not None:
92
- print(
93
- f"Warning: Completion provided without token counts for model {self.model_internal}."
94
- )
92
+ pass
93
+ # print(
94
+ # f"Warning: Completion provided without token counts for model {self.model_internal}."
95
+ # )
95
96
  if isinstance(self.prompt, Conversation):
96
97
  self.prompt = self.prompt.to_log() # avoid keeping images in memory
97
98
 
@@ -0,0 +1,300 @@
1
+ # import argparse
2
+ # import asyncio
3
+ # import os
4
+ # import sys
5
+ # from typing import Optional
6
+
7
+ # from .client import LLMClient
8
+ # from .models import registry, APIModel
9
+ # from .prompt import Conversation, Message
10
+
11
+
12
+ # def _ensure_api_key_for_model(model_id: str, passed_api_key: Optional[str] = None):
13
+ # model: APIModel = APIModel.from_registry(model_id)
14
+ # env_var = model.api_key_env_var or ""
15
+ # if not env_var:
16
+ # return # Some providers (e.g., Bedrock entries) don't use a single key
17
+ # if os.getenv(env_var):
18
+ # return
19
+ # if passed_api_key:
20
+ # os.environ[env_var] = passed_api_key
21
+ # return
22
+ # # If we get here, interactive prompting should occur at the UI layer.
23
+ # # In non-interactive contexts, we will error before calling this without key.
24
+
25
+
26
+ # def run_non_interactive(model_id: str, prompt_text: str, api_key: Optional[str]):
27
+ # _ensure_api_key_for_model(model_id, api_key)
28
+ # client = LLMClient(model_names=[model_id], progress="manual")
29
+ # # Single round, print completion only to stdout
30
+ # completions = asyncio.run(
31
+ # client.process_prompts_async(
32
+ # [Conversation.user(prompt_text)],
33
+ # return_completions_only=True,
34
+ # show_progress=False,
35
+ # )
36
+ # )
37
+ # out = completions[0] if completions and completions[0] is not None else ""
38
+ # # Write raw completion to stdout with no extra decoration
39
+ # sys.stdout.write(out)
40
+ # if out and not out.endswith("\n"):
41
+ # sys.stdout.write("\n")
42
+
43
+
44
+ # # -------- Textual UI (interactive chat) --------
45
+ # try:
46
+ # from textual.app import App, ComposeResult
47
+ # from textual.containers import Container, Horizontal
48
+ # from textual.widgets import Footer, Header, Input, Static, Button, ListView, ListItem, Label
49
+ # from textual.widgets._rich_log import RichLog
50
+ # from textual.reactive import reactive
51
+ # TEXTUAL_AVAILABLE = True
52
+ # except Exception: # pragma: no cover - textual may not be installed in some dev envs
53
+ # TEXTUAL_AVAILABLE = False
54
+
55
+
56
+ # if TEXTUAL_AVAILABLE:
57
+ # class ModelPicker(Static):
58
+ # """Minimal model picker: arrows to move, Enter to select."""
59
+
60
+ # def __init__(self, preselected: Optional[str] = None):
61
+ # super().__init__()
62
+ # self.preselected = preselected
63
+
64
+ # def compose(self) -> ComposeResult: # type: ignore[override]
65
+ # # Keep it terminal-y: one-line hint + list. No buttons.
66
+ # yield Static("Pick a model (Enter)", classes="hint")
67
+ # list_items: list[ListItem] = []
68
+ # # Curated small set to avoid scrollbars
69
+ # preferred = [
70
+ # "gpt-5",
71
+ # "gpt-5-chat",
72
+ # "gpt-5-mini",
73
+ # "claude-4-sonnet",
74
+ # "gemini-2.5-pro",
75
+ # "gemini-2.5-flash",
76
+ # "gemini-2.0-flash",
77
+ # ]
78
+ # for mid in preferred:
79
+ # if mid in registry:
80
+ # list_items.append(ListItem(Label(mid)))
81
+ # yield ListView(*list_items, classes="model-list")
82
+
83
+ # def on_mount(self) -> None: # type: ignore[override]
84
+ # # Focus the list so Enter works immediately
85
+ # self.query_one(ListView).focus()
86
+
87
+ # def get_selected(self) -> Optional[str]:
88
+ # listview = self.query_one(ListView)
89
+ # if not listview.index is None and 0 <= listview.index < len(listview.children):
90
+ # label = listview.children[listview.index].query_one(Label)
91
+ # return label.renderable if isinstance(label.renderable, str) else str(label.renderable)
92
+ # return None
93
+
94
+ # def on_key(self, event): # type: ignore[override]
95
+ # # Select current item on Enter
96
+ # try:
97
+ # key = getattr(event, "key", None)
98
+ # except Exception:
99
+ # key = None
100
+ # if key == "enter":
101
+ # sel = self.get_selected()
102
+ # if sel:
103
+ # # Ask app to proceed with the chosen model
104
+ # getattr(self.app, "model_chosen", lambda *_: None)(sel) # type: ignore[attr-defined]
105
+
106
+
107
+ # class ApiKeyPrompt(Static):
108
+ # def __init__(self, env_var: str):
109
+ # super().__init__()
110
+ # self.env_var = env_var
111
+ # self.input = Input(password=True, placeholder=f"Enter {env_var}")
112
+
113
+ # def compose(self) -> ComposeResult: # type: ignore[override]
114
+ # yield Static(f"API key required: set {self.env_var}", classes="title")
115
+ # yield self.input
116
+ # yield Button("Save", id="save-key", variant="primary")
117
+
118
+ # def value(self) -> str:
119
+ # return self.input.value
120
+
121
+
122
+ # class MessagesView(RichLog):
123
+ # def __init__(self, **kwargs):
124
+ # # Terminal-like log with markup and auto-scroll
125
+ # super().__init__(wrap=True, markup=True, auto_scroll=True, **kwargs)
126
+
127
+ # def append_user(self, text: str):
128
+ # self.write(f"[bold cyan]You:[/bold cyan] {text}")
129
+
130
+ # def append_assistant(self, text: str):
131
+ # self.write(f"[bold magenta]Model:[/bold magenta] {text}")
132
+
133
+
134
+ # class ChatInput(Horizontal):
135
+ # def compose(self) -> ComposeResult: # type: ignore[override]
136
+ # self.input = Input(placeholder="Type message, Enter to send")
137
+ # yield self.input
138
+
139
+
140
+ # class DelugeApp(App):
141
+ # CSS = """
142
+ # #screen { height: 100%; }
143
+ # .chat { height: 1fr; padding: 0 1; }
144
+ # .composer { dock: bottom; height: 3; }
145
+ # """
146
+
147
+ # BINDINGS = [
148
+ # ("ctrl+c", "quit", "Quit"),
149
+ # ]
150
+
151
+ # model_id = reactive("")
152
+ # api_env_var = reactive("")
153
+
154
+ # def __init__(self, model_arg: Optional[str], api_key_arg: Optional[str]):
155
+ # super().__init__()
156
+ # self._model_arg = model_arg
157
+ # self._api_key_arg = api_key_arg
158
+ # self._conversation = Conversation.system("You are a helpful assistant.")
159
+ # self._client = None
160
+
161
+ # def compose(self) -> ComposeResult: # type: ignore[override]
162
+ # yield Header(show_clock=True)
163
+ # self.body = Container(id="screen")
164
+ # yield self.body
165
+ # yield Footer()
166
+
167
+ # def on_mount(self): # type: ignore[override]
168
+ # # Step 1: pick model if not provided
169
+ # if not self._model_arg:
170
+ # self.model_picker = ModelPicker()
171
+ # self.body.mount(self.model_picker)
172
+ # else:
173
+ # self.model_id = self._model_arg
174
+ # self._after_model_selected()
175
+
176
+ # def action_quit(self) -> None: # type: ignore[override]
177
+ # self.exit()
178
+
179
+ # def _after_model_selected(self):
180
+ # # Resolve API requirement
181
+ # model = APIModel.from_registry(self.model_id)
182
+ # self.api_env_var = model.api_key_env_var or ""
183
+ # if self.api_env_var and not os.getenv(self.api_env_var):
184
+ # if self._api_key_arg:
185
+ # os.environ[self.api_env_var] = self._api_key_arg
186
+ # self._show_chat()
187
+ # else:
188
+ # # Prompt for key
189
+ # self.body.remove_children()
190
+ # self.key_prompt = ApiKeyPrompt(self.api_env_var)
191
+ # self.body.mount(self.key_prompt)
192
+ # else:
193
+ # self._show_chat()
194
+
195
+ # def model_chosen(self, sel: str) -> None:
196
+ # """Called by ModelPicker when Enter is pressed on a selection."""
197
+ # self.model_id = sel
198
+ # self._after_model_selected()
199
+
200
+ # def _show_chat(self):
201
+ # self.body.remove_children()
202
+ # # Build UI
203
+ # self.messages = MessagesView(classes="chat")
204
+ # self.composer = ChatInput(classes="composer")
205
+ # self.body.mount(self.messages)
206
+ # self.body.mount(self.composer)
207
+ # # Focus input after mounting
208
+ # self.set_focus(self.composer.input)
209
+ # # Init client
210
+ # self._client = LLMClient(model_names=[self.model_id], progress="manual")
211
+ # # Update header subtitle
212
+ # self.query_one(Header).sub_title = f"Model: {self.model_id}"
213
+
214
+ # async def _send_and_receive(self, text: str):
215
+ # # Append user message
216
+ # self._conversation.add(Message.user(text))
217
+ # self.messages.append_user(text)
218
+ # # Call model (non-streaming for simplicity across providers)
219
+ # responses = await self._client.process_prompts_async(
220
+ # [self._conversation], return_completions_only=False, show_progress=False
221
+ # )
222
+ # resp = responses[0]
223
+ # if resp and resp.completion:
224
+ # self._conversation.add(Message.ai(resp.completion))
225
+ # self.messages.append_assistant(resp.completion)
226
+ # else:
227
+ # self.messages.append_assistant("<no response>")
228
+
229
+ # async def on_button_pressed(self, event): # type: ignore[override]
230
+ # if hasattr(event.button, "id"):
231
+ # if event.button.id == "save-key":
232
+ # key = self.key_prompt.value().strip()
233
+ # if self.api_env_var and key:
234
+ # os.environ[self.api_env_var] = key
235
+ # self._show_chat()
236
+ # elif event.button.id == "send":
237
+ # text = self.composer.input.value.strip()
238
+ # if text:
239
+ # self.composer.input.value = ""
240
+ # await self._send_and_receive(text)
241
+
242
+ # async def on_input_submitted(self, event: Input.Submitted): # type: ignore[override]
243
+ # if isinstance(event.input.parent, ChatInput):
244
+ # text = event.value.strip()
245
+ # if text:
246
+ # self.composer.input.value = ""
247
+ # await self._send_and_receive(text)
248
+
249
+
250
+ # def run_interactive(model: Optional[str], api_key: Optional[str]):
251
+ # if not TEXTUAL_AVAILABLE:
252
+ # sys.stderr.write(
253
+ # "Textual is not installed. Please install with `pip install textual` or reinstall lm_deluge.\n"
254
+ # )
255
+ # sys.exit(2)
256
+ # app = DelugeApp(model, api_key) # type: ignore[name-defined]
257
+ # app.run()
258
+
259
+
260
+ # def main():
261
+ # parser = argparse.ArgumentParser(prog="deluge", description="Deluge CLI")
262
+ # parser.add_argument("prompt", nargs="*", help="Prompt text (non-interactive -p only)")
263
+ # parser.add_argument("--model", dest="model", help="Model ID to use")
264
+ # parser.add_argument("--api-key", dest="api_key", help="API key for chosen model provider")
265
+ # parser.add_argument(
266
+ # "-p",
267
+ # dest="print_mode",
268
+ # action="store_true",
269
+ # help="Print single completion to stdout (non-interactive)",
270
+ # )
271
+
272
+ # args = parser.parse_args()
273
+
274
+ # if args.print_mode:
275
+ # # Determine prompt text
276
+ # prompt_text = " ".join(args.prompt).strip()
277
+ # if not prompt_text and not sys.stdin.isatty():
278
+ # prompt_text = sys.stdin.read()
279
+ # if not prompt_text:
280
+ # sys.stderr.write("No prompt provided. Pass text or pipe input.\n")
281
+ # sys.exit(2)
282
+
283
+ # # Determine model
284
+ # model_id = args.model or os.getenv("DELUGE_DEFAULT_MODEL") or "gpt-4o-mini"
285
+ # # Require API key non-interactively if provider needs it and not set
286
+ # env_var = APIModel.from_registry(model_id).api_key_env_var or ""
287
+ # if env_var and not (os.getenv(env_var) or args.api_key):
288
+ # sys.stderr.write(
289
+ # f"Missing API key. Set {env_var} or pass --api-key.\n"
290
+ # )
291
+ # sys.exit(2)
292
+ # run_non_interactive(model_id, prompt_text, args.api_key)
293
+ # return
294
+
295
+ # # Interactive Textual chat
296
+ # run_interactive(args.model, args.api_key)
297
+
298
+
299
+ # if __name__ == "__main__":
300
+ # main()
@@ -22,6 +22,7 @@ from .models import APIModel, registry
22
22
  from .request_context import RequestContext
23
23
  from .tracker import StatusTracker
24
24
 
25
+
25
26
  # TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
26
27
  class _LLMClient(BaseModel):
27
28
  """
@@ -246,6 +247,7 @@ class _LLMClient(BaseModel):
246
247
  self, context: RequestContext, retry_queue: asyncio.Queue | None = None
247
248
  ) -> APIResponse:
248
249
  """Handle caching and single HTTP call for a request. Failed requests go to retry queue."""
250
+
249
251
  # Check cache first
250
252
  def _maybe_postprocess(response: APIResponse):
251
253
  if self.postprocess:
@@ -712,6 +714,7 @@ class _LLMClient(BaseModel):
712
714
  batch_ids, provider, poll_interval=30
713
715
  )
714
716
 
717
+
715
718
  # factory function -- allows positional model names,
716
719
  # keeps pydantic validation, without sacrificing IDE support
717
720
  @overload
@@ -736,7 +739,7 @@ def LLMClient(
736
739
  top_logprobs: int | None = None,
737
740
  force_local_mcp: bool = False,
738
741
  progress: Literal["rich", "tqdm", "manual"] = "rich",
739
- postprocess: Callable[[APIResponse], APIResponse] | None = None
742
+ postprocess: Callable[[APIResponse], APIResponse] | None = None,
740
743
  ) -> _LLMClient: ...
741
744
 
742
745
 
@@ -762,7 +765,7 @@ def LLMClient(
762
765
  top_logprobs: int | None = None,
763
766
  force_local_mcp: bool = False,
764
767
  progress: Literal["rich", "tqdm", "manual"] = "rich",
765
- postprocess: Callable[[APIResponse], APIResponse] | None = None
768
+ postprocess: Callable[[APIResponse], APIResponse] | None = None,
766
769
  ) -> _LLMClient: ...
767
770
 
768
771
 
@@ -787,7 +790,7 @@ def LLMClient(
787
790
  top_logprobs: int | None = None,
788
791
  force_local_mcp: bool = False,
789
792
  progress: Literal["rich", "tqdm", "manual"] = "rich",
790
- postprocess: Callable[[APIResponse], APIResponse] | None = None
793
+ postprocess: Callable[[APIResponse], APIResponse] | None = None,
791
794
  ) -> _LLMClient:
792
795
  """
793
796
  Create an LLMClient with model_names as a positional argument.
@@ -824,5 +827,5 @@ def LLMClient(
824
827
  top_logprobs=top_logprobs,
825
828
  force_local_mcp=force_local_mcp,
826
829
  progress=progress,
827
- postprocess=postprocess
830
+ postprocess=postprocess,
828
831
  )
@@ -0,0 +1,144 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+ from dataclasses import dataclass, field
5
+
6
+ from ..request_context import RequestContext
7
+
8
+ # Import and register all provider models
9
+ from .anthropic import ANTHROPIC_MODELS
10
+ from .bedrock import BEDROCK_MODELS
11
+ from .cerebras import CEREBRAS_MODELS
12
+ from .cohere import COHERE_MODELS
13
+ from .deepseek import DEEPSEEK_MODELS
14
+ from .fireworks import FIREWORKS_MODELS
15
+ from .google import GOOGLE_MODELS
16
+ from .grok import XAI_MODELS
17
+ from .groq import GROQ_MODELS
18
+ from .meta import META_MODELS
19
+ from .mistral import MISTRAL_MODELS
20
+ from .openai import OPENAI_MODELS
21
+ from .openrouter import OPENROUTER_MODELS
22
+ from .together import TOGETHER_MODELS
23
+
24
+
25
+ @dataclass
26
+ class APIModel:
27
+ id: str
28
+ name: str
29
+ api_base: str
30
+ api_key_env_var: str
31
+ api_spec: str
32
+ cached_input_cost: float | None = 0
33
+ input_cost: float | None = 0 # $ per million input tokens
34
+ output_cost: float | None = 0 # $ per million output tokens
35
+ supports_json: bool = False
36
+ supports_logprobs: bool = False
37
+ supports_responses: bool = False
38
+ reasoning_model: bool = False
39
+ regions: list[str] | dict[str, int] = field(default_factory=list)
40
+ tokens_per_minute: int | None = None
41
+ requests_per_minute: int | None = None
42
+ gpus: list[str] | None = None
43
+
44
+ @classmethod
45
+ def from_registry(cls, name: str):
46
+ if name not in registry:
47
+ raise ValueError(f"Model {name} not found in registry")
48
+ cfg = registry[name]
49
+ if isinstance(cfg, APIModel):
50
+ return cfg
51
+ return cls(**cfg)
52
+
53
+ def sample_region(self):
54
+ if isinstance(self.regions, list):
55
+ regions = self.regions
56
+ weights = [1] * len(regions)
57
+ elif isinstance(self.regions, dict):
58
+ regions = list(self.regions.keys())
59
+ weights = self.regions.values()
60
+ else:
61
+ raise ValueError("no regions to sample")
62
+ random.sample(regions, 1, counts=weights)[0]
63
+
64
+ def make_request(self, context: RequestContext): # -> "APIRequestBase"
65
+ from ..api_requests.common import CLASSES
66
+
67
+ api_spec = self.api_spec
68
+ if (
69
+ context.use_responses_api
70
+ and self.supports_responses
71
+ and api_spec == "openai"
72
+ ):
73
+ api_spec = "openai-responses"
74
+
75
+ request_class = CLASSES.get(api_spec, None)
76
+ if request_class is None:
77
+ raise ValueError(f"Unsupported API spec: {api_spec}")
78
+ return request_class(context=context)
79
+
80
+
81
+ registry: dict[str, APIModel] = {}
82
+
83
+
84
+ def register_model(
85
+ id: str,
86
+ name: str,
87
+ api_base: str,
88
+ api_key_env_var: str,
89
+ api_spec: str = "openai",
90
+ input_cost: float | None = 0, # $ per million input tokens
91
+ cached_input_cost: float | None = 0,
92
+ output_cost: float | None = 0, # $ per million output tokens
93
+ supports_json: bool = False,
94
+ supports_logprobs: bool = False,
95
+ supports_responses: bool = False,
96
+ reasoning_model: bool = False,
97
+ regions: list[str] | dict[str, int] = field(default_factory=list),
98
+ tokens_per_minute: int | None = None,
99
+ requests_per_minute: int | None = None,
100
+ ) -> APIModel:
101
+ """Register a model configuration and return the created APIModel."""
102
+ model = APIModel(
103
+ id=id,
104
+ name=name,
105
+ api_base=api_base,
106
+ api_key_env_var=api_key_env_var,
107
+ api_spec=api_spec,
108
+ cached_input_cost=cached_input_cost,
109
+ input_cost=input_cost,
110
+ output_cost=output_cost,
111
+ supports_json=supports_json,
112
+ supports_logprobs=supports_logprobs,
113
+ supports_responses=supports_responses,
114
+ reasoning_model=reasoning_model,
115
+ regions=regions,
116
+ tokens_per_minute=tokens_per_minute,
117
+ requests_per_minute=requests_per_minute,
118
+ )
119
+ registry[model.id] = model
120
+ return model
121
+
122
+
123
+ # Register all models from all providers
124
+ for model_dict in [
125
+ ANTHROPIC_MODELS,
126
+ BEDROCK_MODELS,
127
+ COHERE_MODELS,
128
+ DEEPSEEK_MODELS,
129
+ FIREWORKS_MODELS,
130
+ GOOGLE_MODELS,
131
+ XAI_MODELS,
132
+ META_MODELS,
133
+ MISTRAL_MODELS,
134
+ OPENAI_MODELS,
135
+ OPENROUTER_MODELS,
136
+ TOGETHER_MODELS,
137
+ GROQ_MODELS,
138
+ CEREBRAS_MODELS,
139
+ ]:
140
+ for cfg in model_dict.values():
141
+ register_model(**cfg)
142
+
143
+
144
+ # print("Valid models:", registry.keys())