lm-deluge 0.0.35__tar.gz → 0.0.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.35/src/lm_deluge.egg-info → lm_deluge-0.0.36}/PKG-INFO +1 -1
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/pyproject.toml +6 -2
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/anthropic.py +3 -3
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/gemini.py +3 -1
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/openai.py +3 -1
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/response.py +4 -3
- lm_deluge-0.0.36/src/lm_deluge/cli.py +300 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/client.py +7 -4
- lm_deluge-0.0.36/src/lm_deluge/models/__init__.py +144 -0
- lm_deluge-0.0.36/src/lm_deluge/models/anthropic.py +124 -0
- lm_deluge-0.0.36/src/lm_deluge/models/bedrock.py +99 -0
- lm_deluge-0.0.36/src/lm_deluge/models/cerebras.py +57 -0
- lm_deluge-0.0.36/src/lm_deluge/models/cohere.py +98 -0
- lm_deluge-0.0.36/src/lm_deluge/models/deepseek.py +27 -0
- lm_deluge-0.0.36/src/lm_deluge/models/fireworks.py +16 -0
- lm_deluge-0.0.36/src/lm_deluge/models/google.py +153 -0
- lm_deluge-0.0.36/src/lm_deluge/models/grok.py +38 -0
- lm_deluge-0.0.36/src/lm_deluge/models/groq.py +74 -0
- lm_deluge-0.0.36/src/lm_deluge/models/meta.py +65 -0
- lm_deluge-0.0.36/src/lm_deluge/models/mistral.py +110 -0
- lm_deluge-0.0.36/src/lm_deluge/models/openai.py +318 -0
- lm_deluge-0.0.36/src/lm_deluge/models/openrouter.py +1 -0
- lm_deluge-0.0.36/src/lm_deluge/models/together.py +112 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/prompt.py +2 -2
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/util/harmony.py +6 -4
- {lm_deluge-0.0.35 → lm_deluge-0.0.36/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/SOURCES.txt +15 -0
- lm_deluge-0.0.35/src/lm_deluge/models/__init__.py +0 -1390
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/LICENSE +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/README.md +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/setup.cfg +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/agent.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/gemini_limits.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/request_context.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/tool.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.35 → lm_deluge-0.0.36}/tests/test_native_mcp_server.py +0 -0
|
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
|
|
|
3
3
|
|
|
4
4
|
[project]
|
|
5
5
|
name = "lm_deluge"
|
|
6
|
-
version = "0.0.
|
|
6
|
+
version = "0.0.36"
|
|
7
7
|
authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
|
|
8
8
|
description = "Python utility for using LLM API models."
|
|
9
9
|
readme = "README.md"
|
|
@@ -28,5 +28,9 @@ dependencies = [
|
|
|
28
28
|
"pdf2image",
|
|
29
29
|
"pillow",
|
|
30
30
|
"fastmcp>=2.4",
|
|
31
|
-
"rich"
|
|
31
|
+
"rich",
|
|
32
|
+
# "textual>=0.58.0"
|
|
32
33
|
]
|
|
34
|
+
|
|
35
|
+
# [project.scripts]
|
|
36
|
+
# deluge = "lm_deluge.cli:main"
|
|
@@ -57,9 +57,9 @@ def _build_anthropic_request(
|
|
|
57
57
|
# handle thinking
|
|
58
58
|
if model.reasoning_model and sampling_params.reasoning_effort:
|
|
59
59
|
# translate reasoning effort of low, medium, high to budget tokens
|
|
60
|
-
budget = {
|
|
61
|
-
|
|
62
|
-
|
|
60
|
+
budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
|
|
61
|
+
sampling_params.reasoning_effort
|
|
62
|
+
)
|
|
63
63
|
request_json["thinking"] = {
|
|
64
64
|
"type": "enabled",
|
|
65
65
|
"budget_tokens": budget,
|
|
@@ -46,7 +46,9 @@ async def _build_gemini_request(
|
|
|
46
46
|
else:
|
|
47
47
|
thinking_config = {"includeThoughts": True}
|
|
48
48
|
if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
|
|
49
|
-
budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
|
|
49
|
+
budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
|
|
50
|
+
effort
|
|
51
|
+
]
|
|
50
52
|
thinking_config["thinkingBudget"] = budget
|
|
51
53
|
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
52
54
|
|
|
@@ -47,7 +47,9 @@ async def _build_oa_chat_request(
|
|
|
47
47
|
else:
|
|
48
48
|
effort = "low"
|
|
49
49
|
if effort == "minimal" and "gpt-5" not in model.id:
|
|
50
|
-
print(
|
|
50
|
+
print(
|
|
51
|
+
"WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
|
|
52
|
+
)
|
|
51
53
|
effort = "low"
|
|
52
54
|
request_json["reasoning_effort"] = effort
|
|
53
55
|
else:
|
|
@@ -89,9 +89,10 @@ class APIResponse:
|
|
|
89
89
|
+ self.usage.output_tokens * api_model.output_cost / 1e6
|
|
90
90
|
)
|
|
91
91
|
elif self.content is not None and self.completion is not None:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
92
|
+
pass
|
|
93
|
+
# print(
|
|
94
|
+
# f"Warning: Completion provided without token counts for model {self.model_internal}."
|
|
95
|
+
# )
|
|
95
96
|
if isinstance(self.prompt, Conversation):
|
|
96
97
|
self.prompt = self.prompt.to_log() # avoid keeping images in memory
|
|
97
98
|
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# import argparse
|
|
2
|
+
# import asyncio
|
|
3
|
+
# import os
|
|
4
|
+
# import sys
|
|
5
|
+
# from typing import Optional
|
|
6
|
+
|
|
7
|
+
# from .client import LLMClient
|
|
8
|
+
# from .models import registry, APIModel
|
|
9
|
+
# from .prompt import Conversation, Message
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# def _ensure_api_key_for_model(model_id: str, passed_api_key: Optional[str] = None):
|
|
13
|
+
# model: APIModel = APIModel.from_registry(model_id)
|
|
14
|
+
# env_var = model.api_key_env_var or ""
|
|
15
|
+
# if not env_var:
|
|
16
|
+
# return # Some providers (e.g., Bedrock entries) don't use a single key
|
|
17
|
+
# if os.getenv(env_var):
|
|
18
|
+
# return
|
|
19
|
+
# if passed_api_key:
|
|
20
|
+
# os.environ[env_var] = passed_api_key
|
|
21
|
+
# return
|
|
22
|
+
# # If we get here, interactive prompting should occur at the UI layer.
|
|
23
|
+
# # In non-interactive contexts, we will error before calling this without key.
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# def run_non_interactive(model_id: str, prompt_text: str, api_key: Optional[str]):
|
|
27
|
+
# _ensure_api_key_for_model(model_id, api_key)
|
|
28
|
+
# client = LLMClient(model_names=[model_id], progress="manual")
|
|
29
|
+
# # Single round, print completion only to stdout
|
|
30
|
+
# completions = asyncio.run(
|
|
31
|
+
# client.process_prompts_async(
|
|
32
|
+
# [Conversation.user(prompt_text)],
|
|
33
|
+
# return_completions_only=True,
|
|
34
|
+
# show_progress=False,
|
|
35
|
+
# )
|
|
36
|
+
# )
|
|
37
|
+
# out = completions[0] if completions and completions[0] is not None else ""
|
|
38
|
+
# # Write raw completion to stdout with no extra decoration
|
|
39
|
+
# sys.stdout.write(out)
|
|
40
|
+
# if out and not out.endswith("\n"):
|
|
41
|
+
# sys.stdout.write("\n")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# # -------- Textual UI (interactive chat) --------
|
|
45
|
+
# try:
|
|
46
|
+
# from textual.app import App, ComposeResult
|
|
47
|
+
# from textual.containers import Container, Horizontal
|
|
48
|
+
# from textual.widgets import Footer, Header, Input, Static, Button, ListView, ListItem, Label
|
|
49
|
+
# from textual.widgets._rich_log import RichLog
|
|
50
|
+
# from textual.reactive import reactive
|
|
51
|
+
# TEXTUAL_AVAILABLE = True
|
|
52
|
+
# except Exception: # pragma: no cover - textual may not be installed in some dev envs
|
|
53
|
+
# TEXTUAL_AVAILABLE = False
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# if TEXTUAL_AVAILABLE:
|
|
57
|
+
# class ModelPicker(Static):
|
|
58
|
+
# """Minimal model picker: arrows to move, Enter to select."""
|
|
59
|
+
|
|
60
|
+
# def __init__(self, preselected: Optional[str] = None):
|
|
61
|
+
# super().__init__()
|
|
62
|
+
# self.preselected = preselected
|
|
63
|
+
|
|
64
|
+
# def compose(self) -> ComposeResult: # type: ignore[override]
|
|
65
|
+
# # Keep it terminal-y: one-line hint + list. No buttons.
|
|
66
|
+
# yield Static("Pick a model (Enter)", classes="hint")
|
|
67
|
+
# list_items: list[ListItem] = []
|
|
68
|
+
# # Curated small set to avoid scrollbars
|
|
69
|
+
# preferred = [
|
|
70
|
+
# "gpt-5",
|
|
71
|
+
# "gpt-5-chat",
|
|
72
|
+
# "gpt-5-mini",
|
|
73
|
+
# "claude-4-sonnet",
|
|
74
|
+
# "gemini-2.5-pro",
|
|
75
|
+
# "gemini-2.5-flash",
|
|
76
|
+
# "gemini-2.0-flash",
|
|
77
|
+
# ]
|
|
78
|
+
# for mid in preferred:
|
|
79
|
+
# if mid in registry:
|
|
80
|
+
# list_items.append(ListItem(Label(mid)))
|
|
81
|
+
# yield ListView(*list_items, classes="model-list")
|
|
82
|
+
|
|
83
|
+
# def on_mount(self) -> None: # type: ignore[override]
|
|
84
|
+
# # Focus the list so Enter works immediately
|
|
85
|
+
# self.query_one(ListView).focus()
|
|
86
|
+
|
|
87
|
+
# def get_selected(self) -> Optional[str]:
|
|
88
|
+
# listview = self.query_one(ListView)
|
|
89
|
+
# if not listview.index is None and 0 <= listview.index < len(listview.children):
|
|
90
|
+
# label = listview.children[listview.index].query_one(Label)
|
|
91
|
+
# return label.renderable if isinstance(label.renderable, str) else str(label.renderable)
|
|
92
|
+
# return None
|
|
93
|
+
|
|
94
|
+
# def on_key(self, event): # type: ignore[override]
|
|
95
|
+
# # Select current item on Enter
|
|
96
|
+
# try:
|
|
97
|
+
# key = getattr(event, "key", None)
|
|
98
|
+
# except Exception:
|
|
99
|
+
# key = None
|
|
100
|
+
# if key == "enter":
|
|
101
|
+
# sel = self.get_selected()
|
|
102
|
+
# if sel:
|
|
103
|
+
# # Ask app to proceed with the chosen model
|
|
104
|
+
# getattr(self.app, "model_chosen", lambda *_: None)(sel) # type: ignore[attr-defined]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# class ApiKeyPrompt(Static):
|
|
108
|
+
# def __init__(self, env_var: str):
|
|
109
|
+
# super().__init__()
|
|
110
|
+
# self.env_var = env_var
|
|
111
|
+
# self.input = Input(password=True, placeholder=f"Enter {env_var}")
|
|
112
|
+
|
|
113
|
+
# def compose(self) -> ComposeResult: # type: ignore[override]
|
|
114
|
+
# yield Static(f"API key required: set {self.env_var}", classes="title")
|
|
115
|
+
# yield self.input
|
|
116
|
+
# yield Button("Save", id="save-key", variant="primary")
|
|
117
|
+
|
|
118
|
+
# def value(self) -> str:
|
|
119
|
+
# return self.input.value
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# class MessagesView(RichLog):
|
|
123
|
+
# def __init__(self, **kwargs):
|
|
124
|
+
# # Terminal-like log with markup and auto-scroll
|
|
125
|
+
# super().__init__(wrap=True, markup=True, auto_scroll=True, **kwargs)
|
|
126
|
+
|
|
127
|
+
# def append_user(self, text: str):
|
|
128
|
+
# self.write(f"[bold cyan]You:[/bold cyan] {text}")
|
|
129
|
+
|
|
130
|
+
# def append_assistant(self, text: str):
|
|
131
|
+
# self.write(f"[bold magenta]Model:[/bold magenta] {text}")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# class ChatInput(Horizontal):
|
|
135
|
+
# def compose(self) -> ComposeResult: # type: ignore[override]
|
|
136
|
+
# self.input = Input(placeholder="Type message, Enter to send")
|
|
137
|
+
# yield self.input
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# class DelugeApp(App):
|
|
141
|
+
# CSS = """
|
|
142
|
+
# #screen { height: 100%; }
|
|
143
|
+
# .chat { height: 1fr; padding: 0 1; }
|
|
144
|
+
# .composer { dock: bottom; height: 3; }
|
|
145
|
+
# """
|
|
146
|
+
|
|
147
|
+
# BINDINGS = [
|
|
148
|
+
# ("ctrl+c", "quit", "Quit"),
|
|
149
|
+
# ]
|
|
150
|
+
|
|
151
|
+
# model_id = reactive("")
|
|
152
|
+
# api_env_var = reactive("")
|
|
153
|
+
|
|
154
|
+
# def __init__(self, model_arg: Optional[str], api_key_arg: Optional[str]):
|
|
155
|
+
# super().__init__()
|
|
156
|
+
# self._model_arg = model_arg
|
|
157
|
+
# self._api_key_arg = api_key_arg
|
|
158
|
+
# self._conversation = Conversation.system("You are a helpful assistant.")
|
|
159
|
+
# self._client = None
|
|
160
|
+
|
|
161
|
+
# def compose(self) -> ComposeResult: # type: ignore[override]
|
|
162
|
+
# yield Header(show_clock=True)
|
|
163
|
+
# self.body = Container(id="screen")
|
|
164
|
+
# yield self.body
|
|
165
|
+
# yield Footer()
|
|
166
|
+
|
|
167
|
+
# def on_mount(self): # type: ignore[override]
|
|
168
|
+
# # Step 1: pick model if not provided
|
|
169
|
+
# if not self._model_arg:
|
|
170
|
+
# self.model_picker = ModelPicker()
|
|
171
|
+
# self.body.mount(self.model_picker)
|
|
172
|
+
# else:
|
|
173
|
+
# self.model_id = self._model_arg
|
|
174
|
+
# self._after_model_selected()
|
|
175
|
+
|
|
176
|
+
# def action_quit(self) -> None: # type: ignore[override]
|
|
177
|
+
# self.exit()
|
|
178
|
+
|
|
179
|
+
# def _after_model_selected(self):
|
|
180
|
+
# # Resolve API requirement
|
|
181
|
+
# model = APIModel.from_registry(self.model_id)
|
|
182
|
+
# self.api_env_var = model.api_key_env_var or ""
|
|
183
|
+
# if self.api_env_var and not os.getenv(self.api_env_var):
|
|
184
|
+
# if self._api_key_arg:
|
|
185
|
+
# os.environ[self.api_env_var] = self._api_key_arg
|
|
186
|
+
# self._show_chat()
|
|
187
|
+
# else:
|
|
188
|
+
# # Prompt for key
|
|
189
|
+
# self.body.remove_children()
|
|
190
|
+
# self.key_prompt = ApiKeyPrompt(self.api_env_var)
|
|
191
|
+
# self.body.mount(self.key_prompt)
|
|
192
|
+
# else:
|
|
193
|
+
# self._show_chat()
|
|
194
|
+
|
|
195
|
+
# def model_chosen(self, sel: str) -> None:
|
|
196
|
+
# """Called by ModelPicker when Enter is pressed on a selection."""
|
|
197
|
+
# self.model_id = sel
|
|
198
|
+
# self._after_model_selected()
|
|
199
|
+
|
|
200
|
+
# def _show_chat(self):
|
|
201
|
+
# self.body.remove_children()
|
|
202
|
+
# # Build UI
|
|
203
|
+
# self.messages = MessagesView(classes="chat")
|
|
204
|
+
# self.composer = ChatInput(classes="composer")
|
|
205
|
+
# self.body.mount(self.messages)
|
|
206
|
+
# self.body.mount(self.composer)
|
|
207
|
+
# # Focus input after mounting
|
|
208
|
+
# self.set_focus(self.composer.input)
|
|
209
|
+
# # Init client
|
|
210
|
+
# self._client = LLMClient(model_names=[self.model_id], progress="manual")
|
|
211
|
+
# # Update header subtitle
|
|
212
|
+
# self.query_one(Header).sub_title = f"Model: {self.model_id}"
|
|
213
|
+
|
|
214
|
+
# async def _send_and_receive(self, text: str):
|
|
215
|
+
# # Append user message
|
|
216
|
+
# self._conversation.add(Message.user(text))
|
|
217
|
+
# self.messages.append_user(text)
|
|
218
|
+
# # Call model (non-streaming for simplicity across providers)
|
|
219
|
+
# responses = await self._client.process_prompts_async(
|
|
220
|
+
# [self._conversation], return_completions_only=False, show_progress=False
|
|
221
|
+
# )
|
|
222
|
+
# resp = responses[0]
|
|
223
|
+
# if resp and resp.completion:
|
|
224
|
+
# self._conversation.add(Message.ai(resp.completion))
|
|
225
|
+
# self.messages.append_assistant(resp.completion)
|
|
226
|
+
# else:
|
|
227
|
+
# self.messages.append_assistant("<no response>")
|
|
228
|
+
|
|
229
|
+
# async def on_button_pressed(self, event): # type: ignore[override]
|
|
230
|
+
# if hasattr(event.button, "id"):
|
|
231
|
+
# if event.button.id == "save-key":
|
|
232
|
+
# key = self.key_prompt.value().strip()
|
|
233
|
+
# if self.api_env_var and key:
|
|
234
|
+
# os.environ[self.api_env_var] = key
|
|
235
|
+
# self._show_chat()
|
|
236
|
+
# elif event.button.id == "send":
|
|
237
|
+
# text = self.composer.input.value.strip()
|
|
238
|
+
# if text:
|
|
239
|
+
# self.composer.input.value = ""
|
|
240
|
+
# await self._send_and_receive(text)
|
|
241
|
+
|
|
242
|
+
# async def on_input_submitted(self, event: Input.Submitted): # type: ignore[override]
|
|
243
|
+
# if isinstance(event.input.parent, ChatInput):
|
|
244
|
+
# text = event.value.strip()
|
|
245
|
+
# if text:
|
|
246
|
+
# self.composer.input.value = ""
|
|
247
|
+
# await self._send_and_receive(text)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# def run_interactive(model: Optional[str], api_key: Optional[str]):
|
|
251
|
+
# if not TEXTUAL_AVAILABLE:
|
|
252
|
+
# sys.stderr.write(
|
|
253
|
+
# "Textual is not installed. Please install with `pip install textual` or reinstall lm_deluge.\n"
|
|
254
|
+
# )
|
|
255
|
+
# sys.exit(2)
|
|
256
|
+
# app = DelugeApp(model, api_key) # type: ignore[name-defined]
|
|
257
|
+
# app.run()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# def main():
|
|
261
|
+
# parser = argparse.ArgumentParser(prog="deluge", description="Deluge CLI")
|
|
262
|
+
# parser.add_argument("prompt", nargs="*", help="Prompt text (non-interactive -p only)")
|
|
263
|
+
# parser.add_argument("--model", dest="model", help="Model ID to use")
|
|
264
|
+
# parser.add_argument("--api-key", dest="api_key", help="API key for chosen model provider")
|
|
265
|
+
# parser.add_argument(
|
|
266
|
+
# "-p",
|
|
267
|
+
# dest="print_mode",
|
|
268
|
+
# action="store_true",
|
|
269
|
+
# help="Print single completion to stdout (non-interactive)",
|
|
270
|
+
# )
|
|
271
|
+
|
|
272
|
+
# args = parser.parse_args()
|
|
273
|
+
|
|
274
|
+
# if args.print_mode:
|
|
275
|
+
# # Determine prompt text
|
|
276
|
+
# prompt_text = " ".join(args.prompt).strip()
|
|
277
|
+
# if not prompt_text and not sys.stdin.isatty():
|
|
278
|
+
# prompt_text = sys.stdin.read()
|
|
279
|
+
# if not prompt_text:
|
|
280
|
+
# sys.stderr.write("No prompt provided. Pass text or pipe input.\n")
|
|
281
|
+
# sys.exit(2)
|
|
282
|
+
|
|
283
|
+
# # Determine model
|
|
284
|
+
# model_id = args.model or os.getenv("DELUGE_DEFAULT_MODEL") or "gpt-4o-mini"
|
|
285
|
+
# # Require API key non-interactively if provider needs it and not set
|
|
286
|
+
# env_var = APIModel.from_registry(model_id).api_key_env_var or ""
|
|
287
|
+
# if env_var and not (os.getenv(env_var) or args.api_key):
|
|
288
|
+
# sys.stderr.write(
|
|
289
|
+
# f"Missing API key. Set {env_var} or pass --api-key.\n"
|
|
290
|
+
# )
|
|
291
|
+
# sys.exit(2)
|
|
292
|
+
# run_non_interactive(model_id, prompt_text, args.api_key)
|
|
293
|
+
# return
|
|
294
|
+
|
|
295
|
+
# # Interactive Textual chat
|
|
296
|
+
# run_interactive(args.model, args.api_key)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# if __name__ == "__main__":
|
|
300
|
+
# main()
|
|
@@ -22,6 +22,7 @@ from .models import APIModel, registry
|
|
|
22
22
|
from .request_context import RequestContext
|
|
23
23
|
from .tracker import StatusTracker
|
|
24
24
|
|
|
25
|
+
|
|
25
26
|
# TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
|
|
26
27
|
class _LLMClient(BaseModel):
|
|
27
28
|
"""
|
|
@@ -246,6 +247,7 @@ class _LLMClient(BaseModel):
|
|
|
246
247
|
self, context: RequestContext, retry_queue: asyncio.Queue | None = None
|
|
247
248
|
) -> APIResponse:
|
|
248
249
|
"""Handle caching and single HTTP call for a request. Failed requests go to retry queue."""
|
|
250
|
+
|
|
249
251
|
# Check cache first
|
|
250
252
|
def _maybe_postprocess(response: APIResponse):
|
|
251
253
|
if self.postprocess:
|
|
@@ -712,6 +714,7 @@ class _LLMClient(BaseModel):
|
|
|
712
714
|
batch_ids, provider, poll_interval=30
|
|
713
715
|
)
|
|
714
716
|
|
|
717
|
+
|
|
715
718
|
# factory function -- allows positional model names,
|
|
716
719
|
# keeps pydantic validation, without sacrificing IDE support
|
|
717
720
|
@overload
|
|
@@ -736,7 +739,7 @@ def LLMClient(
|
|
|
736
739
|
top_logprobs: int | None = None,
|
|
737
740
|
force_local_mcp: bool = False,
|
|
738
741
|
progress: Literal["rich", "tqdm", "manual"] = "rich",
|
|
739
|
-
postprocess: Callable[[APIResponse], APIResponse] | None = None
|
|
742
|
+
postprocess: Callable[[APIResponse], APIResponse] | None = None,
|
|
740
743
|
) -> _LLMClient: ...
|
|
741
744
|
|
|
742
745
|
|
|
@@ -762,7 +765,7 @@ def LLMClient(
|
|
|
762
765
|
top_logprobs: int | None = None,
|
|
763
766
|
force_local_mcp: bool = False,
|
|
764
767
|
progress: Literal["rich", "tqdm", "manual"] = "rich",
|
|
765
|
-
postprocess: Callable[[APIResponse], APIResponse] | None = None
|
|
768
|
+
postprocess: Callable[[APIResponse], APIResponse] | None = None,
|
|
766
769
|
) -> _LLMClient: ...
|
|
767
770
|
|
|
768
771
|
|
|
@@ -787,7 +790,7 @@ def LLMClient(
|
|
|
787
790
|
top_logprobs: int | None = None,
|
|
788
791
|
force_local_mcp: bool = False,
|
|
789
792
|
progress: Literal["rich", "tqdm", "manual"] = "rich",
|
|
790
|
-
postprocess: Callable[[APIResponse], APIResponse] | None = None
|
|
793
|
+
postprocess: Callable[[APIResponse], APIResponse] | None = None,
|
|
791
794
|
) -> _LLMClient:
|
|
792
795
|
"""
|
|
793
796
|
Create an LLMClient with model_names as a positional argument.
|
|
@@ -824,5 +827,5 @@ def LLMClient(
|
|
|
824
827
|
top_logprobs=top_logprobs,
|
|
825
828
|
force_local_mcp=force_local_mcp,
|
|
826
829
|
progress=progress,
|
|
827
|
-
postprocess=postprocess
|
|
830
|
+
postprocess=postprocess,
|
|
828
831
|
)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
|
|
6
|
+
from ..request_context import RequestContext
|
|
7
|
+
|
|
8
|
+
# Import and register all provider models
|
|
9
|
+
from .anthropic import ANTHROPIC_MODELS
|
|
10
|
+
from .bedrock import BEDROCK_MODELS
|
|
11
|
+
from .cerebras import CEREBRAS_MODELS
|
|
12
|
+
from .cohere import COHERE_MODELS
|
|
13
|
+
from .deepseek import DEEPSEEK_MODELS
|
|
14
|
+
from .fireworks import FIREWORKS_MODELS
|
|
15
|
+
from .google import GOOGLE_MODELS
|
|
16
|
+
from .grok import XAI_MODELS
|
|
17
|
+
from .groq import GROQ_MODELS
|
|
18
|
+
from .meta import META_MODELS
|
|
19
|
+
from .mistral import MISTRAL_MODELS
|
|
20
|
+
from .openai import OPENAI_MODELS
|
|
21
|
+
from .openrouter import OPENROUTER_MODELS
|
|
22
|
+
from .together import TOGETHER_MODELS
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class APIModel:
|
|
27
|
+
id: str
|
|
28
|
+
name: str
|
|
29
|
+
api_base: str
|
|
30
|
+
api_key_env_var: str
|
|
31
|
+
api_spec: str
|
|
32
|
+
cached_input_cost: float | None = 0
|
|
33
|
+
input_cost: float | None = 0 # $ per million input tokens
|
|
34
|
+
output_cost: float | None = 0 # $ per million output tokens
|
|
35
|
+
supports_json: bool = False
|
|
36
|
+
supports_logprobs: bool = False
|
|
37
|
+
supports_responses: bool = False
|
|
38
|
+
reasoning_model: bool = False
|
|
39
|
+
regions: list[str] | dict[str, int] = field(default_factory=list)
|
|
40
|
+
tokens_per_minute: int | None = None
|
|
41
|
+
requests_per_minute: int | None = None
|
|
42
|
+
gpus: list[str] | None = None
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def from_registry(cls, name: str):
|
|
46
|
+
if name not in registry:
|
|
47
|
+
raise ValueError(f"Model {name} not found in registry")
|
|
48
|
+
cfg = registry[name]
|
|
49
|
+
if isinstance(cfg, APIModel):
|
|
50
|
+
return cfg
|
|
51
|
+
return cls(**cfg)
|
|
52
|
+
|
|
53
|
+
def sample_region(self):
|
|
54
|
+
if isinstance(self.regions, list):
|
|
55
|
+
regions = self.regions
|
|
56
|
+
weights = [1] * len(regions)
|
|
57
|
+
elif isinstance(self.regions, dict):
|
|
58
|
+
regions = list(self.regions.keys())
|
|
59
|
+
weights = self.regions.values()
|
|
60
|
+
else:
|
|
61
|
+
raise ValueError("no regions to sample")
|
|
62
|
+
random.sample(regions, 1, counts=weights)[0]
|
|
63
|
+
|
|
64
|
+
def make_request(self, context: RequestContext): # -> "APIRequestBase"
|
|
65
|
+
from ..api_requests.common import CLASSES
|
|
66
|
+
|
|
67
|
+
api_spec = self.api_spec
|
|
68
|
+
if (
|
|
69
|
+
context.use_responses_api
|
|
70
|
+
and self.supports_responses
|
|
71
|
+
and api_spec == "openai"
|
|
72
|
+
):
|
|
73
|
+
api_spec = "openai-responses"
|
|
74
|
+
|
|
75
|
+
request_class = CLASSES.get(api_spec, None)
|
|
76
|
+
if request_class is None:
|
|
77
|
+
raise ValueError(f"Unsupported API spec: {api_spec}")
|
|
78
|
+
return request_class(context=context)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
registry: dict[str, APIModel] = {}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def register_model(
|
|
85
|
+
id: str,
|
|
86
|
+
name: str,
|
|
87
|
+
api_base: str,
|
|
88
|
+
api_key_env_var: str,
|
|
89
|
+
api_spec: str,
|
|
90
|
+
input_cost: float | None = 0, # $ per million input tokens
|
|
91
|
+
cached_input_cost: float | None = 0,
|
|
92
|
+
output_cost: float | None = 0, # $ per million output tokens
|
|
93
|
+
supports_json: bool = False,
|
|
94
|
+
supports_logprobs: bool = False,
|
|
95
|
+
supports_responses: bool = False,
|
|
96
|
+
reasoning_model: bool = False,
|
|
97
|
+
regions: list[str] | dict[str, int] = field(default_factory=list),
|
|
98
|
+
tokens_per_minute: int | None = None,
|
|
99
|
+
requests_per_minute: int | None = None,
|
|
100
|
+
) -> APIModel:
|
|
101
|
+
"""Register a model configuration and return the created APIModel."""
|
|
102
|
+
model = APIModel(
|
|
103
|
+
id=id,
|
|
104
|
+
name=name,
|
|
105
|
+
api_base=api_base,
|
|
106
|
+
api_key_env_var=api_key_env_var,
|
|
107
|
+
api_spec=api_spec,
|
|
108
|
+
cached_input_cost=cached_input_cost,
|
|
109
|
+
input_cost=input_cost,
|
|
110
|
+
output_cost=output_cost,
|
|
111
|
+
supports_json=supports_json,
|
|
112
|
+
supports_logprobs=supports_logprobs,
|
|
113
|
+
supports_responses=supports_responses,
|
|
114
|
+
reasoning_model=reasoning_model,
|
|
115
|
+
regions=regions,
|
|
116
|
+
tokens_per_minute=tokens_per_minute,
|
|
117
|
+
requests_per_minute=requests_per_minute,
|
|
118
|
+
)
|
|
119
|
+
registry[model.id] = model
|
|
120
|
+
return model
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# Register all models from all providers
|
|
124
|
+
for model_dict in [
|
|
125
|
+
ANTHROPIC_MODELS,
|
|
126
|
+
BEDROCK_MODELS,
|
|
127
|
+
COHERE_MODELS,
|
|
128
|
+
DEEPSEEK_MODELS,
|
|
129
|
+
FIREWORKS_MODELS,
|
|
130
|
+
GOOGLE_MODELS,
|
|
131
|
+
XAI_MODELS,
|
|
132
|
+
META_MODELS,
|
|
133
|
+
MISTRAL_MODELS,
|
|
134
|
+
OPENAI_MODELS,
|
|
135
|
+
OPENROUTER_MODELS,
|
|
136
|
+
TOGETHER_MODELS,
|
|
137
|
+
GROQ_MODELS,
|
|
138
|
+
CEREBRAS_MODELS,
|
|
139
|
+
]:
|
|
140
|
+
for cfg in model_dict.values():
|
|
141
|
+
register_model(**cfg)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# print("Valid models:", registry.keys())
|