lm-deluge 0.0.34__tar.gz → 0.0.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- {lm_deluge-0.0.34/src/lm_deluge.egg-info → lm_deluge-0.0.36}/PKG-INFO +1 -1
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/pyproject.toml +6 -2
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/anthropic.py +1 -1
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/gemini.py +4 -2
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/openai.py +17 -4
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/response.py +4 -3
- lm_deluge-0.0.36/src/lm_deluge/cli.py +300 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/client.py +22 -73
- lm_deluge-0.0.36/src/lm_deluge/models/__init__.py +144 -0
- lm_deluge-0.0.36/src/lm_deluge/models/anthropic.py +124 -0
- lm_deluge-0.0.36/src/lm_deluge/models/bedrock.py +99 -0
- lm_deluge-0.0.36/src/lm_deluge/models/cerebras.py +57 -0
- lm_deluge-0.0.36/src/lm_deluge/models/cohere.py +98 -0
- lm_deluge-0.0.36/src/lm_deluge/models/deepseek.py +27 -0
- lm_deluge-0.0.36/src/lm_deluge/models/fireworks.py +16 -0
- lm_deluge-0.0.36/src/lm_deluge/models/google.py +153 -0
- lm_deluge-0.0.36/src/lm_deluge/models/grok.py +38 -0
- lm_deluge-0.0.36/src/lm_deluge/models/groq.py +74 -0
- lm_deluge-0.0.36/src/lm_deluge/models/meta.py +65 -0
- lm_deluge-0.0.36/src/lm_deluge/models/mistral.py +110 -0
- lm_deluge-0.0.36/src/lm_deluge/models/openai.py +318 -0
- lm_deluge-0.0.36/src/lm_deluge/models/openrouter.py +1 -0
- lm_deluge-0.0.36/src/lm_deluge/models/together.py +112 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/prompt.py +2 -2
- lm_deluge-0.0.36/src/lm_deluge/util/harmony.py +47 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36/src/lm_deluge.egg-info}/PKG-INFO +1 -1
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/SOURCES.txt +17 -1
- lm_deluge-0.0.34/src/lm_deluge/models.py +0 -1305
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/LICENSE +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/README.md +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/setup.cfg +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/__init__.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/agent.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/__init__.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/base.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/bedrock.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/common.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/bedrock.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/cohere.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/deepseek.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/mistral.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/deprecated/vertex.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/api_requests/mistral.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/batches.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/__init__.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/bash.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/computer_use.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/anthropic/editor.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/base.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/built_in_tools/openai.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/cache.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/config.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/embed.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/errors.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/file.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/gemini_limits.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/image.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/__init__.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/classify.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/extract.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/locate.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/ocr.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/score.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/llm_tools/translate.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/request_context.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/rerank.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/tool.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/tracker.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/usage.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/json.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/logprobs.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/spatial.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/validation.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge/util/xml.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/dependency_links.txt +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/requires.txt +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/src/lm_deluge.egg-info/top_level.txt +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/tests/test_builtin_tools.py +0 -0
- {lm_deluge-0.0.34 → lm_deluge-0.0.36}/tests/test_native_mcp_server.py +0 -0
|
@@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"]
|
|
|
3
3
|
|
|
4
4
|
[project]
|
|
5
5
|
name = "lm_deluge"
|
|
6
|
-
version = "0.0.
|
|
6
|
+
version = "0.0.36"
|
|
7
7
|
authors = [{ name = "Benjamin Anderson", email = "ben@trytaylor.ai" }]
|
|
8
8
|
description = "Python utility for using LLM API models."
|
|
9
9
|
readme = "README.md"
|
|
@@ -28,5 +28,9 @@ dependencies = [
|
|
|
28
28
|
"pdf2image",
|
|
29
29
|
"pillow",
|
|
30
30
|
"fastmcp>=2.4",
|
|
31
|
-
"rich"
|
|
31
|
+
"rich",
|
|
32
|
+
# "textual>=0.58.0"
|
|
32
33
|
]
|
|
34
|
+
|
|
35
|
+
# [project.scripts]
|
|
36
|
+
# deluge = "lm_deluge.cli:main"
|
|
@@ -57,7 +57,7 @@ def _build_anthropic_request(
|
|
|
57
57
|
# handle thinking
|
|
58
58
|
if model.reasoning_model and sampling_params.reasoning_effort:
|
|
59
59
|
# translate reasoning effort of low, medium, high to budget tokens
|
|
60
|
-
budget = {"low": 1024, "medium": 4096, "high": 16384}.get(
|
|
60
|
+
budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}.get(
|
|
61
61
|
sampling_params.reasoning_effort
|
|
62
62
|
)
|
|
63
63
|
request_json["thinking"] = {
|
|
@@ -45,8 +45,10 @@ async def _build_gemini_request(
|
|
|
45
45
|
thinking_config = {"includeThoughts": False, "thinkingBudget": budget}
|
|
46
46
|
else:
|
|
47
47
|
thinking_config = {"includeThoughts": True}
|
|
48
|
-
if effort in {"low", "medium", "high"} and "flash" in model.id:
|
|
49
|
-
budget = {"low": 1024, "medium": 4096, "high": 16384}[
|
|
48
|
+
if effort in {"minimal", "low", "medium", "high"} and "flash" in model.id:
|
|
49
|
+
budget = {"minimal": 256, "low": 1024, "medium": 4096, "high": 16384}[
|
|
50
|
+
effort
|
|
51
|
+
]
|
|
50
52
|
thinking_config["thinkingBudget"] = budget
|
|
51
53
|
request_json["generationConfig"]["thinkingConfig"] = thinking_config
|
|
52
54
|
|
|
@@ -42,8 +42,15 @@ async def _build_oa_chat_request(
|
|
|
42
42
|
# Disable reasoning for Gemini models when no effort requested
|
|
43
43
|
if "gemini" in model.id:
|
|
44
44
|
effort = "none"
|
|
45
|
+
elif "gpt-5" in model.id:
|
|
46
|
+
effort = "minimal"
|
|
45
47
|
else:
|
|
46
48
|
effort = "low"
|
|
49
|
+
if effort == "minimal" and "gpt-5" not in model.id:
|
|
50
|
+
print(
|
|
51
|
+
"WARNING: 'minimal' reasoning effort only allowed for gpt-5. setting to 'low'."
|
|
52
|
+
)
|
|
53
|
+
effort = "low"
|
|
47
54
|
request_json["reasoning_effort"] = effort
|
|
48
55
|
else:
|
|
49
56
|
if sampling_params.reasoning_effort:
|
|
@@ -122,15 +129,21 @@ class OpenAIRequest(APIRequestBase):
|
|
|
122
129
|
message = data["choices"][0]["message"]
|
|
123
130
|
finish_reason = data["choices"][0]["finish_reason"]
|
|
124
131
|
|
|
125
|
-
# Add text content if present
|
|
126
|
-
if message.get("content"):
|
|
127
|
-
parts.append(Text(message["content"]))
|
|
128
|
-
|
|
129
132
|
# Add thinking content if present (reasoning models)
|
|
130
133
|
if "reasoning_content" in message:
|
|
131
134
|
thinking = message["reasoning_content"]
|
|
132
135
|
parts.append(Thinking(thinking))
|
|
133
136
|
|
|
137
|
+
# Together AI returns reasoning in a "reasoning"
|
|
138
|
+
# field which is not correct but whatever
|
|
139
|
+
if message.get("reasoning"):
|
|
140
|
+
thinking = message["reasoning"]
|
|
141
|
+
parts.append(Thinking(thinking))
|
|
142
|
+
|
|
143
|
+
# Add text content if present
|
|
144
|
+
if message.get("content"):
|
|
145
|
+
parts.append(Text(message["content"]))
|
|
146
|
+
|
|
134
147
|
# Add tool calls if present
|
|
135
148
|
if "tool_calls" in message:
|
|
136
149
|
for tool_call in message["tool_calls"]:
|
|
@@ -89,9 +89,10 @@ class APIResponse:
|
|
|
89
89
|
+ self.usage.output_tokens * api_model.output_cost / 1e6
|
|
90
90
|
)
|
|
91
91
|
elif self.content is not None and self.completion is not None:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
92
|
+
pass
|
|
93
|
+
# print(
|
|
94
|
+
# f"Warning: Completion provided without token counts for model {self.model_internal}."
|
|
95
|
+
# )
|
|
95
96
|
if isinstance(self.prompt, Conversation):
|
|
96
97
|
self.prompt = self.prompt.to_log() # avoid keeping images in memory
|
|
97
98
|
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# import argparse
|
|
2
|
+
# import asyncio
|
|
3
|
+
# import os
|
|
4
|
+
# import sys
|
|
5
|
+
# from typing import Optional
|
|
6
|
+
|
|
7
|
+
# from .client import LLMClient
|
|
8
|
+
# from .models import registry, APIModel
|
|
9
|
+
# from .prompt import Conversation, Message
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# def _ensure_api_key_for_model(model_id: str, passed_api_key: Optional[str] = None):
|
|
13
|
+
# model: APIModel = APIModel.from_registry(model_id)
|
|
14
|
+
# env_var = model.api_key_env_var or ""
|
|
15
|
+
# if not env_var:
|
|
16
|
+
# return # Some providers (e.g., Bedrock entries) don't use a single key
|
|
17
|
+
# if os.getenv(env_var):
|
|
18
|
+
# return
|
|
19
|
+
# if passed_api_key:
|
|
20
|
+
# os.environ[env_var] = passed_api_key
|
|
21
|
+
# return
|
|
22
|
+
# # If we get here, interactive prompting should occur at the UI layer.
|
|
23
|
+
# # In non-interactive contexts, we will error before calling this without key.
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# def run_non_interactive(model_id: str, prompt_text: str, api_key: Optional[str]):
|
|
27
|
+
# _ensure_api_key_for_model(model_id, api_key)
|
|
28
|
+
# client = LLMClient(model_names=[model_id], progress="manual")
|
|
29
|
+
# # Single round, print completion only to stdout
|
|
30
|
+
# completions = asyncio.run(
|
|
31
|
+
# client.process_prompts_async(
|
|
32
|
+
# [Conversation.user(prompt_text)],
|
|
33
|
+
# return_completions_only=True,
|
|
34
|
+
# show_progress=False,
|
|
35
|
+
# )
|
|
36
|
+
# )
|
|
37
|
+
# out = completions[0] if completions and completions[0] is not None else ""
|
|
38
|
+
# # Write raw completion to stdout with no extra decoration
|
|
39
|
+
# sys.stdout.write(out)
|
|
40
|
+
# if out and not out.endswith("\n"):
|
|
41
|
+
# sys.stdout.write("\n")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# # -------- Textual UI (interactive chat) --------
|
|
45
|
+
# try:
|
|
46
|
+
# from textual.app import App, ComposeResult
|
|
47
|
+
# from textual.containers import Container, Horizontal
|
|
48
|
+
# from textual.widgets import Footer, Header, Input, Static, Button, ListView, ListItem, Label
|
|
49
|
+
# from textual.widgets._rich_log import RichLog
|
|
50
|
+
# from textual.reactive import reactive
|
|
51
|
+
# TEXTUAL_AVAILABLE = True
|
|
52
|
+
# except Exception: # pragma: no cover - textual may not be installed in some dev envs
|
|
53
|
+
# TEXTUAL_AVAILABLE = False
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# if TEXTUAL_AVAILABLE:
|
|
57
|
+
# class ModelPicker(Static):
|
|
58
|
+
# """Minimal model picker: arrows to move, Enter to select."""
|
|
59
|
+
|
|
60
|
+
# def __init__(self, preselected: Optional[str] = None):
|
|
61
|
+
# super().__init__()
|
|
62
|
+
# self.preselected = preselected
|
|
63
|
+
|
|
64
|
+
# def compose(self) -> ComposeResult: # type: ignore[override]
|
|
65
|
+
# # Keep it terminal-y: one-line hint + list. No buttons.
|
|
66
|
+
# yield Static("Pick a model (Enter)", classes="hint")
|
|
67
|
+
# list_items: list[ListItem] = []
|
|
68
|
+
# # Curated small set to avoid scrollbars
|
|
69
|
+
# preferred = [
|
|
70
|
+
# "gpt-5",
|
|
71
|
+
# "gpt-5-chat",
|
|
72
|
+
# "gpt-5-mini",
|
|
73
|
+
# "claude-4-sonnet",
|
|
74
|
+
# "gemini-2.5-pro",
|
|
75
|
+
# "gemini-2.5-flash",
|
|
76
|
+
# "gemini-2.0-flash",
|
|
77
|
+
# ]
|
|
78
|
+
# for mid in preferred:
|
|
79
|
+
# if mid in registry:
|
|
80
|
+
# list_items.append(ListItem(Label(mid)))
|
|
81
|
+
# yield ListView(*list_items, classes="model-list")
|
|
82
|
+
|
|
83
|
+
# def on_mount(self) -> None: # type: ignore[override]
|
|
84
|
+
# # Focus the list so Enter works immediately
|
|
85
|
+
# self.query_one(ListView).focus()
|
|
86
|
+
|
|
87
|
+
# def get_selected(self) -> Optional[str]:
|
|
88
|
+
# listview = self.query_one(ListView)
|
|
89
|
+
# if not listview.index is None and 0 <= listview.index < len(listview.children):
|
|
90
|
+
# label = listview.children[listview.index].query_one(Label)
|
|
91
|
+
# return label.renderable if isinstance(label.renderable, str) else str(label.renderable)
|
|
92
|
+
# return None
|
|
93
|
+
|
|
94
|
+
# def on_key(self, event): # type: ignore[override]
|
|
95
|
+
# # Select current item on Enter
|
|
96
|
+
# try:
|
|
97
|
+
# key = getattr(event, "key", None)
|
|
98
|
+
# except Exception:
|
|
99
|
+
# key = None
|
|
100
|
+
# if key == "enter":
|
|
101
|
+
# sel = self.get_selected()
|
|
102
|
+
# if sel:
|
|
103
|
+
# # Ask app to proceed with the chosen model
|
|
104
|
+
# getattr(self.app, "model_chosen", lambda *_: None)(sel) # type: ignore[attr-defined]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# class ApiKeyPrompt(Static):
|
|
108
|
+
# def __init__(self, env_var: str):
|
|
109
|
+
# super().__init__()
|
|
110
|
+
# self.env_var = env_var
|
|
111
|
+
# self.input = Input(password=True, placeholder=f"Enter {env_var}")
|
|
112
|
+
|
|
113
|
+
# def compose(self) -> ComposeResult: # type: ignore[override]
|
|
114
|
+
# yield Static(f"API key required: set {self.env_var}", classes="title")
|
|
115
|
+
# yield self.input
|
|
116
|
+
# yield Button("Save", id="save-key", variant="primary")
|
|
117
|
+
|
|
118
|
+
# def value(self) -> str:
|
|
119
|
+
# return self.input.value
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# class MessagesView(RichLog):
|
|
123
|
+
# def __init__(self, **kwargs):
|
|
124
|
+
# # Terminal-like log with markup and auto-scroll
|
|
125
|
+
# super().__init__(wrap=True, markup=True, auto_scroll=True, **kwargs)
|
|
126
|
+
|
|
127
|
+
# def append_user(self, text: str):
|
|
128
|
+
# self.write(f"[bold cyan]You:[/bold cyan] {text}")
|
|
129
|
+
|
|
130
|
+
# def append_assistant(self, text: str):
|
|
131
|
+
# self.write(f"[bold magenta]Model:[/bold magenta] {text}")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# class ChatInput(Horizontal):
|
|
135
|
+
# def compose(self) -> ComposeResult: # type: ignore[override]
|
|
136
|
+
# self.input = Input(placeholder="Type message, Enter to send")
|
|
137
|
+
# yield self.input
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# class DelugeApp(App):
|
|
141
|
+
# CSS = """
|
|
142
|
+
# #screen { height: 100%; }
|
|
143
|
+
# .chat { height: 1fr; padding: 0 1; }
|
|
144
|
+
# .composer { dock: bottom; height: 3; }
|
|
145
|
+
# """
|
|
146
|
+
|
|
147
|
+
# BINDINGS = [
|
|
148
|
+
# ("ctrl+c", "quit", "Quit"),
|
|
149
|
+
# ]
|
|
150
|
+
|
|
151
|
+
# model_id = reactive("")
|
|
152
|
+
# api_env_var = reactive("")
|
|
153
|
+
|
|
154
|
+
# def __init__(self, model_arg: Optional[str], api_key_arg: Optional[str]):
|
|
155
|
+
# super().__init__()
|
|
156
|
+
# self._model_arg = model_arg
|
|
157
|
+
# self._api_key_arg = api_key_arg
|
|
158
|
+
# self._conversation = Conversation.system("You are a helpful assistant.")
|
|
159
|
+
# self._client = None
|
|
160
|
+
|
|
161
|
+
# def compose(self) -> ComposeResult: # type: ignore[override]
|
|
162
|
+
# yield Header(show_clock=True)
|
|
163
|
+
# self.body = Container(id="screen")
|
|
164
|
+
# yield self.body
|
|
165
|
+
# yield Footer()
|
|
166
|
+
|
|
167
|
+
# def on_mount(self): # type: ignore[override]
|
|
168
|
+
# # Step 1: pick model if not provided
|
|
169
|
+
# if not self._model_arg:
|
|
170
|
+
# self.model_picker = ModelPicker()
|
|
171
|
+
# self.body.mount(self.model_picker)
|
|
172
|
+
# else:
|
|
173
|
+
# self.model_id = self._model_arg
|
|
174
|
+
# self._after_model_selected()
|
|
175
|
+
|
|
176
|
+
# def action_quit(self) -> None: # type: ignore[override]
|
|
177
|
+
# self.exit()
|
|
178
|
+
|
|
179
|
+
# def _after_model_selected(self):
|
|
180
|
+
# # Resolve API requirement
|
|
181
|
+
# model = APIModel.from_registry(self.model_id)
|
|
182
|
+
# self.api_env_var = model.api_key_env_var or ""
|
|
183
|
+
# if self.api_env_var and not os.getenv(self.api_env_var):
|
|
184
|
+
# if self._api_key_arg:
|
|
185
|
+
# os.environ[self.api_env_var] = self._api_key_arg
|
|
186
|
+
# self._show_chat()
|
|
187
|
+
# else:
|
|
188
|
+
# # Prompt for key
|
|
189
|
+
# self.body.remove_children()
|
|
190
|
+
# self.key_prompt = ApiKeyPrompt(self.api_env_var)
|
|
191
|
+
# self.body.mount(self.key_prompt)
|
|
192
|
+
# else:
|
|
193
|
+
# self._show_chat()
|
|
194
|
+
|
|
195
|
+
# def model_chosen(self, sel: str) -> None:
|
|
196
|
+
# """Called by ModelPicker when Enter is pressed on a selection."""
|
|
197
|
+
# self.model_id = sel
|
|
198
|
+
# self._after_model_selected()
|
|
199
|
+
|
|
200
|
+
# def _show_chat(self):
|
|
201
|
+
# self.body.remove_children()
|
|
202
|
+
# # Build UI
|
|
203
|
+
# self.messages = MessagesView(classes="chat")
|
|
204
|
+
# self.composer = ChatInput(classes="composer")
|
|
205
|
+
# self.body.mount(self.messages)
|
|
206
|
+
# self.body.mount(self.composer)
|
|
207
|
+
# # Focus input after mounting
|
|
208
|
+
# self.set_focus(self.composer.input)
|
|
209
|
+
# # Init client
|
|
210
|
+
# self._client = LLMClient(model_names=[self.model_id], progress="manual")
|
|
211
|
+
# # Update header subtitle
|
|
212
|
+
# self.query_one(Header).sub_title = f"Model: {self.model_id}"
|
|
213
|
+
|
|
214
|
+
# async def _send_and_receive(self, text: str):
|
|
215
|
+
# # Append user message
|
|
216
|
+
# self._conversation.add(Message.user(text))
|
|
217
|
+
# self.messages.append_user(text)
|
|
218
|
+
# # Call model (non-streaming for simplicity across providers)
|
|
219
|
+
# responses = await self._client.process_prompts_async(
|
|
220
|
+
# [self._conversation], return_completions_only=False, show_progress=False
|
|
221
|
+
# )
|
|
222
|
+
# resp = responses[0]
|
|
223
|
+
# if resp and resp.completion:
|
|
224
|
+
# self._conversation.add(Message.ai(resp.completion))
|
|
225
|
+
# self.messages.append_assistant(resp.completion)
|
|
226
|
+
# else:
|
|
227
|
+
# self.messages.append_assistant("<no response>")
|
|
228
|
+
|
|
229
|
+
# async def on_button_pressed(self, event): # type: ignore[override]
|
|
230
|
+
# if hasattr(event.button, "id"):
|
|
231
|
+
# if event.button.id == "save-key":
|
|
232
|
+
# key = self.key_prompt.value().strip()
|
|
233
|
+
# if self.api_env_var and key:
|
|
234
|
+
# os.environ[self.api_env_var] = key
|
|
235
|
+
# self._show_chat()
|
|
236
|
+
# elif event.button.id == "send":
|
|
237
|
+
# text = self.composer.input.value.strip()
|
|
238
|
+
# if text:
|
|
239
|
+
# self.composer.input.value = ""
|
|
240
|
+
# await self._send_and_receive(text)
|
|
241
|
+
|
|
242
|
+
# async def on_input_submitted(self, event: Input.Submitted): # type: ignore[override]
|
|
243
|
+
# if isinstance(event.input.parent, ChatInput):
|
|
244
|
+
# text = event.value.strip()
|
|
245
|
+
# if text:
|
|
246
|
+
# self.composer.input.value = ""
|
|
247
|
+
# await self._send_and_receive(text)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# def run_interactive(model: Optional[str], api_key: Optional[str]):
|
|
251
|
+
# if not TEXTUAL_AVAILABLE:
|
|
252
|
+
# sys.stderr.write(
|
|
253
|
+
# "Textual is not installed. Please install with `pip install textual` or reinstall lm_deluge.\n"
|
|
254
|
+
# )
|
|
255
|
+
# sys.exit(2)
|
|
256
|
+
# app = DelugeApp(model, api_key) # type: ignore[name-defined]
|
|
257
|
+
# app.run()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# def main():
|
|
261
|
+
# parser = argparse.ArgumentParser(prog="deluge", description="Deluge CLI")
|
|
262
|
+
# parser.add_argument("prompt", nargs="*", help="Prompt text (non-interactive -p only)")
|
|
263
|
+
# parser.add_argument("--model", dest="model", help="Model ID to use")
|
|
264
|
+
# parser.add_argument("--api-key", dest="api_key", help="API key for chosen model provider")
|
|
265
|
+
# parser.add_argument(
|
|
266
|
+
# "-p",
|
|
267
|
+
# dest="print_mode",
|
|
268
|
+
# action="store_true",
|
|
269
|
+
# help="Print single completion to stdout (non-interactive)",
|
|
270
|
+
# )
|
|
271
|
+
|
|
272
|
+
# args = parser.parse_args()
|
|
273
|
+
|
|
274
|
+
# if args.print_mode:
|
|
275
|
+
# # Determine prompt text
|
|
276
|
+
# prompt_text = " ".join(args.prompt).strip()
|
|
277
|
+
# if not prompt_text and not sys.stdin.isatty():
|
|
278
|
+
# prompt_text = sys.stdin.read()
|
|
279
|
+
# if not prompt_text:
|
|
280
|
+
# sys.stderr.write("No prompt provided. Pass text or pipe input.\n")
|
|
281
|
+
# sys.exit(2)
|
|
282
|
+
|
|
283
|
+
# # Determine model
|
|
284
|
+
# model_id = args.model or os.getenv("DELUGE_DEFAULT_MODEL") or "gpt-4o-mini"
|
|
285
|
+
# # Require API key non-interactively if provider needs it and not set
|
|
286
|
+
# env_var = APIModel.from_registry(model_id).api_key_env_var or ""
|
|
287
|
+
# if env_var and not (os.getenv(env_var) or args.api_key):
|
|
288
|
+
# sys.stderr.write(
|
|
289
|
+
# f"Missing API key. Set {env_var} or pass --api-key.\n"
|
|
290
|
+
# )
|
|
291
|
+
# sys.exit(2)
|
|
292
|
+
# run_non_interactive(model_id, prompt_text, args.api_key)
|
|
293
|
+
# return
|
|
294
|
+
|
|
295
|
+
# # Interactive Textual chat
|
|
296
|
+
# run_interactive(args.model, args.api_key)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# if __name__ == "__main__":
|
|
300
|
+
# main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import random
|
|
3
|
-
from typing import Any, Literal, Self, Sequence, overload
|
|
3
|
+
from typing import Any, Literal, Self, Sequence, Callable, overload
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import yaml
|
|
@@ -23,7 +23,6 @@ from .request_context import RequestContext
|
|
|
23
23
|
from .tracker import StatusTracker
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
# TODO: get completions as they finish, not all at once at the end.
|
|
27
26
|
# TODO: add optional max_input_tokens to client so we can reject long prompts to prevent abuse
|
|
28
27
|
class _LLMClient(BaseModel):
|
|
29
28
|
"""
|
|
@@ -55,6 +54,9 @@ class _LLMClient(BaseModel):
|
|
|
55
54
|
# Progress configuration
|
|
56
55
|
progress: Literal["rich", "tqdm", "manual"] = "rich"
|
|
57
56
|
|
|
57
|
+
# Postprocessing - run on every APIResponse
|
|
58
|
+
postprocess: Callable[[APIResponse], APIResponse] | None = None
|
|
59
|
+
|
|
58
60
|
# Internal state for async task handling
|
|
59
61
|
_next_task_id: int = PrivateAttr(default=0)
|
|
60
62
|
_tasks: dict[int, asyncio.Task] = PrivateAttr(default_factory=dict)
|
|
@@ -196,14 +198,6 @@ class _LLMClient(BaseModel):
|
|
|
196
198
|
config_dict = yaml.safe_load(open(file_path))
|
|
197
199
|
return cls.from_dict(config_dict)
|
|
198
200
|
|
|
199
|
-
@classmethod
|
|
200
|
-
def basic(cls, model: str | list[str], **kwargs):
|
|
201
|
-
"""
|
|
202
|
-
Doesn't do anything differently now, kept for backwards compat.
|
|
203
|
-
"""
|
|
204
|
-
kwargs["model_names"] = model
|
|
205
|
-
return cls(**kwargs)
|
|
206
|
-
|
|
207
201
|
def _select_model(self):
|
|
208
202
|
assert isinstance(self.model_weights, list)
|
|
209
203
|
model_idx = np.random.choice(range(len(self.models)), p=self.model_weights)
|
|
@@ -253,14 +247,20 @@ class _LLMClient(BaseModel):
|
|
|
253
247
|
self, context: RequestContext, retry_queue: asyncio.Queue | None = None
|
|
254
248
|
) -> APIResponse:
|
|
255
249
|
"""Handle caching and single HTTP call for a request. Failed requests go to retry queue."""
|
|
250
|
+
|
|
256
251
|
# Check cache first
|
|
252
|
+
def _maybe_postprocess(response: APIResponse):
|
|
253
|
+
if self.postprocess:
|
|
254
|
+
return self.postprocess(response)
|
|
255
|
+
return response
|
|
256
|
+
|
|
257
257
|
if self.cache:
|
|
258
258
|
cached = self.cache.get(context.prompt)
|
|
259
259
|
if cached:
|
|
260
260
|
cached.local_cache_hit = True
|
|
261
261
|
if context.status_tracker:
|
|
262
262
|
context.status_tracker.task_succeeded(context.task_id)
|
|
263
|
-
return cached
|
|
263
|
+
return _maybe_postprocess(cached)
|
|
264
264
|
|
|
265
265
|
# Execute single request
|
|
266
266
|
assert context.status_tracker
|
|
@@ -275,7 +275,7 @@ class _LLMClient(BaseModel):
|
|
|
275
275
|
self.cache.put(context.prompt, response)
|
|
276
276
|
# Call callback if provided
|
|
277
277
|
context.maybe_callback(response, context.status_tracker)
|
|
278
|
-
return response
|
|
278
|
+
return _maybe_postprocess(response)
|
|
279
279
|
|
|
280
280
|
# Handle error response - add to retry queue if available
|
|
281
281
|
if retry_queue and context.attempts_left > 1:
|
|
@@ -303,7 +303,7 @@ class _LLMClient(BaseModel):
|
|
|
303
303
|
|
|
304
304
|
# Add to retry queue for later processing
|
|
305
305
|
await retry_queue.put(retry_context)
|
|
306
|
-
return response # Return the error response for now
|
|
306
|
+
return _maybe_postprocess(response) # Return the error response for now
|
|
307
307
|
|
|
308
308
|
# No retries left or no retry queue - final failure
|
|
309
309
|
context.status_tracker.task_failed(context.task_id)
|
|
@@ -316,7 +316,7 @@ class _LLMClient(BaseModel):
|
|
|
316
316
|
error_msg += f" Message: {response.error_message}. Giving up."
|
|
317
317
|
print(error_msg)
|
|
318
318
|
|
|
319
|
-
return response
|
|
319
|
+
return _maybe_postprocess(response)
|
|
320
320
|
|
|
321
321
|
@overload
|
|
322
322
|
async def process_prompts_async(
|
|
@@ -570,6 +570,8 @@ class _LLMClient(BaseModel):
|
|
|
570
570
|
print(item, end="", flush=True)
|
|
571
571
|
else:
|
|
572
572
|
# final item
|
|
573
|
+
if self.postprocess:
|
|
574
|
+
return self.postprocess(item)
|
|
573
575
|
return item
|
|
574
576
|
|
|
575
577
|
async def run_agent_loop(
|
|
@@ -713,65 +715,8 @@ class _LLMClient(BaseModel):
|
|
|
713
715
|
)
|
|
714
716
|
|
|
715
717
|
|
|
716
|
-
#
|
|
717
|
-
#
|
|
718
|
-
# prompts: list[Conversation],
|
|
719
|
-
# models: str | list[str],
|
|
720
|
-
# model_weights: list[float],
|
|
721
|
-
# sampling_params: list[SamplingParams],
|
|
722
|
-
# max_tokens_per_minute: int = 500_000,
|
|
723
|
-
# max_requests_per_minute: int = 1_000,
|
|
724
|
-
# ):
|
|
725
|
-
# """
|
|
726
|
-
# Count tokens and estimate costs for a batch of prompts.
|
|
727
|
-
# """
|
|
728
|
-
# results = []
|
|
729
|
-
# for i, prompt in zip(ids, prompts):
|
|
730
|
-
# # choose a model
|
|
731
|
-
# model_idx = np.random.choice(range(len(models)), p=model_weights)
|
|
732
|
-
# model = models[model_idx]
|
|
733
|
-
|
|
734
|
-
# # dry run
|
|
735
|
-
# input_tokens, output_tokens, min_cost, max_cost = prompt.dry_run(
|
|
736
|
-
# model, sampling_params[model_idx].max_new_tokens
|
|
737
|
-
# )
|
|
738
|
-
# results.append(
|
|
739
|
-
# {
|
|
740
|
-
# "id": i,
|
|
741
|
-
# "input_tokens": input_tokens,
|
|
742
|
-
# "output_tokens": output_tokens,
|
|
743
|
-
# "min_cost": min_cost,
|
|
744
|
-
# "max_cost": max_cost,
|
|
745
|
-
# }
|
|
746
|
-
# )
|
|
747
|
-
|
|
748
|
-
# combined_results: dict[str, Any] = {
|
|
749
|
-
# "total_input_tokens": sum([r["input_tokens"] for r in results]),
|
|
750
|
-
# "total_output_tokens": sum([r["output_tokens"] for r in results]),
|
|
751
|
-
# "total_min_cost": sum([r["min_cost"] for r in results]),
|
|
752
|
-
# "total_max_cost": sum([r["max_cost"] for r in results]),
|
|
753
|
-
# }
|
|
754
|
-
# minimum_time_tpm = combined_results["total_input_tokens"] / max_tokens_per_minute
|
|
755
|
-
# maximum_time_tpm = (
|
|
756
|
-
# combined_results["total_input_tokens"] + combined_results["total_output_tokens"]
|
|
757
|
-
# ) / max_tokens_per_minute
|
|
758
|
-
# minimum_time_rpm = len(prompts) / max_requests_per_minute
|
|
759
|
-
|
|
760
|
-
# combined_results["minimum_time"] = max(minimum_time_tpm, minimum_time_rpm)
|
|
761
|
-
# combined_results["maximum_time"] = max(maximum_time_tpm, minimum_time_rpm)
|
|
762
|
-
# limiting_factor = None
|
|
763
|
-
# if minimum_time_rpm > maximum_time_tpm:
|
|
764
|
-
# limiting_factor = "requests"
|
|
765
|
-
# elif minimum_time_rpm < minimum_time_tpm:
|
|
766
|
-
# limiting_factor = "tokens"
|
|
767
|
-
# else:
|
|
768
|
-
# limiting_factor = "depends"
|
|
769
|
-
# combined_results["limiting_factor"] = limiting_factor
|
|
770
|
-
|
|
771
|
-
# return combined_results
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
# Clean factory function with perfect IDE support
|
|
718
|
+
# factory function -- allows positional model names,
|
|
719
|
+
# keeps pydantic validation, without sacrificing IDE support
|
|
775
720
|
@overload
|
|
776
721
|
def LLMClient(
|
|
777
722
|
model_names: str,
|
|
@@ -794,6 +739,7 @@ def LLMClient(
|
|
|
794
739
|
top_logprobs: int | None = None,
|
|
795
740
|
force_local_mcp: bool = False,
|
|
796
741
|
progress: Literal["rich", "tqdm", "manual"] = "rich",
|
|
742
|
+
postprocess: Callable[[APIResponse], APIResponse] | None = None,
|
|
797
743
|
) -> _LLMClient: ...
|
|
798
744
|
|
|
799
745
|
|
|
@@ -819,6 +765,7 @@ def LLMClient(
|
|
|
819
765
|
top_logprobs: int | None = None,
|
|
820
766
|
force_local_mcp: bool = False,
|
|
821
767
|
progress: Literal["rich", "tqdm", "manual"] = "rich",
|
|
768
|
+
postprocess: Callable[[APIResponse], APIResponse] | None = None,
|
|
822
769
|
) -> _LLMClient: ...
|
|
823
770
|
|
|
824
771
|
|
|
@@ -843,6 +790,7 @@ def LLMClient(
|
|
|
843
790
|
top_logprobs: int | None = None,
|
|
844
791
|
force_local_mcp: bool = False,
|
|
845
792
|
progress: Literal["rich", "tqdm", "manual"] = "rich",
|
|
793
|
+
postprocess: Callable[[APIResponse], APIResponse] | None = None,
|
|
846
794
|
) -> _LLMClient:
|
|
847
795
|
"""
|
|
848
796
|
Create an LLMClient with model_names as a positional argument.
|
|
@@ -879,4 +827,5 @@ def LLMClient(
|
|
|
879
827
|
top_logprobs=top_logprobs,
|
|
880
828
|
force_local_mcp=force_local_mcp,
|
|
881
829
|
progress=progress,
|
|
830
|
+
postprocess=postprocess,
|
|
882
831
|
)
|