lm-deluge 0.0.89__py3-none-any.whl → 0.0.91__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. lm_deluge/__init__.py +3 -27
  2. lm_deluge/api_requests/anthropic.py +29 -7
  3. lm_deluge/api_requests/base.py +38 -1
  4. lm_deluge/api_requests/bedrock.py +29 -3
  5. lm_deluge/{request_context.py → api_requests/context.py} +4 -4
  6. lm_deluge/api_requests/gemini.py +30 -14
  7. lm_deluge/api_requests/mistral.py +1 -1
  8. lm_deluge/api_requests/openai.py +34 -5
  9. lm_deluge/batches.py +19 -49
  10. lm_deluge/cache.py +1 -1
  11. lm_deluge/cli.py +672 -300
  12. lm_deluge/{client.py → client/__init__.py} +42 -13
  13. lm_deluge/config.py +9 -31
  14. lm_deluge/embed.py +2 -6
  15. lm_deluge/models/__init__.py +138 -29
  16. lm_deluge/models/anthropic.py +32 -24
  17. lm_deluge/models/bedrock.py +9 -0
  18. lm_deluge/models/cerebras.py +2 -0
  19. lm_deluge/models/cohere.py +2 -0
  20. lm_deluge/models/google.py +13 -0
  21. lm_deluge/models/grok.py +4 -0
  22. lm_deluge/models/groq.py +2 -0
  23. lm_deluge/models/meta.py +2 -0
  24. lm_deluge/models/minimax.py +9 -1
  25. lm_deluge/models/openai.py +24 -1
  26. lm_deluge/models/openrouter.py +155 -1
  27. lm_deluge/models/together.py +3 -0
  28. lm_deluge/models/zai.py +50 -1
  29. lm_deluge/pipelines/extract.py +4 -5
  30. lm_deluge/pipelines/gepa/__init__.py +1 -1
  31. lm_deluge/pipelines/gepa/docs/samples.py +19 -10
  32. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
  33. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
  34. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
  35. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
  36. lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
  37. lm_deluge/prompt/__init__.py +45 -0
  38. lm_deluge/{prompt.py → prompt/conversation.py} +165 -869
  39. lm_deluge/{image.py → prompt/image.py} +0 -10
  40. lm_deluge/prompt/message.py +571 -0
  41. lm_deluge/prompt/serialization.py +21 -0
  42. lm_deluge/prompt/signatures.py +77 -0
  43. lm_deluge/prompt/text.py +47 -0
  44. lm_deluge/prompt/thinking.py +55 -0
  45. lm_deluge/prompt/tool_calls.py +245 -0
  46. lm_deluge/server/__init__.py +24 -0
  47. lm_deluge/server/__main__.py +144 -0
  48. lm_deluge/server/adapters.py +369 -0
  49. lm_deluge/server/app.py +388 -0
  50. lm_deluge/server/auth.py +71 -0
  51. lm_deluge/server/model_policy.py +215 -0
  52. lm_deluge/server/models_anthropic.py +172 -0
  53. lm_deluge/server/models_openai.py +175 -0
  54. lm_deluge/skills/anthropic.py +0 -0
  55. lm_deluge/skills/compat.py +0 -0
  56. lm_deluge/tool/__init__.py +78 -19
  57. lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
  58. lm_deluge/tool/cua/actions.py +26 -26
  59. lm_deluge/tool/cua/batch.py +1 -2
  60. lm_deluge/tool/cua/kernel.py +1 -1
  61. lm_deluge/tool/prefab/filesystem.py +2 -2
  62. lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
  63. lm_deluge/tool/prefab/memory.py +3 -1
  64. lm_deluge/tool/prefab/otc/executor.py +3 -3
  65. lm_deluge/tool/prefab/random.py +30 -54
  66. lm_deluge/tool/prefab/rlm/__init__.py +2 -2
  67. lm_deluge/tool/prefab/rlm/executor.py +1 -1
  68. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  69. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  70. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  71. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  72. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  73. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +829 -0
  74. lm_deluge/tool/prefab/skills.py +0 -0
  75. lm_deluge/tool/prefab/subagents.py +1 -1
  76. lm_deluge/util/logprobs.py +4 -4
  77. lm_deluge/util/schema.py +6 -6
  78. lm_deluge/util/validation.py +14 -9
  79. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/METADATA +12 -12
  80. lm_deluge-0.0.91.dist-info/RECORD +140 -0
  81. lm_deluge-0.0.91.dist-info/entry_points.txt +3 -0
  82. lm_deluge/mock_openai.py +0 -643
  83. lm_deluge/tool/prefab/sandbox.py +0 -1621
  84. lm_deluge-0.0.89.dist-info/RECORD +0 -117
  85. /lm_deluge/{file.py → prompt/file.py} +0 -0
  86. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/WHEEL +0 -0
  87. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/licenses/LICENSE +0 -0
  88. {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/top_level.txt +0 -0
lm_deluge/cli.py CHANGED
@@ -1,300 +1,672 @@
1
- # import argparse
2
- # import asyncio
3
- # import os
4
- # import sys
5
- # from typing import Optional
6
-
7
- # from .client import LLMClient
8
- # from .models import registry, APIModel
9
- # from .prompt import Conversation, Message
10
-
11
-
12
- # def _ensure_api_key_for_model(model_id: str, passed_api_key: Optional[str] = None):
13
- # model: APIModel = APIModel.from_registry(model_id)
14
- # env_var = model.api_key_env_var or ""
15
- # if not env_var:
16
- # return # Some providers (e.g., Bedrock entries) don't use a single key
17
- # if os.getenv(env_var):
18
- # return
19
- # if passed_api_key:
20
- # os.environ[env_var] = passed_api_key
21
- # return
22
- # # If we get here, interactive prompting should occur at the UI layer.
23
- # # In non-interactive contexts, we will error before calling this without key.
24
-
25
-
26
- # def run_non_interactive(model_id: str, prompt_text: str, api_key: Optional[str]):
27
- # _ensure_api_key_for_model(model_id, api_key)
28
- # client = LLMClient(model_names=[model_id], progress="manual")
29
- # # Single round, print completion only to stdout
30
- # completions = asyncio.run(
31
- # client.process_prompts_async(
32
- # [Conversation.user(prompt_text)],
33
- # return_completions_only=True,
34
- # show_progress=False,
35
- # )
36
- # )
37
- # out = completions[0] if completions and completions[0] is not None else ""
38
- # # Write raw completion to stdout with no extra decoration
39
- # sys.stdout.write(out)
40
- # if out and not out.endswith("\n"):
41
- # sys.stdout.write("\n")
42
-
43
-
44
- # # -------- Textual UI (interactive chat) --------
45
- # try:
46
- # from textual.app import App, ComposeResult
47
- # from textual.containers import Container, Horizontal
48
- # from textual.widgets import Footer, Header, Input, Static, Button, ListView, ListItem, Label
49
- # from textual.widgets._rich_log import RichLog
50
- # from textual.reactive import reactive
51
- # TEXTUAL_AVAILABLE = True
52
- # except Exception: # pragma: no cover - textual may not be installed in some dev envs
53
- # TEXTUAL_AVAILABLE = False
54
-
55
-
56
- # if TEXTUAL_AVAILABLE:
57
- # class ModelPicker(Static):
58
- # """Minimal model picker: arrows to move, Enter to select."""
59
-
60
- # def __init__(self, preselected: Optional[str] = None):
61
- # super().__init__()
62
- # self.preselected = preselected
63
-
64
- # def compose(self) -> ComposeResult: # type: ignore[override]
65
- # # Keep it terminal-y: one-line hint + list. No buttons.
66
- # yield Static("Pick a model (Enter)", classes="hint")
67
- # list_items: list[ListItem] = []
68
- # # Curated small set to avoid scrollbars
69
- # preferred = [
70
- # "gpt-5",
71
- # "gpt-5-chat",
72
- # "gpt-5-mini",
73
- # "claude-4-sonnet",
74
- # "gemini-2.5-pro",
75
- # "gemini-2.5-flash",
76
- # "gemini-2.0-flash",
77
- # ]
78
- # for mid in preferred:
79
- # if mid in registry:
80
- # list_items.append(ListItem(Label(mid)))
81
- # yield ListView(*list_items, classes="model-list")
82
-
83
- # def on_mount(self) -> None: # type: ignore[override]
84
- # # Focus the list so Enter works immediately
85
- # self.query_one(ListView).focus()
86
-
87
- # def get_selected(self) -> Optional[str]:
88
- # listview = self.query_one(ListView)
89
- # if not listview.index is None and 0 <= listview.index < len(listview.children):
90
- # label = listview.children[listview.index].query_one(Label)
91
- # return label.renderable if isinstance(label.renderable, str) else str(label.renderable)
92
- # return None
93
-
94
- # def on_key(self, event): # type: ignore[override]
95
- # # Select current item on Enter
96
- # try:
97
- # key = getattr(event, "key", None)
98
- # except Exception:
99
- # key = None
100
- # if key == "enter":
101
- # sel = self.get_selected()
102
- # if sel:
103
- # # Ask app to proceed with the chosen model
104
- # getattr(self.app, "model_chosen", lambda *_: None)(sel) # type: ignore[attr-defined]
105
-
106
-
107
- # class ApiKeyPrompt(Static):
108
- # def __init__(self, env_var: str):
109
- # super().__init__()
110
- # self.env_var = env_var
111
- # self.input = Input(password=True, placeholder=f"Enter {env_var}")
112
-
113
- # def compose(self) -> ComposeResult: # type: ignore[override]
114
- # yield Static(f"API key required: set {self.env_var}", classes="title")
115
- # yield self.input
116
- # yield Button("Save", id="save-key", variant="primary")
117
-
118
- # def value(self) -> str:
119
- # return self.input.value
120
-
121
-
122
- # class MessagesView(RichLog):
123
- # def __init__(self, **kwargs):
124
- # # Terminal-like log with markup and auto-scroll
125
- # super().__init__(wrap=True, markup=True, auto_scroll=True, **kwargs)
126
-
127
- # def append_user(self, text: str):
128
- # self.write(f"[bold cyan]You:[/bold cyan] {text}")
129
-
130
- # def append_assistant(self, text: str):
131
- # self.write(f"[bold magenta]Model:[/bold magenta] {text}")
132
-
133
-
134
- # class ChatInput(Horizontal):
135
- # def compose(self) -> ComposeResult: # type: ignore[override]
136
- # self.input = Input(placeholder="Type message, Enter to send")
137
- # yield self.input
138
-
139
-
140
- # class DelugeApp(App):
141
- # CSS = """
142
- # #screen { height: 100%; }
143
- # .chat { height: 1fr; padding: 0 1; }
144
- # .composer { dock: bottom; height: 3; }
145
- # """
146
-
147
- # BINDINGS = [
148
- # ("ctrl+c", "quit", "Quit"),
149
- # ]
150
-
151
- # model_id = reactive("")
152
- # api_env_var = reactive("")
153
-
154
- # def __init__(self, model_arg: Optional[str], api_key_arg: Optional[str]):
155
- # super().__init__()
156
- # self._model_arg = model_arg
157
- # self._api_key_arg = api_key_arg
158
- # self._conversation = Conversation.system("You are a helpful assistant.")
159
- # self._client = None
160
-
161
- # def compose(self) -> ComposeResult: # type: ignore[override]
162
- # yield Header(show_clock=True)
163
- # self.body = Container(id="screen")
164
- # yield self.body
165
- # yield Footer()
166
-
167
- # def on_mount(self): # type: ignore[override]
168
- # # Step 1: pick model if not provided
169
- # if not self._model_arg:
170
- # self.model_picker = ModelPicker()
171
- # self.body.mount(self.model_picker)
172
- # else:
173
- # self.model_id = self._model_arg
174
- # self._after_model_selected()
175
-
176
- # def action_quit(self) -> None: # type: ignore[override]
177
- # self.exit()
178
-
179
- # def _after_model_selected(self):
180
- # # Resolve API requirement
181
- # model = APIModel.from_registry(self.model_id)
182
- # self.api_env_var = model.api_key_env_var or ""
183
- # if self.api_env_var and not os.getenv(self.api_env_var):
184
- # if self._api_key_arg:
185
- # os.environ[self.api_env_var] = self._api_key_arg
186
- # self._show_chat()
187
- # else:
188
- # # Prompt for key
189
- # self.body.remove_children()
190
- # self.key_prompt = ApiKeyPrompt(self.api_env_var)
191
- # self.body.mount(self.key_prompt)
192
- # else:
193
- # self._show_chat()
194
-
195
- # def model_chosen(self, sel: str) -> None:
196
- # """Called by ModelPicker when Enter is pressed on a selection."""
197
- # self.model_id = sel
198
- # self._after_model_selected()
199
-
200
- # def _show_chat(self):
201
- # self.body.remove_children()
202
- # # Build UI
203
- # self.messages = MessagesView(classes="chat")
204
- # self.composer = ChatInput(classes="composer")
205
- # self.body.mount(self.messages)
206
- # self.body.mount(self.composer)
207
- # # Focus input after mounting
208
- # self.set_focus(self.composer.input)
209
- # # Init client
210
- # self._client = LLMClient(model_names=[self.model_id], progress="manual")
211
- # # Update header subtitle
212
- # self.query_one(Header).sub_title = f"Model: {self.model_id}"
213
-
214
- # async def _send_and_receive(self, text: str):
215
- # # Append user message
216
- # self._conversation.add(Message.user(text))
217
- # self.messages.append_user(text)
218
- # # Call model (non-streaming for simplicity across providers)
219
- # responses = await self._client.process_prompts_async(
220
- # [self._conversation], return_completions_only=False, show_progress=False
221
- # )
222
- # resp = responses[0]
223
- # if resp and resp.completion:
224
- # self._conversation.add(Message.ai(resp.completion))
225
- # self.messages.append_assistant(resp.completion)
226
- # else:
227
- # self.messages.append_assistant("<no response>")
228
-
229
- # async def on_button_pressed(self, event): # type: ignore[override]
230
- # if hasattr(event.button, "id"):
231
- # if event.button.id == "save-key":
232
- # key = self.key_prompt.value().strip()
233
- # if self.api_env_var and key:
234
- # os.environ[self.api_env_var] = key
235
- # self._show_chat()
236
- # elif event.button.id == "send":
237
- # text = self.composer.input.value.strip()
238
- # if text:
239
- # self.composer.input.value = ""
240
- # await self._send_and_receive(text)
241
-
242
- # async def on_input_submitted(self, event: Input.Submitted): # type: ignore[override]
243
- # if isinstance(event.input.parent, ChatInput):
244
- # text = event.value.strip()
245
- # if text:
246
- # self.composer.input.value = ""
247
- # await self._send_and_receive(text)
248
-
249
-
250
- # def run_interactive(model: Optional[str], api_key: Optional[str]):
251
- # if not TEXTUAL_AVAILABLE:
252
- # sys.stderr.write(
253
- # "Textual is not installed. Please install with `pip install textual` or reinstall lm_deluge.\n"
254
- # )
255
- # sys.exit(2)
256
- # app = DelugeApp(model, api_key) # type: ignore[name-defined]
257
- # app.run()
258
-
259
-
260
- # def main():
261
- # parser = argparse.ArgumentParser(prog="deluge", description="Deluge CLI")
262
- # parser.add_argument("prompt", nargs="*", help="Prompt text (non-interactive -p only)")
263
- # parser.add_argument("--model", dest="model", help="Model ID to use")
264
- # parser.add_argument("--api-key", dest="api_key", help="API key for chosen model provider")
265
- # parser.add_argument(
266
- # "-p",
267
- # dest="print_mode",
268
- # action="store_true",
269
- # help="Print single completion to stdout (non-interactive)",
270
- # )
271
-
272
- # args = parser.parse_args()
273
-
274
- # if args.print_mode:
275
- # # Determine prompt text
276
- # prompt_text = " ".join(args.prompt).strip()
277
- # if not prompt_text and not sys.stdin.isatty():
278
- # prompt_text = sys.stdin.read()
279
- # if not prompt_text:
280
- # sys.stderr.write("No prompt provided. Pass text or pipe input.\n")
281
- # sys.exit(2)
282
-
283
- # # Determine model
284
- # model_id = args.model or os.getenv("DELUGE_DEFAULT_MODEL") or "gpt-4o-mini"
285
- # # Require API key non-interactively if provider needs it and not set
286
- # env_var = APIModel.from_registry(model_id).api_key_env_var or ""
287
- # if env_var and not (os.getenv(env_var) or args.api_key):
288
- # sys.stderr.write(
289
- # f"Missing API key. Set {env_var} or pass --api-key.\n"
290
- # )
291
- # sys.exit(2)
292
- # run_non_interactive(model_id, prompt_text, args.api_key)
293
- # return
294
-
295
- # # Interactive Textual chat
296
- # run_interactive(args.model, args.api_key)
297
-
298
-
299
- # if __name__ == "__main__":
300
- # main()
1
+ """
2
+ LM-Deluge CLI
3
+
4
+ Usage:
5
+ deluge list [--provider PROVIDER] [--name NAME] [--json] ...
6
+ deluge run MODEL [--input INPUT | --file FILE] [--max-tokens N] [--temperature T] ...
7
+ deluge agent MODEL [--mcp-config FILE] [--prefab TOOLS] [--input INPUT] ...
8
+
9
+ Examples:
10
+ deluge list
11
+ deluge list --provider anthropic --reasoning
12
+ deluge list --name claude --json
13
+ deluge run claude-3.5-haiku -i "What is 2+2?"
14
+ echo "Hello" | deluge run gpt-4.1-mini
15
+ deluge run claude-4-sonnet --file prompt.txt --max-tokens 4096
16
+ deluge agent claude-3.5-haiku --mcp-config mcp.json -i "Search for AI news"
17
+ deluge agent claude-4-sonnet --prefab todo,memory -i "Create a task list"
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import asyncio
24
+ import json
25
+ import sys
26
+ from typing import Any
27
+
28
+ from .models import find_models, APIModel
29
+ from .client import LLMClient
30
+ from .prompt import Conversation
31
+
32
+
33
+ def _model_to_dict(model: APIModel) -> dict[str, Any]:
34
+ """Convert APIModel to a JSON-serializable dict."""
35
+ return {
36
+ "id": model.id,
37
+ "name": model.name,
38
+ "provider": model.provider,
39
+ "api_spec": model.api_spec,
40
+ "input_cost": model.input_cost,
41
+ "output_cost": model.output_cost,
42
+ "supports_json": model.supports_json,
43
+ "supports_images": model.supports_images,
44
+ "supports_logprobs": model.supports_logprobs,
45
+ "reasoning_model": model.reasoning_model,
46
+ }
47
+
48
+
49
+ def cmd_list(args: argparse.Namespace) -> int:
50
+ """List models matching the given criteria."""
51
+ # Convert boolean flags: only pass True if set, None otherwise
52
+ models = find_models(
53
+ provider=args.provider,
54
+ supports_json=True if args.json_mode else None,
55
+ supports_images=True if args.images else None,
56
+ supports_logprobs=True if args.logprobs else None,
57
+ reasoning_model=True if args.reasoning else None,
58
+ min_input_cost=args.min_input_cost,
59
+ max_input_cost=args.max_input_cost,
60
+ min_output_cost=args.min_output_cost,
61
+ max_output_cost=args.max_output_cost,
62
+ name_contains=args.name,
63
+ sort_by=args.sort,
64
+ limit=args.limit,
65
+ )
66
+
67
+ if args.json:
68
+ output = [_model_to_dict(m) for m in models]
69
+ print(json.dumps(output, indent=2))
70
+ else:
71
+ if not models:
72
+ print("No models found matching criteria.", file=sys.stderr)
73
+ return 0
74
+
75
+ # Calculate column widths
76
+ id_width = max(len(m.id) for m in models)
77
+ provider_width = max(len(m.provider) for m in models)
78
+
79
+ # Header
80
+ print(
81
+ f"{'MODEL':<{id_width}} {'PROVIDER':<{provider_width}} {'INPUT $/M':>10} {'OUTPUT $/M':>10} FLAGS"
82
+ )
83
+ print("-" * (id_width + provider_width + 40))
84
+
85
+ for m in models:
86
+ flags = []
87
+ if m.supports_json:
88
+ flags.append("json")
89
+ if m.supports_images:
90
+ flags.append("img")
91
+ if m.supports_logprobs:
92
+ flags.append("logp")
93
+ if m.reasoning_model:
94
+ flags.append("reason")
95
+
96
+ input_cost = f"${m.input_cost:.2f}" if m.input_cost is not None else "N/A"
97
+ output_cost = (
98
+ f"${m.output_cost:.2f}" if m.output_cost is not None else "N/A"
99
+ )
100
+
101
+ print(
102
+ f"{m.id:<{id_width}} {m.provider:<{provider_width}} {input_cost:>10} {output_cost:>10} {','.join(flags)}"
103
+ )
104
+
105
+ print(f"\nTotal: {len(models)} models")
106
+
107
+ return 0
108
+
109
+
110
+ def cmd_run(args: argparse.Namespace) -> int:
111
+ """Run a model on input and output JSON to stdout."""
112
+ # Determine input text
113
+ if args.input:
114
+ prompt_text = args.input
115
+ elif args.file:
116
+ try:
117
+ with open(args.file, "r") as f:
118
+ prompt_text = f.read()
119
+ except FileNotFoundError:
120
+ print(
121
+ json.dumps({"error": f"File not found: {args.file}"}), file=sys.stdout
122
+ )
123
+ return 1
124
+ except Exception as e:
125
+ print(json.dumps({"error": f"Failed to read file: {e}"}), file=sys.stdout)
126
+ return 1
127
+ elif not sys.stdin.isatty():
128
+ prompt_text = sys.stdin.read()
129
+ else:
130
+ print(
131
+ json.dumps(
132
+ {"error": "No input provided. Use --input, --file, or pipe to stdin."}
133
+ ),
134
+ file=sys.stdout,
135
+ )
136
+ return 1
137
+
138
+ if not prompt_text.strip():
139
+ print(json.dumps({"error": "Empty input provided."}), file=sys.stdout)
140
+ return 1
141
+
142
+ # Build conversation
143
+ image = args.image if hasattr(args, "image") else None
144
+ if args.system:
145
+ conv = Conversation().system(args.system).user(prompt_text, image=image)
146
+ else:
147
+ conv = Conversation().user(prompt_text, image=image)
148
+
149
+ # Build client params
150
+ client_kwargs: dict[str, Any] = {
151
+ "model_names": args.model,
152
+ "max_new_tokens": args.max_tokens,
153
+ }
154
+ if args.temperature is not None:
155
+ client_kwargs["temperature"] = args.temperature
156
+
157
+ try:
158
+ client = LLMClient(**client_kwargs)
159
+ client.open(show_progress=False)
160
+ response = asyncio.run(client.start(conv))
161
+ except ValueError as e:
162
+ print(json.dumps({"error": str(e)}), file=sys.stdout)
163
+ return 1
164
+ except Exception as e:
165
+ print(json.dumps({"error": f"Request failed: {e}"}), file=sys.stdout)
166
+ return 1
167
+
168
+ # Build output
169
+ output: dict[str, Any] = {
170
+ "model": args.model,
171
+ "completion": response.completion if response.completion else None,
172
+ "is_error": response.is_error,
173
+ }
174
+
175
+ if response.is_error:
176
+ output["error_message"] = response.error_message
177
+
178
+ if response.usage:
179
+ output["usage"] = {
180
+ "input_tokens": response.usage.input_tokens,
181
+ "output_tokens": response.usage.output_tokens,
182
+ }
183
+
184
+ if response.cost is not None:
185
+ output["cost"] = response.cost
186
+
187
+ if args.verbose and response.finish_reason:
188
+ output["finish_reason"] = response.finish_reason
189
+
190
+ print(json.dumps(output, indent=2 if args.pretty else None))
191
+ return 0 if not response.is_error else 1
192
+
193
+
194
+ def _print_json(obj: dict[str, Any]) -> None:
195
+ """Print JSON and flush immediately for streaming."""
196
+ print(json.dumps(obj), flush=True)
197
+
198
+
199
+ def cmd_agent(args: argparse.Namespace) -> int:
200
+ """Run an agent loop with tools and output JSON blocks for each content piece."""
201
+ from .tool import Tool, MCPServer
202
+ from .prompt.text import Text
203
+ from .prompt.tool_calls import ToolCall
204
+ from .prompt.thinking import Thinking
205
+
206
+ # Determine input text
207
+ if args.input:
208
+ prompt_text = args.input
209
+ elif args.file:
210
+ try:
211
+ with open(args.file, "r") as f:
212
+ prompt_text = f.read()
213
+ except FileNotFoundError:
214
+ _print_json({"type": "error", "error": f"File not found: {args.file}"})
215
+ return 1
216
+ except Exception as e:
217
+ _print_json({"type": "error", "error": f"Failed to read file: {e}"})
218
+ return 1
219
+ elif not sys.stdin.isatty():
220
+ prompt_text = sys.stdin.read()
221
+ else:
222
+ _print_json(
223
+ {
224
+ "type": "error",
225
+ "error": "No input provided. Use --input, --file, or pipe to stdin.",
226
+ }
227
+ )
228
+ return 1
229
+
230
+ if not prompt_text.strip():
231
+ _print_json({"type": "error", "error": "Empty input provided."})
232
+ return 1
233
+
234
+ def print_message_parts(msg_role: str, parts: list) -> None:
235
+ """Print JSON for each part of a message."""
236
+ for part in parts:
237
+ if isinstance(part, Text):
238
+ _print_json({"type": "text", "role": msg_role, "content": part.text})
239
+ elif isinstance(part, ToolCall):
240
+ _print_json(
241
+ {
242
+ "type": "tool_call",
243
+ "id": part.id,
244
+ "name": part.name,
245
+ "arguments": part.arguments,
246
+ }
247
+ )
248
+ elif isinstance(part, Thinking):
249
+ _print_json({"type": "thinking", "content": part.content})
250
+
251
+ async def run_agent() -> int:
252
+ tools: list[Any] = []
253
+ tool_map: dict[str, Tool] = {}
254
+
255
+ # Load MCP tools from config
256
+ if args.mcp_config:
257
+ try:
258
+ import json5
259
+
260
+ with open(args.mcp_config, "r") as f:
261
+ mcp_config = json5.load(f)
262
+ # URL-based servers -> MCPServer objects (provider-native)
263
+ mcp_servers = MCPServer.from_mcp_config(mcp_config)
264
+ tools.extend(mcp_servers)
265
+ # Expand MCP servers to tools for local execution
266
+ for server in mcp_servers:
267
+ server_tools = await server.to_tools()
268
+ for t in server_tools:
269
+ tool_map[t.name] = t
270
+ # Command-based servers -> Tool objects (local execution)
271
+ cmd_tools = await Tool.from_mcp_config(mcp_config)
272
+ tools.extend(cmd_tools)
273
+ for t in cmd_tools:
274
+ tool_map[t.name] = t
275
+ except FileNotFoundError:
276
+ _print_json(
277
+ {
278
+ "type": "error",
279
+ "error": f"MCP config not found: {args.mcp_config}",
280
+ }
281
+ )
282
+ return 1
283
+ except Exception as e:
284
+ _print_json(
285
+ {"type": "error", "error": f"Failed to load MCP config: {e}"}
286
+ )
287
+ return 1
288
+
289
+ # Load prefab tools
290
+ if args.prefab:
291
+ prefab_names = [p.strip() for p in args.prefab.split(",")]
292
+ for name in prefab_names:
293
+ try:
294
+ prefab_tools: list[Tool] = []
295
+ if name == "todo":
296
+ from .tool.prefab import TodoManager
297
+
298
+ prefab_tools = TodoManager().get_tools()
299
+ elif name == "memory":
300
+ from .tool.prefab.memory import MemoryManager
301
+
302
+ prefab_tools = MemoryManager().get_tools()
303
+ elif name == "filesystem":
304
+ from .tool.prefab import FilesystemManager
305
+
306
+ prefab_tools = FilesystemManager().get_tools()
307
+ elif name == "sandbox":
308
+ import platform
309
+
310
+ if platform.system() == "Darwin":
311
+ from .tool.prefab.sandbox import SeatbeltSandbox
312
+
313
+ sandbox = SeatbeltSandbox()
314
+ await sandbox.__aenter__()
315
+ prefab_tools = sandbox.get_tools()
316
+ else:
317
+ from .tool.prefab.sandbox import DockerSandbox
318
+
319
+ sandbox = DockerSandbox()
320
+ await sandbox.__aenter__()
321
+ prefab_tools = sandbox.get_tools()
322
+ else:
323
+ _print_json(
324
+ {
325
+ "type": "error",
326
+ "error": f"Unknown prefab tool: {name}. Available: todo, memory, filesystem, sandbox",
327
+ }
328
+ )
329
+ return 1
330
+ tools.extend(prefab_tools)
331
+ for t in prefab_tools:
332
+ tool_map[t.name] = t
333
+ except ImportError as e:
334
+ _print_json(
335
+ {
336
+ "type": "error",
337
+ "error": f"Failed to load prefab '{name}': {e}",
338
+ }
339
+ )
340
+ return 1
341
+
342
+ # Build conversation
343
+ image = args.image if hasattr(args, "image") else None
344
+ if args.system:
345
+ conv = Conversation().system(args.system).user(prompt_text, image=image)
346
+ else:
347
+ conv = Conversation().user(prompt_text, image=image)
348
+
349
+ # Print initial user message
350
+ _print_json({"type": "text", "role": "user", "content": prompt_text})
351
+
352
+ # Build client
353
+ client_kwargs: dict[str, Any] = {
354
+ "model_names": args.model,
355
+ "max_new_tokens": args.max_tokens,
356
+ }
357
+ if args.temperature is not None:
358
+ client_kwargs["temperature"] = args.temperature
359
+
360
+ try:
361
+ client = LLMClient(**client_kwargs)
362
+ client.open(show_progress=False)
363
+
364
+ # Manual agent loop with streaming output
365
+ total_usage = {"input_tokens": 0, "output_tokens": 0}
366
+ total_cost = 0.0
367
+ last_response = None
368
+ round_num = 0
369
+
370
+ for round_num in range(args.max_rounds):
371
+ # Get model response
372
+ response = await client.start(conv, tools=tools)
373
+ last_response = response
374
+
375
+ if response.is_error:
376
+ _print_json({"type": "error", "error": response.error_message})
377
+ break
378
+
379
+ # Track usage
380
+ if response.usage:
381
+ total_usage["input_tokens"] += response.usage.input_tokens or 0
382
+ total_usage["output_tokens"] += response.usage.output_tokens or 0
383
+ if response.cost:
384
+ total_cost += response.cost
385
+
386
+ # Print assistant response parts
387
+ if response.content:
388
+ print_message_parts("assistant", response.content.parts)
389
+
390
+ # Check for tool calls
391
+ tool_calls = response.content.tool_calls
392
+ if not tool_calls:
393
+ # No tool calls, we're done
394
+ break
395
+
396
+ # Add assistant message to conversation
397
+ conv = conv.add(response.content)
398
+
399
+ # Execute tool calls and print results
400
+ for call in tool_calls:
401
+ tool_obj = tool_map.get(call.name)
402
+ if tool_obj:
403
+ try:
404
+ result = await tool_obj.acall(**call.arguments)
405
+ result_str = (
406
+ result
407
+ if isinstance(result, str)
408
+ else json.dumps(result)
409
+ )
410
+ except Exception as e:
411
+ result_str = f"Error: {e}"
412
+ else:
413
+ result_str = f"Error: Unknown tool '{call.name}'"
414
+
415
+ _print_json(
416
+ {
417
+ "type": "tool_result",
418
+ "tool_call_id": call.id,
419
+ "name": call.name,
420
+ "result": result_str,
421
+ }
422
+ )
423
+
424
+ # Add tool result to conversation
425
+ conv = conv.with_tool_result(call.id, result_str)
426
+ else:
427
+ # No content, we're done
428
+ break
429
+
430
+ # Final summary
431
+ done_output: dict[str, Any] = {"type": "done", "rounds": round_num + 1}
432
+ if total_usage["input_tokens"] or total_usage["output_tokens"]:
433
+ done_output["usage"] = total_usage
434
+ if total_cost > 0:
435
+ done_output["cost"] = total_cost
436
+ if last_response and last_response.is_error:
437
+ done_output["error"] = last_response.error_message
438
+ _print_json(done_output)
439
+
440
+ return 0 if (last_response and not last_response.is_error) else 1
441
+
442
+ except ValueError as e:
443
+ _print_json({"type": "error", "error": str(e)})
444
+ return 1
445
+ except Exception as e:
446
+ _print_json({"type": "error", "error": f"Agent loop failed: {e}"})
447
+ return 1
448
+
449
+ return asyncio.run(run_agent())
450
+
451
+
452
+ def main():
453
+ parser = argparse.ArgumentParser(
454
+ prog="deluge",
455
+ description="LM-Deluge CLI - Run and manage LLM models",
456
+ )
457
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
458
+
459
+ # ---- list command ----
460
+ list_parser = subparsers.add_parser(
461
+ "list",
462
+ help="List available models",
463
+ description="List and filter available models in the registry",
464
+ )
465
+ list_parser.add_argument(
466
+ "--provider",
467
+ type=str,
468
+ help="Filter by provider/api_spec (e.g., openai, anthropic, google)",
469
+ )
470
+ list_parser.add_argument(
471
+ "--name",
472
+ type=str,
473
+ help="Filter by substring in model ID (case-insensitive)",
474
+ )
475
+ list_parser.add_argument(
476
+ "--json-mode",
477
+ action="store_true",
478
+ dest="json_mode",
479
+ help="Only show models that support JSON mode",
480
+ )
481
+ list_parser.add_argument(
482
+ "--images",
483
+ action="store_true",
484
+ help="Only show models that support image inputs",
485
+ )
486
+ list_parser.add_argument(
487
+ "--logprobs",
488
+ action="store_true",
489
+ help="Only show models that support logprobs",
490
+ )
491
+ list_parser.add_argument(
492
+ "--reasoning",
493
+ action="store_true",
494
+ help="Only show reasoning models",
495
+ )
496
+ list_parser.add_argument(
497
+ "--min-input-cost",
498
+ type=float,
499
+ help="Minimum input cost ($ per million tokens)",
500
+ )
501
+ list_parser.add_argument(
502
+ "--max-input-cost",
503
+ type=float,
504
+ help="Maximum input cost ($ per million tokens)",
505
+ )
506
+ list_parser.add_argument(
507
+ "--min-output-cost",
508
+ type=float,
509
+ help="Minimum output cost ($ per million tokens)",
510
+ )
511
+ list_parser.add_argument(
512
+ "--max-output-cost",
513
+ type=float,
514
+ help="Maximum output cost ($ per million tokens)",
515
+ )
516
+ list_parser.add_argument(
517
+ "--sort",
518
+ type=str,
519
+ choices=["input_cost", "output_cost", "-input_cost", "-output_cost"],
520
+ help="Sort by cost (prefix with - for descending)",
521
+ )
522
+ list_parser.add_argument(
523
+ "--limit",
524
+ type=int,
525
+ help="Maximum number of results",
526
+ )
527
+ list_parser.add_argument(
528
+ "--json",
529
+ action="store_true",
530
+ help="Output as JSON",
531
+ )
532
+ list_parser.set_defaults(func=cmd_list)
533
+
534
+ # ---- run command ----
535
+ run_parser = subparsers.add_parser(
536
+ "run",
537
+ help="Run a model on input",
538
+ description="Run a model on input and output JSON to stdout",
539
+ )
540
+ run_parser.add_argument(
541
+ "model",
542
+ type=str,
543
+ help="Model ID to use (e.g., claude-3.5-haiku, gpt-4.1-mini)",
544
+ )
545
+ input_group = run_parser.add_mutually_exclusive_group()
546
+ input_group.add_argument(
547
+ "--input",
548
+ "-i",
549
+ type=str,
550
+ help="Input text (inline)",
551
+ )
552
+ input_group.add_argument(
553
+ "--file",
554
+ "-f",
555
+ type=str,
556
+ help="Read input from file",
557
+ )
558
+ run_parser.add_argument(
559
+ "--system",
560
+ "-s",
561
+ type=str,
562
+ help="System prompt",
563
+ )
564
+ run_parser.add_argument(
565
+ "--image",
566
+ type=str,
567
+ help="Path to image file to include with the prompt",
568
+ )
569
+ run_parser.add_argument(
570
+ "--max-tokens",
571
+ "-m",
572
+ type=int,
573
+ default=1024,
574
+ help="Maximum tokens to generate (default: 1024)",
575
+ )
576
+ run_parser.add_argument(
577
+ "--temperature",
578
+ "-t",
579
+ type=float,
580
+ help="Sampling temperature",
581
+ )
582
+ run_parser.add_argument(
583
+ "--pretty",
584
+ "-p",
585
+ action="store_true",
586
+ help="Pretty-print JSON output",
587
+ )
588
+ run_parser.add_argument(
589
+ "--verbose",
590
+ "-v",
591
+ action="store_true",
592
+ help="Include additional response metadata",
593
+ )
594
+ run_parser.set_defaults(func=cmd_run)
595
+
596
+ # ---- agent command ----
597
+ agent_parser = subparsers.add_parser(
598
+ "agent",
599
+ help="Run an agent loop with tools",
600
+ description="Run an agent loop with MCP servers and/or prefab tools",
601
+ )
602
+ agent_parser.add_argument(
603
+ "model",
604
+ type=str,
605
+ help="Model ID to use (e.g., claude-3.5-haiku, gpt-4.1-mini)",
606
+ )
607
+ agent_input_group = agent_parser.add_mutually_exclusive_group()
608
+ agent_input_group.add_argument(
609
+ "--input",
610
+ "-i",
611
+ type=str,
612
+ help="Input text (inline)",
613
+ )
614
+ agent_input_group.add_argument(
615
+ "--file",
616
+ "-f",
617
+ type=str,
618
+ help="Read input from file",
619
+ )
620
+ agent_parser.add_argument(
621
+ "--system",
622
+ "-s",
623
+ type=str,
624
+ help="System prompt",
625
+ )
626
+ agent_parser.add_argument(
627
+ "--image",
628
+ type=str,
629
+ help="Path to image file to include with the prompt",
630
+ )
631
+ agent_parser.add_argument(
632
+ "--mcp-config",
633
+ type=str,
634
+ help="Path to MCP config file (Claude Desktop format JSON)",
635
+ )
636
+ agent_parser.add_argument(
637
+ "--prefab",
638
+ type=str,
639
+ help="Comma-separated prefab tools: todo,memory,filesystem,sandbox",
640
+ )
641
+ agent_parser.add_argument(
642
+ "--max-rounds",
643
+ type=int,
644
+ default=10,
645
+ help="Maximum agent loop iterations (default: 10)",
646
+ )
647
+ agent_parser.add_argument(
648
+ "--max-tokens",
649
+ "-m",
650
+ type=int,
651
+ default=4096,
652
+ help="Maximum tokens to generate per response (default: 4096)",
653
+ )
654
+ agent_parser.add_argument(
655
+ "--temperature",
656
+ "-t",
657
+ type=float,
658
+ help="Sampling temperature",
659
+ )
660
+ agent_parser.set_defaults(func=cmd_agent)
661
+
662
+ args = parser.parse_args()
663
+
664
+ if not args.command:
665
+ parser.print_help()
666
+ return 0
667
+
668
+ return args.func(args)
669
+
670
+
671
+ if __name__ == "__main__":
672
+ sys.exit(main())