lm-deluge 0.0.89__py3-none-any.whl → 0.0.91__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lm_deluge/__init__.py +3 -27
- lm_deluge/api_requests/anthropic.py +29 -7
- lm_deluge/api_requests/base.py +38 -1
- lm_deluge/api_requests/bedrock.py +29 -3
- lm_deluge/{request_context.py → api_requests/context.py} +4 -4
- lm_deluge/api_requests/gemini.py +30 -14
- lm_deluge/api_requests/mistral.py +1 -1
- lm_deluge/api_requests/openai.py +34 -5
- lm_deluge/batches.py +19 -49
- lm_deluge/cache.py +1 -1
- lm_deluge/cli.py +672 -300
- lm_deluge/{client.py → client/__init__.py} +42 -13
- lm_deluge/config.py +9 -31
- lm_deluge/embed.py +2 -6
- lm_deluge/models/__init__.py +138 -29
- lm_deluge/models/anthropic.py +32 -24
- lm_deluge/models/bedrock.py +9 -0
- lm_deluge/models/cerebras.py +2 -0
- lm_deluge/models/cohere.py +2 -0
- lm_deluge/models/google.py +13 -0
- lm_deluge/models/grok.py +4 -0
- lm_deluge/models/groq.py +2 -0
- lm_deluge/models/meta.py +2 -0
- lm_deluge/models/minimax.py +9 -1
- lm_deluge/models/openai.py +24 -1
- lm_deluge/models/openrouter.py +155 -1
- lm_deluge/models/together.py +3 -0
- lm_deluge/models/zai.py +50 -1
- lm_deluge/pipelines/extract.py +4 -5
- lm_deluge/pipelines/gepa/__init__.py +1 -1
- lm_deluge/pipelines/gepa/docs/samples.py +19 -10
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +1 -1
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +1 -1
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +1 -1
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +1 -1
- lm_deluge/pipelines/gepa/examples/simple_qa.py +1 -1
- lm_deluge/prompt/__init__.py +45 -0
- lm_deluge/{prompt.py → prompt/conversation.py} +165 -869
- lm_deluge/{image.py → prompt/image.py} +0 -10
- lm_deluge/prompt/message.py +571 -0
- lm_deluge/prompt/serialization.py +21 -0
- lm_deluge/prompt/signatures.py +77 -0
- lm_deluge/prompt/text.py +47 -0
- lm_deluge/prompt/thinking.py +55 -0
- lm_deluge/prompt/tool_calls.py +245 -0
- lm_deluge/server/__init__.py +24 -0
- lm_deluge/server/__main__.py +144 -0
- lm_deluge/server/adapters.py +369 -0
- lm_deluge/server/app.py +388 -0
- lm_deluge/server/auth.py +71 -0
- lm_deluge/server/model_policy.py +215 -0
- lm_deluge/server/models_anthropic.py +172 -0
- lm_deluge/server/models_openai.py +175 -0
- lm_deluge/skills/anthropic.py +0 -0
- lm_deluge/skills/compat.py +0 -0
- lm_deluge/tool/__init__.py +78 -19
- lm_deluge/tool/builtin/anthropic/__init__.py +1 -1
- lm_deluge/tool/cua/actions.py +26 -26
- lm_deluge/tool/cua/batch.py +1 -2
- lm_deluge/tool/cua/kernel.py +1 -1
- lm_deluge/tool/prefab/filesystem.py +2 -2
- lm_deluge/tool/prefab/full_text_search/__init__.py +3 -2
- lm_deluge/tool/prefab/memory.py +3 -1
- lm_deluge/tool/prefab/otc/executor.py +3 -3
- lm_deluge/tool/prefab/random.py +30 -54
- lm_deluge/tool/prefab/rlm/__init__.py +2 -2
- lm_deluge/tool/prefab/rlm/executor.py +1 -1
- lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
- lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
- lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
- lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
- lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
- lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +829 -0
- lm_deluge/tool/prefab/skills.py +0 -0
- lm_deluge/tool/prefab/subagents.py +1 -1
- lm_deluge/util/logprobs.py +4 -4
- lm_deluge/util/schema.py +6 -6
- lm_deluge/util/validation.py +14 -9
- {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/METADATA +12 -12
- lm_deluge-0.0.91.dist-info/RECORD +140 -0
- lm_deluge-0.0.91.dist-info/entry_points.txt +3 -0
- lm_deluge/mock_openai.py +0 -643
- lm_deluge/tool/prefab/sandbox.py +0 -1621
- lm_deluge-0.0.89.dist-info/RECORD +0 -117
- /lm_deluge/{file.py → prompt/file.py} +0 -0
- {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.89.dist-info → lm_deluge-0.0.91.dist-info}/top_level.txt +0 -0
lm_deluge/cli.py
CHANGED
|
@@ -1,300 +1,672 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
#
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
#
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
#
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
#
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
#
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
#
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
#
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
#
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
#
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
#
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
#
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
1
|
+
"""
|
|
2
|
+
LM-Deluge CLI
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
deluge list [--provider PROVIDER] [--name NAME] [--json] ...
|
|
6
|
+
deluge run MODEL [--input INPUT | --file FILE] [--max-tokens N] [--temperature T] ...
|
|
7
|
+
deluge agent MODEL [--mcp-config FILE] [--prefab TOOLS] [--input INPUT] ...
|
|
8
|
+
|
|
9
|
+
Examples:
|
|
10
|
+
deluge list
|
|
11
|
+
deluge list --provider anthropic --reasoning
|
|
12
|
+
deluge list --name claude --json
|
|
13
|
+
deluge run claude-3.5-haiku -i "What is 2+2?"
|
|
14
|
+
echo "Hello" | deluge run gpt-4.1-mini
|
|
15
|
+
deluge run claude-4-sonnet --file prompt.txt --max-tokens 4096
|
|
16
|
+
deluge agent claude-3.5-haiku --mcp-config mcp.json -i "Search for AI news"
|
|
17
|
+
deluge agent claude-4-sonnet --prefab todo,memory -i "Create a task list"
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import asyncio
|
|
24
|
+
import json
|
|
25
|
+
import sys
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from .models import find_models, APIModel
|
|
29
|
+
from .client import LLMClient
|
|
30
|
+
from .prompt import Conversation
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _model_to_dict(model: APIModel) -> dict[str, Any]:
|
|
34
|
+
"""Convert APIModel to a JSON-serializable dict."""
|
|
35
|
+
return {
|
|
36
|
+
"id": model.id,
|
|
37
|
+
"name": model.name,
|
|
38
|
+
"provider": model.provider,
|
|
39
|
+
"api_spec": model.api_spec,
|
|
40
|
+
"input_cost": model.input_cost,
|
|
41
|
+
"output_cost": model.output_cost,
|
|
42
|
+
"supports_json": model.supports_json,
|
|
43
|
+
"supports_images": model.supports_images,
|
|
44
|
+
"supports_logprobs": model.supports_logprobs,
|
|
45
|
+
"reasoning_model": model.reasoning_model,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def cmd_list(args: argparse.Namespace) -> int:
|
|
50
|
+
"""List models matching the given criteria."""
|
|
51
|
+
# Convert boolean flags: only pass True if set, None otherwise
|
|
52
|
+
models = find_models(
|
|
53
|
+
provider=args.provider,
|
|
54
|
+
supports_json=True if args.json_mode else None,
|
|
55
|
+
supports_images=True if args.images else None,
|
|
56
|
+
supports_logprobs=True if args.logprobs else None,
|
|
57
|
+
reasoning_model=True if args.reasoning else None,
|
|
58
|
+
min_input_cost=args.min_input_cost,
|
|
59
|
+
max_input_cost=args.max_input_cost,
|
|
60
|
+
min_output_cost=args.min_output_cost,
|
|
61
|
+
max_output_cost=args.max_output_cost,
|
|
62
|
+
name_contains=args.name,
|
|
63
|
+
sort_by=args.sort,
|
|
64
|
+
limit=args.limit,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if args.json:
|
|
68
|
+
output = [_model_to_dict(m) for m in models]
|
|
69
|
+
print(json.dumps(output, indent=2))
|
|
70
|
+
else:
|
|
71
|
+
if not models:
|
|
72
|
+
print("No models found matching criteria.", file=sys.stderr)
|
|
73
|
+
return 0
|
|
74
|
+
|
|
75
|
+
# Calculate column widths
|
|
76
|
+
id_width = max(len(m.id) for m in models)
|
|
77
|
+
provider_width = max(len(m.provider) for m in models)
|
|
78
|
+
|
|
79
|
+
# Header
|
|
80
|
+
print(
|
|
81
|
+
f"{'MODEL':<{id_width}} {'PROVIDER':<{provider_width}} {'INPUT $/M':>10} {'OUTPUT $/M':>10} FLAGS"
|
|
82
|
+
)
|
|
83
|
+
print("-" * (id_width + provider_width + 40))
|
|
84
|
+
|
|
85
|
+
for m in models:
|
|
86
|
+
flags = []
|
|
87
|
+
if m.supports_json:
|
|
88
|
+
flags.append("json")
|
|
89
|
+
if m.supports_images:
|
|
90
|
+
flags.append("img")
|
|
91
|
+
if m.supports_logprobs:
|
|
92
|
+
flags.append("logp")
|
|
93
|
+
if m.reasoning_model:
|
|
94
|
+
flags.append("reason")
|
|
95
|
+
|
|
96
|
+
input_cost = f"${m.input_cost:.2f}" if m.input_cost is not None else "N/A"
|
|
97
|
+
output_cost = (
|
|
98
|
+
f"${m.output_cost:.2f}" if m.output_cost is not None else "N/A"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
print(
|
|
102
|
+
f"{m.id:<{id_width}} {m.provider:<{provider_width}} {input_cost:>10} {output_cost:>10} {','.join(flags)}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
print(f"\nTotal: {len(models)} models")
|
|
106
|
+
|
|
107
|
+
return 0
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def cmd_run(args: argparse.Namespace) -> int:
|
|
111
|
+
"""Run a model on input and output JSON to stdout."""
|
|
112
|
+
# Determine input text
|
|
113
|
+
if args.input:
|
|
114
|
+
prompt_text = args.input
|
|
115
|
+
elif args.file:
|
|
116
|
+
try:
|
|
117
|
+
with open(args.file, "r") as f:
|
|
118
|
+
prompt_text = f.read()
|
|
119
|
+
except FileNotFoundError:
|
|
120
|
+
print(
|
|
121
|
+
json.dumps({"error": f"File not found: {args.file}"}), file=sys.stdout
|
|
122
|
+
)
|
|
123
|
+
return 1
|
|
124
|
+
except Exception as e:
|
|
125
|
+
print(json.dumps({"error": f"Failed to read file: {e}"}), file=sys.stdout)
|
|
126
|
+
return 1
|
|
127
|
+
elif not sys.stdin.isatty():
|
|
128
|
+
prompt_text = sys.stdin.read()
|
|
129
|
+
else:
|
|
130
|
+
print(
|
|
131
|
+
json.dumps(
|
|
132
|
+
{"error": "No input provided. Use --input, --file, or pipe to stdin."}
|
|
133
|
+
),
|
|
134
|
+
file=sys.stdout,
|
|
135
|
+
)
|
|
136
|
+
return 1
|
|
137
|
+
|
|
138
|
+
if not prompt_text.strip():
|
|
139
|
+
print(json.dumps({"error": "Empty input provided."}), file=sys.stdout)
|
|
140
|
+
return 1
|
|
141
|
+
|
|
142
|
+
# Build conversation
|
|
143
|
+
image = args.image if hasattr(args, "image") else None
|
|
144
|
+
if args.system:
|
|
145
|
+
conv = Conversation().system(args.system).user(prompt_text, image=image)
|
|
146
|
+
else:
|
|
147
|
+
conv = Conversation().user(prompt_text, image=image)
|
|
148
|
+
|
|
149
|
+
# Build client params
|
|
150
|
+
client_kwargs: dict[str, Any] = {
|
|
151
|
+
"model_names": args.model,
|
|
152
|
+
"max_new_tokens": args.max_tokens,
|
|
153
|
+
}
|
|
154
|
+
if args.temperature is not None:
|
|
155
|
+
client_kwargs["temperature"] = args.temperature
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
client = LLMClient(**client_kwargs)
|
|
159
|
+
client.open(show_progress=False)
|
|
160
|
+
response = asyncio.run(client.start(conv))
|
|
161
|
+
except ValueError as e:
|
|
162
|
+
print(json.dumps({"error": str(e)}), file=sys.stdout)
|
|
163
|
+
return 1
|
|
164
|
+
except Exception as e:
|
|
165
|
+
print(json.dumps({"error": f"Request failed: {e}"}), file=sys.stdout)
|
|
166
|
+
return 1
|
|
167
|
+
|
|
168
|
+
# Build output
|
|
169
|
+
output: dict[str, Any] = {
|
|
170
|
+
"model": args.model,
|
|
171
|
+
"completion": response.completion if response.completion else None,
|
|
172
|
+
"is_error": response.is_error,
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if response.is_error:
|
|
176
|
+
output["error_message"] = response.error_message
|
|
177
|
+
|
|
178
|
+
if response.usage:
|
|
179
|
+
output["usage"] = {
|
|
180
|
+
"input_tokens": response.usage.input_tokens,
|
|
181
|
+
"output_tokens": response.usage.output_tokens,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if response.cost is not None:
|
|
185
|
+
output["cost"] = response.cost
|
|
186
|
+
|
|
187
|
+
if args.verbose and response.finish_reason:
|
|
188
|
+
output["finish_reason"] = response.finish_reason
|
|
189
|
+
|
|
190
|
+
print(json.dumps(output, indent=2 if args.pretty else None))
|
|
191
|
+
return 0 if not response.is_error else 1
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _print_json(obj: dict[str, Any]) -> None:
|
|
195
|
+
"""Print JSON and flush immediately for streaming."""
|
|
196
|
+
print(json.dumps(obj), flush=True)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def cmd_agent(args: argparse.Namespace) -> int:
|
|
200
|
+
"""Run an agent loop with tools and output JSON blocks for each content piece."""
|
|
201
|
+
from .tool import Tool, MCPServer
|
|
202
|
+
from .prompt.text import Text
|
|
203
|
+
from .prompt.tool_calls import ToolCall
|
|
204
|
+
from .prompt.thinking import Thinking
|
|
205
|
+
|
|
206
|
+
# Determine input text
|
|
207
|
+
if args.input:
|
|
208
|
+
prompt_text = args.input
|
|
209
|
+
elif args.file:
|
|
210
|
+
try:
|
|
211
|
+
with open(args.file, "r") as f:
|
|
212
|
+
prompt_text = f.read()
|
|
213
|
+
except FileNotFoundError:
|
|
214
|
+
_print_json({"type": "error", "error": f"File not found: {args.file}"})
|
|
215
|
+
return 1
|
|
216
|
+
except Exception as e:
|
|
217
|
+
_print_json({"type": "error", "error": f"Failed to read file: {e}"})
|
|
218
|
+
return 1
|
|
219
|
+
elif not sys.stdin.isatty():
|
|
220
|
+
prompt_text = sys.stdin.read()
|
|
221
|
+
else:
|
|
222
|
+
_print_json(
|
|
223
|
+
{
|
|
224
|
+
"type": "error",
|
|
225
|
+
"error": "No input provided. Use --input, --file, or pipe to stdin.",
|
|
226
|
+
}
|
|
227
|
+
)
|
|
228
|
+
return 1
|
|
229
|
+
|
|
230
|
+
if not prompt_text.strip():
|
|
231
|
+
_print_json({"type": "error", "error": "Empty input provided."})
|
|
232
|
+
return 1
|
|
233
|
+
|
|
234
|
+
def print_message_parts(msg_role: str, parts: list) -> None:
|
|
235
|
+
"""Print JSON for each part of a message."""
|
|
236
|
+
for part in parts:
|
|
237
|
+
if isinstance(part, Text):
|
|
238
|
+
_print_json({"type": "text", "role": msg_role, "content": part.text})
|
|
239
|
+
elif isinstance(part, ToolCall):
|
|
240
|
+
_print_json(
|
|
241
|
+
{
|
|
242
|
+
"type": "tool_call",
|
|
243
|
+
"id": part.id,
|
|
244
|
+
"name": part.name,
|
|
245
|
+
"arguments": part.arguments,
|
|
246
|
+
}
|
|
247
|
+
)
|
|
248
|
+
elif isinstance(part, Thinking):
|
|
249
|
+
_print_json({"type": "thinking", "content": part.content})
|
|
250
|
+
|
|
251
|
+
async def run_agent() -> int:
|
|
252
|
+
tools: list[Any] = []
|
|
253
|
+
tool_map: dict[str, Tool] = {}
|
|
254
|
+
|
|
255
|
+
# Load MCP tools from config
|
|
256
|
+
if args.mcp_config:
|
|
257
|
+
try:
|
|
258
|
+
import json5
|
|
259
|
+
|
|
260
|
+
with open(args.mcp_config, "r") as f:
|
|
261
|
+
mcp_config = json5.load(f)
|
|
262
|
+
# URL-based servers -> MCPServer objects (provider-native)
|
|
263
|
+
mcp_servers = MCPServer.from_mcp_config(mcp_config)
|
|
264
|
+
tools.extend(mcp_servers)
|
|
265
|
+
# Expand MCP servers to tools for local execution
|
|
266
|
+
for server in mcp_servers:
|
|
267
|
+
server_tools = await server.to_tools()
|
|
268
|
+
for t in server_tools:
|
|
269
|
+
tool_map[t.name] = t
|
|
270
|
+
# Command-based servers -> Tool objects (local execution)
|
|
271
|
+
cmd_tools = await Tool.from_mcp_config(mcp_config)
|
|
272
|
+
tools.extend(cmd_tools)
|
|
273
|
+
for t in cmd_tools:
|
|
274
|
+
tool_map[t.name] = t
|
|
275
|
+
except FileNotFoundError:
|
|
276
|
+
_print_json(
|
|
277
|
+
{
|
|
278
|
+
"type": "error",
|
|
279
|
+
"error": f"MCP config not found: {args.mcp_config}",
|
|
280
|
+
}
|
|
281
|
+
)
|
|
282
|
+
return 1
|
|
283
|
+
except Exception as e:
|
|
284
|
+
_print_json(
|
|
285
|
+
{"type": "error", "error": f"Failed to load MCP config: {e}"}
|
|
286
|
+
)
|
|
287
|
+
return 1
|
|
288
|
+
|
|
289
|
+
# Load prefab tools
|
|
290
|
+
if args.prefab:
|
|
291
|
+
prefab_names = [p.strip() for p in args.prefab.split(",")]
|
|
292
|
+
for name in prefab_names:
|
|
293
|
+
try:
|
|
294
|
+
prefab_tools: list[Tool] = []
|
|
295
|
+
if name == "todo":
|
|
296
|
+
from .tool.prefab import TodoManager
|
|
297
|
+
|
|
298
|
+
prefab_tools = TodoManager().get_tools()
|
|
299
|
+
elif name == "memory":
|
|
300
|
+
from .tool.prefab.memory import MemoryManager
|
|
301
|
+
|
|
302
|
+
prefab_tools = MemoryManager().get_tools()
|
|
303
|
+
elif name == "filesystem":
|
|
304
|
+
from .tool.prefab import FilesystemManager
|
|
305
|
+
|
|
306
|
+
prefab_tools = FilesystemManager().get_tools()
|
|
307
|
+
elif name == "sandbox":
|
|
308
|
+
import platform
|
|
309
|
+
|
|
310
|
+
if platform.system() == "Darwin":
|
|
311
|
+
from .tool.prefab.sandbox import SeatbeltSandbox
|
|
312
|
+
|
|
313
|
+
sandbox = SeatbeltSandbox()
|
|
314
|
+
await sandbox.__aenter__()
|
|
315
|
+
prefab_tools = sandbox.get_tools()
|
|
316
|
+
else:
|
|
317
|
+
from .tool.prefab.sandbox import DockerSandbox
|
|
318
|
+
|
|
319
|
+
sandbox = DockerSandbox()
|
|
320
|
+
await sandbox.__aenter__()
|
|
321
|
+
prefab_tools = sandbox.get_tools()
|
|
322
|
+
else:
|
|
323
|
+
_print_json(
|
|
324
|
+
{
|
|
325
|
+
"type": "error",
|
|
326
|
+
"error": f"Unknown prefab tool: {name}. Available: todo, memory, filesystem, sandbox",
|
|
327
|
+
}
|
|
328
|
+
)
|
|
329
|
+
return 1
|
|
330
|
+
tools.extend(prefab_tools)
|
|
331
|
+
for t in prefab_tools:
|
|
332
|
+
tool_map[t.name] = t
|
|
333
|
+
except ImportError as e:
|
|
334
|
+
_print_json(
|
|
335
|
+
{
|
|
336
|
+
"type": "error",
|
|
337
|
+
"error": f"Failed to load prefab '{name}': {e}",
|
|
338
|
+
}
|
|
339
|
+
)
|
|
340
|
+
return 1
|
|
341
|
+
|
|
342
|
+
# Build conversation
|
|
343
|
+
image = args.image if hasattr(args, "image") else None
|
|
344
|
+
if args.system:
|
|
345
|
+
conv = Conversation().system(args.system).user(prompt_text, image=image)
|
|
346
|
+
else:
|
|
347
|
+
conv = Conversation().user(prompt_text, image=image)
|
|
348
|
+
|
|
349
|
+
# Print initial user message
|
|
350
|
+
_print_json({"type": "text", "role": "user", "content": prompt_text})
|
|
351
|
+
|
|
352
|
+
# Build client
|
|
353
|
+
client_kwargs: dict[str, Any] = {
|
|
354
|
+
"model_names": args.model,
|
|
355
|
+
"max_new_tokens": args.max_tokens,
|
|
356
|
+
}
|
|
357
|
+
if args.temperature is not None:
|
|
358
|
+
client_kwargs["temperature"] = args.temperature
|
|
359
|
+
|
|
360
|
+
try:
|
|
361
|
+
client = LLMClient(**client_kwargs)
|
|
362
|
+
client.open(show_progress=False)
|
|
363
|
+
|
|
364
|
+
# Manual agent loop with streaming output
|
|
365
|
+
total_usage = {"input_tokens": 0, "output_tokens": 0}
|
|
366
|
+
total_cost = 0.0
|
|
367
|
+
last_response = None
|
|
368
|
+
round_num = 0
|
|
369
|
+
|
|
370
|
+
for round_num in range(args.max_rounds):
|
|
371
|
+
# Get model response
|
|
372
|
+
response = await client.start(conv, tools=tools)
|
|
373
|
+
last_response = response
|
|
374
|
+
|
|
375
|
+
if response.is_error:
|
|
376
|
+
_print_json({"type": "error", "error": response.error_message})
|
|
377
|
+
break
|
|
378
|
+
|
|
379
|
+
# Track usage
|
|
380
|
+
if response.usage:
|
|
381
|
+
total_usage["input_tokens"] += response.usage.input_tokens or 0
|
|
382
|
+
total_usage["output_tokens"] += response.usage.output_tokens or 0
|
|
383
|
+
if response.cost:
|
|
384
|
+
total_cost += response.cost
|
|
385
|
+
|
|
386
|
+
# Print assistant response parts
|
|
387
|
+
if response.content:
|
|
388
|
+
print_message_parts("assistant", response.content.parts)
|
|
389
|
+
|
|
390
|
+
# Check for tool calls
|
|
391
|
+
tool_calls = response.content.tool_calls
|
|
392
|
+
if not tool_calls:
|
|
393
|
+
# No tool calls, we're done
|
|
394
|
+
break
|
|
395
|
+
|
|
396
|
+
# Add assistant message to conversation
|
|
397
|
+
conv = conv.add(response.content)
|
|
398
|
+
|
|
399
|
+
# Execute tool calls and print results
|
|
400
|
+
for call in tool_calls:
|
|
401
|
+
tool_obj = tool_map.get(call.name)
|
|
402
|
+
if tool_obj:
|
|
403
|
+
try:
|
|
404
|
+
result = await tool_obj.acall(**call.arguments)
|
|
405
|
+
result_str = (
|
|
406
|
+
result
|
|
407
|
+
if isinstance(result, str)
|
|
408
|
+
else json.dumps(result)
|
|
409
|
+
)
|
|
410
|
+
except Exception as e:
|
|
411
|
+
result_str = f"Error: {e}"
|
|
412
|
+
else:
|
|
413
|
+
result_str = f"Error: Unknown tool '{call.name}'"
|
|
414
|
+
|
|
415
|
+
_print_json(
|
|
416
|
+
{
|
|
417
|
+
"type": "tool_result",
|
|
418
|
+
"tool_call_id": call.id,
|
|
419
|
+
"name": call.name,
|
|
420
|
+
"result": result_str,
|
|
421
|
+
}
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
# Add tool result to conversation
|
|
425
|
+
conv = conv.with_tool_result(call.id, result_str)
|
|
426
|
+
else:
|
|
427
|
+
# No content, we're done
|
|
428
|
+
break
|
|
429
|
+
|
|
430
|
+
# Final summary
|
|
431
|
+
done_output: dict[str, Any] = {"type": "done", "rounds": round_num + 1}
|
|
432
|
+
if total_usage["input_tokens"] or total_usage["output_tokens"]:
|
|
433
|
+
done_output["usage"] = total_usage
|
|
434
|
+
if total_cost > 0:
|
|
435
|
+
done_output["cost"] = total_cost
|
|
436
|
+
if last_response and last_response.is_error:
|
|
437
|
+
done_output["error"] = last_response.error_message
|
|
438
|
+
_print_json(done_output)
|
|
439
|
+
|
|
440
|
+
return 0 if (last_response and not last_response.is_error) else 1
|
|
441
|
+
|
|
442
|
+
except ValueError as e:
|
|
443
|
+
_print_json({"type": "error", "error": str(e)})
|
|
444
|
+
return 1
|
|
445
|
+
except Exception as e:
|
|
446
|
+
_print_json({"type": "error", "error": f"Agent loop failed: {e}"})
|
|
447
|
+
return 1
|
|
448
|
+
|
|
449
|
+
return asyncio.run(run_agent())
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def main():
|
|
453
|
+
parser = argparse.ArgumentParser(
|
|
454
|
+
prog="deluge",
|
|
455
|
+
description="LM-Deluge CLI - Run and manage LLM models",
|
|
456
|
+
)
|
|
457
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
458
|
+
|
|
459
|
+
# ---- list command ----
|
|
460
|
+
list_parser = subparsers.add_parser(
|
|
461
|
+
"list",
|
|
462
|
+
help="List available models",
|
|
463
|
+
description="List and filter available models in the registry",
|
|
464
|
+
)
|
|
465
|
+
list_parser.add_argument(
|
|
466
|
+
"--provider",
|
|
467
|
+
type=str,
|
|
468
|
+
help="Filter by provider/api_spec (e.g., openai, anthropic, google)",
|
|
469
|
+
)
|
|
470
|
+
list_parser.add_argument(
|
|
471
|
+
"--name",
|
|
472
|
+
type=str,
|
|
473
|
+
help="Filter by substring in model ID (case-insensitive)",
|
|
474
|
+
)
|
|
475
|
+
list_parser.add_argument(
|
|
476
|
+
"--json-mode",
|
|
477
|
+
action="store_true",
|
|
478
|
+
dest="json_mode",
|
|
479
|
+
help="Only show models that support JSON mode",
|
|
480
|
+
)
|
|
481
|
+
list_parser.add_argument(
|
|
482
|
+
"--images",
|
|
483
|
+
action="store_true",
|
|
484
|
+
help="Only show models that support image inputs",
|
|
485
|
+
)
|
|
486
|
+
list_parser.add_argument(
|
|
487
|
+
"--logprobs",
|
|
488
|
+
action="store_true",
|
|
489
|
+
help="Only show models that support logprobs",
|
|
490
|
+
)
|
|
491
|
+
list_parser.add_argument(
|
|
492
|
+
"--reasoning",
|
|
493
|
+
action="store_true",
|
|
494
|
+
help="Only show reasoning models",
|
|
495
|
+
)
|
|
496
|
+
list_parser.add_argument(
|
|
497
|
+
"--min-input-cost",
|
|
498
|
+
type=float,
|
|
499
|
+
help="Minimum input cost ($ per million tokens)",
|
|
500
|
+
)
|
|
501
|
+
list_parser.add_argument(
|
|
502
|
+
"--max-input-cost",
|
|
503
|
+
type=float,
|
|
504
|
+
help="Maximum input cost ($ per million tokens)",
|
|
505
|
+
)
|
|
506
|
+
list_parser.add_argument(
|
|
507
|
+
"--min-output-cost",
|
|
508
|
+
type=float,
|
|
509
|
+
help="Minimum output cost ($ per million tokens)",
|
|
510
|
+
)
|
|
511
|
+
list_parser.add_argument(
|
|
512
|
+
"--max-output-cost",
|
|
513
|
+
type=float,
|
|
514
|
+
help="Maximum output cost ($ per million tokens)",
|
|
515
|
+
)
|
|
516
|
+
list_parser.add_argument(
|
|
517
|
+
"--sort",
|
|
518
|
+
type=str,
|
|
519
|
+
choices=["input_cost", "output_cost", "-input_cost", "-output_cost"],
|
|
520
|
+
help="Sort by cost (prefix with - for descending)",
|
|
521
|
+
)
|
|
522
|
+
list_parser.add_argument(
|
|
523
|
+
"--limit",
|
|
524
|
+
type=int,
|
|
525
|
+
help="Maximum number of results",
|
|
526
|
+
)
|
|
527
|
+
list_parser.add_argument(
|
|
528
|
+
"--json",
|
|
529
|
+
action="store_true",
|
|
530
|
+
help="Output as JSON",
|
|
531
|
+
)
|
|
532
|
+
list_parser.set_defaults(func=cmd_list)
|
|
533
|
+
|
|
534
|
+
# ---- run command ----
|
|
535
|
+
run_parser = subparsers.add_parser(
|
|
536
|
+
"run",
|
|
537
|
+
help="Run a model on input",
|
|
538
|
+
description="Run a model on input and output JSON to stdout",
|
|
539
|
+
)
|
|
540
|
+
run_parser.add_argument(
|
|
541
|
+
"model",
|
|
542
|
+
type=str,
|
|
543
|
+
help="Model ID to use (e.g., claude-3.5-haiku, gpt-4.1-mini)",
|
|
544
|
+
)
|
|
545
|
+
input_group = run_parser.add_mutually_exclusive_group()
|
|
546
|
+
input_group.add_argument(
|
|
547
|
+
"--input",
|
|
548
|
+
"-i",
|
|
549
|
+
type=str,
|
|
550
|
+
help="Input text (inline)",
|
|
551
|
+
)
|
|
552
|
+
input_group.add_argument(
|
|
553
|
+
"--file",
|
|
554
|
+
"-f",
|
|
555
|
+
type=str,
|
|
556
|
+
help="Read input from file",
|
|
557
|
+
)
|
|
558
|
+
run_parser.add_argument(
|
|
559
|
+
"--system",
|
|
560
|
+
"-s",
|
|
561
|
+
type=str,
|
|
562
|
+
help="System prompt",
|
|
563
|
+
)
|
|
564
|
+
run_parser.add_argument(
|
|
565
|
+
"--image",
|
|
566
|
+
type=str,
|
|
567
|
+
help="Path to image file to include with the prompt",
|
|
568
|
+
)
|
|
569
|
+
run_parser.add_argument(
|
|
570
|
+
"--max-tokens",
|
|
571
|
+
"-m",
|
|
572
|
+
type=int,
|
|
573
|
+
default=1024,
|
|
574
|
+
help="Maximum tokens to generate (default: 1024)",
|
|
575
|
+
)
|
|
576
|
+
run_parser.add_argument(
|
|
577
|
+
"--temperature",
|
|
578
|
+
"-t",
|
|
579
|
+
type=float,
|
|
580
|
+
help="Sampling temperature",
|
|
581
|
+
)
|
|
582
|
+
run_parser.add_argument(
|
|
583
|
+
"--pretty",
|
|
584
|
+
"-p",
|
|
585
|
+
action="store_true",
|
|
586
|
+
help="Pretty-print JSON output",
|
|
587
|
+
)
|
|
588
|
+
run_parser.add_argument(
|
|
589
|
+
"--verbose",
|
|
590
|
+
"-v",
|
|
591
|
+
action="store_true",
|
|
592
|
+
help="Include additional response metadata",
|
|
593
|
+
)
|
|
594
|
+
run_parser.set_defaults(func=cmd_run)
|
|
595
|
+
|
|
596
|
+
# ---- agent command ----
|
|
597
|
+
agent_parser = subparsers.add_parser(
|
|
598
|
+
"agent",
|
|
599
|
+
help="Run an agent loop with tools",
|
|
600
|
+
description="Run an agent loop with MCP servers and/or prefab tools",
|
|
601
|
+
)
|
|
602
|
+
agent_parser.add_argument(
|
|
603
|
+
"model",
|
|
604
|
+
type=str,
|
|
605
|
+
help="Model ID to use (e.g., claude-3.5-haiku, gpt-4.1-mini)",
|
|
606
|
+
)
|
|
607
|
+
agent_input_group = agent_parser.add_mutually_exclusive_group()
|
|
608
|
+
agent_input_group.add_argument(
|
|
609
|
+
"--input",
|
|
610
|
+
"-i",
|
|
611
|
+
type=str,
|
|
612
|
+
help="Input text (inline)",
|
|
613
|
+
)
|
|
614
|
+
agent_input_group.add_argument(
|
|
615
|
+
"--file",
|
|
616
|
+
"-f",
|
|
617
|
+
type=str,
|
|
618
|
+
help="Read input from file",
|
|
619
|
+
)
|
|
620
|
+
agent_parser.add_argument(
|
|
621
|
+
"--system",
|
|
622
|
+
"-s",
|
|
623
|
+
type=str,
|
|
624
|
+
help="System prompt",
|
|
625
|
+
)
|
|
626
|
+
agent_parser.add_argument(
|
|
627
|
+
"--image",
|
|
628
|
+
type=str,
|
|
629
|
+
help="Path to image file to include with the prompt",
|
|
630
|
+
)
|
|
631
|
+
agent_parser.add_argument(
|
|
632
|
+
"--mcp-config",
|
|
633
|
+
type=str,
|
|
634
|
+
help="Path to MCP config file (Claude Desktop format JSON)",
|
|
635
|
+
)
|
|
636
|
+
agent_parser.add_argument(
|
|
637
|
+
"--prefab",
|
|
638
|
+
type=str,
|
|
639
|
+
help="Comma-separated prefab tools: todo,memory,filesystem,sandbox",
|
|
640
|
+
)
|
|
641
|
+
agent_parser.add_argument(
|
|
642
|
+
"--max-rounds",
|
|
643
|
+
type=int,
|
|
644
|
+
default=10,
|
|
645
|
+
help="Maximum agent loop iterations (default: 10)",
|
|
646
|
+
)
|
|
647
|
+
agent_parser.add_argument(
|
|
648
|
+
"--max-tokens",
|
|
649
|
+
"-m",
|
|
650
|
+
type=int,
|
|
651
|
+
default=4096,
|
|
652
|
+
help="Maximum tokens to generate per response (default: 4096)",
|
|
653
|
+
)
|
|
654
|
+
agent_parser.add_argument(
|
|
655
|
+
"--temperature",
|
|
656
|
+
"-t",
|
|
657
|
+
type=float,
|
|
658
|
+
help="Sampling temperature",
|
|
659
|
+
)
|
|
660
|
+
agent_parser.set_defaults(func=cmd_agent)
|
|
661
|
+
|
|
662
|
+
args = parser.parse_args()
|
|
663
|
+
|
|
664
|
+
if not args.command:
|
|
665
|
+
parser.print_help()
|
|
666
|
+
return 0
|
|
667
|
+
|
|
668
|
+
return args.func(args)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
if __name__ == "__main__":
|
|
672
|
+
sys.exit(main())
|