@orchagent/cli 0.3.49 → 0.3.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/agent-keys.js +84 -0
- package/dist/commands/index.js +2 -0
- package/dist/commands/publish.js +70 -6
- package/dist/commands/run.js +779 -122
- package/dist/lib/api.js +17 -0
- package/dist/lib/errors.js +1 -0
- package/dist/lib/sse.js +41 -0
- package/package.json +3 -2
- package/src/resources/agent_runner.py +791 -0
|
@@ -0,0 +1,791 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Agent runner — standalone script for local and sandbox execution.
|
|
4
|
+
|
|
5
|
+
Implements a tool-use loop: the LLM receives the author's prompt as the system
|
|
6
|
+
message and the caller's input as the user message, then iterates with tools
|
|
7
|
+
until it calls submit_result or reaches max_turns.
|
|
8
|
+
|
|
9
|
+
Built-in tools: bash, read_file, write_file, list_files, submit_result
|
|
10
|
+
Custom tools: command wrappers defined by the agent author in custom_tools.json
|
|
11
|
+
|
|
12
|
+
Supports multiple LLM providers: anthropic, openai, gemini.
|
|
13
|
+
Set LLM_PROVIDER env var to select (default: anthropic).
|
|
14
|
+
|
|
15
|
+
When LOCAL_MODE=1 is set, adapts platform context for local execution
|
|
16
|
+
(no sandbox references, uses actual working directory).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import re
|
|
23
|
+
import subprocess
|
|
24
|
+
import sys
|
|
25
|
+
import threading
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Tool definitions (canonical format — Anthropic-style)
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
BUILTIN_TOOLS = [
|
|
32
|
+
{
|
|
33
|
+
"name": "bash",
|
|
34
|
+
"description": "Run a shell command and return stdout + stderr. Use for installing packages, running tests, compiling code, and other system operations. Commands time out after 120 seconds.",
|
|
35
|
+
"input_schema": {
|
|
36
|
+
"type": "object",
|
|
37
|
+
"properties": {
|
|
38
|
+
"command": {
|
|
39
|
+
"type": "string",
|
|
40
|
+
"description": "The shell command to execute",
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
"required": ["command"],
|
|
44
|
+
},
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"name": "read_file",
|
|
48
|
+
"description": "Read the contents of a file. Returns the full file content as a string.",
|
|
49
|
+
"input_schema": {
|
|
50
|
+
"type": "object",
|
|
51
|
+
"properties": {
|
|
52
|
+
"path": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"description": "Absolute or relative path to the file",
|
|
55
|
+
}
|
|
56
|
+
},
|
|
57
|
+
"required": ["path"],
|
|
58
|
+
},
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"name": "write_file",
|
|
62
|
+
"description": "Write content to a file. Creates the file and any parent directories if they don't exist. Overwrites existing content.",
|
|
63
|
+
"input_schema": {
|
|
64
|
+
"type": "object",
|
|
65
|
+
"properties": {
|
|
66
|
+
"path": {
|
|
67
|
+
"type": "string",
|
|
68
|
+
"description": "Absolute or relative path to the file",
|
|
69
|
+
},
|
|
70
|
+
"content": {
|
|
71
|
+
"type": "string",
|
|
72
|
+
"description": "The content to write to the file",
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
"required": ["path", "content"],
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
"name": "list_files",
|
|
80
|
+
"description": "List files and directories at the given path.",
|
|
81
|
+
"input_schema": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"properties": {
|
|
84
|
+
"path": {
|
|
85
|
+
"type": "string",
|
|
86
|
+
"description": "Directory path to list (default: current directory)",
|
|
87
|
+
"default": ".",
|
|
88
|
+
},
|
|
89
|
+
"recursive": {
|
|
90
|
+
"type": "boolean",
|
|
91
|
+
"description": "If true, list files recursively",
|
|
92
|
+
"default": False,
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
BASH_TIMEOUT = 120 # seconds per command
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def error_exit(msg):
|
|
103
|
+
print(json.dumps({"error": msg}))
|
|
104
|
+
sys.exit(1)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def build_platform_context(output_schema, custom_tools_config):
|
|
108
|
+
"""Build platform context prepended to the author's prompt.
|
|
109
|
+
|
|
110
|
+
This eliminates the need for authors to explain sandbox mechanics
|
|
111
|
+
(tools, file locations, submit_result usage) in their prompt.md.
|
|
112
|
+
The author's prompt can focus purely on domain expertise.
|
|
113
|
+
"""
|
|
114
|
+
is_local = os.environ.get("LOCAL_MODE") == "1"
|
|
115
|
+
|
|
116
|
+
lines = []
|
|
117
|
+
lines.append("[PLATFORM CONTEXT — auto-injected by orchagent]")
|
|
118
|
+
lines.append("")
|
|
119
|
+
lines.append("## Environment")
|
|
120
|
+
if is_local:
|
|
121
|
+
lines.append("You are running locally. Working directory: %s" % os.getcwd())
|
|
122
|
+
else:
|
|
123
|
+
lines.append("You are running inside an isolated sandbox. Working directory: /home/user")
|
|
124
|
+
lines.append("Uploaded files (if any): /tmp/uploads/")
|
|
125
|
+
lines.append("")
|
|
126
|
+
lines.append("## Tools")
|
|
127
|
+
lines.append("- **bash**: Run shell commands (120s timeout per command)")
|
|
128
|
+
lines.append("- **read_file**: Read a file's contents")
|
|
129
|
+
lines.append("- **write_file**: Create or overwrite a file (parent dirs created automatically)")
|
|
130
|
+
lines.append("- **list_files**: List directory contents")
|
|
131
|
+
|
|
132
|
+
if custom_tools_config:
|
|
133
|
+
for ct in custom_tools_config:
|
|
134
|
+
desc = ct.get("description", ct.get("command", ""))
|
|
135
|
+
lines.append("- **%s**: %s" % (ct["name"], desc))
|
|
136
|
+
|
|
137
|
+
# Check for skills
|
|
138
|
+
skills_path = "/home/user/orchagent/skills/manifest.json"
|
|
139
|
+
if is_local:
|
|
140
|
+
skills_path = os.path.join(os.getcwd(), "orchagent", "skills", "manifest.json")
|
|
141
|
+
if os.path.exists(skills_path):
|
|
142
|
+
try:
|
|
143
|
+
with open(skills_path, "r") as f:
|
|
144
|
+
skills = json.load(f)
|
|
145
|
+
if skills:
|
|
146
|
+
lines.append("")
|
|
147
|
+
lines.append("## Skills")
|
|
148
|
+
lines.append("Reference material is available:")
|
|
149
|
+
for skill in skills:
|
|
150
|
+
lines.append("- %s — %s" % (skill.get("name", ""), skill.get("description", "")))
|
|
151
|
+
except Exception:
|
|
152
|
+
pass
|
|
153
|
+
|
|
154
|
+
lines.append("")
|
|
155
|
+
lines.append("## Submitting Results")
|
|
156
|
+
if output_schema:
|
|
157
|
+
schema_str = json.dumps(output_schema, indent=2)
|
|
158
|
+
lines.append("When done, call **submit_result** with output matching this schema:")
|
|
159
|
+
lines.append("```json")
|
|
160
|
+
lines.append(schema_str)
|
|
161
|
+
lines.append("```")
|
|
162
|
+
else:
|
|
163
|
+
lines.append("When done, call **submit_result** with a JSON object containing your result.")
|
|
164
|
+
|
|
165
|
+
lines.append("")
|
|
166
|
+
lines.append("[END PLATFORM CONTEXT]")
|
|
167
|
+
lines.append("")
|
|
168
|
+
lines.append("---")
|
|
169
|
+
lines.append("")
|
|
170
|
+
return "\n".join(lines)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def build_submit_result_tool(output_schema):
|
|
174
|
+
"""Build the submit_result tool definition."""
|
|
175
|
+
if output_schema:
|
|
176
|
+
input_schema = output_schema
|
|
177
|
+
else:
|
|
178
|
+
input_schema = {
|
|
179
|
+
"type": "object",
|
|
180
|
+
"properties": {
|
|
181
|
+
"result": {
|
|
182
|
+
"type": "string",
|
|
183
|
+
"description": "The final result to return",
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
}
|
|
187
|
+
return {
|
|
188
|
+
"name": "submit_result",
|
|
189
|
+
"description": "Submit the final result. Call this when you have completed the task. The input must match the agent's output schema.",
|
|
190
|
+
"input_schema": input_schema,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def build_custom_tools(custom_tools_config):
|
|
195
|
+
"""Convert author-defined custom tool configs to canonical tool format."""
|
|
196
|
+
tools = []
|
|
197
|
+
for ct in custom_tools_config:
|
|
198
|
+
tool_def = {
|
|
199
|
+
"name": ct["name"],
|
|
200
|
+
"description": ct.get("description", "Run: " + ct["command"]),
|
|
201
|
+
}
|
|
202
|
+
if ct.get("input_schema"):
|
|
203
|
+
tool_def["input_schema"] = ct["input_schema"]
|
|
204
|
+
else:
|
|
205
|
+
# No-parameter tool: empty object schema
|
|
206
|
+
tool_def["input_schema"] = {"type": "object", "properties": {}}
|
|
207
|
+
tools.append(tool_def)
|
|
208
|
+
return tools
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ---------------------------------------------------------------------------
|
|
212
|
+
# Structured event emission for real-time streaming
|
|
213
|
+
# ---------------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
def emit_event(event_type, **kwargs):
|
|
216
|
+
"""Emit a structured event to stderr for the gateway to capture."""
|
|
217
|
+
event = {"type": event_type, **kwargs}
|
|
218
|
+
print("@@ORCHAGENT_EVENT:" + json.dumps(event), file=sys.stderr, flush=True)
|
|
219
|
+
|
|
220
|
+
def _brief_args(tool_name, args):
|
|
221
|
+
"""Short safe summary of tool args for streaming display."""
|
|
222
|
+
if tool_name == "bash":
|
|
223
|
+
cmd = args.get("command", "")
|
|
224
|
+
return cmd[:120] + ("..." if len(cmd) > 120 else "")
|
|
225
|
+
if tool_name == "read_file":
|
|
226
|
+
return args.get("path", "")[:100]
|
|
227
|
+
if tool_name == "write_file":
|
|
228
|
+
return "%s (%d chars)" % (args.get("path", "")[:80], len(args.get("content", "")))
|
|
229
|
+
if tool_name == "list_files":
|
|
230
|
+
return args.get("path", ".")
|
|
231
|
+
if tool_name == "submit_result":
|
|
232
|
+
return ""
|
|
233
|
+
try:
|
|
234
|
+
s = json.dumps(args)
|
|
235
|
+
return s[:100] + ("..." if len(s) > 100 else "")
|
|
236
|
+
except Exception:
|
|
237
|
+
return "..."
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
# ---------------------------------------------------------------------------
|
|
241
|
+
# Verbose logging for local mode
|
|
242
|
+
# ---------------------------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
_VERBOSE = False
|
|
245
|
+
|
|
246
|
+
def verbose_log(tool_name, tool_input):
|
|
247
|
+
"""Log tool call to stderr in human-readable format when --verbose is set."""
|
|
248
|
+
if not _VERBOSE:
|
|
249
|
+
return
|
|
250
|
+
if tool_name == "bash":
|
|
251
|
+
cmd = tool_input.get("command", "")
|
|
252
|
+
display = cmd[:100] + ("..." if len(cmd) > 100 else "")
|
|
253
|
+
print(" - bash: %s" % display, file=sys.stderr, flush=True)
|
|
254
|
+
elif tool_name == "read_file":
|
|
255
|
+
print(" - read_file: %s" % tool_input.get("path", ""), file=sys.stderr, flush=True)
|
|
256
|
+
elif tool_name == "write_file":
|
|
257
|
+
print(" - write_file: %s" % tool_input.get("path", ""), file=sys.stderr, flush=True)
|
|
258
|
+
elif tool_name == "list_files":
|
|
259
|
+
print(" - list_files: %s" % tool_input.get("path", "."), file=sys.stderr, flush=True)
|
|
260
|
+
elif tool_name == "submit_result":
|
|
261
|
+
print(" - submit_result", file=sys.stderr, flush=True)
|
|
262
|
+
else:
|
|
263
|
+
print(" - %s" % tool_name, file=sys.stderr, flush=True)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# ---------------------------------------------------------------------------
|
|
267
|
+
# Tool execution
|
|
268
|
+
# ---------------------------------------------------------------------------
|
|
269
|
+
|
|
270
|
+
def execute_bash(command):
|
|
271
|
+
"""Execute a bash command with timeout."""
|
|
272
|
+
try:
|
|
273
|
+
result = subprocess.run(
|
|
274
|
+
["bash", "-c", command],
|
|
275
|
+
capture_output=True,
|
|
276
|
+
text=True,
|
|
277
|
+
timeout=BASH_TIMEOUT,
|
|
278
|
+
)
|
|
279
|
+
output = ""
|
|
280
|
+
if result.stdout:
|
|
281
|
+
output += result.stdout
|
|
282
|
+
if result.stderr:
|
|
283
|
+
output += ("\n" if output else "") + "STDERR:\n" + result.stderr
|
|
284
|
+
if result.returncode != 0:
|
|
285
|
+
output += "\n[exit code: %d]" % result.returncode
|
|
286
|
+
return output or "(no output)"
|
|
287
|
+
except subprocess.TimeoutExpired:
|
|
288
|
+
return "[ERROR] Command timed out after %d seconds" % BASH_TIMEOUT
|
|
289
|
+
except Exception as e:
|
|
290
|
+
return "[ERROR] %s" % e
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def execute_read_file(path):
|
|
294
|
+
"""Read a file's contents."""
|
|
295
|
+
try:
|
|
296
|
+
with open(path, "r") as f:
|
|
297
|
+
return f.read()
|
|
298
|
+
except FileNotFoundError:
|
|
299
|
+
return "[ERROR] File not found: " + path
|
|
300
|
+
except Exception as e:
|
|
301
|
+
return "[ERROR] %s" % e
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def execute_write_file(path, content):
|
|
305
|
+
"""Write content to a file, creating parent dirs."""
|
|
306
|
+
try:
|
|
307
|
+
parent = os.path.dirname(path)
|
|
308
|
+
if parent:
|
|
309
|
+
os.makedirs(parent, exist_ok=True)
|
|
310
|
+
with open(path, "w") as f:
|
|
311
|
+
f.write(content)
|
|
312
|
+
return "Successfully wrote %d bytes to %s" % (len(content), path)
|
|
313
|
+
except Exception as e:
|
|
314
|
+
return "[ERROR] %s" % e
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def execute_list_files(path=".", recursive=False):
|
|
318
|
+
"""List files in a directory."""
|
|
319
|
+
try:
|
|
320
|
+
if recursive:
|
|
321
|
+
entries = []
|
|
322
|
+
for root, dirs, files in os.walk(path):
|
|
323
|
+
dirs[:] = [d for d in dirs if not d.startswith(".")]
|
|
324
|
+
for f in files:
|
|
325
|
+
if not f.startswith("."):
|
|
326
|
+
entries.append(os.path.relpath(os.path.join(root, f), path))
|
|
327
|
+
return "\n".join(sorted(entries)) or "(empty directory)"
|
|
328
|
+
else:
|
|
329
|
+
entries = sorted(os.listdir(path))
|
|
330
|
+
result = []
|
|
331
|
+
for e in entries:
|
|
332
|
+
full = os.path.join(path, e)
|
|
333
|
+
suffix = "/" if os.path.isdir(full) else ""
|
|
334
|
+
result.append(e + suffix)
|
|
335
|
+
return "\n".join(result) or "(empty directory)"
|
|
336
|
+
except FileNotFoundError:
|
|
337
|
+
return "[ERROR] Directory not found: " + path
|
|
338
|
+
except Exception as e:
|
|
339
|
+
return "[ERROR] %s" % e
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def execute_custom_tool(command_template, params):
|
|
343
|
+
"""Execute a custom tool by substituting params into the command template."""
|
|
344
|
+
# Write params as JSON for tools that prefer structured input
|
|
345
|
+
with open("/tmp/__tool_input.json", "w") as f:
|
|
346
|
+
json.dump(params, f)
|
|
347
|
+
command = command_template
|
|
348
|
+
for key, value in params.items():
|
|
349
|
+
safe_value = str(value).replace("'", "'\\''")
|
|
350
|
+
command = command.replace("{{" + key + "}}", safe_value)
|
|
351
|
+
command = re.sub(r"\{\{\w+\}\}", "", command)
|
|
352
|
+
return execute_bash(command)
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def dispatch_tool(tool_name, tool_input, custom_tools_config):
|
|
356
|
+
"""
|
|
357
|
+
Dispatch a tool call. Returns (result_text, is_submit).
|
|
358
|
+
is_submit is True only when tool_name == "submit_result".
|
|
359
|
+
"""
|
|
360
|
+
if tool_name == "bash":
|
|
361
|
+
return execute_bash(tool_input.get("command", "")), False
|
|
362
|
+
elif tool_name == "read_file":
|
|
363
|
+
return execute_read_file(tool_input.get("path", "")), False
|
|
364
|
+
elif tool_name == "write_file":
|
|
365
|
+
return execute_write_file(
|
|
366
|
+
tool_input.get("path", ""),
|
|
367
|
+
tool_input.get("content", ""),
|
|
368
|
+
), False
|
|
369
|
+
elif tool_name == "list_files":
|
|
370
|
+
return execute_list_files(
|
|
371
|
+
tool_input.get("path", "."),
|
|
372
|
+
tool_input.get("recursive", False),
|
|
373
|
+
), False
|
|
374
|
+
elif tool_name == "submit_result":
|
|
375
|
+
return json.dumps(tool_input), True
|
|
376
|
+
else:
|
|
377
|
+
for ct in custom_tools_config:
|
|
378
|
+
if ct["name"] == tool_name:
|
|
379
|
+
return execute_custom_tool(ct["command"], tool_input), False
|
|
380
|
+
return "[ERROR] Unknown tool: " + tool_name, False
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
# ---------------------------------------------------------------------------
|
|
384
|
+
# Provider abstraction
|
|
385
|
+
# ---------------------------------------------------------------------------
|
|
386
|
+
|
|
387
|
+
class AnthropicProvider:
|
|
388
|
+
name = "anthropic"
|
|
389
|
+
|
|
390
|
+
def import_sdk(self):
|
|
391
|
+
import anthropic
|
|
392
|
+
key = os.environ.get("ANTHROPIC_API_KEY")
|
|
393
|
+
if not key:
|
|
394
|
+
error_exit("ANTHROPIC_API_KEY not set")
|
|
395
|
+
self.client = anthropic.Anthropic(api_key=key)
|
|
396
|
+
self.model = os.environ.get("LLM_MODEL", "claude-sonnet-4-5-20250929")
|
|
397
|
+
|
|
398
|
+
def convert_tools(self, tools):
|
|
399
|
+
return tools # Already in canonical (Anthropic) format
|
|
400
|
+
|
|
401
|
+
def call(self, system, messages, tools):
|
|
402
|
+
return self.client.messages.create(
|
|
403
|
+
model=self.model, max_tokens=16384,
|
|
404
|
+
system=system, tools=tools, messages=messages)
|
|
405
|
+
|
|
406
|
+
def has_tool_use(self, r):
|
|
407
|
+
return any(b.type == "tool_use" for b in r.content)
|
|
408
|
+
|
|
409
|
+
def extract_text(self, r):
|
|
410
|
+
return "\n".join(b.text for b in r.content if b.type == "text")
|
|
411
|
+
|
|
412
|
+
def extract_tool_calls(self, r):
|
|
413
|
+
for b in r.content:
|
|
414
|
+
if b.type == "tool_use":
|
|
415
|
+
yield b.id, b.name, b.input
|
|
416
|
+
|
|
417
|
+
def extract_usage(self, r):
|
|
418
|
+
u = getattr(r, "usage", None)
|
|
419
|
+
if u:
|
|
420
|
+
return {"input_tokens": u.input_tokens, "output_tokens": u.output_tokens}
|
|
421
|
+
return {"input_tokens": 0, "output_tokens": 0}
|
|
422
|
+
|
|
423
|
+
def append_turn(self, messages, response, tool_results):
|
|
424
|
+
messages.append({"role": "assistant", "content": response.content})
|
|
425
|
+
results = []
|
|
426
|
+
for call_id, name, text, is_err in tool_results:
|
|
427
|
+
r = {"type": "tool_result", "tool_use_id": call_id, "content": text}
|
|
428
|
+
if is_err:
|
|
429
|
+
r["is_error"] = True
|
|
430
|
+
results.append(r)
|
|
431
|
+
messages.append({"role": "user", "content": results})
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
class OpenAIProvider:
|
|
435
|
+
name = "openai"
|
|
436
|
+
|
|
437
|
+
def import_sdk(self):
|
|
438
|
+
import openai
|
|
439
|
+
key = os.environ.get("OPENAI_API_KEY")
|
|
440
|
+
if not key:
|
|
441
|
+
error_exit("OPENAI_API_KEY not set")
|
|
442
|
+
self.client = openai.OpenAI(api_key=key)
|
|
443
|
+
self.model = os.environ.get("LLM_MODEL", "gpt-4o")
|
|
444
|
+
|
|
445
|
+
def convert_tools(self, tools):
|
|
446
|
+
"""Wrap canonical tools into OpenAI function-calling format."""
|
|
447
|
+
converted = []
|
|
448
|
+
for t in tools:
|
|
449
|
+
converted.append({
|
|
450
|
+
"type": "function",
|
|
451
|
+
"function": {
|
|
452
|
+
"name": t["name"],
|
|
453
|
+
"description": t.get("description", ""),
|
|
454
|
+
"parameters": t.get("input_schema", {"type": "object", "properties": {}}),
|
|
455
|
+
},
|
|
456
|
+
})
|
|
457
|
+
return converted
|
|
458
|
+
|
|
459
|
+
def call(self, system, messages, tools):
|
|
460
|
+
oai_messages = [{"role": "system", "content": system}] + messages
|
|
461
|
+
return self.client.chat.completions.create(
|
|
462
|
+
model=self.model, max_tokens=16384,
|
|
463
|
+
tools=tools, messages=oai_messages)
|
|
464
|
+
|
|
465
|
+
def has_tool_use(self, r):
|
|
466
|
+
return bool(r.choices[0].message.tool_calls)
|
|
467
|
+
|
|
468
|
+
def extract_text(self, r):
|
|
469
|
+
return r.choices[0].message.content or ""
|
|
470
|
+
|
|
471
|
+
def extract_tool_calls(self, r):
|
|
472
|
+
for tc in r.choices[0].message.tool_calls:
|
|
473
|
+
yield tc.id, tc.function.name, json.loads(tc.function.arguments)
|
|
474
|
+
|
|
475
|
+
def extract_usage(self, r):
|
|
476
|
+
u = getattr(r, "usage", None)
|
|
477
|
+
if u:
|
|
478
|
+
return {"input_tokens": getattr(u, "prompt_tokens", 0) or 0, "output_tokens": getattr(u, "completion_tokens", 0) or 0}
|
|
479
|
+
return {"input_tokens": 0, "output_tokens": 0}
|
|
480
|
+
|
|
481
|
+
def append_turn(self, messages, response, tool_results):
|
|
482
|
+
msg = response.choices[0].message
|
|
483
|
+
# Build assistant message dict with tool_calls
|
|
484
|
+
asst = {"role": "assistant", "content": msg.content or ""}
|
|
485
|
+
if msg.tool_calls:
|
|
486
|
+
asst["tool_calls"] = [
|
|
487
|
+
{
|
|
488
|
+
"id": tc.id,
|
|
489
|
+
"type": "function",
|
|
490
|
+
"function": {"name": tc.function.name, "arguments": tc.function.arguments},
|
|
491
|
+
}
|
|
492
|
+
for tc in msg.tool_calls
|
|
493
|
+
]
|
|
494
|
+
messages.append(asst)
|
|
495
|
+
# Each tool result is a separate message for OpenAI
|
|
496
|
+
for call_id, name, text, is_err in tool_results:
|
|
497
|
+
messages.append({
|
|
498
|
+
"role": "tool",
|
|
499
|
+
"tool_call_id": call_id,
|
|
500
|
+
"content": text,
|
|
501
|
+
})
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
class GeminiProvider:
|
|
505
|
+
name = "gemini"
|
|
506
|
+
|
|
507
|
+
def _sanitize_schema(self, schema):
|
|
508
|
+
"""Recursively strip keys Gemini doesn't support."""
|
|
509
|
+
if not isinstance(schema, dict):
|
|
510
|
+
return schema
|
|
511
|
+
schema = dict(schema)
|
|
512
|
+
for key in ("$schema", "additionalProperties", "examples", "default", "title"):
|
|
513
|
+
schema.pop(key, None)
|
|
514
|
+
schema_type = (schema.get("type") or "").lower()
|
|
515
|
+
if schema_type == "object":
|
|
516
|
+
props = schema.get("properties")
|
|
517
|
+
if not props or not isinstance(props, dict) or len(props) == 0:
|
|
518
|
+
schema["type"] = "STRING"
|
|
519
|
+
schema.pop("properties", None)
|
|
520
|
+
schema.pop("required", None)
|
|
521
|
+
else:
|
|
522
|
+
sanitized = {}
|
|
523
|
+
for k, v in props.items():
|
|
524
|
+
cleaned = self._sanitize_schema(v)
|
|
525
|
+
if cleaned is not None:
|
|
526
|
+
sanitized[k] = cleaned
|
|
527
|
+
schema["properties"] = sanitized
|
|
528
|
+
elif schema_type == "array":
|
|
529
|
+
items = schema.get("items")
|
|
530
|
+
if isinstance(items, dict):
|
|
531
|
+
schema["items"] = self._sanitize_schema(items)
|
|
532
|
+
return schema
|
|
533
|
+
|
|
534
|
+
def import_sdk(self):
|
|
535
|
+
from google import genai
|
|
536
|
+
key = os.environ.get("GEMINI_API_KEY")
|
|
537
|
+
if not key:
|
|
538
|
+
error_exit("GEMINI_API_KEY not set")
|
|
539
|
+
self.client = genai.Client(api_key=key)
|
|
540
|
+
self.model = os.environ.get("LLM_MODEL", "gemini-2.5-pro")
|
|
541
|
+
self._genai_types = __import__("google.genai", fromlist=["types"]).types
|
|
542
|
+
|
|
543
|
+
def convert_tools(self, tools):
|
|
544
|
+
"""Convert canonical tools to Gemini function declarations."""
|
|
545
|
+
types = self._genai_types
|
|
546
|
+
declarations = []
|
|
547
|
+
for t in tools:
|
|
548
|
+
schema = t.get("input_schema", {"type": "object", "properties": {}})
|
|
549
|
+
sanitized = self._sanitize_schema(schema)
|
|
550
|
+
declarations.append(types.FunctionDeclaration(
|
|
551
|
+
name=t["name"],
|
|
552
|
+
description=t.get("description", ""),
|
|
553
|
+
parameters=sanitized,
|
|
554
|
+
))
|
|
555
|
+
return [types.Tool(function_declarations=declarations)]
|
|
556
|
+
|
|
557
|
+
def call(self, system, messages, tools):
|
|
558
|
+
types = self._genai_types
|
|
559
|
+
# Convert messages to Gemini Content format
|
|
560
|
+
contents = []
|
|
561
|
+
for msg in messages:
|
|
562
|
+
role = msg["role"] if isinstance(msg, dict) else getattr(msg, "role", "user")
|
|
563
|
+
# If msg is already a genai Content object, pass through
|
|
564
|
+
if hasattr(msg, "parts"):
|
|
565
|
+
contents.append(msg)
|
|
566
|
+
continue
|
|
567
|
+
gemini_role = "user" if role == "user" else "model"
|
|
568
|
+
content = msg.get("content", "") if isinstance(msg, dict) else ""
|
|
569
|
+
if isinstance(content, str):
|
|
570
|
+
contents.append(types.Content(
|
|
571
|
+
role=gemini_role,
|
|
572
|
+
parts=[types.Part.from_text(text=content)],
|
|
573
|
+
))
|
|
574
|
+
elif isinstance(content, list):
|
|
575
|
+
parts = []
|
|
576
|
+
for item in content:
|
|
577
|
+
if isinstance(item, dict) and "function_response" in item:
|
|
578
|
+
fr = item["function_response"]
|
|
579
|
+
parts.append(types.Part.from_function_response(
|
|
580
|
+
name=fr["name"],
|
|
581
|
+
response=fr["response"],
|
|
582
|
+
))
|
|
583
|
+
else:
|
|
584
|
+
parts.append(types.Part.from_text(text=str(item)))
|
|
585
|
+
contents.append(types.Content(role=gemini_role, parts=parts))
|
|
586
|
+
config = types.GenerateContentConfig(
|
|
587
|
+
system_instruction=system,
|
|
588
|
+
tools=tools,
|
|
589
|
+
max_output_tokens=16384,
|
|
590
|
+
)
|
|
591
|
+
return self.client.models.generate_content(
|
|
592
|
+
model=self.model, contents=contents, config=config)
|
|
593
|
+
|
|
594
|
+
def has_tool_use(self, r):
|
|
595
|
+
if not r.candidates or not r.candidates[0].content:
|
|
596
|
+
return False
|
|
597
|
+
return any(p.function_call for p in r.candidates[0].content.parts)
|
|
598
|
+
|
|
599
|
+
def extract_text(self, r):
|
|
600
|
+
if not r.candidates or not r.candidates[0].content:
|
|
601
|
+
return ""
|
|
602
|
+
parts = []
|
|
603
|
+
for p in r.candidates[0].content.parts:
|
|
604
|
+
if p.text:
|
|
605
|
+
parts.append(p.text)
|
|
606
|
+
return "\n".join(parts)
|
|
607
|
+
|
|
608
|
+
def extract_tool_calls(self, r):
|
|
609
|
+
for i, p in enumerate(r.candidates[0].content.parts):
|
|
610
|
+
if p.function_call:
|
|
611
|
+
yield str(i), p.function_call.name, dict(p.function_call.args)
|
|
612
|
+
|
|
613
|
+
def extract_usage(self, r):
|
|
614
|
+
u = getattr(r, "usage_metadata", None)
|
|
615
|
+
if u:
|
|
616
|
+
return {"input_tokens": getattr(u, "prompt_token_count", 0) or 0, "output_tokens": getattr(u, "candidates_token_count", 0) or 0}
|
|
617
|
+
return {"input_tokens": 0, "output_tokens": 0}
|
|
618
|
+
|
|
619
|
+
def append_turn(self, messages, response, tool_results):
|
|
620
|
+
types = self._genai_types
|
|
621
|
+
# Append the model's response as a Content object
|
|
622
|
+
model_parts = []
|
|
623
|
+
for p in response.candidates[0].content.parts:
|
|
624
|
+
if p.function_call:
|
|
625
|
+
model_parts.append(types.Part.from_function_call(
|
|
626
|
+
name=p.function_call.name,
|
|
627
|
+
args=dict(p.function_call.args),
|
|
628
|
+
))
|
|
629
|
+
elif p.text:
|
|
630
|
+
model_parts.append(types.Part.from_text(text=p.text))
|
|
631
|
+
messages.append(types.Content(role="model", parts=model_parts))
|
|
632
|
+
# Append function responses as user message
|
|
633
|
+
fr_parts = []
|
|
634
|
+
for call_id, name, text, is_err in tool_results:
|
|
635
|
+
try:
|
|
636
|
+
resp_data = json.loads(text)
|
|
637
|
+
except (json.JSONDecodeError, TypeError):
|
|
638
|
+
resp_data = {"output": text}
|
|
639
|
+
if is_err:
|
|
640
|
+
resp_data = {"error": text}
|
|
641
|
+
# Gemini requires response to be a dict
|
|
642
|
+
if not isinstance(resp_data, dict):
|
|
643
|
+
resp_data = {"output": resp_data}
|
|
644
|
+
fr_parts.append(types.Part.from_function_response(
|
|
645
|
+
name=name, response=resp_data,
|
|
646
|
+
))
|
|
647
|
+
messages.append(types.Content(role="user", parts=fr_parts))
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
PROVIDERS = {
|
|
651
|
+
"anthropic": AnthropicProvider,
|
|
652
|
+
"openai": OpenAIProvider,
|
|
653
|
+
"gemini": GeminiProvider,
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
class Heartbeat:
|
|
658
|
+
"""Print periodic markers to stderr to keep E2B connection alive during LLM calls."""
|
|
659
|
+
def __init__(self, interval=15):
|
|
660
|
+
self.interval = interval
|
|
661
|
+
self._stop = threading.Event()
|
|
662
|
+
self._thread = None
|
|
663
|
+
|
|
664
|
+
def __enter__(self):
|
|
665
|
+
def _beat():
|
|
666
|
+
while not self._stop.wait(self.interval):
|
|
667
|
+
print(".", end="", file=sys.stderr, flush=True)
|
|
668
|
+
self._thread = threading.Thread(target=_beat, daemon=True)
|
|
669
|
+
self._thread.start()
|
|
670
|
+
return self
|
|
671
|
+
|
|
672
|
+
def __exit__(self, *args):
|
|
673
|
+
self._stop.set()
|
|
674
|
+
if self._thread:
|
|
675
|
+
self._thread.join(timeout=2)
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
# ---------------------------------------------------------------------------
|
|
679
|
+
# Main agent loop
|
|
680
|
+
# ---------------------------------------------------------------------------
|
|
681
|
+
|
|
682
|
+
def main():
|
|
683
|
+
global _VERBOSE
|
|
684
|
+
|
|
685
|
+
parser = argparse.ArgumentParser()
|
|
686
|
+
parser.add_argument("--max-turns", type=int, default=25)
|
|
687
|
+
parser.add_argument("--verbose", action="store_true", help="Log tool calls to stderr")
|
|
688
|
+
args = parser.parse_args()
|
|
689
|
+
|
|
690
|
+
_VERBOSE = args.verbose
|
|
691
|
+
|
|
692
|
+
with open("prompt.md", "r") as f:
|
|
693
|
+
author_prompt = f.read()
|
|
694
|
+
|
|
695
|
+
with open("input.json", "r") as f:
|
|
696
|
+
input_data = json.load(f)
|
|
697
|
+
|
|
698
|
+
output_schema = None
|
|
699
|
+
if os.path.exists("output_schema.json"):
|
|
700
|
+
with open("output_schema.json", "r") as f:
|
|
701
|
+
output_schema = json.load(f)
|
|
702
|
+
|
|
703
|
+
custom_tools_config = []
|
|
704
|
+
if os.path.exists("custom_tools.json"):
|
|
705
|
+
with open("custom_tools.json", "r") as f:
|
|
706
|
+
custom_tools_config = json.load(f)
|
|
707
|
+
|
|
708
|
+
# Prepend platform context so authors don't need to explain sandbox mechanics
|
|
709
|
+
system_prompt = build_platform_context(output_schema, custom_tools_config) + author_prompt
|
|
710
|
+
|
|
711
|
+
# Build canonical tool list
|
|
712
|
+
canonical_tools = list(BUILTIN_TOOLS)
|
|
713
|
+
canonical_tools.append(build_submit_result_tool(output_schema))
|
|
714
|
+
canonical_tools.extend(build_custom_tools(custom_tools_config))
|
|
715
|
+
|
|
716
|
+
# Select and initialize provider
|
|
717
|
+
provider_name = os.environ.get("LLM_PROVIDER", "anthropic")
|
|
718
|
+
if provider_name not in PROVIDERS:
|
|
719
|
+
error_exit("Unsupported LLM_PROVIDER: %s. Supported: %s" % (provider_name, ", ".join(PROVIDERS)))
|
|
720
|
+
|
|
721
|
+
provider = PROVIDERS[provider_name]()
|
|
722
|
+
try:
|
|
723
|
+
provider.import_sdk()
|
|
724
|
+
except ImportError as e:
|
|
725
|
+
error_exit("Failed to import SDK for %s: %s" % (provider_name, e))
|
|
726
|
+
except Exception as e:
|
|
727
|
+
error_exit("Failed to initialize %s provider: %s" % (provider_name, e))
|
|
728
|
+
|
|
729
|
+
tools = provider.convert_tools(canonical_tools)
|
|
730
|
+
|
|
731
|
+
messages = [{"role": "user", "content": json.dumps(input_data, indent=2)}]
|
|
732
|
+
total_usage = {"input_tokens": 0, "output_tokens": 0}
|
|
733
|
+
|
|
734
|
+
for turn in range(args.max_turns):
|
|
735
|
+
emit_event("turn_start", turn=turn + 1, max_turns=args.max_turns)
|
|
736
|
+
if _VERBOSE:
|
|
737
|
+
print("[agent] Turn %d/%d" % (turn + 1, args.max_turns), file=sys.stderr, flush=True)
|
|
738
|
+
|
|
739
|
+
with Heartbeat(interval=15):
|
|
740
|
+
try:
|
|
741
|
+
response = provider.call(system_prompt, messages, tools)
|
|
742
|
+
except Exception as e:
|
|
743
|
+
emit_event("error", message=str(e)[:200], usage=total_usage)
|
|
744
|
+
error_exit("LLM API error (%s): %s" % (provider_name, e))
|
|
745
|
+
|
|
746
|
+
turn_usage = provider.extract_usage(response)
|
|
747
|
+
total_usage["input_tokens"] += turn_usage["input_tokens"]
|
|
748
|
+
total_usage["output_tokens"] += turn_usage["output_tokens"]
|
|
749
|
+
|
|
750
|
+
if not provider.has_tool_use(response):
|
|
751
|
+
emit_event("done", usage=total_usage)
|
|
752
|
+
final_text = provider.extract_text(response)
|
|
753
|
+
try:
|
|
754
|
+
result = json.loads(final_text)
|
|
755
|
+
print(json.dumps(result))
|
|
756
|
+
except json.JSONDecodeError:
|
|
757
|
+
print(json.dumps({"result": final_text}))
|
|
758
|
+
sys.exit(0)
|
|
759
|
+
|
|
760
|
+
tool_results = []
|
|
761
|
+
for call_id, name, input_args in provider.extract_tool_calls(response):
|
|
762
|
+
verbose_log(name, input_args)
|
|
763
|
+
emit_event("tool_call", turn=turn + 1, tool=name, args_brief=_brief_args(name, input_args))
|
|
764
|
+
result_text, is_submit = dispatch_tool(name, input_args, custom_tools_config)
|
|
765
|
+
emit_event("tool_result", turn=turn + 1, tool=name, status="error" if result_text.startswith("[ERROR]") else "ok")
|
|
766
|
+
|
|
767
|
+
if is_submit:
|
|
768
|
+
emit_event("done", usage=total_usage)
|
|
769
|
+
try:
|
|
770
|
+
result = json.loads(result_text)
|
|
771
|
+
except json.JSONDecodeError:
|
|
772
|
+
result = {"result": result_text}
|
|
773
|
+
print(json.dumps(result))
|
|
774
|
+
sys.exit(0)
|
|
775
|
+
|
|
776
|
+
is_error = result_text.startswith("[ERROR]")
|
|
777
|
+
tool_results.append((call_id, name, result_text, is_error))
|
|
778
|
+
|
|
779
|
+
provider.append_turn(messages, response, tool_results)
|
|
780
|
+
num_calls = len(tool_results)
|
|
781
|
+
if _VERBOSE:
|
|
782
|
+
print("[agent] Turn %d/%d completed (%d tool calls)" % (turn + 1, args.max_turns, num_calls), file=sys.stderr, flush=True)
|
|
783
|
+
else:
|
|
784
|
+
print("[agent] Turn %d/%d completed (%d tool calls)" % (turn + 1, args.max_turns, num_calls), file=sys.stderr)
|
|
785
|
+
|
|
786
|
+
emit_event("error", message="max turns reached", usage=total_usage)
|
|
787
|
+
error_exit("Agent reached maximum turns (%d) without submitting a result" % args.max_turns)
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
if __name__ == "__main__":
|
|
791
|
+
main()
|