openrouter-agent-cli 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openrouter_agent_cli/__init__.py +2 -0
- openrouter_agent_cli/__main__.py +6 -0
- openrouter_agent_cli/cli.py +748 -0
- openrouter_agent_cli-0.1.2.dist-info/METADATA +270 -0
- openrouter_agent_cli-0.1.2.dist-info/RECORD +9 -0
- openrouter_agent_cli-0.1.2.dist-info/WHEEL +5 -0
- openrouter_agent_cli-0.1.2.dist-info/entry_points.txt +2 -0
- openrouter_agent_cli-0.1.2.dist-info/licenses/LICENSE +22 -0
- openrouter_agent_cli-0.1.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,748 @@
|
|
|
1
|
+
"""Standalone OpenRouter agent CLI with basic actions and context management."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import signal
|
|
11
|
+
import sys
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import httpx
|
|
17
|
+
|
|
18
|
+
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
19
|
+
# Default to a free-tier model so first-run usage does not consume paid credits.
|
|
20
|
+
DEFAULT_MODEL = "arcee-ai/trinity-large-preview:free"
|
|
21
|
+
DEFAULT_SESSION_ID = "default"
|
|
22
|
+
DEFAULT_MAX_TURNS = 24
|
|
23
|
+
DEFAULT_MAX_HISTORY_MESSAGES = 60
|
|
24
|
+
DEFAULT_COMMAND_TIMEOUT = 30
|
|
25
|
+
CONTEXT_KEEP_TAIL = 10
|
|
26
|
+
|
|
27
|
+
DEFAULT_SYSTEM_PROMPT = """You are a pragmatic coding assistant in a terminal.
|
|
28
|
+
Use tools only when needed. Explain outputs clearly and stay concise.
|
|
29
|
+
When unsure, ask for clarification before destructive operations."""
|
|
30
|
+
|
|
31
|
+
TOOLS = [
|
|
32
|
+
{
|
|
33
|
+
"type": "function",
|
|
34
|
+
"function": {
|
|
35
|
+
"name": "run_bash",
|
|
36
|
+
"description": (
|
|
37
|
+
"Run a shell command in the current working directory and return stdout/stderr."
|
|
38
|
+
),
|
|
39
|
+
"parameters": {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"properties": {
|
|
42
|
+
"command": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"description": "Shell command to execute.",
|
|
45
|
+
},
|
|
46
|
+
"timeout_seconds": {
|
|
47
|
+
"type": "integer",
|
|
48
|
+
"description": "Execution timeout in seconds (1-600).",
|
|
49
|
+
"default": DEFAULT_COMMAND_TIMEOUT,
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
"required": ["command"],
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
}
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _sanitize_session_id(session_id: str) -> str:
|
|
60
|
+
safe = re.sub(r"[^a-zA-Z0-9._-]", "_", session_id)
|
|
61
|
+
return safe or DEFAULT_SESSION_ID
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _truncate(text: str, limit: int) -> str:
|
|
65
|
+
return text if len(text) <= limit else text[:limit] + "..."
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _decode_tool_arguments(raw_args: Any) -> dict[str, Any]:
|
|
69
|
+
if raw_args is None:
|
|
70
|
+
return {}
|
|
71
|
+
if isinstance(raw_args, dict):
|
|
72
|
+
return raw_args
|
|
73
|
+
if isinstance(raw_args, str):
|
|
74
|
+
value = raw_args.strip()
|
|
75
|
+
if not value:
|
|
76
|
+
return {}
|
|
77
|
+
try:
|
|
78
|
+
decoded = json.loads(value)
|
|
79
|
+
except json.JSONDecodeError:
|
|
80
|
+
return {}
|
|
81
|
+
return decoded if isinstance(decoded, dict) else {}
|
|
82
|
+
return {}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _message_content_as_text(message: dict[str, Any]) -> str:
|
|
86
|
+
content = message.get("content")
|
|
87
|
+
if isinstance(content, str):
|
|
88
|
+
return content
|
|
89
|
+
if content is None:
|
|
90
|
+
return ""
|
|
91
|
+
return json.dumps(content, ensure_ascii=False)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _estimate_tokens(messages: list[dict[str, Any]]) -> int:
|
|
95
|
+
chars = 0
|
|
96
|
+
for msg in messages:
|
|
97
|
+
chars += len(_message_content_as_text(msg))
|
|
98
|
+
for tc in msg.get("tool_calls") or []:
|
|
99
|
+
fn = tc.get("function", {})
|
|
100
|
+
chars += len(str(fn.get("name", "")))
|
|
101
|
+
chars += len(str(fn.get("arguments", "")))
|
|
102
|
+
return max(1, chars // 4)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass
|
|
106
|
+
class ToolPermissionPolicy:
|
|
107
|
+
allow: set[str] = field(default_factory=set)
|
|
108
|
+
deny: set[str] = field(default_factory=set)
|
|
109
|
+
|
|
110
|
+
def decision(self, tool_name: str) -> str:
|
|
111
|
+
if "*" in self.deny or tool_name in self.deny:
|
|
112
|
+
return "deny"
|
|
113
|
+
if "*" in self.allow or tool_name in self.allow:
|
|
114
|
+
return "allow"
|
|
115
|
+
return "ask"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class OpenRouterAgentCLI:
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
api_key: str,
|
|
122
|
+
model: str,
|
|
123
|
+
session_id: str,
|
|
124
|
+
workdir: str,
|
|
125
|
+
max_turns: int,
|
|
126
|
+
max_history_messages: int,
|
|
127
|
+
command_timeout: int,
|
|
128
|
+
tools_enabled: bool,
|
|
129
|
+
system_prompt: str,
|
|
130
|
+
):
|
|
131
|
+
self.api_key = api_key
|
|
132
|
+
self.model = model
|
|
133
|
+
self.session_id = _sanitize_session_id(session_id)
|
|
134
|
+
self.workdir = os.path.abspath(workdir)
|
|
135
|
+
self.max_turns = max(1, max_turns)
|
|
136
|
+
self.max_history_messages = max(8, max_history_messages)
|
|
137
|
+
self.command_timeout = min(max(1, command_timeout), 600)
|
|
138
|
+
self.tools_enabled = tools_enabled
|
|
139
|
+
self.system_prompt = system_prompt
|
|
140
|
+
self.non_interactive_mode = False
|
|
141
|
+
self.policy = ToolPermissionPolicy()
|
|
142
|
+
self.one_shot_prompt: str | None = None
|
|
143
|
+
|
|
144
|
+
self.session_root = (
|
|
145
|
+
Path(os.environ.get("OPENROUTER_AGENT_SESSION_DIR", "~/.openrouter-agent-cli/sessions"))
|
|
146
|
+
.expanduser()
|
|
147
|
+
)
|
|
148
|
+
self.session_root.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
self.messages = self._load_session()
|
|
150
|
+
|
|
151
|
+
def _log(self, message: str, *, end: str = "\n") -> None:
|
|
152
|
+
target = sys.stderr if self.non_interactive_mode else sys.stdout
|
|
153
|
+
print(message, file=target, end=end)
|
|
154
|
+
|
|
155
|
+
def _output_response(self, text: str) -> str:
|
|
156
|
+
if self.non_interactive_mode:
|
|
157
|
+
print(text)
|
|
158
|
+
else:
|
|
159
|
+
print(f"\nassistant> {text}\n")
|
|
160
|
+
return text
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def _session_path(self) -> Path:
|
|
164
|
+
return self.session_root / f"{self.session_id}.json"
|
|
165
|
+
|
|
166
|
+
def _load_session(self) -> list[dict[str, Any]]:
|
|
167
|
+
try:
|
|
168
|
+
data = json.loads(self._session_path.read_text())
|
|
169
|
+
stored = data.get("messages", [])
|
|
170
|
+
if isinstance(stored, list):
|
|
171
|
+
return [{"role": "system", "content": self.system_prompt}] + stored
|
|
172
|
+
except FileNotFoundError:
|
|
173
|
+
pass
|
|
174
|
+
except Exception as e:
|
|
175
|
+
print(f"[session] Failed to load session: {e}")
|
|
176
|
+
return [{"role": "system", "content": self.system_prompt}]
|
|
177
|
+
|
|
178
|
+
def _save_session(self):
|
|
179
|
+
non_system = [m for m in self.messages if m.get("role") != "system"]
|
|
180
|
+
if len(non_system) > self.max_history_messages:
|
|
181
|
+
non_system = non_system[-self.max_history_messages :]
|
|
182
|
+
payload = {"messages": non_system}
|
|
183
|
+
try:
|
|
184
|
+
self._session_path.write_text(json.dumps(payload))
|
|
185
|
+
except Exception as e:
|
|
186
|
+
print(f"[session] Failed to save session: {e}")
|
|
187
|
+
|
|
188
|
+
def _tool_names(self) -> list[str]:
|
|
189
|
+
names: list[str] = []
|
|
190
|
+
for tool in TOOLS:
|
|
191
|
+
fn = tool.get("function", {})
|
|
192
|
+
name = fn.get("name")
|
|
193
|
+
if isinstance(name, str) and name:
|
|
194
|
+
names.append(name)
|
|
195
|
+
return sorted(names)
|
|
196
|
+
|
|
197
|
+
async def run(self):
|
|
198
|
+
if not self.non_interactive_mode:
|
|
199
|
+
print("OpenRouter Agent CLI")
|
|
200
|
+
print(f"Model : {self.model}")
|
|
201
|
+
print(f"Session : {self.session_id}")
|
|
202
|
+
print(f"Working dir: {self.workdir}")
|
|
203
|
+
print("Type /help for commands. Type /exit to quit.")
|
|
204
|
+
print()
|
|
205
|
+
|
|
206
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
207
|
+
if self.one_shot_prompt:
|
|
208
|
+
await self._run_user_turn(client, self.one_shot_prompt)
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
while True:
|
|
212
|
+
try:
|
|
213
|
+
user_text = await asyncio.to_thread(input, "you> ")
|
|
214
|
+
except (EOFError, KeyboardInterrupt):
|
|
215
|
+
print("\nExiting.")
|
|
216
|
+
break
|
|
217
|
+
|
|
218
|
+
user_text = user_text.strip()
|
|
219
|
+
if not user_text:
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
if user_text.startswith("/"):
|
|
223
|
+
keep_running = await self._handle_command(client, user_text)
|
|
224
|
+
if not keep_running:
|
|
225
|
+
break
|
|
226
|
+
continue
|
|
227
|
+
|
|
228
|
+
await self._run_user_turn(client, user_text)
|
|
229
|
+
|
|
230
|
+
async def _handle_command(self, client: httpx.AsyncClient, command_line: str) -> bool:
|
|
231
|
+
parts = command_line.split(maxsplit=1)
|
|
232
|
+
cmd = parts[0].lower()
|
|
233
|
+
arg = parts[1].strip() if len(parts) > 1 else ""
|
|
234
|
+
|
|
235
|
+
if cmd in ("/exit", "/quit"):
|
|
236
|
+
return False
|
|
237
|
+
|
|
238
|
+
if cmd == "/help":
|
|
239
|
+
print("Commands:")
|
|
240
|
+
print(" /help Show help")
|
|
241
|
+
print(" /exit Exit")
|
|
242
|
+
print(" /model [id] Show or set model")
|
|
243
|
+
print(" /usage Show message count + rough token estimate")
|
|
244
|
+
print(" /context [n] Show last n messages (default 8)")
|
|
245
|
+
print(" /compact Force conversation compaction")
|
|
246
|
+
print(" /clear Clear session history")
|
|
247
|
+
print(" /tools Show tools + permission policy")
|
|
248
|
+
print(" /tools on|off Enable or disable tool calling")
|
|
249
|
+
print(" /allow <tool|*> Always allow tool")
|
|
250
|
+
print(" /deny <tool|*> Always deny tool")
|
|
251
|
+
print(" /unallow <tool|*> Remove allow rule")
|
|
252
|
+
print(" /undeny <tool|*> Remove deny rule")
|
|
253
|
+
print(" /cwd [path] Show or set working directory")
|
|
254
|
+
return True
|
|
255
|
+
|
|
256
|
+
if cmd == "/model":
|
|
257
|
+
if not arg:
|
|
258
|
+
print(f"Current model: {self.model}")
|
|
259
|
+
else:
|
|
260
|
+
self.model = arg
|
|
261
|
+
print(f"Model set to: {self.model}")
|
|
262
|
+
return True
|
|
263
|
+
|
|
264
|
+
if cmd == "/usage":
|
|
265
|
+
msg_count = len([m for m in self.messages if m.get("role") != "system"])
|
|
266
|
+
token_est = _estimate_tokens(self.messages)
|
|
267
|
+
print(f"Messages (non-system): {msg_count}")
|
|
268
|
+
print(f"Estimated tokens : ~{token_est}")
|
|
269
|
+
print(f"History limit : {self.max_history_messages}")
|
|
270
|
+
return True
|
|
271
|
+
|
|
272
|
+
if cmd == "/context":
|
|
273
|
+
n = 8
|
|
274
|
+
if arg:
|
|
275
|
+
try:
|
|
276
|
+
n = max(1, int(arg))
|
|
277
|
+
except ValueError:
|
|
278
|
+
print("Usage: /context [n]")
|
|
279
|
+
return True
|
|
280
|
+
context = self.messages[-n:]
|
|
281
|
+
print(f"Last {len(context)} messages:")
|
|
282
|
+
for i, msg in enumerate(context, 1):
|
|
283
|
+
role = msg.get("role", "unknown")
|
|
284
|
+
text = _truncate(_message_content_as_text(msg).replace("\n", " "), 180)
|
|
285
|
+
print(f" {i:>2}. {role}: {text}")
|
|
286
|
+
return True
|
|
287
|
+
|
|
288
|
+
if cmd == "/compact":
|
|
289
|
+
compacted = await self._compact_history(client, force=True)
|
|
290
|
+
print("Context compacted." if compacted else "Nothing to compact.")
|
|
291
|
+
return True
|
|
292
|
+
|
|
293
|
+
if cmd == "/clear":
|
|
294
|
+
self.messages = [{"role": "system", "content": self.system_prompt}]
|
|
295
|
+
self._save_session()
|
|
296
|
+
print("Session history cleared.")
|
|
297
|
+
return True
|
|
298
|
+
|
|
299
|
+
if cmd == "/tools":
|
|
300
|
+
lowered = arg.lower()
|
|
301
|
+
if lowered in ("on", "off"):
|
|
302
|
+
self.tools_enabled = lowered == "on"
|
|
303
|
+
print(f"Tools {'enabled' if self.tools_enabled else 'disabled'}.")
|
|
304
|
+
return True
|
|
305
|
+
print(f"Tools enabled: {self.tools_enabled}")
|
|
306
|
+
print(f"Available tools: {', '.join(self._tool_names())}")
|
|
307
|
+
print(f"Allow list: {sorted(self.policy.allow) if self.policy.allow else '[]'}")
|
|
308
|
+
print(f"Deny list : {sorted(self.policy.deny) if self.policy.deny else '[]'}")
|
|
309
|
+
return True
|
|
310
|
+
|
|
311
|
+
if cmd == "/allow":
|
|
312
|
+
if not arg:
|
|
313
|
+
print("Usage: /allow <tool_name|*>")
|
|
314
|
+
return True
|
|
315
|
+
self.policy.allow.add(arg)
|
|
316
|
+
self.policy.deny.discard(arg)
|
|
317
|
+
print(f"Always allow: {arg}")
|
|
318
|
+
return True
|
|
319
|
+
|
|
320
|
+
if cmd == "/deny":
|
|
321
|
+
if not arg:
|
|
322
|
+
print("Usage: /deny <tool_name|*>")
|
|
323
|
+
return True
|
|
324
|
+
self.policy.deny.add(arg)
|
|
325
|
+
self.policy.allow.discard(arg)
|
|
326
|
+
print(f"Always deny: {arg}")
|
|
327
|
+
return True
|
|
328
|
+
|
|
329
|
+
if cmd == "/unallow":
|
|
330
|
+
if not arg:
|
|
331
|
+
print("Usage: /unallow <tool_name|*>")
|
|
332
|
+
return True
|
|
333
|
+
self.policy.allow.discard(arg)
|
|
334
|
+
print(f"Removed allow rule: {arg}")
|
|
335
|
+
return True
|
|
336
|
+
|
|
337
|
+
if cmd == "/undeny":
|
|
338
|
+
if not arg:
|
|
339
|
+
print("Usage: /undeny <tool_name|*>")
|
|
340
|
+
return True
|
|
341
|
+
self.policy.deny.discard(arg)
|
|
342
|
+
print(f"Removed deny rule: {arg}")
|
|
343
|
+
return True
|
|
344
|
+
|
|
345
|
+
if cmd == "/cwd":
|
|
346
|
+
if not arg:
|
|
347
|
+
print(f"Current working directory: {self.workdir}")
|
|
348
|
+
return True
|
|
349
|
+
candidate = os.path.abspath(os.path.expanduser(arg))
|
|
350
|
+
if not os.path.isdir(candidate):
|
|
351
|
+
print(f"Directory not found: {candidate}")
|
|
352
|
+
return True
|
|
353
|
+
self.workdir = candidate
|
|
354
|
+
print(f"Working directory set to: {self.workdir}")
|
|
355
|
+
return True
|
|
356
|
+
|
|
357
|
+
print(f"Unknown command: {cmd}. Use /help.")
|
|
358
|
+
return True
|
|
359
|
+
|
|
360
|
+
async def _call_openrouter(
|
|
361
|
+
self,
|
|
362
|
+
client: httpx.AsyncClient,
|
|
363
|
+
messages: list[dict[str, Any]],
|
|
364
|
+
tool_choice: str = "auto",
|
|
365
|
+
) -> dict[str, Any]:
|
|
366
|
+
body: dict[str, Any] = {
|
|
367
|
+
"model": self.model,
|
|
368
|
+
"messages": messages,
|
|
369
|
+
"temperature": 0,
|
|
370
|
+
"max_tokens": 4096,
|
|
371
|
+
}
|
|
372
|
+
if tool_choice == "none" or not self.tools_enabled:
|
|
373
|
+
body["tool_choice"] = "none"
|
|
374
|
+
else:
|
|
375
|
+
body["tools"] = TOOLS
|
|
376
|
+
body["tool_choice"] = "auto"
|
|
377
|
+
|
|
378
|
+
headers = {
|
|
379
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
380
|
+
"Content-Type": "application/json",
|
|
381
|
+
"HTTP-Referer": os.environ.get(
|
|
382
|
+
"OPENROUTER_AGENT_REFERER", "https://github.com/local/openrouter-agent-cli"
|
|
383
|
+
),
|
|
384
|
+
"X-Title": os.environ.get("OPENROUTER_AGENT_TITLE", "OpenRouter Agent CLI"),
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
resp = await client.post(OPENROUTER_URL, json=body, headers=headers)
|
|
388
|
+
if resp.status_code in (500, 502, 503):
|
|
389
|
+
for attempt in range(1, 3):
|
|
390
|
+
await asyncio.sleep(attempt * 2)
|
|
391
|
+
resp = await client.post(OPENROUTER_URL, json=body, headers=headers)
|
|
392
|
+
if resp.status_code not in (500, 502, 503):
|
|
393
|
+
break
|
|
394
|
+
if not resp.is_success:
|
|
395
|
+
resp.raise_for_status()
|
|
396
|
+
|
|
397
|
+
data = resp.json()
|
|
398
|
+
if "choices" not in data:
|
|
399
|
+
err = data.get("error", {})
|
|
400
|
+
if isinstance(err, dict):
|
|
401
|
+
err = err.get("message", str(data))
|
|
402
|
+
raise RuntimeError(f"OpenRouter error: {err}")
|
|
403
|
+
return data
|
|
404
|
+
|
|
405
|
+
async def _compact_history(self, client: httpx.AsyncClient, force: bool = False) -> bool:
|
|
406
|
+
non_system = [m for m in self.messages if m.get("role") != "system"]
|
|
407
|
+
if not force and len(non_system) <= self.max_history_messages:
|
|
408
|
+
return False
|
|
409
|
+
if len(non_system) <= CONTEXT_KEEP_TAIL + 2:
|
|
410
|
+
return False
|
|
411
|
+
|
|
412
|
+
older = non_system[:-CONTEXT_KEEP_TAIL]
|
|
413
|
+
tail = non_system[-CONTEXT_KEEP_TAIL:]
|
|
414
|
+
|
|
415
|
+
transcript_lines = []
|
|
416
|
+
for msg in older[-80:]:
|
|
417
|
+
role = msg.get("role", "unknown")
|
|
418
|
+
text = _truncate(_message_content_as_text(msg).replace("\n", " "), 500)
|
|
419
|
+
transcript_lines.append(f"{role}: {text}")
|
|
420
|
+
transcript = "\n".join(transcript_lines) or "No prior messages."
|
|
421
|
+
|
|
422
|
+
summary_prompt = [
|
|
423
|
+
{
|
|
424
|
+
"role": "system",
|
|
425
|
+
"content": (
|
|
426
|
+
"Summarize prior conversation for continuation. "
|
|
427
|
+
"Return short bullets: goals, decisions, facts, TODOs, constraints. "
|
|
428
|
+
"Keep below 180 words."
|
|
429
|
+
),
|
|
430
|
+
},
|
|
431
|
+
{"role": "user", "content": transcript},
|
|
432
|
+
]
|
|
433
|
+
|
|
434
|
+
summary = ""
|
|
435
|
+
try:
|
|
436
|
+
summary_resp = await self._call_openrouter(client, summary_prompt, tool_choice="none")
|
|
437
|
+
summary_msg = summary_resp["choices"][0]["message"]
|
|
438
|
+
summary = (summary_msg.get("content") or summary_msg.get("reasoning") or "").strip()
|
|
439
|
+
except Exception as e:
|
|
440
|
+
summary = f"Compaction summary failed: {e}"
|
|
441
|
+
|
|
442
|
+
if not summary:
|
|
443
|
+
summary = "No significant prior context."
|
|
444
|
+
|
|
445
|
+
summary_entry = {"role": "assistant", "content": f"[Context summary]\n{summary}"}
|
|
446
|
+
self.messages = [
|
|
447
|
+
{"role": "system", "content": self.system_prompt},
|
|
448
|
+
summary_entry,
|
|
449
|
+
*tail,
|
|
450
|
+
]
|
|
451
|
+
self._save_session()
|
|
452
|
+
return True
|
|
453
|
+
|
|
454
|
+
async def _confirm_tool_call(self, tool_name: str, args: dict[str, Any]) -> bool:
|
|
455
|
+
if self.non_interactive_mode:
|
|
456
|
+
self._log(f"[permission] Tool '{tool_name}' denied in non-interactive mode.")
|
|
457
|
+
return False
|
|
458
|
+
preview = _truncate(json.dumps(args, ensure_ascii=False), 220)
|
|
459
|
+
question = (
|
|
460
|
+
f"[permission] Allow tool '{tool_name}' args={preview}? "
|
|
461
|
+
"[y]es/[n]o/[a]lways allow/[d]eny always: "
|
|
462
|
+
)
|
|
463
|
+
choice = (await asyncio.to_thread(input, question)).strip().lower()
|
|
464
|
+
if choice == "a":
|
|
465
|
+
self.policy.allow.add(tool_name)
|
|
466
|
+
self.policy.deny.discard(tool_name)
|
|
467
|
+
return True
|
|
468
|
+
if choice == "d":
|
|
469
|
+
self.policy.deny.add(tool_name)
|
|
470
|
+
self.policy.allow.discard(tool_name)
|
|
471
|
+
return False
|
|
472
|
+
return choice in ("y", "yes")
|
|
473
|
+
|
|
474
|
+
async def _run_bash(self, command: str, timeout_seconds: int) -> str:
|
|
475
|
+
try:
|
|
476
|
+
proc = await asyncio.create_subprocess_shell(
|
|
477
|
+
command,
|
|
478
|
+
cwd=self.workdir,
|
|
479
|
+
stdout=asyncio.subprocess.PIPE,
|
|
480
|
+
stderr=asyncio.subprocess.PIPE,
|
|
481
|
+
)
|
|
482
|
+
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout_seconds)
|
|
483
|
+
except asyncio.TimeoutError:
|
|
484
|
+
try:
|
|
485
|
+
proc.kill()
|
|
486
|
+
except Exception:
|
|
487
|
+
pass
|
|
488
|
+
return f"Command timed out after {timeout_seconds}s."
|
|
489
|
+
except Exception as e:
|
|
490
|
+
return f"Command failed to start: {e}"
|
|
491
|
+
|
|
492
|
+
out = stdout.decode("utf-8", errors="replace").strip()
|
|
493
|
+
err = stderr.decode("utf-8", errors="replace").strip()
|
|
494
|
+
if proc.returncode == 0:
|
|
495
|
+
return out or "(command succeeded with no output)"
|
|
496
|
+
if out and err:
|
|
497
|
+
return f"exit={proc.returncode}\nstdout:\n{out}\nstderr:\n{err}"
|
|
498
|
+
if err:
|
|
499
|
+
return f"exit={proc.returncode}\nstderr:\n{err}"
|
|
500
|
+
if out:
|
|
501
|
+
return f"exit={proc.returncode}\nstdout:\n{out}"
|
|
502
|
+
return f"exit={proc.returncode} (no output)"
|
|
503
|
+
|
|
504
|
+
async def _execute_tool(self, tool_name: str, args: dict[str, Any]) -> str:
|
|
505
|
+
if not self.tools_enabled:
|
|
506
|
+
return f"Tool blocked: tools are disabled. Requested '{tool_name}'."
|
|
507
|
+
|
|
508
|
+
decision = self.policy.decision(tool_name)
|
|
509
|
+
if decision == "deny":
|
|
510
|
+
return f"Tool blocked by deny policy: {tool_name}"
|
|
511
|
+
if decision == "ask":
|
|
512
|
+
allowed = await self._confirm_tool_call(tool_name, args)
|
|
513
|
+
if not allowed:
|
|
514
|
+
return f"Tool call denied by user: {tool_name}"
|
|
515
|
+
|
|
516
|
+
if tool_name != "run_bash":
|
|
517
|
+
return f"Unknown tool: {tool_name}"
|
|
518
|
+
|
|
519
|
+
command = str(args.get("command", "")).strip()
|
|
520
|
+
if not command:
|
|
521
|
+
return "run_bash error: 'command' is required."
|
|
522
|
+
|
|
523
|
+
timeout_seconds = args.get("timeout_seconds", self.command_timeout)
|
|
524
|
+
try:
|
|
525
|
+
timeout_seconds = int(timeout_seconds)
|
|
526
|
+
except (TypeError, ValueError):
|
|
527
|
+
timeout_seconds = self.command_timeout
|
|
528
|
+
timeout_seconds = min(max(1, timeout_seconds), 600)
|
|
529
|
+
|
|
530
|
+
return await self._run_bash(command, timeout_seconds)
|
|
531
|
+
|
|
532
|
+
async def _run_user_turn(self, client: httpx.AsyncClient, user_text: str) -> str:
|
|
533
|
+
self.messages.append({"role": "user", "content": user_text})
|
|
534
|
+
last_tool_signature: str | None = None
|
|
535
|
+
repeated_count = 0
|
|
536
|
+
|
|
537
|
+
for turn in range(self.max_turns):
|
|
538
|
+
try:
|
|
539
|
+
if await self._compact_history(client):
|
|
540
|
+
self._log("[context] Auto-compacted old history.")
|
|
541
|
+
response = await self._call_openrouter(client, self.messages)
|
|
542
|
+
except httpx.HTTPStatusError as e:
|
|
543
|
+
detail = _truncate(e.response.text, 300)
|
|
544
|
+
self._log(f"[openrouter] HTTP {e.response.status_code}: {detail}")
|
|
545
|
+
self._save_session()
|
|
546
|
+
return ""
|
|
547
|
+
except Exception as e:
|
|
548
|
+
self._log(f"[openrouter] Request failed: {e}")
|
|
549
|
+
self._save_session()
|
|
550
|
+
return ""
|
|
551
|
+
|
|
552
|
+
choice = response["choices"][0]
|
|
553
|
+
message = choice["message"]
|
|
554
|
+
finish_reason = choice.get("finish_reason", "")
|
|
555
|
+
tool_calls = message.get("tool_calls") or []
|
|
556
|
+
self.messages.append(message)
|
|
557
|
+
|
|
558
|
+
if not tool_calls:
|
|
559
|
+
text = message.get("content") or message.get("reasoning") or ""
|
|
560
|
+
if not text:
|
|
561
|
+
text = f"[empty response, finish_reason={finish_reason}]"
|
|
562
|
+
self._output_response(text)
|
|
563
|
+
self._save_session()
|
|
564
|
+
return text
|
|
565
|
+
|
|
566
|
+
signature = json.dumps(
|
|
567
|
+
[
|
|
568
|
+
{
|
|
569
|
+
"name": tc.get("function", {}).get("name"),
|
|
570
|
+
"args": tc.get("function", {}).get("arguments"),
|
|
571
|
+
}
|
|
572
|
+
for tc in tool_calls
|
|
573
|
+
],
|
|
574
|
+
sort_keys=True,
|
|
575
|
+
)
|
|
576
|
+
if signature == last_tool_signature:
|
|
577
|
+
repeated_count += 1
|
|
578
|
+
else:
|
|
579
|
+
repeated_count = 0
|
|
580
|
+
last_tool_signature = signature
|
|
581
|
+
|
|
582
|
+
if repeated_count >= 1:
|
|
583
|
+
nudge = (
|
|
584
|
+
"STOP. You repeated the same tool call without progress. "
|
|
585
|
+
"Do not call additional tools. Reply with a concise final answer."
|
|
586
|
+
)
|
|
587
|
+
for tc in tool_calls:
|
|
588
|
+
self.messages.append(
|
|
589
|
+
{
|
|
590
|
+
"role": "tool",
|
|
591
|
+
"tool_call_id": tc.get("id", "loop"),
|
|
592
|
+
"content": nudge,
|
|
593
|
+
}
|
|
594
|
+
)
|
|
595
|
+
try:
|
|
596
|
+
forced = await self._call_openrouter(client, self.messages, tool_choice="none")
|
|
597
|
+
forced_message = forced["choices"][0]["message"]
|
|
598
|
+
text = forced_message.get("content") or forced_message.get("reasoning") or ""
|
|
599
|
+
self.messages.append(forced_message)
|
|
600
|
+
except Exception:
|
|
601
|
+
text = ""
|
|
602
|
+
if not text:
|
|
603
|
+
text = "I got stuck in a tool loop and could not make progress."
|
|
604
|
+
self._output_response(text)
|
|
605
|
+
self._save_session()
|
|
606
|
+
return text
|
|
607
|
+
|
|
608
|
+
tool_results = []
|
|
609
|
+
for idx, tool_call in enumerate(tool_calls):
|
|
610
|
+
fn = tool_call.get("function", {})
|
|
611
|
+
tool_name = str(fn.get("name", "")).strip()
|
|
612
|
+
tool_args = _decode_tool_arguments(fn.get("arguments"))
|
|
613
|
+
fn["arguments"] = json.dumps(tool_args, separators=(",", ":"))
|
|
614
|
+
|
|
615
|
+
self._log(f"[tool] {tool_name}({_truncate(json.dumps(tool_args), 140)})")
|
|
616
|
+
result = await self._execute_tool(tool_name, tool_args)
|
|
617
|
+
preview = _truncate(result.replace("\n", " "), 220)
|
|
618
|
+
self._log(f"[tool-result] {preview}")
|
|
619
|
+
|
|
620
|
+
tool_call_id = tool_call.get("id") or f"tc-{turn + 1}-{idx + 1}"
|
|
621
|
+
tool_results.append(
|
|
622
|
+
{
|
|
623
|
+
"role": "tool",
|
|
624
|
+
"tool_call_id": tool_call_id,
|
|
625
|
+
"content": result[:8000],
|
|
626
|
+
}
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
self.messages.extend(tool_results)
|
|
630
|
+
|
|
631
|
+
self._log("[agent] Reached max turns for this user message.")
|
|
632
|
+
self._save_session()
|
|
633
|
+
return ""
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def _load_system_prompt(path: str | None) -> str:
|
|
637
|
+
if not path:
|
|
638
|
+
return DEFAULT_SYSTEM_PROMPT
|
|
639
|
+
p = Path(path).expanduser()
|
|
640
|
+
try:
|
|
641
|
+
return p.read_text()
|
|
642
|
+
except Exception as e:
|
|
643
|
+
raise RuntimeError(f"Failed to read system prompt file {p}: {e}") from e
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def main() -> None:
|
|
647
|
+
parser = argparse.ArgumentParser(
|
|
648
|
+
description="Standalone OpenRouter terminal agent with tool actions and context management."
|
|
649
|
+
)
|
|
650
|
+
parser.add_argument(
|
|
651
|
+
"--api-key",
|
|
652
|
+
default=os.environ.get("OPENROUTER_API_KEY", ""),
|
|
653
|
+
help="OpenRouter API key (defaults to OPENROUTER_API_KEY env var).",
|
|
654
|
+
)
|
|
655
|
+
parser.add_argument(
|
|
656
|
+
"--model",
|
|
657
|
+
default=os.environ.get("OPENROUTER_MODEL", DEFAULT_MODEL),
|
|
658
|
+
help=f"OpenRouter model ID (default: {DEFAULT_MODEL}).",
|
|
659
|
+
)
|
|
660
|
+
parser.add_argument(
|
|
661
|
+
"--session-id",
|
|
662
|
+
default=DEFAULT_SESSION_ID,
|
|
663
|
+
help=f"Session ID for persisted history (default: {DEFAULT_SESSION_ID}).",
|
|
664
|
+
)
|
|
665
|
+
parser.add_argument(
|
|
666
|
+
"--workdir",
|
|
667
|
+
default=os.getcwd(),
|
|
668
|
+
help="Working directory for run_bash tool (default: current directory).",
|
|
669
|
+
)
|
|
670
|
+
parser.add_argument(
|
|
671
|
+
"--max-turns",
|
|
672
|
+
type=int,
|
|
673
|
+
default=DEFAULT_MAX_TURNS,
|
|
674
|
+
help=f"Max model/tool iterations per user turn (default: {DEFAULT_MAX_TURNS}).",
|
|
675
|
+
)
|
|
676
|
+
parser.add_argument(
|
|
677
|
+
"--max-history-messages",
|
|
678
|
+
type=int,
|
|
679
|
+
default=DEFAULT_MAX_HISTORY_MESSAGES,
|
|
680
|
+
help=f"Compaction threshold in non-system messages (default: {DEFAULT_MAX_HISTORY_MESSAGES}).",
|
|
681
|
+
)
|
|
682
|
+
parser.add_argument(
|
|
683
|
+
"--command-timeout",
|
|
684
|
+
type=int,
|
|
685
|
+
default=DEFAULT_COMMAND_TIMEOUT,
|
|
686
|
+
help=f"Default timeout in seconds for run_bash (default: {DEFAULT_COMMAND_TIMEOUT}).",
|
|
687
|
+
)
|
|
688
|
+
parser.add_argument(
|
|
689
|
+
"--no-tools",
|
|
690
|
+
action="store_true",
|
|
691
|
+
help="Disable all tool calling.",
|
|
692
|
+
)
|
|
693
|
+
parser.add_argument(
|
|
694
|
+
"--system-prompt-file",
|
|
695
|
+
help="Path to a custom system prompt file.",
|
|
696
|
+
)
|
|
697
|
+
parser.add_argument(
|
|
698
|
+
"--prompt",
|
|
699
|
+
"-p",
|
|
700
|
+
help="Run a single prompt, emit the assistant reply on stdout, and exit.",
|
|
701
|
+
)
|
|
702
|
+
args = parser.parse_args()
|
|
703
|
+
|
|
704
|
+
if not args.api_key:
|
|
705
|
+
print("ERROR: missing OpenRouter API key. Set OPENROUTER_API_KEY or pass --api-key.", file=sys.stderr)
|
|
706
|
+
raise SystemExit(1)
|
|
707
|
+
|
|
708
|
+
try:
|
|
709
|
+
system_prompt = _load_system_prompt(args.system_prompt_file)
|
|
710
|
+
except RuntimeError as e:
|
|
711
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
712
|
+
raise SystemExit(1)
|
|
713
|
+
|
|
714
|
+
cli = OpenRouterAgentCLI(
|
|
715
|
+
api_key=args.api_key,
|
|
716
|
+
model=args.model,
|
|
717
|
+
session_id=args.session_id,
|
|
718
|
+
workdir=args.workdir,
|
|
719
|
+
max_turns=args.max_turns,
|
|
720
|
+
max_history_messages=args.max_history_messages,
|
|
721
|
+
command_timeout=args.command_timeout,
|
|
722
|
+
tools_enabled=not args.no_tools,
|
|
723
|
+
system_prompt=system_prompt,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
if args.prompt is not None:
|
|
727
|
+
cli.one_shot_prompt = args.prompt
|
|
728
|
+
cli.non_interactive_mode = True
|
|
729
|
+
|
|
730
|
+
loop = asyncio.new_event_loop()
|
|
731
|
+
asyncio.set_event_loop(loop)
|
|
732
|
+
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
733
|
+
try:
|
|
734
|
+
loop.add_signal_handler(sig, lambda: loop.stop())
|
|
735
|
+
except NotImplementedError:
|
|
736
|
+
# add_signal_handler is not available on some platforms.
|
|
737
|
+
pass
|
|
738
|
+
|
|
739
|
+
try:
|
|
740
|
+
loop.run_until_complete(cli.run())
|
|
741
|
+
except KeyboardInterrupt:
|
|
742
|
+
pass
|
|
743
|
+
finally:
|
|
744
|
+
loop.close()
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
if __name__ == "__main__":
|
|
748
|
+
main()
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openrouter-agent-cli
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Standalone terminal agent for OpenRouter with tool actions and context management.
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: httpx>=0.27
|
|
9
|
+
Dynamic: license-file
|
|
10
|
+
|
|
11
|
+
# openrouter-agent-cli
|
|
12
|
+
|
|
13
|
+
Standalone terminal agent for OpenRouter models with:
|
|
14
|
+
- tool actions (`run_bash`)
|
|
15
|
+
- interactive permission gating (`allow` / `deny` / `ask`)
|
|
16
|
+
- session persistence
|
|
17
|
+
- context visibility and compaction
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
cd openrouter-agent-cli
|
|
23
|
+
python3 -m venv .venv
|
|
24
|
+
source .venv/bin/activate
|
|
25
|
+
pip install -e .
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Run
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
export OPENROUTER_API_KEY=sk-or-...
|
|
32
|
+
openrouter-agent
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Or without installation:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
export OPENROUTER_API_KEY=sk-or-...
|
|
39
|
+
python -m openrouter_agent_cli.cli
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Non-interactive prompt
|
|
43
|
+
|
|
44
|
+
`--prompt` (short `-p`) lets another process run the CLI with a single user message, emit only the assistant reply to `stdout`, and exit immediately. Operation logs, tool call summaries, and permission notices are written to `stderr`, and tool calls are automatically denied unless you disable tools with `--no-tools`.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
openrouter-agent --prompt "Explain tail recursion" --no-tools
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Useful flags
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
openrouter-agent \
|
|
56
|
+
--model arcee-ai/trinity-large-preview:free \
|
|
57
|
+
--session-id my-session \
|
|
58
|
+
--workdir ~/Projects \
|
|
59
|
+
--max-turns 24 \
|
|
60
|
+
--max-history-messages 60 \
|
|
61
|
+
--command-timeout 30
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Disable tools:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
openrouter-agent --no-tools
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Slash commands
|
|
71
|
+
|
|
72
|
+
- `/help`
|
|
73
|
+
- `/exit`
|
|
74
|
+
- `/model [id]`
|
|
75
|
+
- `/usage`
|
|
76
|
+
- `/context [n]`
|
|
77
|
+
- `/compact`
|
|
78
|
+
- `/clear`
|
|
79
|
+
- `/tools`
|
|
80
|
+
- `/tools on|off`
|
|
81
|
+
- `/allow <tool|*>`
|
|
82
|
+
- `/deny <tool|*>`
|
|
83
|
+
- `/unallow <tool|*>`
|
|
84
|
+
- `/undeny <tool|*>`
|
|
85
|
+
- `/cwd [path]`
|
|
86
|
+
|
|
87
|
+
## Context management
|
|
88
|
+
|
|
89
|
+
- history is saved in `~/.openrouter-agent-cli/sessions/<session_id>.json`
|
|
90
|
+
- `/usage` shows rough token estimate
|
|
91
|
+
- `/compact` forces summarization
|
|
92
|
+
- automatic compaction triggers when non-system message count exceeds `--max-history-messages`
|
|
93
|
+
|
|
94
|
+
## Security notes
|
|
95
|
+
|
|
96
|
+
- `run_bash` executes shell commands on your machine in `--workdir`
|
|
97
|
+
- default policy is `ask` for every tool call
|
|
98
|
+
- use `/deny *` for a fully no-tools session
|
|
99
|
+
- default model is free-tier (`arcee-ai/trinity-large-preview:free`); override with `--model` or `OPENROUTER_MODEL`
|
|
100
|
+
|
|
101
|
+
## Tool schema seen by the model
|
|
102
|
+
|
|
103
|
+
When tools are enabled, each OpenRouter request includes this tool definition:
|
|
104
|
+
|
|
105
|
+
```json
|
|
106
|
+
[
|
|
107
|
+
{
|
|
108
|
+
"type": "function",
|
|
109
|
+
"function": {
|
|
110
|
+
"name": "run_bash",
|
|
111
|
+
"description": "Run a shell command in the current working directory and return stdout/stderr.",
|
|
112
|
+
"parameters": {
|
|
113
|
+
"type": "object",
|
|
114
|
+
"properties": {
|
|
115
|
+
"command": {
|
|
116
|
+
"type": "string",
|
|
117
|
+
"description": "Shell command to execute."
|
|
118
|
+
},
|
|
119
|
+
"timeout_seconds": {
|
|
120
|
+
"type": "integer",
|
|
121
|
+
"description": "Execution timeout in seconds (1-600).",
|
|
122
|
+
"default": 30
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
"required": ["command"]
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
]
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Request body shape sent to OpenRouter (simplified):
|
|
133
|
+
|
|
134
|
+
```json
|
|
135
|
+
{
|
|
136
|
+
"model": "arcee-ai/trinity-large-preview:free",
|
|
137
|
+
"messages": [...],
|
|
138
|
+
"temperature": 0,
|
|
139
|
+
"max_tokens": 4096,
|
|
140
|
+
"tools": [...],
|
|
141
|
+
"tool_choice": "auto"
|
|
142
|
+
}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
If tools are disabled (`--no-tools` or `/tools off`), the request sets:
|
|
146
|
+
|
|
147
|
+
```json
|
|
148
|
+
{
|
|
149
|
+
"tool_choice": "none"
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## How `run_bash` is invoked
|
|
154
|
+
|
|
155
|
+
Execution flow per user turn:
|
|
156
|
+
|
|
157
|
+
1. Model returns `tool_calls` in assistant message.
|
|
158
|
+
2. CLI decodes `function.arguments` JSON into a dict.
|
|
159
|
+
3. Permission policy is applied:
|
|
160
|
+
- `deny` list blocks immediately.
|
|
161
|
+
- `allow` list runs immediately.
|
|
162
|
+
- otherwise prompt user (`y/n/a/d`).
|
|
163
|
+
4. For `run_bash`, CLI executes:
|
|
164
|
+
- `asyncio.create_subprocess_shell(command, cwd=<workdir>, stdout=PIPE, stderr=PIPE)`
|
|
165
|
+
- waits with `asyncio.wait_for(..., timeout_seconds)`
|
|
166
|
+
- kills process on timeout
|
|
167
|
+
5. CLI formats stdout/stderr/exit code to text and appends a tool result message:
|
|
168
|
+
- role: `tool`
|
|
169
|
+
- tool_call_id: model-provided id
|
|
170
|
+
- content: command output (capped to 8000 chars before being sent back to model)
|
|
171
|
+
|
|
172
|
+
Example tool call from model:
|
|
173
|
+
|
|
174
|
+
```json
|
|
175
|
+
{
|
|
176
|
+
"id": "call_123",
|
|
177
|
+
"type": "function",
|
|
178
|
+
"function": {
|
|
179
|
+
"name": "run_bash",
|
|
180
|
+
"arguments": "{\"command\":\"ls -la\",\"timeout_seconds\":30}"
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Example tool result message added by CLI:
|
|
186
|
+
|
|
187
|
+
```json
|
|
188
|
+
{
|
|
189
|
+
"role": "tool",
|
|
190
|
+
"tool_call_id": "call_123",
|
|
191
|
+
"content": "total 64\n-rw-r--r-- ..."
|
|
192
|
+
}
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Note: despite the name `run_bash`, execution uses `create_subprocess_shell` (system shell), not an explicit `bash` binary unless the command itself invokes `bash`.
|
|
196
|
+
|
|
197
|
+
## Prompt A/B testing
|
|
198
|
+
|
|
199
|
+
This repo includes a small harness for comparing system prompts:
|
|
200
|
+
|
|
201
|
+
- script: `scripts/ab_test_system_prompts.py`
|
|
202
|
+
- prompt variants:
|
|
203
|
+
- `prompts/system_prompt_control.md`
|
|
204
|
+
- `prompts/system_prompt_agentic_v1.md`
|
|
205
|
+
- sample tasks: `ab_tests/tasks_sample.txt`
|
|
206
|
+
|
|
207
|
+
Run prompt-only comparison (no tools):
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
export OPENROUTER_API_KEY=sk-or-...
|
|
211
|
+
python scripts/ab_test_system_prompts.py \
|
|
212
|
+
--tool-mode none \
|
|
213
|
+
--model arcee-ai/trinity-large-preview:free
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Run with tool execution enabled (use cautiously):
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
export OPENROUTER_API_KEY=sk-or-...
|
|
220
|
+
python scripts/ab_test_system_prompts.py \
|
|
221
|
+
--tool-mode execute \
|
|
222
|
+
--workdir "$(pwd)" \
|
|
223
|
+
--model arcee-ai/trinity-large-preview:free
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
Artifacts are written to `ab_tests/results/<timestamp>/`:
|
|
227
|
+
|
|
228
|
+
- `results.json` full transcripts and metadata
|
|
229
|
+
- `summary.csv` flat comparison table
|
|
230
|
+
- `summary.md` quick markdown summary
|
|
231
|
+
|
|
232
|
+
Run a harder repeated suite (2 prompts x 6 tasks x 3 repeats):
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
export OPENROUTER_API_KEY=sk-or-...
|
|
236
|
+
python scripts/ab_test_system_prompts.py \
|
|
237
|
+
--tool-mode execute \
|
|
238
|
+
--tasks-file ab_tests/tasks_hard_suite_v1.txt \
|
|
239
|
+
--repeats 3 \
|
|
240
|
+
--max-turns 3 \
|
|
241
|
+
--max-tokens 1000 \
|
|
242
|
+
--request-timeout 40 \
|
|
243
|
+
--command-timeout 20 \
|
|
244
|
+
--workdir "$(pwd)" \
|
|
245
|
+
--model arcee-ai/trinity-large-preview:free \
|
|
246
|
+
--output-dir ab_tests/results/hard_suite_v1_r3
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
Evaluate quality and groundedness from a run:
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
export OPENROUTER_API_KEY=sk-or-...
|
|
253
|
+
python scripts/evaluate_ab_results.py \
|
|
254
|
+
--results ab_tests/results/hard_suite_v1_r3/results.json \
|
|
255
|
+
--judge-model arcee-ai/trinity-large-preview:free \
|
|
256
|
+
--output-dir ab_tests/results/hard_suite_v1_r3/eval
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
Evaluator artifacts:
|
|
260
|
+
|
|
261
|
+
- `evaluation.json` per-case raw evaluation details
|
|
262
|
+
- `evaluation.csv` tabular scores
|
|
263
|
+
- `leaderboard.md` aggregated per-prompt ranking
|
|
264
|
+
|
|
265
|
+
## Findings and release docs
|
|
266
|
+
|
|
267
|
+
- benchmark findings: `docs/AB_FINDINGS_2026-02-21.md`
|
|
268
|
+
- public release checklist: `docs/PUBLIC_RELEASE_CHECKLIST.md`
|
|
269
|
+
- security policy: `SECURITY.md`
|
|
270
|
+
- env template: `.env.example`
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
openrouter_agent_cli/__init__.py,sha256=PsriEUjyjk6pcHgN5chqGD0l61CcjZ4w61CpCNqxCQc,37
|
|
2
|
+
openrouter_agent_cli/__main__.py,sha256=14FfnaF7zY550dRxcOV5XinMWDf5fRBJZkgTBoqeVj8,63
|
|
3
|
+
openrouter_agent_cli/cli.py,sha256=dLoszQK_CBIYZCJ-haGS74LO4NB84iM4pCWAL4cDGXU,27262
|
|
4
|
+
openrouter_agent_cli-0.1.2.dist-info/licenses/LICENSE,sha256=dQVLagjhAIyGtgRauZrdLI-lcZh4gJUamSnxHuVqfWQ,1057
|
|
5
|
+
openrouter_agent_cli-0.1.2.dist-info/METADATA,sha256=dN8t-VLEIJj8i9fuZK-TSipufJihUgVmYQgLPnvg5DU,6675
|
|
6
|
+
openrouter_agent_cli-0.1.2.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
7
|
+
openrouter_agent_cli-0.1.2.dist-info/entry_points.txt,sha256=ARllOLyXdEO2W-QblB3grNQIyFMS0RIFJtDGUGY1S3c,67
|
|
8
|
+
openrouter_agent_cli-0.1.2.dist-info/top_level.txt,sha256=bRAMCPYtV3NPW7V5Im8QEK_RHIRqhRLL8u4m1aoTQOY,21
|
|
9
|
+
openrouter_agent_cli-0.1.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
openrouter_agent_cli
|