aru-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aru/__init__.py +1 -0
- aru/agents/__init__.py +0 -0
- aru/agents/base.py +188 -0
- aru/agents/executor.py +32 -0
- aru/agents/planner.py +85 -0
- aru/cli.py +1993 -0
- aru/config.py +237 -0
- aru/context.py +287 -0
- aru/providers.py +433 -0
- aru/tools/__init__.py +0 -0
- aru/tools/ast_tools.py +422 -0
- aru/tools/codebase.py +1328 -0
- aru/tools/gitignore.py +109 -0
- aru/tools/mcp_client.py +156 -0
- aru/tools/ranker.py +220 -0
- aru/tools/tasklist.py +183 -0
- aru_code-0.1.0.dist-info/METADATA +385 -0
- aru_code-0.1.0.dist-info/RECORD +22 -0
- aru_code-0.1.0.dist-info/WHEEL +5 -0
- aru_code-0.1.0.dist-info/entry_points.txt +2 -0
- aru_code-0.1.0.dist-info/licenses/LICENSE +21 -0
- aru_code-0.1.0.dist-info/top_level.txt +1 -0
aru/tools/codebase.py
ADDED
|
@@ -0,0 +1,1328 @@
|
|
|
1
|
+
"""Custom tools for codebase exploration and manipulation."""
|
|
2
|
+
|
|
3
|
+
import fnmatch
|
|
4
|
+
import html.parser
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import shlex
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
import threading
|
|
11
|
+
import textwrap
|
|
12
|
+
|
|
13
|
+
from aru.tools.gitignore import is_ignored, walk_filtered
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
from rich.console import Console, Group
|
|
18
|
+
from rich.panel import Panel
|
|
19
|
+
from rich.syntax import Syntax
|
|
20
|
+
from rich.text import Text
|
|
21
|
+
|
|
22
|
+
_console = Console()
|
|
23
|
+
_skip_permissions = False
|
|
24
|
+
_live = None # Reference to the active Rich Live instance
|
|
25
|
+
_permission_lock = threading.Lock() # Serialize permission prompts
|
|
26
|
+
_allowed_actions: set[str] = set() # Actions auto-approved via "allow all"
|
|
27
|
+
_display = None # Reference to the active StreamingDisplay
|
|
28
|
+
_model_id: str = "claude-sonnet-4-5-20250929" # Current model for sub-agents
|
|
29
|
+
_permission_rules: list[str] = [] # User-defined glob patterns from aru.json
|
|
30
|
+
_on_file_mutation = None # Callback to invalidate context cache after file writes
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def set_on_file_mutation(callback):
|
|
34
|
+
"""Set a callback invoked after any file write/edit/bash operation."""
|
|
35
|
+
global _on_file_mutation
|
|
36
|
+
_on_file_mutation = callback
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _notify_file_mutation():
|
|
40
|
+
"""Notify the session that files changed so caches are invalidated."""
|
|
41
|
+
_read_cache.clear()
|
|
42
|
+
if _on_file_mutation:
|
|
43
|
+
_on_file_mutation()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def set_skip_permissions(value: bool):
|
|
47
|
+
global _skip_permissions
|
|
48
|
+
_skip_permissions = value
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_skip_permissions() -> bool:
|
|
52
|
+
return _skip_permissions
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
_small_model_ref: str = "anthropic/claude-haiku-4-5" # Small model for sub-agents
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def set_model_id(model_id: str):
|
|
59
|
+
global _model_id
|
|
60
|
+
_model_id = model_id
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def set_small_model_ref(model_ref: str):
|
|
64
|
+
"""Set the small/fast model reference used by sub-agents."""
|
|
65
|
+
global _small_model_ref
|
|
66
|
+
_small_model_ref = model_ref
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _get_small_model_ref() -> str:
|
|
70
|
+
"""Get the small model reference for sub-agents."""
|
|
71
|
+
return _small_model_ref
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def set_live(live):
|
|
75
|
+
"""Set the active Live instance so tools can pause it during permission prompts."""
|
|
76
|
+
global _live
|
|
77
|
+
_live = live
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def set_display(display):
|
|
81
|
+
"""Set the active StreamingDisplay so tools can flush content before permission prompts."""
|
|
82
|
+
global _display
|
|
83
|
+
_display = display
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def set_console(console: Console):
|
|
87
|
+
"""Share the main console instance to avoid conflicts with Live display."""
|
|
88
|
+
global _console
|
|
89
|
+
_console = console
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def set_permission_rules(rules: list[str]):
|
|
93
|
+
"""Set user-defined permission rules (glob patterns) from aru.json."""
|
|
94
|
+
global _permission_rules
|
|
95
|
+
_permission_rules = list(rules)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _format_diff(old_string: str, new_string: str) -> Group:
|
|
99
|
+
"""Format old/new strings as a colored diff (red background for deletions, green for additions)."""
|
|
100
|
+
parts = []
|
|
101
|
+
if old_string:
|
|
102
|
+
for line in old_string.splitlines():
|
|
103
|
+
parts.append(Text.assemble(
|
|
104
|
+
("- " + line, "on red"),
|
|
105
|
+
))
|
|
106
|
+
if new_string:
|
|
107
|
+
for line in new_string.splitlines():
|
|
108
|
+
parts.append(Text.assemble(
|
|
109
|
+
("+ " + line, "white on green"),
|
|
110
|
+
))
|
|
111
|
+
return Group(*parts)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def reset_allowed_actions():
|
|
115
|
+
"""Reset auto-approved actions (call between conversations if needed)."""
|
|
116
|
+
_allowed_actions.clear()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _ask_permission(action: str, details: str | Text | Group) -> bool:
|
|
120
|
+
"""Ask user permission before executing a dangerous action.
|
|
121
|
+
|
|
122
|
+
Uses a lock to serialize prompts when multiple tools run in parallel.
|
|
123
|
+
Supports 'a' (allow all) to auto-approve all future calls of the same action type.
|
|
124
|
+
"""
|
|
125
|
+
if _skip_permissions:
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
if action in _allowed_actions:
|
|
129
|
+
return True
|
|
130
|
+
|
|
131
|
+
with _permission_lock:
|
|
132
|
+
# Re-check after acquiring lock (another thread may have allowed it)
|
|
133
|
+
if action in _allowed_actions:
|
|
134
|
+
return True
|
|
135
|
+
|
|
136
|
+
# Pause Live and flush already-streamed content so it doesn't re-render
|
|
137
|
+
if _live:
|
|
138
|
+
_live.stop()
|
|
139
|
+
if _display:
|
|
140
|
+
_display.flush()
|
|
141
|
+
|
|
142
|
+
_console.print()
|
|
143
|
+
_console.print(Panel(
|
|
144
|
+
details,
|
|
145
|
+
title=f"[bold yellow]{action}[/bold yellow]",
|
|
146
|
+
border_style="yellow",
|
|
147
|
+
expand=False,
|
|
148
|
+
))
|
|
149
|
+
try:
|
|
150
|
+
answer = _console.input(
|
|
151
|
+
"[bold yellow]Allow? (y)es / (a)llow all / (n)o:[/bold yellow] "
|
|
152
|
+
).strip().lower()
|
|
153
|
+
if answer in ("a", "allow all", "all"):
|
|
154
|
+
_allowed_actions.add(action)
|
|
155
|
+
allowed = True
|
|
156
|
+
else:
|
|
157
|
+
allowed = answer in ("y", "yes", "s", "sim")
|
|
158
|
+
except (EOFError, KeyboardInterrupt):
|
|
159
|
+
allowed = False
|
|
160
|
+
|
|
161
|
+
# Resume Live display (now clean — flushed content won't re-render)
|
|
162
|
+
if _live:
|
|
163
|
+
_live.start()
|
|
164
|
+
_live._live_render._shape = None # prevent overwriting static Panel
|
|
165
|
+
|
|
166
|
+
return allowed
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# Hard ceiling per tool result (~15K tokens). Even max_size=0 respects this per chunk.
|
|
170
|
+
_READ_HARD_CAP = 60_000 # bytes
|
|
171
|
+
|
|
172
|
+
# Per-session read cache: avoids re-reading the same file+range multiple times.
|
|
173
|
+
# Key = (resolved_path, start_line, end_line, max_size), Value = short metadata description.
|
|
174
|
+
_read_cache: dict[tuple, str] = {}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def clear_read_cache():
|
|
178
|
+
"""Clear the read cache. Call after file mutations to avoid stale data."""
|
|
179
|
+
_read_cache.clear()
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def read_file(file_path: str, start_line: int = 0, end_line: int = 0, max_size: int = 15_000) -> str:
|
|
183
|
+
"""Read file contents. Returns chunked output for large files.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
file_path: Path to the file (absolute or relative).
|
|
187
|
+
start_line: First line (1-indexed, inclusive). 0 = beginning.
|
|
188
|
+
end_line: Last line (1-indexed, inclusive). 0 = end.
|
|
189
|
+
max_size: Max bytes before truncation. Default 15KB.
|
|
190
|
+
Set to 0 to read the full file in chunks — each chunk up to ~60KB.
|
|
191
|
+
The first chunk includes a continuation hint so you can call again
|
|
192
|
+
with start_line to get the next chunk.
|
|
193
|
+
"""
|
|
194
|
+
try:
|
|
195
|
+
resolved = os.path.abspath(file_path)
|
|
196
|
+
cache_key = (resolved, start_line, end_line, max_size)
|
|
197
|
+
# Only cache specific range reads — full-file reads may have been compressed
|
|
198
|
+
# out of context, so blocking them causes the agent to get stuck
|
|
199
|
+
if cache_key in _read_cache and (start_line > 0 or end_line > 0):
|
|
200
|
+
lines_info = _read_cache[cache_key]
|
|
201
|
+
return (
|
|
202
|
+
f"[cached] Already read ({lines_info})."
|
|
203
|
+
f" Use the content from your earlier call."
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Check if file exists and get size
|
|
207
|
+
file_size = os.path.getsize(file_path)
|
|
208
|
+
|
|
209
|
+
full_read = max_size == 0
|
|
210
|
+
effective_limit = _READ_HARD_CAP if full_read else max_size
|
|
211
|
+
|
|
212
|
+
# Detect binary files by checking for null bytes in the first 1KB
|
|
213
|
+
with open(file_path, "rb") as f:
|
|
214
|
+
sample = f.read(1024)
|
|
215
|
+
if b"\x00" in sample:
|
|
216
|
+
return f"Error: Binary file detected ({file_size} bytes): {file_path}"
|
|
217
|
+
|
|
218
|
+
# Read with line range support
|
|
219
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
220
|
+
lines = f.readlines()
|
|
221
|
+
|
|
222
|
+
total_lines = len(lines)
|
|
223
|
+
|
|
224
|
+
if start_line > 0 or end_line > 0:
|
|
225
|
+
# Line range mode (1-indexed, inclusive)
|
|
226
|
+
s = max(start_line, 1) - 1 # Convert to 0-indexed
|
|
227
|
+
e = end_line if end_line > 0 else total_lines
|
|
228
|
+
e = min(e, total_lines)
|
|
229
|
+
|
|
230
|
+
selected = lines[s:e]
|
|
231
|
+
|
|
232
|
+
# Apply chunk limit based on bytes
|
|
233
|
+
accumulated = []
|
|
234
|
+
char_count = 0
|
|
235
|
+
for i, line in enumerate(selected):
|
|
236
|
+
char_count += len(line)
|
|
237
|
+
if char_count > effective_limit:
|
|
238
|
+
break
|
|
239
|
+
accumulated.append(f"{s + i + 1:4d} | {line}")
|
|
240
|
+
|
|
241
|
+
lines_returned = len(accumulated)
|
|
242
|
+
actual_end = s + lines_returned
|
|
243
|
+
header = f"[Lines {s + 1}-{actual_end} of {total_lines}]\n"
|
|
244
|
+
result = header + "".join(accumulated)
|
|
245
|
+
|
|
246
|
+
if lines_returned < len(selected):
|
|
247
|
+
next_start = actual_end + 1
|
|
248
|
+
result += (
|
|
249
|
+
f"\n\n[CHUNK] Returned {lines_returned} of {e - s} requested lines."
|
|
250
|
+
f" Call read_file(\"{file_path}\", start_line={next_start}, end_line={e})"
|
|
251
|
+
f" to continue."
|
|
252
|
+
)
|
|
253
|
+
_read_cache[cache_key] = f"{lines_returned} lines returned"
|
|
254
|
+
return result
|
|
255
|
+
|
|
256
|
+
# Full file mode — check if it fits in one chunk
|
|
257
|
+
if file_size <= effective_limit:
|
|
258
|
+
numbered = [f"{i + 1:4d} | {line}" for i, line in enumerate(lines)]
|
|
259
|
+
output = "".join(numbered)
|
|
260
|
+
result = output if full_read else _truncate_output(output)
|
|
261
|
+
_read_cache[cache_key] = f"{total_lines} lines"
|
|
262
|
+
return result
|
|
263
|
+
|
|
264
|
+
# File exceeds limit — return first chunk + outline of the rest
|
|
265
|
+
# First chunk: up to effective_limit bytes
|
|
266
|
+
accumulated = []
|
|
267
|
+
char_count = 0
|
|
268
|
+
for i, line in enumerate(lines):
|
|
269
|
+
char_count += len(line)
|
|
270
|
+
if char_count > effective_limit and accumulated:
|
|
271
|
+
break
|
|
272
|
+
accumulated.append(f"{i + 1:4d} | {line}")
|
|
273
|
+
if char_count > effective_limit:
|
|
274
|
+
break
|
|
275
|
+
|
|
276
|
+
lines_shown = len(accumulated)
|
|
277
|
+
first_chunk = "".join(accumulated)
|
|
278
|
+
|
|
279
|
+
# Outline of remaining content (definitions after the first chunk)
|
|
280
|
+
import re as _re
|
|
281
|
+
toc_entries = []
|
|
282
|
+
toc_pattern = _re.compile(r"^(\s*)(def |class |async def )(\w+)")
|
|
283
|
+
for li in range(lines_shown, total_lines):
|
|
284
|
+
m = toc_pattern.match(lines[li])
|
|
285
|
+
if m:
|
|
286
|
+
indent = len(m.group(1))
|
|
287
|
+
prefix = " " if indent > 0 else ""
|
|
288
|
+
toc_entries.append(f"{prefix}{m.group(2).strip()} {m.group(3)} (line {li + 1})")
|
|
289
|
+
|
|
290
|
+
outline = "\n".join(toc_entries) if toc_entries else "(no more definitions)"
|
|
291
|
+
result = (
|
|
292
|
+
f"{first_chunk}\n\n"
|
|
293
|
+
f"[Showing lines 1-{lines_shown} of {total_lines} ({file_size:,} bytes)]\n\n"
|
|
294
|
+
f"[Remaining definitions]\n{outline}\n\n"
|
|
295
|
+
f"To read more: read_file(\"{file_path}\", start_line={lines_shown + 1}, end_line=N)"
|
|
296
|
+
)
|
|
297
|
+
_read_cache[cache_key] = f"{lines_shown}/{total_lines} lines + outline"
|
|
298
|
+
return result
|
|
299
|
+
except FileNotFoundError:
|
|
300
|
+
return f"Error: File not found: {file_path}"
|
|
301
|
+
except Exception as e:
|
|
302
|
+
return f"Error reading file: {e}"
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
# Threshold: files smaller than this are returned as-is (not worth a model call)
|
|
306
|
+
_SMART_READ_THRESHOLD = 3_000 # chars (~750 tokens)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
async def read_file_smart(file_path: str, query: str) -> str:
|
|
310
|
+
"""Read a file and answer a specific question about it — returns a concise answer, NOT raw content.
|
|
311
|
+
|
|
312
|
+
Use this instead of read_file when you only need a specific piece of information
|
|
313
|
+
about a file (e.g., "does this file have tests for X?", "what does function Y do?",
|
|
314
|
+
"which classes are exported?"). This is much cheaper on tokens than reading the full file.
|
|
315
|
+
|
|
316
|
+
Use plain read_file only when you need to see the actual code/content.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
file_path: Path to the file to read.
|
|
320
|
+
query: The specific question you want answered about this file.
|
|
321
|
+
"""
|
|
322
|
+
# Read raw content first (reuse existing read_file logic)
|
|
323
|
+
raw = read_file(file_path, max_size=20_000)
|
|
324
|
+
|
|
325
|
+
if raw.startswith("Error:"):
|
|
326
|
+
return raw
|
|
327
|
+
|
|
328
|
+
# Strip line number prefixes for the model (cleaner input)
|
|
329
|
+
lines = raw.splitlines()
|
|
330
|
+
clean_lines = []
|
|
331
|
+
for line in lines:
|
|
332
|
+
# Lines have format " 42 | content" — strip the prefix
|
|
333
|
+
if " | " in line[:8]:
|
|
334
|
+
clean_lines.append(line.split(" | ", 1)[1] if " | " in line else line)
|
|
335
|
+
else:
|
|
336
|
+
clean_lines.append(line)
|
|
337
|
+
clean = "\n".join(clean_lines)
|
|
338
|
+
|
|
339
|
+
# Small file — just return raw content (model call not worth it)
|
|
340
|
+
if len(clean) <= _SMART_READ_THRESHOLD:
|
|
341
|
+
return raw
|
|
342
|
+
|
|
343
|
+
# Large file — call small model to answer the query
|
|
344
|
+
from agno.agent import Agent
|
|
345
|
+
from aru.providers import create_model
|
|
346
|
+
|
|
347
|
+
small_ref = _get_small_model_ref()
|
|
348
|
+
prompt = (
|
|
349
|
+
f"Answer this question about the file `{file_path}`:\n\n"
|
|
350
|
+
f"**Question:** {query}\n\n"
|
|
351
|
+
f"**File content:**\n```\n{clean[:15_000]}\n```\n\n"
|
|
352
|
+
f"Give a concise, direct answer. If code is relevant, quote only the essential snippet."
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
summarizer = Agent(
|
|
357
|
+
name="FileReader",
|
|
358
|
+
model=create_model(small_ref, max_tokens=512),
|
|
359
|
+
instructions="You answer specific questions about source code files. Be concise and direct.",
|
|
360
|
+
markdown=False,
|
|
361
|
+
)
|
|
362
|
+
result = await summarizer.arun(prompt, stream=False)
|
|
363
|
+
answer = result.content.strip() if result and result.content else ""
|
|
364
|
+
if not answer:
|
|
365
|
+
return raw # fallback
|
|
366
|
+
return f"[{file_path}] {answer}"
|
|
367
|
+
except Exception:
|
|
368
|
+
return raw # fallback to raw content on any error
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def write_file(file_path: str, content: str) -> str:
|
|
373
|
+
"""Write content to a file, creating parent directories if needed.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
file_path: Path to the file to write.
|
|
377
|
+
content: The content to write to the file.
|
|
378
|
+
"""
|
|
379
|
+
preview = content[:500] + ("..." if len(content) > 500 else "")
|
|
380
|
+
header = Text(file_path, style="bold")
|
|
381
|
+
diff = _format_diff("", preview)
|
|
382
|
+
if not _ask_permission("Write File", Group(header, Text(), diff)):
|
|
383
|
+
return f"Permission denied: write to {file_path}"
|
|
384
|
+
try:
|
|
385
|
+
os.makedirs(os.path.dirname(file_path) or ".", exist_ok=True)
|
|
386
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
387
|
+
f.write(content)
|
|
388
|
+
_notify_file_mutation()
|
|
389
|
+
return f"Successfully wrote to {file_path}"
|
|
390
|
+
except Exception as e:
|
|
391
|
+
return f"Error writing file: {e}"
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def write_files(file_list: list[dict]) -> str:
|
|
395
|
+
"""Write multiple files at once. Use this instead of multiple write_file calls when creating
|
|
396
|
+
or updating several files that don't depend on each other (e.g. scaffolding a project).
|
|
397
|
+
|
|
398
|
+
Each entry in the list must have 'path' and 'content' keys.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
file_list: List of dicts with 'path' (file path) and 'content' (file content) keys.
|
|
402
|
+
Example: [{"path": "src/main.py", "content": "print('hello')"}, {"path": "src/utils.py", "content": "..."}]
|
|
403
|
+
"""
|
|
404
|
+
parts = [Text(f"Write {len(file_list)} files:", style="bold"), Text()]
|
|
405
|
+
for e in file_list:
|
|
406
|
+
p = e.get("path", "<missing>")
|
|
407
|
+
content = e.get("content", "")
|
|
408
|
+
preview = content[:300] + ("..." if len(content) > 300 else "")
|
|
409
|
+
parts.append(Text(p, style="bold dim"))
|
|
410
|
+
parts.append(_format_diff("", preview))
|
|
411
|
+
parts.append(Text())
|
|
412
|
+
if not _ask_permission("Write Files", Group(*parts)):
|
|
413
|
+
return f"Permission denied: batch write of {len(file_list)} files"
|
|
414
|
+
|
|
415
|
+
results = []
|
|
416
|
+
errors = []
|
|
417
|
+
for entry in file_list:
|
|
418
|
+
path = entry.get("path", "")
|
|
419
|
+
content = entry.get("content", "")
|
|
420
|
+
if not path:
|
|
421
|
+
errors.append("Error: missing 'path' in entry")
|
|
422
|
+
continue
|
|
423
|
+
try:
|
|
424
|
+
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
|
|
425
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
426
|
+
f.write(content)
|
|
427
|
+
results.append(path)
|
|
428
|
+
except Exception as e:
|
|
429
|
+
errors.append(f"Error writing {path}: {e}")
|
|
430
|
+
|
|
431
|
+
parts = []
|
|
432
|
+
if results:
|
|
433
|
+
_notify_file_mutation()
|
|
434
|
+
parts.append(f"Successfully wrote {len(results)} files: {', '.join(results)}")
|
|
435
|
+
if errors:
|
|
436
|
+
parts.append("\n".join(errors))
|
|
437
|
+
return "\n".join(parts) or "No files to write."
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def edit_file(file_path: str, old_string: str, new_string: str) -> str:
|
|
441
|
+
"""Replace an exact string in a file. The old_string must appear exactly once.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
file_path: Path to the file to edit.
|
|
445
|
+
old_string: The exact text to find and replace. Must be unique in the file.
|
|
446
|
+
new_string: The replacement text.
|
|
447
|
+
"""
|
|
448
|
+
header = Text(file_path, style="bold")
|
|
449
|
+
diff = _format_diff(old_string, new_string)
|
|
450
|
+
if not _ask_permission("Edit File", Group(header, Text(), diff)):
|
|
451
|
+
return f"Permission denied: edit {file_path}"
|
|
452
|
+
try:
|
|
453
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
454
|
+
content = f.read()
|
|
455
|
+
|
|
456
|
+
count = content.count(old_string)
|
|
457
|
+
if count == 0:
|
|
458
|
+
return f"Error: old_string not found in {file_path}"
|
|
459
|
+
if count > 1:
|
|
460
|
+
return f"Error: old_string found {count} times in {file_path}. Must be unique."
|
|
461
|
+
|
|
462
|
+
new_content = content.replace(old_string, new_string, 1)
|
|
463
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
464
|
+
f.write(new_content)
|
|
465
|
+
_notify_file_mutation()
|
|
466
|
+
return f"Successfully edited {file_path}"
|
|
467
|
+
except FileNotFoundError:
|
|
468
|
+
return f"Error: File not found: {file_path}"
|
|
469
|
+
except Exception as e:
|
|
470
|
+
return f"Error editing file: {e}"
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def edit_files(edits: list[dict]) -> str:
|
|
474
|
+
"""Apply multiple find-and-replace edits across files in a single call. Use this instead of
|
|
475
|
+
multiple edit_file calls when making independent edits to different files (or multiple edits
|
|
476
|
+
to the same file, applied in order).
|
|
477
|
+
|
|
478
|
+
Each entry must have 'path', 'old_string', and 'new_string' keys.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
edits: List of dicts with 'path' (file path), 'old_string' (text to find), and 'new_string' (replacement).
|
|
482
|
+
Example: [{"path": "src/main.py", "old_string": "foo", "new_string": "bar"}]
|
|
483
|
+
"""
|
|
484
|
+
parts = [Text(f"Apply {len(edits)} edits:", style="bold"), Text()]
|
|
485
|
+
for e in edits:
|
|
486
|
+
p = e.get("path", "<missing>")
|
|
487
|
+
old = e.get("old_string", "")
|
|
488
|
+
new = e.get("new_string", "")
|
|
489
|
+
parts.append(Text(p, style="bold dim"))
|
|
490
|
+
parts.append(_format_diff(old, new))
|
|
491
|
+
parts.append(Text())
|
|
492
|
+
if not _ask_permission("Edit Files", Group(*parts)):
|
|
493
|
+
return f"Permission denied: batch edit of {len(edits)} files"
|
|
494
|
+
|
|
495
|
+
results = []
|
|
496
|
+
errors = []
|
|
497
|
+
# Cache file contents to support multiple edits to the same file
|
|
498
|
+
cache: dict[str, str] = {}
|
|
499
|
+
|
|
500
|
+
for entry in edits:
|
|
501
|
+
path = entry.get("path", "")
|
|
502
|
+
old = entry.get("old_string", "")
|
|
503
|
+
new = entry.get("new_string", "")
|
|
504
|
+
if not path or not old:
|
|
505
|
+
errors.append(f"Error: missing 'path' or 'old_string' in entry")
|
|
506
|
+
continue
|
|
507
|
+
try:
|
|
508
|
+
if path not in cache:
|
|
509
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
510
|
+
cache[path] = f.read()
|
|
511
|
+
|
|
512
|
+
content = cache[path]
|
|
513
|
+
count = content.count(old)
|
|
514
|
+
if count == 0:
|
|
515
|
+
errors.append(f"{path}: old_string not found")
|
|
516
|
+
continue
|
|
517
|
+
if count > 1:
|
|
518
|
+
errors.append(f"{path}: old_string found {count} times, must be unique")
|
|
519
|
+
continue
|
|
520
|
+
|
|
521
|
+
cache[path] = content.replace(old, new, 1)
|
|
522
|
+
results.append(path)
|
|
523
|
+
except FileNotFoundError:
|
|
524
|
+
errors.append(f"{path}: file not found")
|
|
525
|
+
except Exception as e:
|
|
526
|
+
errors.append(f"{path}: {e}")
|
|
527
|
+
|
|
528
|
+
# Flush all modified files
|
|
529
|
+
written = set()
|
|
530
|
+
for path, content in cache.items():
|
|
531
|
+
try:
|
|
532
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
533
|
+
f.write(content)
|
|
534
|
+
written.add(path)
|
|
535
|
+
except Exception as e:
|
|
536
|
+
errors.append(f"Error writing {path}: {e}")
|
|
537
|
+
|
|
538
|
+
parts = []
|
|
539
|
+
if results:
|
|
540
|
+
_notify_file_mutation()
|
|
541
|
+
unique = list(dict.fromkeys(results)) # preserve order, dedupe
|
|
542
|
+
parts.append(f"Successfully applied {len(results)} edits across {len(unique)} files: {', '.join(unique)}")
|
|
543
|
+
if errors:
|
|
544
|
+
parts.append("\n".join(errors))
|
|
545
|
+
return "\n".join(parts) or "No edits to apply."
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def glob_search(pattern: str, directory: str = ".") -> str:
|
|
549
|
+
"""Find files matching a glob pattern recursively.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
pattern: Glob pattern to match (e.g. '**/*.py', 'src/**/*.ts').
|
|
553
|
+
directory: Directory to search in. Defaults to current directory.
|
|
554
|
+
"""
|
|
555
|
+
matches = []
|
|
556
|
+
for root, dirs, files in walk_filtered(directory):
|
|
557
|
+
for filename in files:
|
|
558
|
+
filepath = os.path.join(root, filename)
|
|
559
|
+
rel_path = os.path.relpath(filepath, directory)
|
|
560
|
+
# Normalize to forward slashes for consistent fnmatch behaviour on Windows
|
|
561
|
+
rel_posix = rel_path.replace('\\', '/')
|
|
562
|
+
matched = fnmatch.fnmatch(rel_posix, pattern)
|
|
563
|
+
# For patterns like **/*.py, also match root-level files against the suffix
|
|
564
|
+
# because fnmatch requires a path separator before the file part
|
|
565
|
+
if not matched and pattern.startswith('**/'):
|
|
566
|
+
matched = fnmatch.fnmatch(filename, pattern[3:])
|
|
567
|
+
if not matched:
|
|
568
|
+
matched = fnmatch.fnmatch(filename, pattern)
|
|
569
|
+
if matched:
|
|
570
|
+
matches.append(rel_path)
|
|
571
|
+
|
|
572
|
+
if not matches:
|
|
573
|
+
return f"No files matched pattern: {pattern}"
|
|
574
|
+
|
|
575
|
+
matches.sort()
|
|
576
|
+
if len(matches) > 100:
|
|
577
|
+
return "\n".join(matches[:100]) + f"\n... and {len(matches) - 100} more matches (use a more specific pattern to narrow results)"
|
|
578
|
+
return "\n".join(matches)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def grep_search(pattern: str, directory: str = ".", file_glob: str = "", context_lines: int = 10) -> str:
|
|
582
|
+
"""Search for a regex pattern in file contents.
|
|
583
|
+
|
|
584
|
+
Args:
|
|
585
|
+
pattern: Regular expression pattern to search for.
|
|
586
|
+
directory: Directory to search in. Defaults to current directory.
|
|
587
|
+
file_glob: Optional glob to filter which files to search (e.g. '*.py').
|
|
588
|
+
context_lines: Lines of context before and after each match (like grep -C). Default 10.
|
|
589
|
+
Use 0 for file-level matches only. Use 30+ for full function bodies.
|
|
590
|
+
"""
|
|
591
|
+
import re
|
|
592
|
+
|
|
593
|
+
try:
|
|
594
|
+
regex = re.compile(pattern)
|
|
595
|
+
except re.error as e:
|
|
596
|
+
return f"Invalid regex pattern: {e}"
|
|
597
|
+
|
|
598
|
+
results = []
|
|
599
|
+
match_count = 0
|
|
600
|
+
files_with_matches: dict[str, list[int]] = {} # rel_path -> list of match line numbers
|
|
601
|
+
MAX_MATCHES = 20 if context_lines > 0 else 50
|
|
602
|
+
stopped_early = False
|
|
603
|
+
|
|
604
|
+
for root, dirs, files in walk_filtered(directory):
|
|
605
|
+
for filename in files:
|
|
606
|
+
if file_glob and not fnmatch.fnmatch(filename, file_glob):
|
|
607
|
+
continue
|
|
608
|
+
filepath = os.path.join(root, filename)
|
|
609
|
+
rel_path = os.path.relpath(filepath, directory)
|
|
610
|
+
try:
|
|
611
|
+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
|
612
|
+
lines = f.readlines()
|
|
613
|
+
|
|
614
|
+
if context_lines > 0:
|
|
615
|
+
# Collect match line indices
|
|
616
|
+
match_indices = [i for i, line in enumerate(lines) if regex.search(line)]
|
|
617
|
+
if not match_indices:
|
|
618
|
+
continue
|
|
619
|
+
|
|
620
|
+
files_with_matches[rel_path] = [i + 1 for i in match_indices]
|
|
621
|
+
|
|
622
|
+
# Only emit context blocks if we haven't exceeded the limit
|
|
623
|
+
if match_count < MAX_MATCHES:
|
|
624
|
+
# Merge overlapping context windows
|
|
625
|
+
shown: set[int] = set()
|
|
626
|
+
blocks = []
|
|
627
|
+
current_block: list[str] = []
|
|
628
|
+
for mi in match_indices:
|
|
629
|
+
start = max(0, mi - context_lines)
|
|
630
|
+
end = min(len(lines), mi + context_lines + 1)
|
|
631
|
+
for li in range(start, end):
|
|
632
|
+
if li not in shown:
|
|
633
|
+
if current_block and li > max(shown) + 1:
|
|
634
|
+
blocks.append(current_block)
|
|
635
|
+
current_block = []
|
|
636
|
+
shown.add(li)
|
|
637
|
+
marker = ">" if li == mi else " "
|
|
638
|
+
current_block.append(f"{rel_path}:{li + 1}:{marker} {lines[li].rstrip()}")
|
|
639
|
+
if current_block:
|
|
640
|
+
blocks.append(current_block)
|
|
641
|
+
|
|
642
|
+
for block in blocks:
|
|
643
|
+
results.extend(block)
|
|
644
|
+
results.append("---")
|
|
645
|
+
match_count += len(match_indices)
|
|
646
|
+
else:
|
|
647
|
+
for i, line in enumerate(lines, 1):
|
|
648
|
+
if regex.search(line):
|
|
649
|
+
results.append(f"{rel_path}:{i}: {line.rstrip()}")
|
|
650
|
+
match_count += 1
|
|
651
|
+
if rel_path not in files_with_matches:
|
|
652
|
+
files_with_matches[rel_path] = []
|
|
653
|
+
files_with_matches[rel_path].append(i)
|
|
654
|
+
|
|
655
|
+
except (OSError, PermissionError):
|
|
656
|
+
continue
|
|
657
|
+
|
|
658
|
+
if match_count >= MAX_MATCHES:
|
|
659
|
+
stopped_early = True
|
|
660
|
+
break
|
|
661
|
+
|
|
662
|
+
if not results:
|
|
663
|
+
return f"No matches found for pattern: {pattern}"
|
|
664
|
+
|
|
665
|
+
# Trim trailing separator
|
|
666
|
+
if results and results[-1] == "---":
|
|
667
|
+
results.pop()
|
|
668
|
+
|
|
669
|
+
if match_count > MAX_MATCHES and context_lines == 0:
|
|
670
|
+
output = "\n".join(results[:MAX_MATCHES])
|
|
671
|
+
else:
|
|
672
|
+
output = "\n".join(results)
|
|
673
|
+
|
|
674
|
+
# Append file summary so the model knows where ALL matches are
|
|
675
|
+
if len(files_with_matches) > 1 or stopped_early:
|
|
676
|
+
summary_lines = ["\n[Match summary]"]
|
|
677
|
+
for fpath, line_nums in files_with_matches.items():
|
|
678
|
+
nums = ", ".join(str(n) for n in line_nums[:10])
|
|
679
|
+
extra = f" +{len(line_nums) - 10} more" if len(line_nums) > 10 else ""
|
|
680
|
+
summary_lines.append(f" {fpath}: lines {nums}{extra}")
|
|
681
|
+
if stopped_early:
|
|
682
|
+
summary_lines.append(f" ... search stopped at {match_count} matches. Use file_glob or a more specific pattern.")
|
|
683
|
+
output += "\n".join(summary_lines)
|
|
684
|
+
|
|
685
|
+
return _truncate_output(output)
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def list_directory(directory: str = ".") -> str:
|
|
689
|
+
"""List files and directories in the given path.
|
|
690
|
+
|
|
691
|
+
Args:
|
|
692
|
+
directory: Directory to list. Defaults to current directory.
|
|
693
|
+
"""
|
|
694
|
+
try:
|
|
695
|
+
abs_dir = os.path.abspath(directory)
|
|
696
|
+
entries = os.listdir(abs_dir)
|
|
697
|
+
result = []
|
|
698
|
+
for entry in sorted(entries):
|
|
699
|
+
if is_ignored(entry, abs_dir):
|
|
700
|
+
continue
|
|
701
|
+
full_path = os.path.join(abs_dir, entry)
|
|
702
|
+
if os.path.isdir(full_path):
|
|
703
|
+
result.append(f"📁 {entry}/")
|
|
704
|
+
else:
|
|
705
|
+
size = os.path.getsize(full_path)
|
|
706
|
+
result.append(f"📄 {entry} ({size} bytes)")
|
|
707
|
+
return "\n".join(result) if result else "Empty directory"
|
|
708
|
+
except FileNotFoundError:
|
|
709
|
+
return f"Error: Directory not found: {directory}"
|
|
710
|
+
except Exception as e:
|
|
711
|
+
return f"Error listing directory: {e}"
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def get_project_tree(root_dir: str, max_depth: int = 3, max_files_per_dir: int = 30) -> str:
|
|
715
|
+
"""Generate a fast, text-based directory tree respecting .gitignore rules."""
|
|
716
|
+
import os
|
|
717
|
+
from aru.tools.gitignore import walk_filtered
|
|
718
|
+
|
|
719
|
+
lines = []
|
|
720
|
+
root_dir = os.path.abspath(root_dir)
|
|
721
|
+
|
|
722
|
+
if not os.path.exists(root_dir):
|
|
723
|
+
return ""
|
|
724
|
+
|
|
725
|
+
for dirpath, dirs, files in walk_filtered(root_dir):
|
|
726
|
+
rel_path = os.path.relpath(dirpath, root_dir)
|
|
727
|
+
|
|
728
|
+
# Calculate depth
|
|
729
|
+
if rel_path == ".":
|
|
730
|
+
depth = 0
|
|
731
|
+
lines.append(os.path.basename(root_dir) + "/")
|
|
732
|
+
else:
|
|
733
|
+
depth = rel_path.count(os.sep) + 1
|
|
734
|
+
if depth > max_depth:
|
|
735
|
+
dirs.clear() # Stop descending
|
|
736
|
+
continue
|
|
737
|
+
|
|
738
|
+
indent = " " * depth
|
|
739
|
+
lines.append(f"{indent}{os.path.basename(dirpath)}/")
|
|
740
|
+
|
|
741
|
+
# Add files
|
|
742
|
+
file_indent = " " * (depth + 1)
|
|
743
|
+
sorted_files = sorted(files)
|
|
744
|
+
for i, f in enumerate(sorted_files):
|
|
745
|
+
if i >= max_files_per_dir:
|
|
746
|
+
lines.append(f"{file_indent}... ({len(files) - max_files_per_dir} more files)")
|
|
747
|
+
break
|
|
748
|
+
lines.append(f"{file_indent}{f}")
|
|
749
|
+
|
|
750
|
+
result = "\n".join(lines)
|
|
751
|
+
if len(result) > 15000:
|
|
752
|
+
return result[:15000] + "\n... [Tree truncated due to size]"
|
|
753
|
+
return result
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
import atexit
|
|
758
|
+
|
|
759
|
+
# ── Process tracking ──────────────────────────────────────────────
|
|
760
|
+
# Keep references to long-running background processes so we can kill
|
|
761
|
+
# them when the main ARC process exits (avoid zombie / ghost processes).
|
|
762
|
+
_tracked_processes: list[subprocess.Popen] = []
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def _register_process(process: subprocess.Popen):
|
|
766
|
+
"""Track a background process for cleanup on exit."""
|
|
767
|
+
_tracked_processes.append(process)
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def _cleanup_processes():
|
|
771
|
+
"""Kill all tracked background processes on exit."""
|
|
772
|
+
for proc in _tracked_processes:
|
|
773
|
+
if proc.poll() is None: # still running
|
|
774
|
+
_kill_process_tree(proc)
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
atexit.register(_cleanup_processes)
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
BACKGROUND_PATTERNS = (
|
|
781
|
+
"uvicorn", "gunicorn", "flask run", "django", "manage.py runserver",
|
|
782
|
+
"npm start", "npm run dev", "npx ", "next dev", "next start",
|
|
783
|
+
"vite", "webpack serve", "ng serve",
|
|
784
|
+
"node server", "nodemon",
|
|
785
|
+
"docker compose up", "docker-compose up",
|
|
786
|
+
"celery worker", "celery beat",
|
|
787
|
+
"redis-server", "mongod", "postgres",
|
|
788
|
+
"streamlit run", "gradio",
|
|
789
|
+
"http-server", "live-server", "serve ",
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def _kill_process_tree(process: subprocess.Popen):
|
|
794
|
+
"""Kill a process and all its children. On Windows, process.kill() only
|
|
795
|
+
kills the shell wrapper — child processes (e.g. npm → node) keep running.
|
|
796
|
+
Use taskkill /T to kill the entire tree."""
|
|
797
|
+
pid = process.pid
|
|
798
|
+
try:
|
|
799
|
+
if sys.platform == "win32":
|
|
800
|
+
subprocess.run(
|
|
801
|
+
["taskkill", "/F", "/T", "/PID", str(pid)],
|
|
802
|
+
stdout=subprocess.DEVNULL,
|
|
803
|
+
stderr=subprocess.DEVNULL,
|
|
804
|
+
)
|
|
805
|
+
else:
|
|
806
|
+
import signal
|
|
807
|
+
os.killpg(os.getpgid(pid), signal.SIGKILL)
|
|
808
|
+
except Exception:
|
|
809
|
+
# Fallback to basic kill
|
|
810
|
+
try:
|
|
811
|
+
process.kill()
|
|
812
|
+
except Exception:
|
|
813
|
+
pass
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
_MAX_OUTPUT_CHARS = 10_000
|
|
817
|
+
_TRUNCATE_KEEP = 3_000 # chars to keep from start and end
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
def _truncate_output(text: str) -> str:
|
|
821
|
+
"""Truncate long tool output to save tokens. Keeps start + end with a marker in the middle."""
|
|
822
|
+
from aru.context import truncate_output
|
|
823
|
+
return truncate_output(text)
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
def _is_long_running(command: str) -> bool:
|
|
827
|
+
"""Detect commands that start servers or long-running processes."""
|
|
828
|
+
cmd = command.strip()
|
|
829
|
+
# Explicit background indicator
|
|
830
|
+
if cmd.endswith("&"):
|
|
831
|
+
return True
|
|
832
|
+
return any(pattern in cmd for pattern in BACKGROUND_PATTERNS)
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
def run_command(command: str, timeout: int = 60, working_directory: str = "") -> str:
|
|
836
|
+
"""Execute a shell command and return output.
|
|
837
|
+
|
|
838
|
+
Args:
|
|
839
|
+
command: The command to execute.
|
|
840
|
+
timeout: Max seconds. Default 60.
|
|
841
|
+
working_directory: Directory to run in. Default: cwd.
|
|
842
|
+
"""
|
|
843
|
+
cwd = working_directory or os.getcwd()
|
|
844
|
+
|
|
845
|
+
# Long-running commands: start, capture initial output for a few seconds, then detach
|
|
846
|
+
if _is_long_running(command):
|
|
847
|
+
import threading
|
|
848
|
+
import time
|
|
849
|
+
|
|
850
|
+
startup_seconds = 5
|
|
851
|
+
try:
|
|
852
|
+
bg_kwargs: dict = dict(
|
|
853
|
+
shell=True,
|
|
854
|
+
stdout=subprocess.PIPE,
|
|
855
|
+
stderr=subprocess.STDOUT,
|
|
856
|
+
text=True,
|
|
857
|
+
encoding="utf-8",
|
|
858
|
+
errors="replace",
|
|
859
|
+
cwd=cwd,
|
|
860
|
+
)
|
|
861
|
+
if sys.platform != "win32":
|
|
862
|
+
bg_kwargs["start_new_session"] = True
|
|
863
|
+
process = subprocess.Popen(command, **bg_kwargs)
|
|
864
|
+
|
|
865
|
+
# Read stdout in a thread so we don't block on Windows
|
|
866
|
+
lines: list[str] = []
|
|
867
|
+
stop_event = threading.Event()
|
|
868
|
+
|
|
869
|
+
def _reader():
|
|
870
|
+
while not stop_event.is_set():
|
|
871
|
+
try:
|
|
872
|
+
line = process.stdout.readline()
|
|
873
|
+
if line:
|
|
874
|
+
lines.append(line.rstrip())
|
|
875
|
+
elif process.poll() is not None:
|
|
876
|
+
break
|
|
877
|
+
except Exception:
|
|
878
|
+
break
|
|
879
|
+
|
|
880
|
+
reader_thread = threading.Thread(target=_reader, daemon=True)
|
|
881
|
+
reader_thread.start()
|
|
882
|
+
|
|
883
|
+
# Wait for startup output or early exit
|
|
884
|
+
time.sleep(startup_seconds)
|
|
885
|
+
stop_event.set()
|
|
886
|
+
reader_thread.join(timeout=1)
|
|
887
|
+
|
|
888
|
+
exit_code = process.poll()
|
|
889
|
+
output = "\n".join(lines) if lines else "(no output yet)"
|
|
890
|
+
|
|
891
|
+
if exit_code is not None:
|
|
892
|
+
# Process already finished (likely an error)
|
|
893
|
+
return f"Process exited immediately (code {exit_code}):\n{output}"
|
|
894
|
+
|
|
895
|
+
# Track so it gets killed when ARC exits
|
|
896
|
+
_register_process(process)
|
|
897
|
+
|
|
898
|
+
return (
|
|
899
|
+
f"Process running in background (PID {process.pid}).\n"
|
|
900
|
+
f"Initial output ({startup_seconds}s):\n{output}"
|
|
901
|
+
)
|
|
902
|
+
except Exception as e:
|
|
903
|
+
return f"Error starting background process: {e}"
|
|
904
|
+
|
|
905
|
+
try:
|
|
906
|
+
popen_kwargs = dict(
|
|
907
|
+
shell=True,
|
|
908
|
+
stdout=subprocess.PIPE,
|
|
909
|
+
stderr=subprocess.PIPE,
|
|
910
|
+
text=True,
|
|
911
|
+
encoding="utf-8",
|
|
912
|
+
errors="replace",
|
|
913
|
+
cwd=cwd,
|
|
914
|
+
)
|
|
915
|
+
if sys.platform == "win32":
|
|
916
|
+
popen_kwargs["creationflags"] = subprocess.CREATE_NEW_PROCESS_GROUP
|
|
917
|
+
else:
|
|
918
|
+
# Start in a new process group so _kill_process_tree (os.killpg)
|
|
919
|
+
# does not accidentally kill the parent process when timing out.
|
|
920
|
+
popen_kwargs["start_new_session"] = True
|
|
921
|
+
|
|
922
|
+
process = subprocess.Popen(command, **popen_kwargs)
|
|
923
|
+
stdout, stderr = process.communicate(timeout=timeout)
|
|
924
|
+
|
|
925
|
+
parts = []
|
|
926
|
+
if stdout:
|
|
927
|
+
parts.append(_truncate_output(stdout))
|
|
928
|
+
if stderr:
|
|
929
|
+
parts.append(f"STDERR:\n{_truncate_output(stderr)}")
|
|
930
|
+
if process.returncode != 0:
|
|
931
|
+
parts.append(f"Exit code: {process.returncode}")
|
|
932
|
+
|
|
933
|
+
return "\n".join(parts).strip() or "(no output)"
|
|
934
|
+
except subprocess.TimeoutExpired:
|
|
935
|
+
# Kill the entire process tree, not just the shell wrapper
|
|
936
|
+
_kill_process_tree(process)
|
|
937
|
+
try:
|
|
938
|
+
stdout, stderr = process.communicate(timeout=5)
|
|
939
|
+
except Exception:
|
|
940
|
+
stdout, stderr = "", ""
|
|
941
|
+
partial = (stdout or "") + (stderr or "")
|
|
942
|
+
partial = partial.strip()
|
|
943
|
+
msg = f"Error: Command timed out after {timeout} seconds."
|
|
944
|
+
if partial:
|
|
945
|
+
tail = "\n".join(partial.splitlines()[-20:])
|
|
946
|
+
msg += f"\nLast output:\n{tail}"
|
|
947
|
+
msg += "\nHint: if this is a server/long-running process, it will be detected and run in background automatically."
|
|
948
|
+
return msg
|
|
949
|
+
except Exception as e:
|
|
950
|
+
return f"Error running command: {e}"
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
SAFE_COMMAND_PREFIXES = (
|
|
954
|
+
# File/directory inspection
|
|
955
|
+
"ls", "dir", "find", "tree", "cat", "head", "tail", "less", "more", "wc",
|
|
956
|
+
"file", "stat", "du", "df",
|
|
957
|
+
# Search
|
|
958
|
+
"grep", "rg", "ag", "ack",
|
|
959
|
+
# Git read-only
|
|
960
|
+
"git status", "git log", "git diff", "git show", "git branch", "git tag",
|
|
961
|
+
"git remote", "git stash list", "git blame", "git shortlog",
|
|
962
|
+
# System info / navigation
|
|
963
|
+
"cd", "echo", "pwd", "whoami", "which", "where", "type", "env", "printenv",
|
|
964
|
+
"uname", "hostname", "ps", "top", "free", "uptime",
|
|
965
|
+
# Language versions
|
|
966
|
+
"python --version", "python3 --version", "node --version", "npm --version",
|
|
967
|
+
"cargo --version", "go version", "java --version", "uv --version",
|
|
968
|
+
# Sort/filter (typically piped)
|
|
969
|
+
"sort", "uniq", "cut", "tr", "awk", "sed -n", "jq",
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
def _shell_split(command: str, separators: tuple[str, ...]) -> list[str] | None:
|
|
974
|
+
"""Split command by shell operators, respecting quotes.
|
|
975
|
+
|
|
976
|
+
Returns list of parts if any separator found, None otherwise.
|
|
977
|
+
"""
|
|
978
|
+
parts = []
|
|
979
|
+
current = []
|
|
980
|
+
in_single = False
|
|
981
|
+
in_double = False
|
|
982
|
+
i = 0
|
|
983
|
+
chars = command
|
|
984
|
+
while i < len(chars):
|
|
985
|
+
c = chars[i]
|
|
986
|
+
if c == "'" and not in_double:
|
|
987
|
+
in_single = not in_single
|
|
988
|
+
current.append(c)
|
|
989
|
+
elif c == '"' and not in_single:
|
|
990
|
+
in_double = not in_double
|
|
991
|
+
current.append(c)
|
|
992
|
+
elif not in_single and not in_double:
|
|
993
|
+
matched = False
|
|
994
|
+
for sep in separators:
|
|
995
|
+
if chars[i:i+len(sep)] == sep:
|
|
996
|
+
parts.append("".join(current).strip())
|
|
997
|
+
current = []
|
|
998
|
+
i += len(sep)
|
|
999
|
+
matched = True
|
|
1000
|
+
break
|
|
1001
|
+
if matched:
|
|
1002
|
+
continue
|
|
1003
|
+
current.append(c)
|
|
1004
|
+
else:
|
|
1005
|
+
current.append(c)
|
|
1006
|
+
i += 1
|
|
1007
|
+
if parts: # at least one separator was found
|
|
1008
|
+
parts.append("".join(current).strip())
|
|
1009
|
+
return [p for p in parts if p]
|
|
1010
|
+
return None
|
|
1011
|
+
|
|
1012
|
+
|
|
1013
|
+
def _is_safe_command(command: str) -> bool:
|
|
1014
|
+
"""Check if a command is read-only and safe to run without permission."""
|
|
1015
|
+
cmd = command.strip()
|
|
1016
|
+
# Handle chained commands (&&, ;): safe only if ALL parts are safe
|
|
1017
|
+
parts = _shell_split(cmd, ("&&", ";"))
|
|
1018
|
+
if parts:
|
|
1019
|
+
return all(_is_safe_command(p) for p in parts)
|
|
1020
|
+
# Handle piped commands: safe only if ALL parts are safe
|
|
1021
|
+
parts = _shell_split(cmd, ("|",))
|
|
1022
|
+
if parts:
|
|
1023
|
+
return all(_is_safe_command(p) for p in parts)
|
|
1024
|
+
if any(cmd == prefix or cmd.startswith(prefix + " ") for prefix in SAFE_COMMAND_PREFIXES):
|
|
1025
|
+
return True
|
|
1026
|
+
return any(fnmatch.fnmatch(cmd, rule) for rule in _permission_rules)
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
def bash(command: str, timeout: int = 60, working_directory: str = "") -> str:
|
|
1030
|
+
"""Execute a shell command (tests, git, install, build, etc).
|
|
1031
|
+
|
|
1032
|
+
Args:
|
|
1033
|
+
command: The command to execute.
|
|
1034
|
+
timeout: Max seconds to wait. Default 60.
|
|
1035
|
+
working_directory: Directory to run in. Default: cwd.
|
|
1036
|
+
"""
|
|
1037
|
+
cwd = working_directory or os.getcwd()
|
|
1038
|
+
if not _is_safe_command(command):
|
|
1039
|
+
cmd_display = Group(
|
|
1040
|
+
Syntax(command, "bash", theme="monokai"),
|
|
1041
|
+
Text(f"cwd: {cwd}", style="dim"),
|
|
1042
|
+
)
|
|
1043
|
+
if not _ask_permission("Bash Command", cmd_display):
|
|
1044
|
+
return f"Permission denied: {command}"
|
|
1045
|
+
result = run_command(command, timeout=timeout, working_directory=working_directory)
|
|
1046
|
+
# Bash can modify files, so always invalidate cache
|
|
1047
|
+
_notify_file_mutation()
|
|
1048
|
+
return result
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
class _HTMLToText(html.parser.HTMLParser):
|
|
1052
|
+
"""Minimal HTML-to-text converter — no external dependencies."""
|
|
1053
|
+
|
|
1054
|
+
SKIP_TAGS = {"script", "style", "svg", "noscript", "head"}
|
|
1055
|
+
BLOCK_TAGS = {"p", "div", "br", "h1", "h2", "h3", "h4", "h5", "h6",
|
|
1056
|
+
"li", "tr", "blockquote", "pre", "section", "article", "header", "footer"}
|
|
1057
|
+
|
|
1058
|
+
def __init__(self):
|
|
1059
|
+
super().__init__()
|
|
1060
|
+
self._pieces: list[str] = []
|
|
1061
|
+
self._skip_depth = 0
|
|
1062
|
+
|
|
1063
|
+
def handle_starttag(self, tag, attrs):
|
|
1064
|
+
if tag in self.SKIP_TAGS:
|
|
1065
|
+
self._skip_depth += 1
|
|
1066
|
+
elif tag in self.BLOCK_TAGS and not self._skip_depth:
|
|
1067
|
+
self._pieces.append("\n")
|
|
1068
|
+
|
|
1069
|
+
def handle_endtag(self, tag):
|
|
1070
|
+
if tag in self.SKIP_TAGS:
|
|
1071
|
+
self._skip_depth = max(0, self._skip_depth - 1)
|
|
1072
|
+
elif tag in self.BLOCK_TAGS and not self._skip_depth:
|
|
1073
|
+
self._pieces.append("\n")
|
|
1074
|
+
|
|
1075
|
+
def handle_data(self, data):
|
|
1076
|
+
if not self._skip_depth:
|
|
1077
|
+
self._pieces.append(data)
|
|
1078
|
+
|
|
1079
|
+
def get_text(self) -> str:
|
|
1080
|
+
raw = "".join(self._pieces)
|
|
1081
|
+
# Collapse whitespace within lines, preserve line breaks
|
|
1082
|
+
lines = [" ".join(line.split()) for line in raw.splitlines()]
|
|
1083
|
+
# Collapse multiple blank lines
|
|
1084
|
+
text = re.sub(r"\n{3,}", "\n\n", "\n".join(lines))
|
|
1085
|
+
return text.strip()
|
|
1086
|
+
|
|
1087
|
+
|
|
1088
|
+
def _html_to_text(html_content: str) -> str:
|
|
1089
|
+
parser = _HTMLToText()
|
|
1090
|
+
parser.feed(html_content)
|
|
1091
|
+
return parser.get_text()
|
|
1092
|
+
|
|
1093
|
+
|
|
1094
|
+
def web_search(query: str, max_results: int = 5) -> str:
|
|
1095
|
+
"""Search the web for information.
|
|
1096
|
+
|
|
1097
|
+
Args:
|
|
1098
|
+
query: The search query.
|
|
1099
|
+
max_results: Max results to return (default 5).
|
|
1100
|
+
"""
|
|
1101
|
+
import re as _re
|
|
1102
|
+
import urllib.parse
|
|
1103
|
+
|
|
1104
|
+
encoded = urllib.parse.quote_plus(query)
|
|
1105
|
+
url = f"https://html.duckduckgo.com/html/?q={encoded}"
|
|
1106
|
+
|
|
1107
|
+
try:
|
|
1108
|
+
with httpx.Client(follow_redirects=True, timeout=15) as client:
|
|
1109
|
+
resp = client.get(url, headers={
|
|
1110
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
1111
|
+
})
|
|
1112
|
+
resp.raise_for_status()
|
|
1113
|
+
except httpx.RequestError as e:
|
|
1114
|
+
return f"Search error: {e}"
|
|
1115
|
+
|
|
1116
|
+
html = resp.text
|
|
1117
|
+
results = []
|
|
1118
|
+
|
|
1119
|
+
# Parse DuckDuckGo HTML results
|
|
1120
|
+
blocks = _re.findall(
|
|
1121
|
+
r'<a[^>]+class="result__a"[^>]*href="([^"]*)"[^>]*>(.*?)</a>.*?'
|
|
1122
|
+
r'<a[^>]+class="result__snippet"[^>]*>(.*?)</a>',
|
|
1123
|
+
html, _re.DOTALL,
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
for i, (link, title, snippet) in enumerate(blocks[:max_results], 1):
|
|
1127
|
+
# Clean HTML tags
|
|
1128
|
+
title_clean = _re.sub(r"<[^>]+>", "", title).strip()
|
|
1129
|
+
snippet_clean = _re.sub(r"<[^>]+>", "", snippet).strip()
|
|
1130
|
+
# DuckDuckGo wraps URLs in a redirect — extract the actual URL
|
|
1131
|
+
actual_url = link
|
|
1132
|
+
ud_match = _re.search(r"uddg=([^&]+)", link)
|
|
1133
|
+
if ud_match:
|
|
1134
|
+
actual_url = urllib.parse.unquote(ud_match.group(1))
|
|
1135
|
+
results.append(f"{i}. {title_clean}\n {actual_url}\n {snippet_clean}")
|
|
1136
|
+
|
|
1137
|
+
if not results:
|
|
1138
|
+
return f"No results found for: {query}"
|
|
1139
|
+
return "\n\n".join(results)
|
|
1140
|
+
|
|
1141
|
+
|
|
1142
|
+
def web_fetch(url: str, max_chars: int = 8000) -> str:
|
|
1143
|
+
"""Fetch a URL and return content as text.
|
|
1144
|
+
|
|
1145
|
+
Args:
|
|
1146
|
+
url: The URL to fetch.
|
|
1147
|
+
max_chars: Max characters to return (default 8000).
|
|
1148
|
+
"""
|
|
1149
|
+
try:
|
|
1150
|
+
with httpx.Client(follow_redirects=True, timeout=30) as client:
|
|
1151
|
+
resp = client.get(url, headers={
|
|
1152
|
+
"User-Agent": "Mozilla/5.0 (compatible; aru-agent/0.1)",
|
|
1153
|
+
"Accept": "text/html,application/json,text/plain,*/*",
|
|
1154
|
+
})
|
|
1155
|
+
resp.raise_for_status()
|
|
1156
|
+
except httpx.HTTPStatusError as e:
|
|
1157
|
+
return f"HTTP error {e.response.status_code}: {e.response.reason_phrase}"
|
|
1158
|
+
except httpx.RequestError as e:
|
|
1159
|
+
return f"Request error: {e}"
|
|
1160
|
+
|
|
1161
|
+
content_type = resp.headers.get("content-type", "")
|
|
1162
|
+
body = resp.text
|
|
1163
|
+
|
|
1164
|
+
if "json" in content_type:
|
|
1165
|
+
# JSON — return as-is (already readable)
|
|
1166
|
+
text = body
|
|
1167
|
+
elif "html" in content_type:
|
|
1168
|
+
text = _html_to_text(body)
|
|
1169
|
+
else:
|
|
1170
|
+
# Plain text or other
|
|
1171
|
+
text = body
|
|
1172
|
+
|
|
1173
|
+
if len(text) > max_chars:
|
|
1174
|
+
text = text[:max_chars] + f"\n\n... [truncated at {max_chars} chars]"
|
|
1175
|
+
return _truncate_output(text)
|
|
1176
|
+
|
|
1177
|
+
|
|
1178
|
+
_SUBAGENT_COUNTER = 0
|
|
1179
|
+
_SUBAGENT_COUNTER_LOCK = threading.Lock()
|
|
1180
|
+
|
|
1181
|
+
|
|
1182
|
+
def _next_subagent_id() -> int:
|
|
1183
|
+
global _SUBAGENT_COUNTER
|
|
1184
|
+
with _SUBAGENT_COUNTER_LOCK:
|
|
1185
|
+
_SUBAGENT_COUNTER += 1
|
|
1186
|
+
return _SUBAGENT_COUNTER
|
|
1187
|
+
|
|
1188
|
+
|
|
1189
|
+
# Import new tools
|
|
1190
|
+
from aru.tools.ast_tools import code_structure, find_dependencies
|
|
1191
|
+
from aru.tools.ranker import rank_files
|
|
1192
|
+
|
|
1193
|
+
# Tools available to sub-agents (no delegate_task to prevent infinite nesting)
|
|
1194
|
+
_SUBAGENT_TOOLS = [
|
|
1195
|
+
read_file,
|
|
1196
|
+
write_file,
|
|
1197
|
+
write_files,
|
|
1198
|
+
edit_file,
|
|
1199
|
+
edit_files,
|
|
1200
|
+
glob_search,
|
|
1201
|
+
grep_search,
|
|
1202
|
+
list_directory,
|
|
1203
|
+
bash,
|
|
1204
|
+
web_search,
|
|
1205
|
+
web_fetch,
|
|
1206
|
+
code_structure,
|
|
1207
|
+
find_dependencies,
|
|
1208
|
+
rank_files,
|
|
1209
|
+
]
|
|
1210
|
+
|
|
1211
|
+
|
|
1212
|
+
async def delegate_task(task: str, context: str = "") -> str:
|
|
1213
|
+
"""Delegate a task to a sub-agent that runs autonomously. Multiple calls run concurrently.
|
|
1214
|
+
Use for independent research or subtasks to keep your own context clean.
|
|
1215
|
+
|
|
1216
|
+
Args:
|
|
1217
|
+
task: What the sub-agent should do.
|
|
1218
|
+
context: Optional extra context (file paths, constraints).
|
|
1219
|
+
"""
|
|
1220
|
+
from agno.agent import Agent
|
|
1221
|
+
from aru.providers import create_model
|
|
1222
|
+
|
|
1223
|
+
agent_id = _next_subagent_id()
|
|
1224
|
+
cwd = os.getcwd()
|
|
1225
|
+
|
|
1226
|
+
# Use a small/fast model for sub-agents. Resolve from the global _model_id's provider
|
|
1227
|
+
# to pick the right "small" model, falling back to anthropic/claude-haiku-4-5.
|
|
1228
|
+
small_model_ref = _get_small_model_ref()
|
|
1229
|
+
|
|
1230
|
+
instructions = f"""\
|
|
1231
|
+
You are a sub-agent (#{agent_id}) working on a specific task. Be focused and concise.
|
|
1232
|
+
Complete the task and return a clear summary of what you did or found.
|
|
1233
|
+
The current working directory is: {cwd}
|
|
1234
|
+
Do not create documentation files unless explicitly asked.
|
|
1235
|
+
"""
|
|
1236
|
+
if context:
|
|
1237
|
+
instructions += f"\nAdditional context:\n{context}\n"
|
|
1238
|
+
|
|
1239
|
+
sub = Agent(
|
|
1240
|
+
name=f"SubAgent-{agent_id}",
|
|
1241
|
+
model=create_model(small_model_ref, max_tokens=4096),
|
|
1242
|
+
tools=_SUBAGENT_TOOLS,
|
|
1243
|
+
instructions=instructions,
|
|
1244
|
+
markdown=True,
|
|
1245
|
+
)
|
|
1246
|
+
|
|
1247
|
+
try:
|
|
1248
|
+
result = await sub.arun(task, stream=False)
|
|
1249
|
+
if result and result.content:
|
|
1250
|
+
return _truncate_output(f"[SubAgent-{agent_id}] {result.content}")
|
|
1251
|
+
return f"[SubAgent-{agent_id}] Task completed but no output was returned."
|
|
1252
|
+
except Exception as e:
|
|
1253
|
+
return f"[SubAgent-{agent_id}] Error: {e}"
|
|
1254
|
+
|
|
1255
|
+
|
|
1256
|
+
# All tools as a list for easy import
|
|
1257
|
+
ALL_TOOLS = [
|
|
1258
|
+
read_file,
|
|
1259
|
+
read_file_smart,
|
|
1260
|
+
write_file,
|
|
1261
|
+
write_files,
|
|
1262
|
+
edit_file,
|
|
1263
|
+
edit_files,
|
|
1264
|
+
glob_search,
|
|
1265
|
+
grep_search,
|
|
1266
|
+
list_directory,
|
|
1267
|
+
bash,
|
|
1268
|
+
web_search,
|
|
1269
|
+
web_fetch,
|
|
1270
|
+
delegate_task,
|
|
1271
|
+
code_structure,
|
|
1272
|
+
find_dependencies,
|
|
1273
|
+
rank_files,
|
|
1274
|
+
]
|
|
1275
|
+
|
|
1276
|
+
# Task list tools for executor subtask tracking
|
|
1277
|
+
from aru.tools.tasklist import create_task_list, update_task
|
|
1278
|
+
|
|
1279
|
+
# Executor tools — full write/execute capability, no discovery overhead
|
|
1280
|
+
EXECUTOR_TOOLS = [
|
|
1281
|
+
create_task_list,
|
|
1282
|
+
update_task,
|
|
1283
|
+
read_file,
|
|
1284
|
+
read_file_smart,
|
|
1285
|
+
write_file,
|
|
1286
|
+
write_files,
|
|
1287
|
+
edit_file,
|
|
1288
|
+
edit_files,
|
|
1289
|
+
glob_search,
|
|
1290
|
+
grep_search,
|
|
1291
|
+
list_directory,
|
|
1292
|
+
bash,
|
|
1293
|
+
web_search,
|
|
1294
|
+
web_fetch,
|
|
1295
|
+
delegate_task,
|
|
1296
|
+
code_structure,
|
|
1297
|
+
]
|
|
1298
|
+
|
|
1299
|
+
# General-purpose tools — everything except niche analysis tools
|
|
1300
|
+
GENERAL_TOOLS = [
|
|
1301
|
+
read_file,
|
|
1302
|
+
read_file_smart,
|
|
1303
|
+
write_file,
|
|
1304
|
+
write_files,
|
|
1305
|
+
edit_file,
|
|
1306
|
+
edit_files,
|
|
1307
|
+
glob_search,
|
|
1308
|
+
grep_search,
|
|
1309
|
+
list_directory,
|
|
1310
|
+
bash,
|
|
1311
|
+
web_search,
|
|
1312
|
+
web_fetch,
|
|
1313
|
+
delegate_task,
|
|
1314
|
+
]
|
|
1315
|
+
|
|
1316
|
+
async def load_mcp_tools():
|
|
1317
|
+
"""Initialize MCP servers and inject their tools into tool lists dynamically."""
|
|
1318
|
+
from aru.tools.mcp_client import init_mcp
|
|
1319
|
+
try:
|
|
1320
|
+
mcp_tools = await init_mcp()
|
|
1321
|
+
if mcp_tools:
|
|
1322
|
+
_console.print(f"[dim]Loaded {len(mcp_tools)} tools from MCP servers.[/dim]")
|
|
1323
|
+
for t in mcp_tools:
|
|
1324
|
+
ALL_TOOLS.append(t)
|
|
1325
|
+
EXECUTOR_TOOLS.append(t)
|
|
1326
|
+
GENERAL_TOOLS.append(t)
|
|
1327
|
+
except Exception as e:
|
|
1328
|
+
_console.print(f"[dim]Failed to load MCP tools: {e}[/dim]")
|