aru-code 0.14.1__tar.gz → 0.16.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {aru_code-0.14.1 → aru_code-0.16.0}/PKG-INFO +7 -12
  2. {aru_code-0.14.1 → aru_code-0.16.0}/README.md +6 -11
  3. aru_code-0.16.0/aru/__init__.py +1 -0
  4. {aru_code-0.14.1 → aru_code-0.16.0}/aru/agent_factory.py +8 -18
  5. {aru_code-0.14.1 → aru_code-0.16.0}/aru/agents/base.py +5 -10
  6. aru_code-0.16.0/aru/cache_patch.py +133 -0
  7. {aru_code-0.14.1 → aru_code-0.16.0}/aru/cli.py +21 -4
  8. {aru_code-0.14.1 → aru_code-0.16.0}/aru/completers.py +29 -19
  9. {aru_code-0.14.1 → aru_code-0.16.0}/aru/context.py +113 -37
  10. {aru_code-0.14.1 → aru_code-0.16.0}/aru/display.py +0 -13
  11. {aru_code-0.14.1 → aru_code-0.16.0}/aru/runner.py +1 -1
  12. {aru_code-0.14.1 → aru_code-0.16.0}/aru/session.py +2 -0
  13. {aru_code-0.14.1 → aru_code-0.16.0}/aru/tools/ast_tools.py +0 -124
  14. {aru_code-0.14.1 → aru_code-0.16.0}/aru/tools/codebase.py +16 -30
  15. {aru_code-0.14.1 → aru_code-0.16.0}/aru/tools/ranker.py +1 -1
  16. {aru_code-0.14.1 → aru_code-0.16.0}/aru_code.egg-info/PKG-INFO +7 -12
  17. {aru_code-0.14.1 → aru_code-0.16.0}/aru_code.egg-info/SOURCES.txt +1 -1
  18. {aru_code-0.14.1 → aru_code-0.16.0}/pyproject.toml +1 -1
  19. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_cli.py +22 -17
  20. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_cli_advanced.py +0 -10
  21. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_cli_completers.py +28 -36
  22. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_cli_new.py +0 -5
  23. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_codebase.py +4 -150
  24. aru_code-0.14.1/aru/__init__.py +0 -1
  25. aru_code-0.14.1/tests/test_ast_tools.py +0 -762
  26. {aru_code-0.14.1 → aru_code-0.16.0}/LICENSE +0 -0
  27. {aru_code-0.14.1 → aru_code-0.16.0}/aru/agents/__init__.py +0 -0
  28. {aru_code-0.14.1 → aru_code-0.16.0}/aru/agents/executor.py +0 -0
  29. {aru_code-0.14.1 → aru_code-0.16.0}/aru/agents/planner.py +0 -0
  30. {aru_code-0.14.1 → aru_code-0.16.0}/aru/commands.py +0 -0
  31. {aru_code-0.14.1 → aru_code-0.16.0}/aru/config.py +0 -0
  32. {aru_code-0.14.1 → aru_code-0.16.0}/aru/permissions.py +0 -0
  33. {aru_code-0.14.1 → aru_code-0.16.0}/aru/providers.py +0 -0
  34. {aru_code-0.14.1 → aru_code-0.16.0}/aru/runtime.py +0 -0
  35. {aru_code-0.14.1 → aru_code-0.16.0}/aru/tools/__init__.py +0 -0
  36. {aru_code-0.14.1 → aru_code-0.16.0}/aru/tools/gitignore.py +0 -0
  37. {aru_code-0.14.1 → aru_code-0.16.0}/aru/tools/mcp_client.py +0 -0
  38. {aru_code-0.14.1 → aru_code-0.16.0}/aru/tools/tasklist.py +0 -0
  39. {aru_code-0.14.1 → aru_code-0.16.0}/aru_code.egg-info/dependency_links.txt +0 -0
  40. {aru_code-0.14.1 → aru_code-0.16.0}/aru_code.egg-info/entry_points.txt +0 -0
  41. {aru_code-0.14.1 → aru_code-0.16.0}/aru_code.egg-info/requires.txt +0 -0
  42. {aru_code-0.14.1 → aru_code-0.16.0}/aru_code.egg-info/top_level.txt +0 -0
  43. {aru_code-0.14.1 → aru_code-0.16.0}/setup.cfg +0 -0
  44. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_agents_base.py +0 -0
  45. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_cli_base.py +0 -0
  46. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_cli_run_cli.py +0 -0
  47. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_cli_session.py +0 -0
  48. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_cli_shell.py +0 -0
  49. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_config.py +0 -0
  50. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_context.py +0 -0
  51. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_executor.py +0 -0
  52. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_gitignore.py +0 -0
  53. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_main.py +0 -0
  54. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_mcp_client.py +0 -0
  55. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_permissions.py +0 -0
  56. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_planner.py +0 -0
  57. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_providers.py +0 -0
  58. {aru_code-0.14.1 → aru_code-0.16.0}/tests/test_ranker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aru-code
3
- Version: 0.14.1
3
+ Version: 0.16.0
4
4
  Summary: A Claude Code clone built with Agno agents
5
5
  Author-email: Estevao <estevaofon@gmail.com>
6
6
  License-Expression: MIT
@@ -56,7 +56,7 @@ An intelligent coding assistant for the terminal, powered by LLMs and [Agno](htt
56
56
  - **Multi-Agent Architecture** — Specialized agents for planning, execution, and conversation
57
57
  - **Interactive CLI** — Streaming responses, multi-line paste, session management
58
58
  - **Image Support** — Attach images via `@` mentions for multimodal analysis (Claude, GPT-4o, Gemini)
59
- - **16 Integrated Tools** — File operations, code search, shell, web search, task delegation
59
+ - **11 Integrated Tools** — File operations, code search, shell, web search, task delegation
60
60
  - **Task Planning** — Break down complex tasks into steps with automatic execution
61
61
  - **Multi-Provider** — Anthropic, OpenAI, Ollama, Groq, OpenRouter, DeepSeek, and others via custom configuration
62
62
  - **Custom Commands, Skills, and Agents** — Extend aru via the `.agents/` directory
@@ -369,7 +369,7 @@ Custom agents are Markdown files with YAML frontmatter stored in `.agents/agents
369
369
  name: Code Reviewer
370
370
  description: Review code for quality, bugs, and best practices
371
371
  model: anthropic/claude-sonnet-4-5
372
- tools: read_file, grep_search, glob_search, code_structure
372
+ tools: read_file, grep_search, glob_search
373
373
  max_turns: 15
374
374
  mode: primary
375
375
  ---
@@ -479,19 +479,14 @@ Aru can load tools from MCP servers. Configure in `.aru/mcp_config.json`:
479
479
 
480
480
  ### File Operations
481
481
  - `read_file` — Reads files with line range support and binary detection
482
- - `read_file_smart` — Smart file reading focused on relevant snippets for the query
483
- - `write_file` / `write_files` — Writes single or batch files
484
- - `edit_file` / `edit_files` — Find-replace edits across multiple files
482
+ - `read_file_smart` — Answers specific questions about a file without returning raw content
483
+ - `write_file` — Writes content to files, creating directories as needed
484
+ - `edit_file` — Find-and-replace edits on files
485
485
 
486
486
  ### Search & Discovery
487
487
  - `glob_search` — Find files by pattern (respects .gitignore)
488
488
  - `grep_search` — Content search with regex and file filtering
489
489
  - `list_directory` — Directory listing with gitignore filtering
490
- - `rank_files` — Multi-factor file relevance ranking (name, structure, recency)
491
-
492
- ### Code Analysis
493
- - `code_structure` — Extracts classes, functions, imports via tree-sitter AST
494
- - `find_dependencies` — Analyzes import relationships between files
495
490
 
496
491
  ### Shell & Web
497
492
  - `bash` — Executes shell commands with permission gates
@@ -521,7 +516,7 @@ aru-code/
521
516
  │ │ ├── planner.py # Planning agent
522
517
  │ │ └── executor.py # Execution agent
523
518
  │ └── tools/
524
- │ ├── codebase.py # 16 core tools
519
+ │ ├── codebase.py # 11 core tools
525
520
  │ ├── ast_tools.py # Tree-sitter code analysis
526
521
  │ ├── ranker.py # File relevance ranking
527
522
  │ ├── mcp_client.py # MCP client
@@ -9,7 +9,7 @@ An intelligent coding assistant for the terminal, powered by LLMs and [Agno](htt
9
9
  - **Multi-Agent Architecture** — Specialized agents for planning, execution, and conversation
10
10
  - **Interactive CLI** — Streaming responses, multi-line paste, session management
11
11
  - **Image Support** — Attach images via `@` mentions for multimodal analysis (Claude, GPT-4o, Gemini)
12
- - **16 Integrated Tools** — File operations, code search, shell, web search, task delegation
12
+ - **11 Integrated Tools** — File operations, code search, shell, web search, task delegation
13
13
  - **Task Planning** — Break down complex tasks into steps with automatic execution
14
14
  - **Multi-Provider** — Anthropic, OpenAI, Ollama, Groq, OpenRouter, DeepSeek, and others via custom configuration
15
15
  - **Custom Commands, Skills, and Agents** — Extend aru via the `.agents/` directory
@@ -322,7 +322,7 @@ Custom agents are Markdown files with YAML frontmatter stored in `.agents/agents
322
322
  name: Code Reviewer
323
323
  description: Review code for quality, bugs, and best practices
324
324
  model: anthropic/claude-sonnet-4-5
325
- tools: read_file, grep_search, glob_search, code_structure
325
+ tools: read_file, grep_search, glob_search
326
326
  max_turns: 15
327
327
  mode: primary
328
328
  ---
@@ -432,19 +432,14 @@ Aru can load tools from MCP servers. Configure in `.aru/mcp_config.json`:
432
432
 
433
433
  ### File Operations
434
434
  - `read_file` — Reads files with line range support and binary detection
435
- - `read_file_smart` — Smart file reading focused on relevant snippets for the query
436
- - `write_file` / `write_files` — Writes single or batch files
437
- - `edit_file` / `edit_files` — Find-replace edits across multiple files
435
+ - `read_file_smart` — Answers specific questions about a file without returning raw content
436
+ - `write_file` — Writes content to files, creating directories as needed
437
+ - `edit_file` — Find-and-replace edits on files
438
438
 
439
439
  ### Search & Discovery
440
440
  - `glob_search` — Find files by pattern (respects .gitignore)
441
441
  - `grep_search` — Content search with regex and file filtering
442
442
  - `list_directory` — Directory listing with gitignore filtering
443
- - `rank_files` — Multi-factor file relevance ranking (name, structure, recency)
444
-
445
- ### Code Analysis
446
- - `code_structure` — Extracts classes, functions, imports via tree-sitter AST
447
- - `find_dependencies` — Analyzes import relationships between files
448
443
 
449
444
  ### Shell & Web
450
445
  - `bash` — Executes shell commands with permission gates
@@ -474,7 +469,7 @@ aru-code/
474
469
  │ │ ├── planner.py # Planning agent
475
470
  │ │ └── executor.py # Execution agent
476
471
  │ └── tools/
477
- │ ├── codebase.py # 16 core tools
472
+ │ ├── codebase.py # 11 core tools
478
473
  │ ├── ast_tools.py # Tree-sitter code analysis
479
474
  │ ├── ranker.py # File relevance ranking
480
475
  │ ├── mcp_client.py # MCP client
@@ -0,0 +1 @@
1
+ __version__ = "0.16.0"
@@ -21,12 +21,16 @@ def create_general_agent(
21
21
  in the system prompt. Placed in instructions so it's cacheable.
22
22
  """
23
23
  from agno.agent import Agent
24
- from agno.compression.manager import CompressionManager
25
24
 
26
25
  from aru.tools.codebase import GENERAL_TOOLS
27
- from aru.runtime import get_ctx
26
+ tools = GENERAL_TOOLS
28
27
 
29
- extra = config.get_extra_instructions() if config else ""
28
+ # Only include AGENTS.md/project instructions on first turn to save ~1.6K tokens/turn
29
+ if config and not session.extra_instructions_sent:
30
+ extra = config.get_extra_instructions()
31
+ session.extra_instructions_sent = True
32
+ else:
33
+ extra = ""
30
34
  if env_context:
31
35
  extra = f"{extra}\n\n{env_context}" if extra else env_context
32
36
  model_ref = model_override or session.model_ref
@@ -34,15 +38,9 @@ def create_general_agent(
34
38
  return Agent(
35
39
  name="Aru",
36
40
  model=create_model(model_ref, max_tokens=8192),
37
- tools=GENERAL_TOOLS,
41
+ tools=tools,
38
42
  instructions=_build_instructions("general", extra),
39
43
  markdown=True,
40
- compress_tool_results=True,
41
- compression_manager=CompressionManager(
42
- model=create_model(get_ctx().small_model_ref, max_tokens=1024),
43
- compress_tool_results=True,
44
- compress_tool_results_limit=25,
45
- ),
46
44
  tool_call_limit=20,
47
45
  )
48
46
 
@@ -52,10 +50,8 @@ def create_custom_agent_instance(agent_def: CustomAgent, session: Session,
52
50
  env_context: str = ""):
53
51
  """Create an Agno Agent from a CustomAgent definition."""
54
52
  from agno.agent import Agent
55
- from agno.compression.manager import CompressionManager
56
53
  from aru.agents.base import BASE_INSTRUCTIONS
57
54
  from aru.tools.codebase import resolve_tools
58
- from aru.runtime import get_ctx
59
55
 
60
56
  model_ref = agent_def.model or session.model_ref
61
57
  tools = resolve_tools(agent_def.tools)
@@ -74,11 +70,5 @@ def create_custom_agent_instance(agent_def: CustomAgent, session: Session,
74
70
  tools=tools,
75
71
  instructions=instructions,
76
72
  markdown=True,
77
- compress_tool_results=True,
78
- compression_manager=CompressionManager(
79
- model=create_model(get_ctx().small_model_ref, max_tokens=1024),
80
- compress_tool_results=True,
81
- compress_tool_results_limit=25,
82
- ),
83
73
  tool_call_limit=agent_def.max_turns or 20,
84
74
  )
@@ -35,7 +35,7 @@ PLANNER_ROLE = """\
35
35
  You are a software architect agent. Your job is to analyze codebases and create concise implementation plans.
36
36
 
37
37
  IMPORTANT: You are a READ-ONLY agent. You have NO tools to create, write, or edit files, or run shell commands. \
38
- Do NOT attempt to use write_file, edit_file, bash, run_command, or any write/exec tool — they do not exist in your toolkit. \
38
+ Do NOT attempt to use write_file, edit_file, bash, or any write/exec tool — they do not exist in your toolkit. \
39
39
  To assess test coverage, read source files and test files directly — do NOT try to run pytest or any command. \
40
40
  Your sole output is the implementation plan. The executor agent will carry out the actual changes.
41
41
 
@@ -112,15 +112,12 @@ When all subtasks are done, STOP. Do not add extra actions beyond the task list.
112
112
  ## Subtask granularity — CRITICAL
113
113
  Each subtask should touch at most **3-4 files**. If the step involves many files, \
114
114
  split into subtasks grouped by concern (e.g. "Create model files", "Create route files", \
115
- "Update config and main"). Batch independent file writes using `write_files` or `edit_files` \
116
- to minimize tool calls. Batch independent file writes using `write_files` or `edit_files` to minimize tool calls.
115
+ "Update config and main").
117
116
 
118
117
  ## Guidelines
119
118
  - Read files before editing them
120
119
  - Use edit_file for targeted changes (preferred over rewriting entire files)
121
120
  - Use write_file only for new files or complete rewrites
122
- - When creating or updating multiple independent files, use write_files to batch them
123
- - When making independent edits across files, use edit_files to batch them
124
121
  - Run existing tests after changes when applicable
125
122
  - **When adding or modifying unit tests, ALWAYS run them to verify they pass before finishing.**
126
123
  - Keep changes minimal and focused on the task
@@ -139,7 +136,7 @@ Use `context_lines=30` for full function bodies.
139
136
 
140
137
  **NEVER read the same file twice.** If you already have the file content in context, use it.
141
138
 
142
- **NEVER use bash/run_command to read files.** Always use `read_file` or `grep_search`.
139
+ **NEVER use bash to read files.** Always use `read_file` or `grep_search`.
143
140
 
144
141
  **Batch independent tool calls**: emit ALL independent tool calls in a single response.
145
142
 
@@ -181,7 +178,7 @@ Every tool call accumulates its result in your context window. Use the minimum n
181
178
 
182
179
  **NEVER read the same file twice.** Check if you already have the content in context.
183
180
 
184
- **NEVER use bash/run_command to read files.** Always use `read_file` or `grep_search`.
181
+ **NEVER use bash to read files.** Always use `read_file` or `grep_search`.
185
182
 
186
183
  **Batch independent tool calls**: emit ALL independent tool calls in a single response.
187
184
 
@@ -189,9 +186,7 @@ Every tool call accumulates its result in your context window. Use the minimum n
189
186
 
190
187
  **When adding or modifying unit tests, ALWAYS run them to verify they pass before finishing.**
191
188
 
192
- Use delegate_task to split work into independent subtasks for parallel execution.
193
- When creating or updating multiple independent files, use write_files to batch them.
194
- When making independent edits across files, use edit_files to batch them.\
189
+ Use delegate_task to split work into independent subtasks for parallel execution.\
195
190
  """
196
191
 
197
192
 
@@ -0,0 +1,133 @@
1
+ """Monkey-patch Agno's model layer to reduce token consumption.
2
+
3
+ Two optimizations:
4
+
5
+ 1. **Tool result pruning** (ALL providers): After each tool execution, old tool
6
+ results in the message list are truncated to a short summary. This prevents
7
+ O(n²) token growth where each API call re-sends all previous tool results.
8
+
9
+ 2. **Cache breakpoints** (Anthropic only): Marks the last 2 messages with
10
+ cache_control for Anthropic's prompt caching.
11
+
12
+ These patches intercept Agno's internal loop so they work transparently
13
+ regardless of which provider is used.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ # Max chars to keep from old tool results
19
+ _TOOL_RESULT_KEEP_CHARS = 200
20
+ # Number of recent tool results to keep in full
21
+ _KEEP_RECENT_RESULTS = 1
22
+
23
+
24
+ def _prune_tool_messages(messages):
25
+ """Truncate old tool result content in the message list.
26
+
27
+ Keeps only the last N tool results in full. Older ones are truncated
28
+ to a short preview. This runs BEFORE each API call, so accumulated
29
+ tool results don't bloat the context on every re-send.
30
+ """
31
+ # Find all tool message indices
32
+ tool_indices = [
33
+ i for i, msg in enumerate(messages)
34
+ if getattr(msg, "role", None) == "tool"
35
+ ]
36
+
37
+ if len(tool_indices) <= _KEEP_RECENT_RESULTS:
38
+ return
39
+
40
+ # Prune all except the last N
41
+ for idx in tool_indices[:-_KEEP_RECENT_RESULTS]:
42
+ msg = messages[idx]
43
+ content = getattr(msg, "content", None)
44
+ if content is None:
45
+ continue
46
+
47
+ content_str = str(content)
48
+ if len(content_str) <= _TOOL_RESULT_KEEP_CHARS:
49
+ continue
50
+
51
+ truncated = content_str[:_TOOL_RESULT_KEEP_CHARS] + "\n[...truncated]"
52
+ try:
53
+ msg.content = truncated
54
+ if hasattr(msg, "compressed_content"):
55
+ msg.compressed_content = None
56
+ except (AttributeError, TypeError):
57
+ pass
58
+
59
+
60
+ def apply_cache_patch():
61
+ """Apply all patches to reduce Agno's token consumption."""
62
+ _patch_tool_result_pruning()
63
+ _patch_claude_cache_breakpoints()
64
+
65
+
66
+ def _patch_tool_result_pruning():
67
+ """Patch format_function_call_results to prune old tool results.
68
+
69
+ This is called after each tool execution, right before the next API call.
70
+ Works for ALL providers (Claude, OpenAI, Qwen, etc.) since it patches
71
+ the base Model class.
72
+ """
73
+ from agno.models.base import Model
74
+
75
+ _original_format_results = Model.format_function_call_results
76
+
77
+ def _patched_format_results(self, messages, function_call_results, **kwargs):
78
+ # First: prune old tool results already in messages
79
+ _prune_tool_messages(messages)
80
+ # Then: add new results normally
81
+ return _original_format_results(self, messages, function_call_results, **kwargs)
82
+
83
+ Model.format_function_call_results = _patched_format_results
84
+
85
+
86
+ def _patch_claude_cache_breakpoints():
87
+ """Patch Claude's format_messages to add cache breakpoints.
88
+
89
+ Marks the last 2 messages with cache_control for Anthropic's prompt
90
+ caching. Non-Anthropic providers ignore these fields.
91
+ """
92
+ try:
93
+ import agno.utils.models.claude as claude_utils
94
+ except ImportError:
95
+ return
96
+
97
+ _original_format = claude_utils.format_messages
98
+
99
+ def _patched_format_messages(messages, compress_tool_results=False):
100
+ chat_messages, system_message = _original_format(
101
+ messages, compress_tool_results=compress_tool_results
102
+ )
103
+
104
+ if not chat_messages:
105
+ return chat_messages, system_message
106
+
107
+ # Add cache_control to last 2 messages
108
+ cache_marker = {"type": "ephemeral"}
109
+ marked = 0
110
+ for msg in reversed(chat_messages):
111
+ if marked >= 2:
112
+ break
113
+ content = msg.get("content")
114
+ if isinstance(content, list) and content:
115
+ last_item = content[-1]
116
+ if isinstance(last_item, dict):
117
+ last_item["cache_control"] = cache_marker
118
+ marked += 1
119
+ elif hasattr(last_item, "type"):
120
+ try:
121
+ as_dict = last_item.model_dump() if hasattr(last_item, "model_dump") else dict(last_item)
122
+ as_dict["cache_control"] = cache_marker
123
+ content[-1] = as_dict
124
+ marked += 1
125
+ except Exception:
126
+ pass
127
+ elif isinstance(content, str):
128
+ msg["content"] = [{"type": "text", "text": content, "cache_control": cache_marker}]
129
+ marked += 1
130
+
131
+ return chat_messages, system_message
132
+
133
+ claude_utils.format_messages = _patched_format_messages
@@ -50,6 +50,7 @@ from aru.display import ( # noqa: F401
50
50
  from aru.completers import ( # noqa: F401
51
51
  AruCompleter,
52
52
  FileMentionCompleter,
53
+ MentionResult,
53
54
  PasteState,
54
55
  SlashCommandCompleter,
55
56
  TIPS,
@@ -110,6 +111,11 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
110
111
  from aru.permissions import parse_permission_config, reset_session as perm_reset_session
111
112
  from aru.tools.codebase import cleanup_processes
112
113
 
114
+ # Inject cache breakpoints into Agno's Claude API calls — reduces token
115
+ # consumption by ~40% on multi-tool-call interactions via prompt caching.
116
+ from aru.cache_patch import apply_cache_patch
117
+ apply_cache_patch()
118
+
113
119
  ctx = init_ctx(console=console, skip_permissions=skip_permissions)
114
120
 
115
121
  store = SessionStore()
@@ -253,16 +259,19 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
253
259
 
254
260
  # Resolve @file mentions (skip known agent names)
255
261
  _agent_names = set(config.custom_agents.keys()) if config.custom_agents else set()
256
- resolved, injected, attached_images = _resolve_mentions(user_input, os.getcwd(), _agent_names)
257
- if injected > 0:
262
+ mention_result = _resolve_mentions(user_input, os.getcwd(), _agent_names)
263
+ attached_images = mention_result.images
264
+ # File contents go into history as separate prunable messages (not inline)
265
+ mention_file_msgs = mention_result.file_messages
266
+ if mention_result.count > 0:
258
267
  parts = []
259
- text_count = injected - len(attached_images)
268
+ text_count = mention_result.count - len(attached_images)
260
269
  if text_count > 0:
261
270
  parts.append(f"{text_count} file(s)")
262
271
  if attached_images:
263
272
  parts.append(f"{len(attached_images)} image(s)")
264
273
  console.print(f"[dim]Attached {', '.join(parts)} from @ mentions[/dim]")
265
- user_input = resolved
274
+ user_input = mention_result.text
266
275
 
267
276
  if paste_state.pasted_content and user_text:
268
277
  console.print(
@@ -276,6 +285,14 @@ async def run_cli(skip_permissions: bool = False, resume_id: str | None = None):
276
285
  if not user_input:
277
286
  continue
278
287
 
288
+ # Inject @file contents as prunable history entries BEFORE the user message.
289
+ # These look like simulated read_file tool calls and can be pruned/compacted
290
+ # normally, unlike inline content which bloats the user message permanently.
291
+ if mention_file_msgs:
292
+ for msg in mention_file_msgs:
293
+ session.add_message(msg["role"], msg["content"])
294
+ mention_file_msgs = [] # consumed
295
+
279
296
  # Reset "allow all" approvals for each new user message
280
297
  perm_reset_session()
281
298
 
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  import os
6
6
  import re
7
+ from dataclasses import dataclass
7
8
 
8
9
  from prompt_toolkit import PromptSession
9
10
  from prompt_toolkit.completion import Completer, Completion
@@ -18,24 +19,36 @@ from aru.commands import SLASH_COMMANDS
18
19
  from aru.config import AgentConfig
19
20
 
20
21
  _MENTION_RE = re.compile(r'(?<!\S)@([a-zA-Z0-9_./\\:-]+)')
21
- _MENTION_MAX_SIZE = 30_000 # bytes, same limit as read_file
22
+ _MENTION_MAX_SIZE = 10_000 # bytes smaller to protect context (model uses read_file for large files)
22
23
  _IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}
23
24
  _IMAGE_MAX_SIZE = 20 * 1024 * 1024 # 20MB
24
25
 
25
26
 
26
- def _resolve_mentions(text: str, cwd: str, agent_names: set[str] | None = None) -> tuple[str, int, list[Image]]:
27
- """Resolve @file mentions by appending file contents to the message.
27
+ @dataclass
28
+ class MentionResult:
29
+ """Result of resolving @file mentions."""
30
+ text: str # User text (without file contents)
31
+ file_messages: list[dict[str, str]] # Simulated tool-call pairs for history
32
+ images: list[Image]
33
+ count: int # Total attached (files + images)
28
34
 
29
- Image files (png, jpg, etc.) are returned as Image objects instead of text.
35
+
36
+ def _resolve_mentions(text: str, cwd: str, agent_names: set[str] | None = None) -> MentionResult:
37
+ """Resolve @file mentions as simulated read_file tool calls.
38
+
39
+ Instead of inlining file contents into the user message (which bloats
40
+ history and can't be pruned), we return separate assistant+tool_result
41
+ message pairs that the session can prune/compact like normal tool outputs.
42
+
43
+ Image files are returned as Image objects.
30
44
  Skips @mentions that match known agent names.
31
- Returns (resolved_text, number_of_files_attached, images).
32
45
  """
33
46
  agent_names = agent_names or set()
34
47
  matches = list(_MENTION_RE.finditer(text))
35
48
  if not matches:
36
- return text, 0, []
49
+ return MentionResult(text=text, file_messages=[], images=[], count=0)
37
50
 
38
- appendix_parts = []
51
+ file_messages: list[dict[str, str]] = []
39
52
  images: list[Image] = []
40
53
  seen = set()
41
54
  for m in matches:
@@ -64,21 +77,18 @@ def _resolve_mentions(text: str, cwd: str, agent_names: set[str] | None = None)
64
77
  size = os.path.getsize(abs_path)
65
78
  with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
66
79
  content = f.read(_MENTION_MAX_SIZE)
67
- if size > _MENTION_MAX_SIZE:
68
- appendix_parts.append(
69
- f"\n\n---\nContents of {rel_path} (truncated to {_MENTION_MAX_SIZE // 1000}KB):\n```\n{content}\n```"
70
- )
71
- else:
72
- appendix_parts.append(
73
- f"\n\n---\nContents of {rel_path}:\n```\n{content}\n```"
74
- )
80
+ truncated = size > _MENTION_MAX_SIZE
81
+ label = f"[read_file: {rel_path}]"
82
+ if truncated:
83
+ label += f" (truncated to {_MENTION_MAX_SIZE // 1000}KB of {size // 1000}KB — use read_file for the rest)"
84
+ # Simulated tool call pair — can be pruned like normal tool outputs
85
+ file_messages.append({"role": "assistant", "content": label})
86
+ file_messages.append({"role": "user", "content": content})
75
87
  except OSError:
76
88
  continue
77
89
 
78
- attached = len(appendix_parts) + len(images)
79
- if appendix_parts:
80
- return text + "".join(appendix_parts), attached, images
81
- return text, attached, images
90
+ count = len(file_messages) // 2 + len(images)
91
+ return MentionResult(text=text, file_messages=file_messages, images=images, count=count)
82
92
 
83
93
 
84
94
  def _extract_agent_mention(