nanocoderagent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nanocoder/__init__.py +10 -0
- nanocoder/__main__.py +3 -0
- nanocoder/agent.py +122 -0
- nanocoder/cli.py +214 -0
- nanocoder/config.py +32 -0
- nanocoder/context.py +196 -0
- nanocoder/llm.py +156 -0
- nanocoder/prompt.py +33 -0
- nanocoder/session.py +68 -0
- nanocoder/tools/__init__.py +27 -0
- nanocoder/tools/agent.py +58 -0
- nanocoder/tools/base.py +27 -0
- nanocoder/tools/bash.py +115 -0
- nanocoder/tools/edit.py +85 -0
- nanocoder/tools/glob_tool.py +47 -0
- nanocoder/tools/grep.py +78 -0
- nanocoder/tools/read.py +53 -0
- nanocoder/tools/write.py +36 -0
- nanocoderagent-0.1.0.dist-info/METADATA +194 -0
- nanocoderagent-0.1.0.dist-info/RECORD +23 -0
- nanocoderagent-0.1.0.dist-info/WHEEL +4 -0
- nanocoderagent-0.1.0.dist-info/entry_points.txt +2 -0
- nanocoderagent-0.1.0.dist-info/licenses/LICENSE +21 -0
nanocoder/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""NanoCoder - Minimal AI coding agent inspired by Claude Code's architecture."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from nanocoder.agent import Agent
|
|
6
|
+
from nanocoder.llm import LLM
|
|
7
|
+
from nanocoder.config import Config
|
|
8
|
+
from nanocoder.tools import ALL_TOOLS
|
|
9
|
+
|
|
10
|
+
__all__ = ["Agent", "LLM", "Config", "ALL_TOOLS", "__version__"]
|
nanocoder/__main__.py
ADDED
nanocoder/agent.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Core agent loop.
|
|
2
|
+
|
|
3
|
+
This is the heart of NanoCoder. The pattern is simple:
|
|
4
|
+
|
|
5
|
+
user message -> LLM (with tools) -> tool calls? -> execute -> loop
|
|
6
|
+
-> text reply? -> return to user
|
|
7
|
+
|
|
8
|
+
It keeps looping until the LLM responds with plain text (no tool calls),
|
|
9
|
+
which means it's done working and ready to report back.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import concurrent.futures
|
|
13
|
+
from .llm import LLM
|
|
14
|
+
from .tools import ALL_TOOLS, get_tool
|
|
15
|
+
from .tools.base import Tool
|
|
16
|
+
from .tools.agent import AgentTool
|
|
17
|
+
from .prompt import system_prompt
|
|
18
|
+
from .context import ContextManager
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Agent:
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
llm: LLM,
|
|
25
|
+
tools: list[Tool] | None = None,
|
|
26
|
+
max_context_tokens: int = 128_000,
|
|
27
|
+
max_rounds: int = 50,
|
|
28
|
+
):
|
|
29
|
+
self.llm = llm
|
|
30
|
+
self.tools = tools if tools is not None else ALL_TOOLS
|
|
31
|
+
self.messages: list[dict] = []
|
|
32
|
+
self.context = ContextManager(max_tokens=max_context_tokens)
|
|
33
|
+
self.max_rounds = max_rounds
|
|
34
|
+
self._system = system_prompt(self.tools)
|
|
35
|
+
|
|
36
|
+
# wire up sub-agent capability
|
|
37
|
+
for t in self.tools:
|
|
38
|
+
if isinstance(t, AgentTool):
|
|
39
|
+
t._parent_agent = self
|
|
40
|
+
|
|
41
|
+
def _full_messages(self) -> list[dict]:
|
|
42
|
+
return [{"role": "system", "content": self._system}] + self.messages
|
|
43
|
+
|
|
44
|
+
def _tool_schemas(self) -> list[dict]:
|
|
45
|
+
return [t.schema() for t in self.tools]
|
|
46
|
+
|
|
47
|
+
def chat(self, user_input: str, on_token=None, on_tool=None) -> str:
|
|
48
|
+
"""Process one user message. May involve multiple LLM/tool rounds."""
|
|
49
|
+
self.messages.append({"role": "user", "content": user_input})
|
|
50
|
+
self.context.maybe_compress(self.messages, self.llm)
|
|
51
|
+
|
|
52
|
+
for _ in range(self.max_rounds):
|
|
53
|
+
resp = self.llm.chat(
|
|
54
|
+
messages=self._full_messages(),
|
|
55
|
+
tools=self._tool_schemas(),
|
|
56
|
+
on_token=on_token,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# no tool calls -> LLM is done, return text
|
|
60
|
+
if not resp.tool_calls:
|
|
61
|
+
self.messages.append(resp.message)
|
|
62
|
+
return resp.content
|
|
63
|
+
|
|
64
|
+
# tool calls -> execute (parallel when multiple, like Claude Code's
|
|
65
|
+
# StreamingToolExecutor which runs independent tools concurrently)
|
|
66
|
+
self.messages.append(resp.message)
|
|
67
|
+
|
|
68
|
+
if len(resp.tool_calls) == 1:
|
|
69
|
+
tc = resp.tool_calls[0]
|
|
70
|
+
if on_tool:
|
|
71
|
+
on_tool(tc.name, tc.arguments)
|
|
72
|
+
result = self._exec_tool(tc)
|
|
73
|
+
self.messages.append({
|
|
74
|
+
"role": "tool",
|
|
75
|
+
"tool_call_id": tc.id,
|
|
76
|
+
"content": result,
|
|
77
|
+
})
|
|
78
|
+
else:
|
|
79
|
+
# parallel execution for multiple tool calls
|
|
80
|
+
results = self._exec_tools_parallel(resp.tool_calls, on_tool)
|
|
81
|
+
for tc, result in zip(resp.tool_calls, results):
|
|
82
|
+
self.messages.append({
|
|
83
|
+
"role": "tool",
|
|
84
|
+
"tool_call_id": tc.id,
|
|
85
|
+
"content": result,
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
# compress if tool outputs are big
|
|
89
|
+
self.context.maybe_compress(self.messages, self.llm)
|
|
90
|
+
|
|
91
|
+
return "(reached maximum tool-call rounds)"
|
|
92
|
+
|
|
93
|
+
def _exec_tool(self, tc) -> str:
|
|
94
|
+
"""Execute a single tool call, returning the result string."""
|
|
95
|
+
tool = get_tool(tc.name)
|
|
96
|
+
if tool is None:
|
|
97
|
+
return f"Error: unknown tool '{tc.name}'"
|
|
98
|
+
try:
|
|
99
|
+
return tool.execute(**tc.arguments)
|
|
100
|
+
except TypeError as e:
|
|
101
|
+
return f"Error: bad arguments for {tc.name}: {e}"
|
|
102
|
+
except Exception as e:
|
|
103
|
+
return f"Error executing {tc.name}: {e}"
|
|
104
|
+
|
|
105
|
+
def _exec_tools_parallel(self, tool_calls, on_tool=None) -> list[str]:
|
|
106
|
+
"""Run multiple tool calls concurrently using threads.
|
|
107
|
+
|
|
108
|
+
This is inspired by Claude Code's StreamingToolExecutor which starts
|
|
109
|
+
executing tools while the model is still generating. We simplify to:
|
|
110
|
+
when the model returns N tool calls at once, run them in parallel.
|
|
111
|
+
"""
|
|
112
|
+
for tc in tool_calls:
|
|
113
|
+
if on_tool:
|
|
114
|
+
on_tool(tc.name, tc.arguments)
|
|
115
|
+
|
|
116
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as pool:
|
|
117
|
+
futures = [pool.submit(self._exec_tool, tc) for tc in tool_calls]
|
|
118
|
+
return [f.result() for f in futures]
|
|
119
|
+
|
|
120
|
+
def reset(self):
|
|
121
|
+
"""Clear conversation history."""
|
|
122
|
+
self.messages.clear()
|
nanocoder/cli.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Interactive REPL - the user-facing terminal interface."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import os
|
|
5
|
+
import argparse
|
|
6
|
+
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.markdown import Markdown
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from prompt_toolkit import prompt as pt_prompt
|
|
11
|
+
from prompt_toolkit.history import FileHistory
|
|
12
|
+
|
|
13
|
+
from .agent import Agent
|
|
14
|
+
from .llm import LLM
|
|
15
|
+
from .config import Config
|
|
16
|
+
from .session import save_session, load_session, list_sessions
|
|
17
|
+
from . import __version__
|
|
18
|
+
|
|
19
|
+
console = Console()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _parse_args():
|
|
23
|
+
p = argparse.ArgumentParser(
|
|
24
|
+
prog="nanocoder",
|
|
25
|
+
description="Minimal AI coding agent. Works with any OpenAI-compatible LLM.",
|
|
26
|
+
)
|
|
27
|
+
p.add_argument("-m", "--model", help="Model name (default: $NANOCODER_MODEL or gpt-4o)")
|
|
28
|
+
p.add_argument("--base-url", help="API base URL (default: $OPENAI_BASE_URL)")
|
|
29
|
+
p.add_argument("--api-key", help="API key (default: $OPENAI_API_KEY)")
|
|
30
|
+
p.add_argument("-p", "--prompt", help="One-shot prompt (non-interactive mode)")
|
|
31
|
+
p.add_argument("-r", "--resume", metavar="ID", help="Resume a saved session")
|
|
32
|
+
p.add_argument("-v", "--version", action="version", version=f"%(prog)s {__version__}")
|
|
33
|
+
return p.parse_args()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def main():
|
|
37
|
+
args = _parse_args()
|
|
38
|
+
config = Config.from_env()
|
|
39
|
+
|
|
40
|
+
# CLI args override env vars
|
|
41
|
+
if args.model:
|
|
42
|
+
config.model = args.model
|
|
43
|
+
if args.base_url:
|
|
44
|
+
config.base_url = args.base_url
|
|
45
|
+
if args.api_key:
|
|
46
|
+
config.api_key = args.api_key
|
|
47
|
+
|
|
48
|
+
if not config.api_key:
|
|
49
|
+
console.print("[red bold]No API key found.[/]")
|
|
50
|
+
console.print(
|
|
51
|
+
"Set one of: OPENAI_API_KEY, DEEPSEEK_API_KEY, or NANOCODER_API_KEY\n"
|
|
52
|
+
"\nExamples:\n"
|
|
53
|
+
" # OpenAI\n"
|
|
54
|
+
" export OPENAI_API_KEY=sk-...\n"
|
|
55
|
+
"\n"
|
|
56
|
+
" # DeepSeek\n"
|
|
57
|
+
" export OPENAI_API_KEY=sk-... OPENAI_BASE_URL=https://api.deepseek.com\n"
|
|
58
|
+
"\n"
|
|
59
|
+
" # Ollama (local)\n"
|
|
60
|
+
" export OPENAI_API_KEY=ollama OPENAI_BASE_URL=http://localhost:11434/v1 NANOCODER_MODEL=qwen2.5-coder\n"
|
|
61
|
+
)
|
|
62
|
+
sys.exit(1)
|
|
63
|
+
|
|
64
|
+
llm = LLM(
|
|
65
|
+
model=config.model,
|
|
66
|
+
api_key=config.api_key,
|
|
67
|
+
base_url=config.base_url,
|
|
68
|
+
temperature=config.temperature,
|
|
69
|
+
max_tokens=config.max_tokens,
|
|
70
|
+
)
|
|
71
|
+
agent = Agent(llm=llm, max_context_tokens=config.max_context_tokens)
|
|
72
|
+
|
|
73
|
+
# resume saved session
|
|
74
|
+
if args.resume:
|
|
75
|
+
loaded = load_session(args.resume)
|
|
76
|
+
if loaded:
|
|
77
|
+
agent.messages, loaded_model = loaded
|
|
78
|
+
console.print(f"[green]Resumed session: {args.resume}[/green]")
|
|
79
|
+
else:
|
|
80
|
+
console.print(f"[red]Session '{args.resume}' not found.[/red]")
|
|
81
|
+
sys.exit(1)
|
|
82
|
+
|
|
83
|
+
# one-shot mode
|
|
84
|
+
if args.prompt:
|
|
85
|
+
_run_once(agent, args.prompt)
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
# interactive REPL
|
|
89
|
+
_repl(agent, config)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _run_once(agent: Agent, prompt: str):
|
|
93
|
+
"""Non-interactive: run one prompt and exit."""
|
|
94
|
+
def on_token(tok):
|
|
95
|
+
print(tok, end="", flush=True)
|
|
96
|
+
|
|
97
|
+
def on_tool(name, kwargs):
|
|
98
|
+
console.print(f"\n[dim]> {name}({_brief(kwargs)})[/dim]")
|
|
99
|
+
|
|
100
|
+
agent.chat(prompt, on_token=on_token, on_tool=on_tool)
|
|
101
|
+
print()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _repl(agent: Agent, config: Config):
|
|
105
|
+
"""Interactive read-eval-print loop."""
|
|
106
|
+
console.print(Panel(
|
|
107
|
+
f"[bold]NanoCoder[/bold] v{__version__}\n"
|
|
108
|
+
f"Model: [cyan]{config.model}[/cyan]"
|
|
109
|
+
+ (f" Base: [dim]{config.base_url}[/dim]" if config.base_url else "")
|
|
110
|
+
+ "\nType [bold]/help[/bold] for commands, [bold]Ctrl+C[/bold] to cancel, [bold]quit[/bold] to exit.",
|
|
111
|
+
border_style="blue",
|
|
112
|
+
))
|
|
113
|
+
|
|
114
|
+
hist_path = os.path.expanduser("~/.nanocoder_history")
|
|
115
|
+
history = FileHistory(hist_path)
|
|
116
|
+
|
|
117
|
+
while True:
|
|
118
|
+
try:
|
|
119
|
+
user_input = pt_prompt("You > ", history=history).strip()
|
|
120
|
+
except (EOFError, KeyboardInterrupt):
|
|
121
|
+
console.print("\nBye!")
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
if not user_input:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# built-in commands
|
|
128
|
+
if user_input.lower() in ("quit", "exit", "/quit", "/exit"):
|
|
129
|
+
break
|
|
130
|
+
if user_input == "/help":
|
|
131
|
+
_show_help()
|
|
132
|
+
continue
|
|
133
|
+
if user_input == "/reset":
|
|
134
|
+
agent.reset()
|
|
135
|
+
console.print("[yellow]Conversation reset.[/yellow]")
|
|
136
|
+
continue
|
|
137
|
+
if user_input == "/tokens":
|
|
138
|
+
p = agent.llm.total_prompt_tokens
|
|
139
|
+
c = agent.llm.total_completion_tokens
|
|
140
|
+
console.print(f"Tokens used this session: [cyan]{p}[/cyan] prompt + [cyan]{c}[/cyan] completion = [bold]{p+c}[/bold] total")
|
|
141
|
+
continue
|
|
142
|
+
if user_input.startswith("/model "):
|
|
143
|
+
new_model = user_input[7:].strip()
|
|
144
|
+
if new_model:
|
|
145
|
+
agent.llm.model = new_model
|
|
146
|
+
config.model = new_model
|
|
147
|
+
console.print(f"Switched to [cyan]{new_model}[/cyan]")
|
|
148
|
+
continue
|
|
149
|
+
if user_input == "/compact":
|
|
150
|
+
from .context import estimate_tokens
|
|
151
|
+
before = estimate_tokens(agent.messages)
|
|
152
|
+
compressed = agent.context.maybe_compress(agent.messages, agent.llm)
|
|
153
|
+
after = estimate_tokens(agent.messages)
|
|
154
|
+
if compressed:
|
|
155
|
+
console.print(f"[green]Compressed: {before} → {after} tokens ({len(agent.messages)} messages)[/green]")
|
|
156
|
+
else:
|
|
157
|
+
console.print(f"[dim]Nothing to compress ({before} tokens, {len(agent.messages)} messages)[/dim]")
|
|
158
|
+
continue
|
|
159
|
+
if user_input == "/save":
|
|
160
|
+
sid = save_session(agent.messages, config.model)
|
|
161
|
+
console.print(f"[green]Session saved: {sid}[/green]")
|
|
162
|
+
console.print(f"Resume with: nanocoder -r {sid}")
|
|
163
|
+
continue
|
|
164
|
+
if user_input == "/sessions":
|
|
165
|
+
sessions = list_sessions()
|
|
166
|
+
if not sessions:
|
|
167
|
+
console.print("[dim]No saved sessions.[/dim]")
|
|
168
|
+
else:
|
|
169
|
+
for s in sessions:
|
|
170
|
+
console.print(f" [cyan]{s['id']}[/cyan] ({s['model']}, {s['saved_at']}) {s['preview']}")
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
# call the agent
|
|
174
|
+
streamed: list[str] = []
|
|
175
|
+
|
|
176
|
+
def on_token(tok):
|
|
177
|
+
streamed.append(tok)
|
|
178
|
+
print(tok, end="", flush=True)
|
|
179
|
+
|
|
180
|
+
def on_tool(name, kwargs):
|
|
181
|
+
console.print(f"\n[dim]> {name}({_brief(kwargs)})[/dim]")
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
response = agent.chat(user_input, on_token=on_token, on_tool=on_tool)
|
|
185
|
+
if streamed:
|
|
186
|
+
print() # newline after streamed tokens
|
|
187
|
+
else:
|
|
188
|
+
# response wasn't streamed (came after tool calls)
|
|
189
|
+
console.print(Markdown(response))
|
|
190
|
+
except KeyboardInterrupt:
|
|
191
|
+
console.print("\n[yellow]Interrupted.[/yellow]")
|
|
192
|
+
except Exception as e:
|
|
193
|
+
console.print(f"\n[red]Error: {e}[/red]")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _show_help():
|
|
197
|
+
console.print(Panel(
|
|
198
|
+
"[bold]Commands:[/bold]\n"
|
|
199
|
+
" /help Show this help\n"
|
|
200
|
+
" /reset Clear conversation history\n"
|
|
201
|
+
" /model <name> Switch model mid-conversation\n"
|
|
202
|
+
" /tokens Show token usage\n"
|
|
203
|
+
" /compact Compress conversation context\n"
|
|
204
|
+
" /save Save session to disk\n"
|
|
205
|
+
" /sessions List saved sessions\n"
|
|
206
|
+
" quit Exit NanoCoder",
|
|
207
|
+
title="NanoCoder Help",
|
|
208
|
+
border_style="dim",
|
|
209
|
+
))
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _brief(kwargs: dict, maxlen: int = 80) -> str:
|
|
213
|
+
s = ", ".join(f"{k}={repr(v)[:40]}" for k, v in kwargs.items())
|
|
214
|
+
return s[:maxlen] + ("..." if len(s) > maxlen else "")
|
nanocoder/config.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Configuration - env vars and defaults."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Config:
|
|
9
|
+
model: str = "gpt-4o"
|
|
10
|
+
api_key: str = ""
|
|
11
|
+
base_url: str | None = None
|
|
12
|
+
max_tokens: int = 4096
|
|
13
|
+
temperature: float = 0.0
|
|
14
|
+
max_context_tokens: int = 128_000
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def from_env(cls) -> "Config":
|
|
18
|
+
# pick up common env vars automatically
|
|
19
|
+
api_key = (
|
|
20
|
+
os.getenv("NANOCODER_API_KEY")
|
|
21
|
+
or os.getenv("OPENAI_API_KEY")
|
|
22
|
+
or os.getenv("DEEPSEEK_API_KEY")
|
|
23
|
+
or ""
|
|
24
|
+
)
|
|
25
|
+
return cls(
|
|
26
|
+
model=os.getenv("NANOCODER_MODEL", "gpt-4o"),
|
|
27
|
+
api_key=api_key,
|
|
28
|
+
base_url=os.getenv("OPENAI_BASE_URL") or os.getenv("NANOCODER_BASE_URL"),
|
|
29
|
+
max_tokens=int(os.getenv("NANOCODER_MAX_TOKENS", "4096")),
|
|
30
|
+
temperature=float(os.getenv("NANOCODER_TEMPERATURE", "0")),
|
|
31
|
+
max_context_tokens=int(os.getenv("NANOCODER_MAX_CONTEXT", "128000")),
|
|
32
|
+
)
|
nanocoder/context.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Multi-layer context compression.
|
|
2
|
+
|
|
3
|
+
Claude Code uses a 4-layer strategy:
|
|
4
|
+
1. HISTORY_SNIP - trim old tool outputs to a one-line summary
|
|
5
|
+
2. Microcompact - LLM-powered summary of old turns (cached)
|
|
6
|
+
3. CONTEXT_COLLAPSE - aggressive compression when nearing hard limit
|
|
7
|
+
4. Autocompact - periodic background compaction
|
|
8
|
+
|
|
9
|
+
NanoCoder implements the same idea in 3 layers:
|
|
10
|
+
Layer 1 (tool_snip) - replace verbose tool results with truncated versions
|
|
11
|
+
Layer 2 (summarize) - LLM-powered summary of old conversation
|
|
12
|
+
Layer 3 (hard_collapse) - last resort: drop everything except summary + recent
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from .llm import LLM
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _approx_tokens(text: str) -> int:
|
|
23
|
+
"""Rough token count. ~3.5 chars/token for mixed en/zh content."""
|
|
24
|
+
return len(text) // 3
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def estimate_tokens(messages: list[dict]) -> int:
|
|
28
|
+
total = 0
|
|
29
|
+
for m in messages:
|
|
30
|
+
if m.get("content"):
|
|
31
|
+
total += _approx_tokens(m["content"])
|
|
32
|
+
if m.get("tool_calls"):
|
|
33
|
+
total += _approx_tokens(str(m["tool_calls"]))
|
|
34
|
+
return total
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ContextManager:
|
|
38
|
+
def __init__(self, max_tokens: int = 128_000):
|
|
39
|
+
self.max_tokens = max_tokens
|
|
40
|
+
# layer thresholds (fraction of max_tokens)
|
|
41
|
+
self._snip_at = int(max_tokens * 0.50) # 50% -> snip tool outputs
|
|
42
|
+
self._summarize_at = int(max_tokens * 0.70) # 70% -> LLM summarize
|
|
43
|
+
self._collapse_at = int(max_tokens * 0.90) # 90% -> hard collapse
|
|
44
|
+
|
|
45
|
+
def maybe_compress(self, messages: list[dict], llm: LLM | None = None) -> bool:
|
|
46
|
+
"""Apply compression layers as needed. Returns True if any compression happened."""
|
|
47
|
+
current = estimate_tokens(messages)
|
|
48
|
+
compressed = False
|
|
49
|
+
|
|
50
|
+
# Layer 1: snip verbose tool outputs
|
|
51
|
+
if current > self._snip_at:
|
|
52
|
+
if self._snip_tool_outputs(messages):
|
|
53
|
+
compressed = True
|
|
54
|
+
current = estimate_tokens(messages)
|
|
55
|
+
|
|
56
|
+
# Layer 2: LLM-powered summarization of old turns
|
|
57
|
+
if current > self._summarize_at and len(messages) > 10:
|
|
58
|
+
if self._summarize_old(messages, llm, keep_recent=8):
|
|
59
|
+
compressed = True
|
|
60
|
+
current = estimate_tokens(messages)
|
|
61
|
+
|
|
62
|
+
# Layer 3: hard collapse - last resort
|
|
63
|
+
if current > self._collapse_at and len(messages) > 4:
|
|
64
|
+
self._hard_collapse(messages, llm)
|
|
65
|
+
compressed = True
|
|
66
|
+
|
|
67
|
+
return compressed
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def _snip_tool_outputs(messages: list[dict]) -> bool:
|
|
71
|
+
"""Layer 1: Truncate tool results over 1500 chars to their first/last lines.
|
|
72
|
+
|
|
73
|
+
This mirrors Claude Code's HISTORY_SNIP which replaces old tool outputs
|
|
74
|
+
with a one-line summary to reclaim context space.
|
|
75
|
+
"""
|
|
76
|
+
changed = False
|
|
77
|
+
for m in messages:
|
|
78
|
+
if m.get("role") != "tool":
|
|
79
|
+
continue
|
|
80
|
+
content = m.get("content", "")
|
|
81
|
+
if len(content) <= 1500:
|
|
82
|
+
continue
|
|
83
|
+
lines = content.splitlines()
|
|
84
|
+
if len(lines) <= 6:
|
|
85
|
+
continue
|
|
86
|
+
# keep first 3 + last 3 lines
|
|
87
|
+
snipped = (
|
|
88
|
+
"\n".join(lines[:3])
|
|
89
|
+
+ f"\n... ({len(lines)} lines, snipped to save context) ...\n"
|
|
90
|
+
+ "\n".join(lines[-3:])
|
|
91
|
+
)
|
|
92
|
+
m["content"] = snipped
|
|
93
|
+
changed = True
|
|
94
|
+
return changed
|
|
95
|
+
|
|
96
|
+
def _summarize_old(self, messages: list[dict], llm: LLM | None,
|
|
97
|
+
keep_recent: int = 8) -> bool:
|
|
98
|
+
"""Layer 2: Summarize old conversation, keep recent messages intact."""
|
|
99
|
+
if len(messages) <= keep_recent:
|
|
100
|
+
return False
|
|
101
|
+
|
|
102
|
+
old = messages[:-keep_recent]
|
|
103
|
+
tail = messages[-keep_recent:]
|
|
104
|
+
|
|
105
|
+
summary = self._get_summary(old, llm)
|
|
106
|
+
|
|
107
|
+
messages.clear()
|
|
108
|
+
messages.append({
|
|
109
|
+
"role": "user",
|
|
110
|
+
"content": f"[Context compressed - conversation summary]\n{summary}",
|
|
111
|
+
})
|
|
112
|
+
messages.append({
|
|
113
|
+
"role": "assistant",
|
|
114
|
+
"content": "Got it, I have the context from our earlier conversation.",
|
|
115
|
+
})
|
|
116
|
+
messages.extend(tail)
|
|
117
|
+
return True
|
|
118
|
+
|
|
119
|
+
def _hard_collapse(self, messages: list[dict], llm: LLM | None):
|
|
120
|
+
"""Layer 3: Emergency compression. Keep only last 4 messages + summary."""
|
|
121
|
+
tail = messages[-4:] if len(messages) > 4 else messages[-2:]
|
|
122
|
+
summary = self._get_summary(messages[:-len(tail)], llm)
|
|
123
|
+
|
|
124
|
+
messages.clear()
|
|
125
|
+
messages.append({
|
|
126
|
+
"role": "user",
|
|
127
|
+
"content": f"[Hard context reset]\n{summary}",
|
|
128
|
+
})
|
|
129
|
+
messages.append({
|
|
130
|
+
"role": "assistant",
|
|
131
|
+
"content": "Context restored. Continuing from where we left off.",
|
|
132
|
+
})
|
|
133
|
+
messages.extend(tail)
|
|
134
|
+
|
|
135
|
+
def _get_summary(self, messages: list[dict], llm: LLM | None) -> str:
|
|
136
|
+
"""Generate summary via LLM or fallback to extraction."""
|
|
137
|
+
flat = self._flatten(messages)
|
|
138
|
+
|
|
139
|
+
if llm:
|
|
140
|
+
try:
|
|
141
|
+
resp = llm.chat(
|
|
142
|
+
messages=[
|
|
143
|
+
{
|
|
144
|
+
"role": "system",
|
|
145
|
+
"content": (
|
|
146
|
+
"Compress this conversation into a brief summary. "
|
|
147
|
+
"Preserve: file paths edited, key decisions made, "
|
|
148
|
+
"errors encountered, current task state. "
|
|
149
|
+
"Drop: verbose command output, code listings, "
|
|
150
|
+
"redundant back-and-forth."
|
|
151
|
+
),
|
|
152
|
+
},
|
|
153
|
+
{"role": "user", "content": flat[:15000]},
|
|
154
|
+
],
|
|
155
|
+
)
|
|
156
|
+
return resp.content
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
# fallback: extract key lines
|
|
161
|
+
return self._extract_key_info(messages)
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def _flatten(messages: list[dict]) -> str:
|
|
165
|
+
parts = []
|
|
166
|
+
for m in messages:
|
|
167
|
+
role = m.get("role", "?")
|
|
168
|
+
text = m.get("content", "") or ""
|
|
169
|
+
if text:
|
|
170
|
+
parts.append(f"[{role}] {text[:400]}")
|
|
171
|
+
return "\n".join(parts)
|
|
172
|
+
|
|
173
|
+
@staticmethod
|
|
174
|
+
def _extract_key_info(messages: list[dict]) -> str:
|
|
175
|
+
"""Fallback: extract file paths, errors, and decisions without LLM."""
|
|
176
|
+
import re
|
|
177
|
+
files_seen = set()
|
|
178
|
+
errors = []
|
|
179
|
+
decisions = []
|
|
180
|
+
|
|
181
|
+
for m in messages:
|
|
182
|
+
text = m.get("content", "") or ""
|
|
183
|
+
# extract file paths
|
|
184
|
+
for match in re.finditer(r'[\w./\-]+\.\w{1,5}', text):
|
|
185
|
+
files_seen.add(match.group())
|
|
186
|
+
# extract error lines
|
|
187
|
+
for line in text.splitlines():
|
|
188
|
+
if 'error' in line.lower() or 'Error' in line:
|
|
189
|
+
errors.append(line.strip()[:150])
|
|
190
|
+
|
|
191
|
+
parts = []
|
|
192
|
+
if files_seen:
|
|
193
|
+
parts.append(f"Files touched: {', '.join(sorted(files_seen)[:20])}")
|
|
194
|
+
if errors:
|
|
195
|
+
parts.append(f"Errors seen: {'; '.join(errors[:5])}")
|
|
196
|
+
return "\n".join(parts) or "(no extractable context)"
|