nanocoderagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nanocoder/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ """NanoCoder - Minimal AI coding agent inspired by Claude Code's architecture."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from nanocoder.agent import Agent
6
+ from nanocoder.llm import LLM
7
+ from nanocoder.config import Config
8
+ from nanocoder.tools import ALL_TOOLS
9
+
10
+ __all__ = ["Agent", "LLM", "Config", "ALL_TOOLS", "__version__"]
nanocoder/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from nanocoder.cli import main
2
+
3
+ main()
nanocoder/agent.py ADDED
@@ -0,0 +1,122 @@
1
+ """Core agent loop.
2
+
3
+ This is the heart of NanoCoder. The pattern is simple:
4
+
5
+ user message -> LLM (with tools) -> tool calls? -> execute -> loop
6
+ -> text reply? -> return to user
7
+
8
+ It keeps looping until the LLM responds with plain text (no tool calls),
9
+ which means it's done working and ready to report back.
10
+ """
11
+
12
+ import concurrent.futures
13
+ from .llm import LLM
14
+ from .tools import ALL_TOOLS, get_tool
15
+ from .tools.base import Tool
16
+ from .tools.agent import AgentTool
17
+ from .prompt import system_prompt
18
+ from .context import ContextManager
19
+
20
+
21
+ class Agent:
22
+ def __init__(
23
+ self,
24
+ llm: LLM,
25
+ tools: list[Tool] | None = None,
26
+ max_context_tokens: int = 128_000,
27
+ max_rounds: int = 50,
28
+ ):
29
+ self.llm = llm
30
+ self.tools = tools if tools is not None else ALL_TOOLS
31
+ self.messages: list[dict] = []
32
+ self.context = ContextManager(max_tokens=max_context_tokens)
33
+ self.max_rounds = max_rounds
34
+ self._system = system_prompt(self.tools)
35
+
36
+ # wire up sub-agent capability
37
+ for t in self.tools:
38
+ if isinstance(t, AgentTool):
39
+ t._parent_agent = self
40
+
41
+ def _full_messages(self) -> list[dict]:
42
+ return [{"role": "system", "content": self._system}] + self.messages
43
+
44
+ def _tool_schemas(self) -> list[dict]:
45
+ return [t.schema() for t in self.tools]
46
+
47
+ def chat(self, user_input: str, on_token=None, on_tool=None) -> str:
48
+ """Process one user message. May involve multiple LLM/tool rounds."""
49
+ self.messages.append({"role": "user", "content": user_input})
50
+ self.context.maybe_compress(self.messages, self.llm)
51
+
52
+ for _ in range(self.max_rounds):
53
+ resp = self.llm.chat(
54
+ messages=self._full_messages(),
55
+ tools=self._tool_schemas(),
56
+ on_token=on_token,
57
+ )
58
+
59
+ # no tool calls -> LLM is done, return text
60
+ if not resp.tool_calls:
61
+ self.messages.append(resp.message)
62
+ return resp.content
63
+
64
+ # tool calls -> execute (parallel when multiple, like Claude Code's
65
+ # StreamingToolExecutor which runs independent tools concurrently)
66
+ self.messages.append(resp.message)
67
+
68
+ if len(resp.tool_calls) == 1:
69
+ tc = resp.tool_calls[0]
70
+ if on_tool:
71
+ on_tool(tc.name, tc.arguments)
72
+ result = self._exec_tool(tc)
73
+ self.messages.append({
74
+ "role": "tool",
75
+ "tool_call_id": tc.id,
76
+ "content": result,
77
+ })
78
+ else:
79
+ # parallel execution for multiple tool calls
80
+ results = self._exec_tools_parallel(resp.tool_calls, on_tool)
81
+ for tc, result in zip(resp.tool_calls, results):
82
+ self.messages.append({
83
+ "role": "tool",
84
+ "tool_call_id": tc.id,
85
+ "content": result,
86
+ })
87
+
88
+ # compress if tool outputs are big
89
+ self.context.maybe_compress(self.messages, self.llm)
90
+
91
+ return "(reached maximum tool-call rounds)"
92
+
93
+ def _exec_tool(self, tc) -> str:
94
+ """Execute a single tool call, returning the result string."""
95
+ tool = get_tool(tc.name)
96
+ if tool is None:
97
+ return f"Error: unknown tool '{tc.name}'"
98
+ try:
99
+ return tool.execute(**tc.arguments)
100
+ except TypeError as e:
101
+ return f"Error: bad arguments for {tc.name}: {e}"
102
+ except Exception as e:
103
+ return f"Error executing {tc.name}: {e}"
104
+
105
+ def _exec_tools_parallel(self, tool_calls, on_tool=None) -> list[str]:
106
+ """Run multiple tool calls concurrently using threads.
107
+
108
+ This is inspired by Claude Code's StreamingToolExecutor which starts
109
+ executing tools while the model is still generating. We simplify to:
110
+ when the model returns N tool calls at once, run them in parallel.
111
+ """
112
+ for tc in tool_calls:
113
+ if on_tool:
114
+ on_tool(tc.name, tc.arguments)
115
+
116
+ with concurrent.futures.ThreadPoolExecutor(max_workers=8) as pool:
117
+ futures = [pool.submit(self._exec_tool, tc) for tc in tool_calls]
118
+ return [f.result() for f in futures]
119
+
120
+ def reset(self):
121
+ """Clear conversation history."""
122
+ self.messages.clear()
nanocoder/cli.py ADDED
@@ -0,0 +1,214 @@
1
+ """Interactive REPL - the user-facing terminal interface."""
2
+
3
+ import sys
4
+ import os
5
+ import argparse
6
+
7
+ from rich.console import Console
8
+ from rich.markdown import Markdown
9
+ from rich.panel import Panel
10
+ from prompt_toolkit import prompt as pt_prompt
11
+ from prompt_toolkit.history import FileHistory
12
+
13
+ from .agent import Agent
14
+ from .llm import LLM
15
+ from .config import Config
16
+ from .session import save_session, load_session, list_sessions
17
+ from . import __version__
18
+
19
+ console = Console()
20
+
21
+
22
+ def _parse_args():
23
+ p = argparse.ArgumentParser(
24
+ prog="nanocoder",
25
+ description="Minimal AI coding agent. Works with any OpenAI-compatible LLM.",
26
+ )
27
+ p.add_argument("-m", "--model", help="Model name (default: $NANOCODER_MODEL or gpt-4o)")
28
+ p.add_argument("--base-url", help="API base URL (default: $OPENAI_BASE_URL)")
29
+ p.add_argument("--api-key", help="API key (default: $OPENAI_API_KEY)")
30
+ p.add_argument("-p", "--prompt", help="One-shot prompt (non-interactive mode)")
31
+ p.add_argument("-r", "--resume", metavar="ID", help="Resume a saved session")
32
+ p.add_argument("-v", "--version", action="version", version=f"%(prog)s {__version__}")
33
+ return p.parse_args()
34
+
35
+
36
+ def main():
37
+ args = _parse_args()
38
+ config = Config.from_env()
39
+
40
+ # CLI args override env vars
41
+ if args.model:
42
+ config.model = args.model
43
+ if args.base_url:
44
+ config.base_url = args.base_url
45
+ if args.api_key:
46
+ config.api_key = args.api_key
47
+
48
+ if not config.api_key:
49
+ console.print("[red bold]No API key found.[/]")
50
+ console.print(
51
+ "Set one of: OPENAI_API_KEY, DEEPSEEK_API_KEY, or NANOCODER_API_KEY\n"
52
+ "\nExamples:\n"
53
+ " # OpenAI\n"
54
+ " export OPENAI_API_KEY=sk-...\n"
55
+ "\n"
56
+ " # DeepSeek\n"
57
+ " export OPENAI_API_KEY=sk-... OPENAI_BASE_URL=https://api.deepseek.com\n"
58
+ "\n"
59
+ " # Ollama (local)\n"
60
+ " export OPENAI_API_KEY=ollama OPENAI_BASE_URL=http://localhost:11434/v1 NANOCODER_MODEL=qwen2.5-coder\n"
61
+ )
62
+ sys.exit(1)
63
+
64
+ llm = LLM(
65
+ model=config.model,
66
+ api_key=config.api_key,
67
+ base_url=config.base_url,
68
+ temperature=config.temperature,
69
+ max_tokens=config.max_tokens,
70
+ )
71
+ agent = Agent(llm=llm, max_context_tokens=config.max_context_tokens)
72
+
73
+ # resume saved session
74
+ if args.resume:
75
+ loaded = load_session(args.resume)
76
+ if loaded:
77
+ agent.messages, loaded_model = loaded
78
+ console.print(f"[green]Resumed session: {args.resume}[/green]")
79
+ else:
80
+ console.print(f"[red]Session '{args.resume}' not found.[/red]")
81
+ sys.exit(1)
82
+
83
+ # one-shot mode
84
+ if args.prompt:
85
+ _run_once(agent, args.prompt)
86
+ return
87
+
88
+ # interactive REPL
89
+ _repl(agent, config)
90
+
91
+
92
+ def _run_once(agent: Agent, prompt: str):
93
+ """Non-interactive: run one prompt and exit."""
94
+ def on_token(tok):
95
+ print(tok, end="", flush=True)
96
+
97
+ def on_tool(name, kwargs):
98
+ console.print(f"\n[dim]> {name}({_brief(kwargs)})[/dim]")
99
+
100
+ agent.chat(prompt, on_token=on_token, on_tool=on_tool)
101
+ print()
102
+
103
+
104
+ def _repl(agent: Agent, config: Config):
105
+ """Interactive read-eval-print loop."""
106
+ console.print(Panel(
107
+ f"[bold]NanoCoder[/bold] v{__version__}\n"
108
+ f"Model: [cyan]{config.model}[/cyan]"
109
+ + (f" Base: [dim]{config.base_url}[/dim]" if config.base_url else "")
110
+ + "\nType [bold]/help[/bold] for commands, [bold]Ctrl+C[/bold] to cancel, [bold]quit[/bold] to exit.",
111
+ border_style="blue",
112
+ ))
113
+
114
+ hist_path = os.path.expanduser("~/.nanocoder_history")
115
+ history = FileHistory(hist_path)
116
+
117
+ while True:
118
+ try:
119
+ user_input = pt_prompt("You > ", history=history).strip()
120
+ except (EOFError, KeyboardInterrupt):
121
+ console.print("\nBye!")
122
+ break
123
+
124
+ if not user_input:
125
+ continue
126
+
127
+ # built-in commands
128
+ if user_input.lower() in ("quit", "exit", "/quit", "/exit"):
129
+ break
130
+ if user_input == "/help":
131
+ _show_help()
132
+ continue
133
+ if user_input == "/reset":
134
+ agent.reset()
135
+ console.print("[yellow]Conversation reset.[/yellow]")
136
+ continue
137
+ if user_input == "/tokens":
138
+ p = agent.llm.total_prompt_tokens
139
+ c = agent.llm.total_completion_tokens
140
+ console.print(f"Tokens used this session: [cyan]{p}[/cyan] prompt + [cyan]{c}[/cyan] completion = [bold]{p+c}[/bold] total")
141
+ continue
142
+ if user_input.startswith("/model "):
143
+ new_model = user_input[7:].strip()
144
+ if new_model:
145
+ agent.llm.model = new_model
146
+ config.model = new_model
147
+ console.print(f"Switched to [cyan]{new_model}[/cyan]")
148
+ continue
149
+ if user_input == "/compact":
150
+ from .context import estimate_tokens
151
+ before = estimate_tokens(agent.messages)
152
+ compressed = agent.context.maybe_compress(agent.messages, agent.llm)
153
+ after = estimate_tokens(agent.messages)
154
+ if compressed:
155
+ console.print(f"[green]Compressed: {before} → {after} tokens ({len(agent.messages)} messages)[/green]")
156
+ else:
157
+ console.print(f"[dim]Nothing to compress ({before} tokens, {len(agent.messages)} messages)[/dim]")
158
+ continue
159
+ if user_input == "/save":
160
+ sid = save_session(agent.messages, config.model)
161
+ console.print(f"[green]Session saved: {sid}[/green]")
162
+ console.print(f"Resume with: nanocoder -r {sid}")
163
+ continue
164
+ if user_input == "/sessions":
165
+ sessions = list_sessions()
166
+ if not sessions:
167
+ console.print("[dim]No saved sessions.[/dim]")
168
+ else:
169
+ for s in sessions:
170
+ console.print(f" [cyan]{s['id']}[/cyan] ({s['model']}, {s['saved_at']}) {s['preview']}")
171
+ continue
172
+
173
+ # call the agent
174
+ streamed: list[str] = []
175
+
176
+ def on_token(tok):
177
+ streamed.append(tok)
178
+ print(tok, end="", flush=True)
179
+
180
+ def on_tool(name, kwargs):
181
+ console.print(f"\n[dim]> {name}({_brief(kwargs)})[/dim]")
182
+
183
+ try:
184
+ response = agent.chat(user_input, on_token=on_token, on_tool=on_tool)
185
+ if streamed:
186
+ print() # newline after streamed tokens
187
+ else:
188
+ # response wasn't streamed (came after tool calls)
189
+ console.print(Markdown(response))
190
+ except KeyboardInterrupt:
191
+ console.print("\n[yellow]Interrupted.[/yellow]")
192
+ except Exception as e:
193
+ console.print(f"\n[red]Error: {e}[/red]")
194
+
195
+
196
+ def _show_help():
197
+ console.print(Panel(
198
+ "[bold]Commands:[/bold]\n"
199
+ " /help Show this help\n"
200
+ " /reset Clear conversation history\n"
201
+ " /model <name> Switch model mid-conversation\n"
202
+ " /tokens Show token usage\n"
203
+ " /compact Compress conversation context\n"
204
+ " /save Save session to disk\n"
205
+ " /sessions List saved sessions\n"
206
+ " quit Exit NanoCoder",
207
+ title="NanoCoder Help",
208
+ border_style="dim",
209
+ ))
210
+
211
+
212
+ def _brief(kwargs: dict, maxlen: int = 80) -> str:
213
+ s = ", ".join(f"{k}={repr(v)[:40]}" for k, v in kwargs.items())
214
+ return s[:maxlen] + ("..." if len(s) > maxlen else "")
nanocoder/config.py ADDED
@@ -0,0 +1,32 @@
1
+ """Configuration - env vars and defaults."""
2
+
3
+ import os
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class Config:
9
+ model: str = "gpt-4o"
10
+ api_key: str = ""
11
+ base_url: str | None = None
12
+ max_tokens: int = 4096
13
+ temperature: float = 0.0
14
+ max_context_tokens: int = 128_000
15
+
16
+ @classmethod
17
+ def from_env(cls) -> "Config":
18
+ # pick up common env vars automatically
19
+ api_key = (
20
+ os.getenv("NANOCODER_API_KEY")
21
+ or os.getenv("OPENAI_API_KEY")
22
+ or os.getenv("DEEPSEEK_API_KEY")
23
+ or ""
24
+ )
25
+ return cls(
26
+ model=os.getenv("NANOCODER_MODEL", "gpt-4o"),
27
+ api_key=api_key,
28
+ base_url=os.getenv("OPENAI_BASE_URL") or os.getenv("NANOCODER_BASE_URL"),
29
+ max_tokens=int(os.getenv("NANOCODER_MAX_TOKENS", "4096")),
30
+ temperature=float(os.getenv("NANOCODER_TEMPERATURE", "0")),
31
+ max_context_tokens=int(os.getenv("NANOCODER_MAX_CONTEXT", "128000")),
32
+ )
nanocoder/context.py ADDED
@@ -0,0 +1,196 @@
1
+ """Multi-layer context compression.
2
+
3
+ Claude Code uses a 4-layer strategy:
4
+ 1. HISTORY_SNIP - trim old tool outputs to a one-line summary
5
+ 2. Microcompact - LLM-powered summary of old turns (cached)
6
+ 3. CONTEXT_COLLAPSE - aggressive compression when nearing hard limit
7
+ 4. Autocompact - periodic background compaction
8
+
9
+ NanoCoder implements the same idea in 3 layers:
10
+ Layer 1 (tool_snip) - replace verbose tool results with truncated versions
11
+ Layer 2 (summarize) - LLM-powered summary of old conversation
12
+ Layer 3 (hard_collapse) - last resort: drop everything except summary + recent
13
+ """
14
+
15
+ from __future__ import annotations
16
+ from typing import TYPE_CHECKING
17
+
18
+ if TYPE_CHECKING:
19
+ from .llm import LLM
20
+
21
+
22
+ def _approx_tokens(text: str) -> int:
23
+ """Rough token count. ~3.5 chars/token for mixed en/zh content."""
24
+ return len(text) // 3
25
+
26
+
27
+ def estimate_tokens(messages: list[dict]) -> int:
28
+ total = 0
29
+ for m in messages:
30
+ if m.get("content"):
31
+ total += _approx_tokens(m["content"])
32
+ if m.get("tool_calls"):
33
+ total += _approx_tokens(str(m["tool_calls"]))
34
+ return total
35
+
36
+
37
+ class ContextManager:
38
+ def __init__(self, max_tokens: int = 128_000):
39
+ self.max_tokens = max_tokens
40
+ # layer thresholds (fraction of max_tokens)
41
+ self._snip_at = int(max_tokens * 0.50) # 50% -> snip tool outputs
42
+ self._summarize_at = int(max_tokens * 0.70) # 70% -> LLM summarize
43
+ self._collapse_at = int(max_tokens * 0.90) # 90% -> hard collapse
44
+
45
+ def maybe_compress(self, messages: list[dict], llm: LLM | None = None) -> bool:
46
+ """Apply compression layers as needed. Returns True if any compression happened."""
47
+ current = estimate_tokens(messages)
48
+ compressed = False
49
+
50
+ # Layer 1: snip verbose tool outputs
51
+ if current > self._snip_at:
52
+ if self._snip_tool_outputs(messages):
53
+ compressed = True
54
+ current = estimate_tokens(messages)
55
+
56
+ # Layer 2: LLM-powered summarization of old turns
57
+ if current > self._summarize_at and len(messages) > 10:
58
+ if self._summarize_old(messages, llm, keep_recent=8):
59
+ compressed = True
60
+ current = estimate_tokens(messages)
61
+
62
+ # Layer 3: hard collapse - last resort
63
+ if current > self._collapse_at and len(messages) > 4:
64
+ self._hard_collapse(messages, llm)
65
+ compressed = True
66
+
67
+ return compressed
68
+
69
+ @staticmethod
70
+ def _snip_tool_outputs(messages: list[dict]) -> bool:
71
+ """Layer 1: Truncate tool results over 1500 chars to their first/last lines.
72
+
73
+ This mirrors Claude Code's HISTORY_SNIP which replaces old tool outputs
74
+ with a one-line summary to reclaim context space.
75
+ """
76
+ changed = False
77
+ for m in messages:
78
+ if m.get("role") != "tool":
79
+ continue
80
+ content = m.get("content", "")
81
+ if len(content) <= 1500:
82
+ continue
83
+ lines = content.splitlines()
84
+ if len(lines) <= 6:
85
+ continue
86
+ # keep first 3 + last 3 lines
87
+ snipped = (
88
+ "\n".join(lines[:3])
89
+ + f"\n... ({len(lines)} lines, snipped to save context) ...\n"
90
+ + "\n".join(lines[-3:])
91
+ )
92
+ m["content"] = snipped
93
+ changed = True
94
+ return changed
95
+
96
+ def _summarize_old(self, messages: list[dict], llm: LLM | None,
97
+ keep_recent: int = 8) -> bool:
98
+ """Layer 2: Summarize old conversation, keep recent messages intact."""
99
+ if len(messages) <= keep_recent:
100
+ return False
101
+
102
+ old = messages[:-keep_recent]
103
+ tail = messages[-keep_recent:]
104
+
105
+ summary = self._get_summary(old, llm)
106
+
107
+ messages.clear()
108
+ messages.append({
109
+ "role": "user",
110
+ "content": f"[Context compressed - conversation summary]\n{summary}",
111
+ })
112
+ messages.append({
113
+ "role": "assistant",
114
+ "content": "Got it, I have the context from our earlier conversation.",
115
+ })
116
+ messages.extend(tail)
117
+ return True
118
+
119
+ def _hard_collapse(self, messages: list[dict], llm: LLM | None):
120
+ """Layer 3: Emergency compression. Keep only last 4 messages + summary."""
121
+ tail = messages[-4:] if len(messages) > 4 else messages[-2:]
122
+ summary = self._get_summary(messages[:-len(tail)], llm)
123
+
124
+ messages.clear()
125
+ messages.append({
126
+ "role": "user",
127
+ "content": f"[Hard context reset]\n{summary}",
128
+ })
129
+ messages.append({
130
+ "role": "assistant",
131
+ "content": "Context restored. Continuing from where we left off.",
132
+ })
133
+ messages.extend(tail)
134
+
135
+ def _get_summary(self, messages: list[dict], llm: LLM | None) -> str:
136
+ """Generate summary via LLM or fallback to extraction."""
137
+ flat = self._flatten(messages)
138
+
139
+ if llm:
140
+ try:
141
+ resp = llm.chat(
142
+ messages=[
143
+ {
144
+ "role": "system",
145
+ "content": (
146
+ "Compress this conversation into a brief summary. "
147
+ "Preserve: file paths edited, key decisions made, "
148
+ "errors encountered, current task state. "
149
+ "Drop: verbose command output, code listings, "
150
+ "redundant back-and-forth."
151
+ ),
152
+ },
153
+ {"role": "user", "content": flat[:15000]},
154
+ ],
155
+ )
156
+ return resp.content
157
+ except Exception:
158
+ pass
159
+
160
+ # fallback: extract key lines
161
+ return self._extract_key_info(messages)
162
+
163
+ @staticmethod
164
+ def _flatten(messages: list[dict]) -> str:
165
+ parts = []
166
+ for m in messages:
167
+ role = m.get("role", "?")
168
+ text = m.get("content", "") or ""
169
+ if text:
170
+ parts.append(f"[{role}] {text[:400]}")
171
+ return "\n".join(parts)
172
+
173
+ @staticmethod
174
+ def _extract_key_info(messages: list[dict]) -> str:
175
+ """Fallback: extract file paths, errors, and decisions without LLM."""
176
+ import re
177
+ files_seen = set()
178
+ errors = []
179
+ decisions = []
180
+
181
+ for m in messages:
182
+ text = m.get("content", "") or ""
183
+ # extract file paths
184
+ for match in re.finditer(r'[\w./\-]+\.\w{1,5}', text):
185
+ files_seen.add(match.group())
186
+ # extract error lines
187
+ for line in text.splitlines():
188
+ if 'error' in line.lower() or 'Error' in line:
189
+ errors.append(line.strip()[:150])
190
+
191
+ parts = []
192
+ if files_seen:
193
+ parts.append(f"Files touched: {', '.join(sorted(files_seen)[:20])}")
194
+ if errors:
195
+ parts.append(f"Errors seen: {'; '.join(errors[:5])}")
196
+ return "\n".join(parts) or "(no extractable context)"