2b-agent 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/PKG-INFO +25 -10
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/README.md +24 -9
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/__init__.py +1 -1
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/orchestrator.py +11 -4
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/providers/anthropic.py +11 -2
- 2b_agent-0.2.4/src/two_b/subagents.py +110 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/toolspec.py +27 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/update.py +36 -7
- 2b_agent-0.2.4/tests/test_delegate_wiring.py +32 -0
- 2b_agent-0.2.4/tests/test_prompt_cache.py +16 -0
- 2b_agent-0.2.4/tests/test_subagents.py +78 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/test_update.py +29 -9
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/.github/workflows/release.yml +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/.gitignore +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/LICENSE +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/NOTICE +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/install.sh +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/pyproject.toml +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/app_tui.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/banner.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/cli.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/commands.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/config.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/conversation.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/diagnostics.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/doctor.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/lsp.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/mcp_client.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/planparse.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/prompt.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/providers/__init__.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/providers/base.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/providers/google.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/providers/ollama.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/providers/openai_compat.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/rawkey.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/registry.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/repomap.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/session.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/symbols.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/theme.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/tools.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/tui.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/src/two_b/uninstall.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/spike_ctrl_b.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/test_default_model.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/test_diagnostics.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/test_doctor.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/test_edit_file.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/test_lsp.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/test_mcp_resolver.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/test_search_semantics.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/tests/test_uninstall.py +0 -0
- {2b_agent-0.2.2 → 2b_agent-0.2.4}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: 2b-agent
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: A local-first coding agent that keeps small local models focused instead of hallucinating.
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -106,6 +106,14 @@ model has to understand.
|
|
|
106
106
|
- **Runs things — split by model.** Local models get `run_git` (git only, never a raw shell — no
|
|
107
107
|
chaining/injection); cloud models get a full `run_command` shell (tests, build, git). Read-only git
|
|
108
108
|
runs freely; anything that mutates is confirmation-gated and refused in plan mode.
|
|
109
|
+
- **Delegates read-only exploration (cloud).** On the cloud path the model can `delegate` one or
|
|
110
|
+
more investigations to run in parallel, each in its own isolated context, and get back short
|
|
111
|
+
findings — so a big search-and-read never bloats the main conversation. Each sub-agent can only
|
|
112
|
+
`list_files`, `read_file`, and `search_files`; local models keep their frozen five tools
|
|
113
|
+
untouched, and delegation is cloud-only for now.
|
|
114
|
+
- **Cheaper multi-turn cloud sessions.** Anthropic requests mark the system prompt and tool
|
|
115
|
+
definitions as cacheable, so a long conversation pays full price for that stable prefix once
|
|
116
|
+
instead of on every turn.
|
|
109
117
|
- **MCP tools, curated.** Pull in tools from MCP servers (dart, mempalace, …) — but **per tool**, not
|
|
110
118
|
wholesale, because flooding a small model with tools is exactly what breaks it. You enable a server
|
|
111
119
|
and pick which of its tools the model sees (`/mcp`); local models are capped to a few so their
|
|
@@ -157,10 +165,11 @@ Already have Ollama and some models? It skips what you already have — it lists
|
|
|
157
165
|
models, offers to just use them (pulling nothing), and marks anything in the menu you've already
|
|
158
166
|
got. Your existing setup is left untouched.
|
|
159
167
|
|
|
160
|
-
Prefer to do it by hand?
|
|
168
|
+
Prefer to do it by hand? Install the published package from
|
|
169
|
+
[PyPI](https://pypi.org/project/2b-agent/):
|
|
161
170
|
|
|
162
171
|
```bash
|
|
163
|
-
|
|
172
|
+
pip install 2b-agent # latest release from PyPI
|
|
164
173
|
ollama pull qwen3.5:9b # my default — a good balance on an 18 GB machine
|
|
165
174
|
```
|
|
166
175
|
|
|
@@ -188,17 +197,23 @@ Then just type what you want done. Type `/` to see the commands.
|
|
|
188
197
|
|
|
189
198
|
### Updating
|
|
190
199
|
|
|
191
|
-
|
|
200
|
+
One command, whatever you installed with — it detects the method and runs the right upgrade:
|
|
192
201
|
|
|
193
202
|
```bash
|
|
194
|
-
2b --update
|
|
203
|
+
2b --update
|
|
195
204
|
```
|
|
196
205
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
206
|
+
That resolves to `uv tool upgrade 2b-agent` (the `curl … | sh` installer / `uv`),
|
|
207
|
+
`pipx upgrade 2b-agent` (pipx), or `pip install -U 2b-agent` (pip). You can of course
|
|
208
|
+
run the matching command yourself — e.g. **if you installed with pip**:
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
pip install -U 2b-agent
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
2B also checks for a newer release in the background (at most once a day, never blocking
|
|
215
|
+
startup) and prints a one-line notice on the next launch when one is available — set
|
|
216
|
+
`TWOB_NO_UPDATE_CHECK=1` to turn that off. Releases are tagged `vMAJOR.MINOR.PATCH`.
|
|
202
217
|
|
|
203
218
|
### Providers
|
|
204
219
|
|
|
@@ -92,6 +92,14 @@ model has to understand.
|
|
|
92
92
|
- **Runs things — split by model.** Local models get `run_git` (git only, never a raw shell — no
|
|
93
93
|
chaining/injection); cloud models get a full `run_command` shell (tests, build, git). Read-only git
|
|
94
94
|
runs freely; anything that mutates is confirmation-gated and refused in plan mode.
|
|
95
|
+
- **Delegates read-only exploration (cloud).** On the cloud path the model can `delegate` one or
|
|
96
|
+
more investigations to run in parallel, each in its own isolated context, and get back short
|
|
97
|
+
findings — so a big search-and-read never bloats the main conversation. Each sub-agent can only
|
|
98
|
+
`list_files`, `read_file`, and `search_files`; local models keep their frozen five tools
|
|
99
|
+
untouched, and delegation is cloud-only for now.
|
|
100
|
+
- **Cheaper multi-turn cloud sessions.** Anthropic requests mark the system prompt and tool
|
|
101
|
+
definitions as cacheable, so a long conversation pays full price for that stable prefix once
|
|
102
|
+
instead of on every turn.
|
|
95
103
|
- **MCP tools, curated.** Pull in tools from MCP servers (dart, mempalace, …) — but **per tool**, not
|
|
96
104
|
wholesale, because flooding a small model with tools is exactly what breaks it. You enable a server
|
|
97
105
|
and pick which of its tools the model sees (`/mcp`); local models are capped to a few so their
|
|
@@ -143,10 +151,11 @@ Already have Ollama and some models? It skips what you already have — it lists
|
|
|
143
151
|
models, offers to just use them (pulling nothing), and marks anything in the menu you've already
|
|
144
152
|
got. Your existing setup is left untouched.
|
|
145
153
|
|
|
146
|
-
Prefer to do it by hand?
|
|
154
|
+
Prefer to do it by hand? Install the published package from
|
|
155
|
+
[PyPI](https://pypi.org/project/2b-agent/):
|
|
147
156
|
|
|
148
157
|
```bash
|
|
149
|
-
|
|
158
|
+
pip install 2b-agent # latest release from PyPI
|
|
150
159
|
ollama pull qwen3.5:9b # my default — a good balance on an 18 GB machine
|
|
151
160
|
```
|
|
152
161
|
|
|
@@ -174,17 +183,23 @@ Then just type what you want done. Type `/` to see the commands.
|
|
|
174
183
|
|
|
175
184
|
### Updating
|
|
176
185
|
|
|
177
|
-
|
|
186
|
+
One command, whatever you installed with — it detects the method and runs the right upgrade:
|
|
178
187
|
|
|
179
188
|
```bash
|
|
180
|
-
2b --update
|
|
189
|
+
2b --update
|
|
181
190
|
```
|
|
182
191
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
192
|
+
That resolves to `uv tool upgrade 2b-agent` (the `curl … | sh` installer / `uv`),
|
|
193
|
+
`pipx upgrade 2b-agent` (pipx), or `pip install -U 2b-agent` (pip). You can of course
|
|
194
|
+
run the matching command yourself — e.g. **if you installed with pip**:
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
pip install -U 2b-agent
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
2B also checks for a newer release in the background (at most once a day, never blocking
|
|
201
|
+
startup) and prints a one-line notice on the next launch when one is available — set
|
|
202
|
+
`TWOB_NO_UPDATE_CHECK=1` to turn that off. Releases are tagged `vMAJOR.MINOR.PATCH`.
|
|
188
203
|
|
|
189
204
|
### Providers
|
|
190
205
|
|
|
@@ -33,7 +33,7 @@ from . import diagnostics, mcp_client, planparse, registry, tools
|
|
|
33
33
|
from .conversation import Conversation, Message, Role, ToolResult
|
|
34
34
|
from .providers.base import ProviderError
|
|
35
35
|
from .session import PendingConfirmation, Session, Task, TaskState
|
|
36
|
-
from .toolspec import TOOL_SPECS, specs_for
|
|
36
|
+
from .toolspec import TOOL_SPECS, specs_for, DELEGATE_SPEC
|
|
37
37
|
|
|
38
38
|
MAX_TURNS = 40 # generous budget for real multi-step tasks
|
|
39
39
|
DEFAULT_MODEL = "qwen3.5:9b"
|
|
@@ -158,11 +158,13 @@ def _project_context() -> str:
|
|
|
158
158
|
|
|
159
159
|
def _active_specs(is_local: bool):
|
|
160
160
|
"""Base file tools + the model's exec tool + curated MCP tools. Local models
|
|
161
|
-
get a small MCP cap so a big enabled set can't flood their tool list.
|
|
161
|
+
get a small MCP cap so a big enabled set can't flood their tool list.
|
|
162
|
+
delegate (fan-out to sub-agents) is exposed to cloud models only."""
|
|
162
163
|
mcp = mcp_client.manager.tool_specs()
|
|
163
164
|
if is_local:
|
|
164
165
|
mcp = mcp[:MCP_LOCAL_CAP]
|
|
165
|
-
|
|
166
|
+
base = specs_for(is_local) + mcp
|
|
167
|
+
return base if is_local else base + (DELEGATE_SPEC,)
|
|
166
168
|
|
|
167
169
|
|
|
168
170
|
def context_budget(provider, model: str) -> int:
|
|
@@ -512,7 +514,12 @@ def run_task(session: Session, task: Task, on_event: Callable[[AgentEvent], None
|
|
|
512
514
|
task.status_line = _STATUS.get(tc.name, "Working")
|
|
513
515
|
shown = {k: (v if k != "content" else f"<{len(v)} chars>") for k, v in tc.arguments.items()}
|
|
514
516
|
on_event(AgentEvent(EventType.TOOL_CALL_START, task.id, {"name": tc.name, "shown": shown}))
|
|
515
|
-
|
|
517
|
+
if tc.name == "delegate" and not is_local:
|
|
518
|
+
from . import subagents
|
|
519
|
+
result = subagents.delegate(tc.arguments.get("tasks", []), provider, model,
|
|
520
|
+
read_cap=read_cap, on_event=on_event, cancel=task.cancel_flag)
|
|
521
|
+
else:
|
|
522
|
+
result = _dispatch_tool(session, task, tc.name, tc.arguments, read_cap)
|
|
516
523
|
on_event(AgentEvent(EventType.TOOL_CALL_RESULT, task.id, {"name": tc.name, "result": result}))
|
|
517
524
|
results.append(ToolResult(tool_call_id=tc.id, content=result))
|
|
518
525
|
conv.append(Message.results(results))
|
|
@@ -55,11 +55,20 @@ class AnthropicProvider:
|
|
|
55
55
|
return out
|
|
56
56
|
|
|
57
57
|
def send(self, conversation: Conversation, model: str, tools: tuple[ToolSpec, ...]) -> ProviderResponse:
|
|
58
|
+
# Prompt caching (GA — no beta header needed): mark the stable prefix
|
|
59
|
+
# (system prompt, last tool definition) with cache_control so repeated
|
|
60
|
+
# requests reuse Anthropic's cache instead of paying full price every
|
|
61
|
+
# turn. OpenAI-compatible providers cache automatically server-side —
|
|
62
|
+
# no payload change needed there.
|
|
63
|
+
tools_json = to_anthropic(tools)
|
|
64
|
+
if tools_json:
|
|
65
|
+
tools_json[-1] = {**tools_json[-1], "cache_control": {"type": "ephemeral"}}
|
|
58
66
|
payload = {
|
|
59
67
|
"model": model,
|
|
60
68
|
"max_tokens": 4096,
|
|
61
|
-
"system": conversation.system_prompt,
|
|
62
|
-
|
|
69
|
+
"system": [{"type": "text", "text": conversation.system_prompt,
|
|
70
|
+
"cache_control": {"type": "ephemeral"}}],
|
|
71
|
+
"tools": tools_json,
|
|
63
72
|
"messages": self._messages(conversation),
|
|
64
73
|
}
|
|
65
74
|
raw = post_json(API_URL, payload, headers=self._headers(), provider=self.name)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Cloud-path subagents: parallel, isolated read-only explorers behind the `delegate`
|
|
2
|
+
tool. Each runs in its own Conversation with only the read tools and returns a distilled
|
|
3
|
+
findings report — heavy file reading happens here and never enters the parent context."""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
import concurrent.futures
|
|
6
|
+
import threading
|
|
7
|
+
from . import tools
|
|
8
|
+
from .conversation import Conversation, Message, ToolResult
|
|
9
|
+
|
|
10
|
+
EXPLORER_PROMPT = (
|
|
11
|
+
"You are a read-only exploration agent. Investigate the goal using list_files, "
|
|
12
|
+
"read_file, and search_files, then STOP and reply with a concise findings report: "
|
|
13
|
+
"what you found, the concrete file:line references, and anything the caller needs. "
|
|
14
|
+
"You cannot edit, write, or run commands. Keep the report short — it is folded back "
|
|
15
|
+
"into another agent's context, so summarize; do not paste large file bodies."
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def _read_dispatch(name: str, args: dict, read_cap: int | None) -> str:
|
|
19
|
+
if name == "list_files":
|
|
20
|
+
return tools.do_list_files(args.get("path", "."), max_chars=read_cap)
|
|
21
|
+
if name == "read_file":
|
|
22
|
+
return tools.do_read_file(args["path"], max_chars=read_cap)
|
|
23
|
+
if name == "search_files":
|
|
24
|
+
return tools.do_search_files(args["query"], args.get("path", "."))
|
|
25
|
+
return f"error: '{name}' is not available to an explorer (read-only)"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def run_explorer(goal, provider, model, read_cap=None, max_turns=8, cancel=None):
|
|
29
|
+
conv = Conversation(system_prompt=EXPLORER_PROMPT)
|
|
30
|
+
conv.append(Message.user(goal))
|
|
31
|
+
specs = tuple(s for s in _explorer_specs()) # read-only tool specs
|
|
32
|
+
for _ in range(max_turns):
|
|
33
|
+
if cancel is not None and cancel.is_set():
|
|
34
|
+
return "explorer cancelled"
|
|
35
|
+
resp = provider.stream(conv, model, specs, lambda _c: None)
|
|
36
|
+
msg = resp.message
|
|
37
|
+
conv.append(msg)
|
|
38
|
+
if not msg.tool_calls:
|
|
39
|
+
return (msg.text or "").strip() or "(explorer produced no findings)"
|
|
40
|
+
results = [ToolResult(tool_call_id=tc.id,
|
|
41
|
+
content=_read_dispatch(tc.name, tc.arguments, read_cap))
|
|
42
|
+
for tc in msg.tool_calls]
|
|
43
|
+
conv.append(Message.results(results))
|
|
44
|
+
return "(explorer hit its turn limit without a final report)"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _explorer_specs():
|
|
48
|
+
from .toolspec import TOOL_SPECS
|
|
49
|
+
keep = {"list_files", "read_file", "search_files"}
|
|
50
|
+
return [s for s in TOOL_SPECS if s.name in keep]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class _AnyEvent:
|
|
54
|
+
"""Read-only OR of several threading.Events: is_set() is True if any is set.
|
|
55
|
+
Lets an explorer honor both the parent task's cancel (esc) and delegate's own
|
|
56
|
+
batch-timeout signal, while delegate only ever sets its OWN event."""
|
|
57
|
+
def __init__(self, *events):
|
|
58
|
+
self._events = [e for e in events if e is not None]
|
|
59
|
+
def is_set(self) -> bool:
|
|
60
|
+
return any(e.is_set() for e in self._events)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
MAX_PARALLEL = 4
|
|
64
|
+
DELEGATE_TIMEOUT = 180 # seconds, wall-clock budget for the whole batch
|
|
65
|
+
_MAX_SECTION = 4000
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def delegate(tasks, provider, model, read_cap=None, on_event=None, cancel=None) -> str:
|
|
69
|
+
tasks = [t for t in (tasks or []) if isinstance(t, dict) and t.get("goal")]
|
|
70
|
+
if not tasks:
|
|
71
|
+
return "error: delegate needs at least one {role, goal} task"
|
|
72
|
+
|
|
73
|
+
sub_cancel = threading.Event()
|
|
74
|
+
combined = _AnyEvent(cancel, sub_cancel)
|
|
75
|
+
|
|
76
|
+
def _one(t):
|
|
77
|
+
role, goal = (t.get("role") or "explore"), t["goal"]
|
|
78
|
+
if role == "work":
|
|
79
|
+
return role, goal, "(worker delegation is not enabled yet — Phase 2)"
|
|
80
|
+
try:
|
|
81
|
+
return role, goal, run_explorer(goal, provider, model, read_cap=read_cap, cancel=combined)
|
|
82
|
+
except Exception as e: # a subagent failing must not kill the batch
|
|
83
|
+
return role, goal, f"(explorer error: {str(e)[:200]})"
|
|
84
|
+
|
|
85
|
+
# Not a `with` block on purpose: ThreadPoolExecutor.__exit__ calls
|
|
86
|
+
# shutdown(wait=True), which would block on any straggler exactly like the
|
|
87
|
+
# timeout below is meant to avoid. We call shutdown() exactly once, with
|
|
88
|
+
# wait=False, so this function returns as soon as the batch timeout hits.
|
|
89
|
+
results: list[tuple[str, str, str] | None] = [None] * len(tasks)
|
|
90
|
+
ex = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_PARALLEL)
|
|
91
|
+
futures = {ex.submit(_one, t): i for i, t in enumerate(tasks)}
|
|
92
|
+
try:
|
|
93
|
+
for fut in concurrent.futures.as_completed(futures, timeout=DELEGATE_TIMEOUT):
|
|
94
|
+
results[futures[fut]] = fut.result()
|
|
95
|
+
except concurrent.futures.TimeoutError:
|
|
96
|
+
sub_cancel.set()
|
|
97
|
+
finally:
|
|
98
|
+
ex.shutdown(wait=False, cancel_futures=True)
|
|
99
|
+
|
|
100
|
+
lines = [f"## delegate results ({len(results)} task(s))"]
|
|
101
|
+
for i, (t, r) in enumerate(zip(tasks, results), 1):
|
|
102
|
+
if r is None:
|
|
103
|
+
role, goal = (t.get("role") or "explore"), t["goal"]
|
|
104
|
+
out = "(timed out)"
|
|
105
|
+
else:
|
|
106
|
+
role, goal, out = r
|
|
107
|
+
if len(out) > _MAX_SECTION:
|
|
108
|
+
out = out[:_MAX_SECTION] + " …[truncated]"
|
|
109
|
+
lines.append(f"\n### [{i}] {role}: {goal}\n{out}")
|
|
110
|
+
return "\n".join(lines)
|
|
@@ -123,3 +123,30 @@ def to_gemini(specs: tuple[ToolSpec, ...] = TOOL_SPECS) -> list[dict]:
|
|
|
123
123
|
|
|
124
124
|
# Guarantee the local-Ollama payload is byte-identical to the proven constant.
|
|
125
125
|
assert to_openai() == tools.TOOLS, "toolspec.to_openai() drifted from tools.TOOLS"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
DELEGATE_SPEC = ToolSpec(
|
|
129
|
+
"delegate",
|
|
130
|
+
"Run one or more independent sub-tasks in parallel, each in its own isolated context. "
|
|
131
|
+
"role 'explore' investigates read-only and returns a concise findings report — use it to "
|
|
132
|
+
"locate code or understand a flow without reading everything into this conversation. "
|
|
133
|
+
"role 'work' is reserved. Returns a digest of each task's result.",
|
|
134
|
+
raw_schema={
|
|
135
|
+
"type": "object",
|
|
136
|
+
"properties": {
|
|
137
|
+
"tasks": {
|
|
138
|
+
"type": "array",
|
|
139
|
+
"items": {
|
|
140
|
+
"type": "object",
|
|
141
|
+
"properties": {
|
|
142
|
+
"role": {"type": "string", "enum": ["explore", "work"]},
|
|
143
|
+
"goal": {"type": "string", "description": "What to find/understand (explore) — a clear, self-contained instruction."},
|
|
144
|
+
},
|
|
145
|
+
"required": ["goal"],
|
|
146
|
+
},
|
|
147
|
+
"description": "Independent sub-tasks run in parallel.",
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
"required": ["tasks"],
|
|
151
|
+
},
|
|
152
|
+
)
|
|
@@ -15,6 +15,7 @@ import json
|
|
|
15
15
|
import os
|
|
16
16
|
import shutil
|
|
17
17
|
import subprocess
|
|
18
|
+
import sys
|
|
18
19
|
import threading
|
|
19
20
|
import time
|
|
20
21
|
import urllib.request
|
|
@@ -99,15 +100,43 @@ def notice(now: float | None = None) -> str | None:
|
|
|
99
100
|
return msg
|
|
100
101
|
|
|
101
102
|
|
|
103
|
+
def _kind_from(paths: str) -> str:
|
|
104
|
+
"""Classify an install from where its files live: uv tool, pipx, or plain pip."""
|
|
105
|
+
p = paths.replace(os.sep, "/").lower()
|
|
106
|
+
if "/uv/tools/" in p:
|
|
107
|
+
return "uv"
|
|
108
|
+
if "/pipx/" in p:
|
|
109
|
+
return "pipx"
|
|
110
|
+
return "pip"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _install_kind() -> str:
|
|
114
|
+
"""How this 2b-agent was installed, inferred from its run location."""
|
|
115
|
+
return _kind_from(sys.prefix + "|" + os.path.abspath(__file__))
|
|
116
|
+
|
|
117
|
+
|
|
102
118
|
def run_upgrade(emit) -> int:
|
|
103
|
-
"""`2b --update`: upgrade
|
|
104
|
-
|
|
105
|
-
if
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
119
|
+
"""`2b --update`: upgrade using whatever installed it — `uv tool upgrade` (installer/
|
|
120
|
+
uv), `pipx upgrade` (pipx), or `pip install -U` (pip). Returns the tool's exit code
|
|
121
|
+
(1 if the needed tool isn't found). Lets the tool's own progress print to the terminal."""
|
|
122
|
+
kind = _install_kind()
|
|
123
|
+
if kind == "uv":
|
|
124
|
+
if not shutil.which("uv"):
|
|
125
|
+
emit(f"uv not found — run 'uv tool upgrade {PKG}' once it's on PATH.")
|
|
126
|
+
return 1
|
|
127
|
+
emit(f"Updating {PKG} via uv tool…")
|
|
128
|
+
cmd = ["uv", "tool", "upgrade", PKG]
|
|
129
|
+
elif kind == "pipx":
|
|
130
|
+
if not shutil.which("pipx"):
|
|
131
|
+
emit(f"pipx not found — run 'pipx upgrade {PKG}' once it's on PATH.")
|
|
132
|
+
return 1
|
|
133
|
+
emit(f"Updating {PKG} via pipx…")
|
|
134
|
+
cmd = ["pipx", "upgrade", PKG]
|
|
135
|
+
else:
|
|
136
|
+
emit(f"Updating {PKG} via pip…")
|
|
137
|
+
cmd = [sys.executable, "-m", "pip", "install", "-U", PKG]
|
|
109
138
|
try:
|
|
110
|
-
return subprocess.run(
|
|
139
|
+
return subprocess.run(cmd, timeout=600).returncode
|
|
111
140
|
except Exception as e:
|
|
112
141
|
emit(f"update failed: {e}")
|
|
113
142
|
return 1
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import unittest
|
|
4
|
+
|
|
5
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
|
|
6
|
+
|
|
7
|
+
from two_b import orchestrator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Exposure(unittest.TestCase):
|
|
11
|
+
def test_local_has_no_delegate(self):
|
|
12
|
+
names = [s.name for s in orchestrator._active_specs(is_local=True)]
|
|
13
|
+
self.assertNotIn("delegate", names)
|
|
14
|
+
self.assertEqual(
|
|
15
|
+
names[:6],
|
|
16
|
+
["list_files", "read_file", "search_files", "edit_file", "write_file", "run_git"],
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
def test_cloud_has_delegate(self):
|
|
20
|
+
names = [s.name for s in orchestrator._active_specs(is_local=False)]
|
|
21
|
+
self.assertIn("delegate", names)
|
|
22
|
+
|
|
23
|
+
def test_frozen_schema_still_holds(self):
|
|
24
|
+
# two_b.tools import triggers the toolspec.py assert (to_openai() == tools.TOOLS)
|
|
25
|
+
import two_b.tools as tools
|
|
26
|
+
from two_b.toolspec import to_openai, TOOL_SPECS
|
|
27
|
+
self.assertEqual(to_openai(), tools.TOOLS)
|
|
28
|
+
self.assertNotIn("delegate", [s.name for s in TOOL_SPECS])
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
if __name__ == "__main__":
|
|
32
|
+
unittest.main()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import os, sys, unittest
|
|
2
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
|
|
3
|
+
from two_b.providers import anthropic as A
|
|
4
|
+
from two_b.conversation import Conversation
|
|
5
|
+
from two_b.toolspec import TOOL_SPECS
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Cache(unittest.TestCase):
|
|
9
|
+
def test_system_and_tools_cached(self):
|
|
10
|
+
captured = {}
|
|
11
|
+
A.post_json = lambda url, payload, **k: captured.setdefault("p", payload) or {"content": [{"type": "text", "text": "ok"}]}
|
|
12
|
+
os.environ["ANTHROPIC_API_KEY"] = "x"
|
|
13
|
+
A.AnthropicProvider().send(Conversation(system_prompt="SYS"), "claude-sonnet-5", tuple(TOOL_SPECS))
|
|
14
|
+
p = captured["p"]
|
|
15
|
+
self.assertEqual(p["system"][-1]["cache_control"], {"type": "ephemeral"})
|
|
16
|
+
self.assertEqual(p["tools"][-1]["cache_control"], {"type": "ephemeral"})
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import os, sys, tempfile, unittest
|
|
2
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
|
|
3
|
+
from two_b import subagents
|
|
4
|
+
|
|
5
|
+
class ReadDispatch(unittest.TestCase):
|
|
6
|
+
def test_write_tools_refused(self):
|
|
7
|
+
self.assertIn("not available", subagents._read_dispatch("edit_file", {"path":"x"}, None))
|
|
8
|
+
self.assertIn("not available", subagents._read_dispatch("run_command", {"command":"ls"}, None))
|
|
9
|
+
def test_read_tools_allowed(self):
|
|
10
|
+
d = tempfile.mkdtemp()
|
|
11
|
+
with open(os.path.join(d, "a.py"), "w") as f:
|
|
12
|
+
f.write("x=1\n")
|
|
13
|
+
out = subagents._read_dispatch("list_files", {"path": d}, None)
|
|
14
|
+
self.assertIn("a.py", out)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RunExplorer(unittest.TestCase):
|
|
18
|
+
def test_loops_then_returns_final_text(self):
|
|
19
|
+
from two_b.conversation import Message, ToolCall
|
|
20
|
+
calls = iter([
|
|
21
|
+
Message.assistant(tool_calls=[ToolCall.new("search_files", {"query":"Widget"})]),
|
|
22
|
+
Message.assistant(text="Widget is defined in a.py:1"),
|
|
23
|
+
])
|
|
24
|
+
class FakeProvider:
|
|
25
|
+
name = "anthropic"
|
|
26
|
+
def stream(self, conv, model, tools_, on_text):
|
|
27
|
+
from two_b.providers.base import ProviderResponse
|
|
28
|
+
return ProviderResponse(message=next(calls), raw={})
|
|
29
|
+
out = subagents.run_explorer("find Widget", FakeProvider(), "m")
|
|
30
|
+
self.assertEqual(out, "Widget is defined in a.py:1")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Delegate(unittest.TestCase):
|
|
34
|
+
def setUp(self):
|
|
35
|
+
self._orig_run_explorer = subagents.run_explorer
|
|
36
|
+
|
|
37
|
+
def tearDown(self):
|
|
38
|
+
subagents.run_explorer = self._orig_run_explorer
|
|
39
|
+
|
|
40
|
+
def test_digest_has_one_section_per_task(self):
|
|
41
|
+
subagents.run_explorer = lambda goal, *a, **k: f"found: {goal}" # stub
|
|
42
|
+
out = subagents.delegate(
|
|
43
|
+
[{"role":"explore","goal":"A"}, {"role":"explore","goal":"B"}],
|
|
44
|
+
provider=None, model="m")
|
|
45
|
+
self.assertIn("A", out); self.assertIn("B", out)
|
|
46
|
+
self.assertIn("found: A", out); self.assertIn("found: B", out)
|
|
47
|
+
|
|
48
|
+
def test_work_role_stubbed(self):
|
|
49
|
+
out = subagents.delegate([{"role":"work","goal":"edit x"}], provider=None, model="m")
|
|
50
|
+
self.assertIn("not enabled yet", out)
|
|
51
|
+
|
|
52
|
+
def test_batch_failure_isolation(self):
|
|
53
|
+
def flaky(goal, *a, **k):
|
|
54
|
+
if goal == "bad":
|
|
55
|
+
raise RuntimeError("boom")
|
|
56
|
+
return f"found: {goal}"
|
|
57
|
+
subagents.run_explorer = flaky
|
|
58
|
+
out = subagents.delegate(
|
|
59
|
+
[{"role": "explore", "goal": "bad"}, {"role": "explore", "goal": "good"}],
|
|
60
|
+
provider=None, model="m")
|
|
61
|
+
self.assertIn("### [1] explore: bad", out)
|
|
62
|
+
self.assertIn("explorer error", out)
|
|
63
|
+
self.assertIn("### [2] explore: good", out)
|
|
64
|
+
self.assertIn("found: good", out)
|
|
65
|
+
|
|
66
|
+
def test_batch_timeout_does_not_touch_parent_cancel(self):
|
|
67
|
+
import threading, time
|
|
68
|
+
from two_b import subagents
|
|
69
|
+
parent = threading.Event()
|
|
70
|
+
subagents.run_explorer = lambda goal, *a, **k: (time.sleep(0.5) or "late") # slower than the tiny budget
|
|
71
|
+
orig = subagents.DELEGATE_TIMEOUT
|
|
72
|
+
subagents.DELEGATE_TIMEOUT = 0.05
|
|
73
|
+
try:
|
|
74
|
+
out = subagents.delegate([{"role":"explore","goal":"slow"}], provider=None, model="m", cancel=parent)
|
|
75
|
+
finally:
|
|
76
|
+
subagents.DELEGATE_TIMEOUT = orig
|
|
77
|
+
self.assertFalse(parent.is_set()) # parent task must NOT be cancelled
|
|
78
|
+
self.assertIn("(timed out)", out)
|
|
@@ -64,28 +64,48 @@ class Notice(unittest.TestCase):
|
|
|
64
64
|
self.assertIsNone(update.notice(now=self.now))
|
|
65
65
|
|
|
66
66
|
|
|
67
|
+
class InstallKind(unittest.TestCase):
|
|
68
|
+
def test_kind_from_path(self):
|
|
69
|
+
self.assertEqual(update._kind_from("/home/u/.local/share/uv/tools/2b-agent/lib"), "uv")
|
|
70
|
+
self.assertEqual(update._kind_from("/home/u/.local/pipx/venvs/2b-agent"), "pipx")
|
|
71
|
+
self.assertEqual(update._kind_from("/usr/lib/python3.12/site-packages"), "pip")
|
|
72
|
+
|
|
73
|
+
|
|
67
74
|
class RunUpgrade(unittest.TestCase):
|
|
68
75
|
def _patch(self, obj, attr, val):
|
|
69
76
|
orig = getattr(obj, attr)
|
|
70
77
|
setattr(obj, attr, val)
|
|
71
78
|
self.addCleanup(setattr, obj, attr, orig)
|
|
72
79
|
|
|
73
|
-
def
|
|
74
|
-
self._patch(update
|
|
75
|
-
|
|
76
|
-
code = update.run_upgrade(out.append)
|
|
77
|
-
self.assertEqual(code, 1)
|
|
78
|
-
self.assertIn("uv not found", "\n".join(out))
|
|
79
|
-
|
|
80
|
-
def test_uv_present_invokes_upgrade(self):
|
|
81
|
-
self._patch(update.shutil, "which", lambda n: "/usr/bin/uv")
|
|
80
|
+
def _capture(self, kind, which_ok=True):
|
|
81
|
+
self._patch(update, "_install_kind", lambda: kind)
|
|
82
|
+
self._patch(update.shutil, "which", lambda n: "/usr/bin/" + n if which_ok else None)
|
|
82
83
|
calls = []
|
|
83
84
|
self._patch(update.subprocess, "run",
|
|
84
85
|
lambda argv, **kw: calls.append(argv) or types.SimpleNamespace(returncode=0))
|
|
85
86
|
code = update.run_upgrade([].append)
|
|
87
|
+
return code, calls
|
|
88
|
+
|
|
89
|
+
def test_uv_install_uses_uv_tool(self):
|
|
90
|
+
code, calls = self._capture("uv")
|
|
86
91
|
self.assertEqual(code, 0)
|
|
87
92
|
self.assertIn(["uv", "tool", "upgrade", "2b-agent"], calls)
|
|
88
93
|
|
|
94
|
+
def test_pipx_install_uses_pipx(self):
|
|
95
|
+
code, calls = self._capture("pipx")
|
|
96
|
+
self.assertEqual(code, 0)
|
|
97
|
+
self.assertIn(["pipx", "upgrade", "2b-agent"], calls)
|
|
98
|
+
|
|
99
|
+
def test_pip_install_uses_pip(self):
|
|
100
|
+
code, calls = self._capture("pip")
|
|
101
|
+
self.assertEqual(code, 0)
|
|
102
|
+
self.assertEqual(calls[0][1:], ["-m", "pip", "install", "-U", "2b-agent"]) # sys.executable -m pip …
|
|
103
|
+
|
|
104
|
+
def test_uv_absent_returns_1(self):
|
|
105
|
+
code, calls = self._capture("uv", which_ok=False)
|
|
106
|
+
self.assertEqual(code, 1)
|
|
107
|
+
self.assertEqual(calls, [])
|
|
108
|
+
|
|
89
109
|
|
|
90
110
|
if __name__ == "__main__":
|
|
91
111
|
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|