@pencil-agent/nano-pencil 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/README.md +267 -267
  2. package/dist/build-meta.json +3 -3
  3. package/dist/core/export-html/AGENT.md +11 -11
  4. package/dist/core/export-html/template.css +971 -971
  5. package/dist/core/export-html/template.html +54 -54
  6. package/dist/core/model/custom-providers.js +1 -1
  7. package/dist/core/model-registry.js +5 -5
  8. package/dist/extensions/builtin/AGENT.md +115 -115
  9. package/dist/extensions/builtin/browser/AGENT.md +17 -17
  10. package/dist/extensions/builtin/browser/agent-workspace/agent_helpers.py +12 -12
  11. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/amazon/product-search.md +198 -198
  12. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/archive-org/scraping.md +341 -341
  13. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv/scraping.md +311 -311
  14. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv-bulk/scraping.md +333 -333
  15. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/atlas/overview.md +70 -70
  16. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/booking-com/scraping.md +578 -578
  17. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/capterra/scraping.md +440 -440
  18. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/centilebrain/generate-estimates.md +110 -110
  19. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coingecko/scraping.md +325 -325
  20. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coinmarketcap/scraping.md +463 -463
  21. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coursera/scraping.md +360 -360
  22. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/craigslist/scraping.md +390 -390
  23. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/crossref/scraping.md +568 -568
  24. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/dev-to/scraping.md +323 -323
  25. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/duckduckgo/scraping.md +349 -349
  26. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/ebay/scraping.md +435 -435
  27. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/etsy/scraping.md +506 -506
  28. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/eventbrite/scraping.md +363 -363
  29. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/expedia/automation.md +168 -168
  30. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/groups.md +236 -236
  31. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/pages.md +295 -295
  32. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/framer/editor.md +108 -108
  33. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/fred/scraping.md +493 -493
  34. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/g2/scraping.md +580 -580
  35. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/genius/scraping.md +511 -511
  36. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/repo-actions.md +65 -65
  37. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/scraping.md +184 -184
  38. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/glassdoor/scraping.md +543 -543
  39. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gmail/compose.md +122 -122
  40. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/goodreads/scraping.md +461 -461
  41. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gutenberg/scraping.md +383 -383
  42. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/hackernews/scraping.md +243 -243
  43. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/howlongtobeat/scraping.md +473 -473
  44. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/imdb/scraping.md +271 -271
  45. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/itch-io/scraping.md +436 -436
  46. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/job-boards/indeed-glassdoor.md +1021 -1021
  47. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/letterboxd/scraping.md +349 -349
  48. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/linkedin/invitation-manager.md +109 -109
  49. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/loom/folder-enumeration.md +170 -170
  50. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/macrotrends/scraping.md +537 -537
  51. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/article-hydration.md +120 -120
  52. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/scraping.md +414 -414
  53. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/metacritic/scraping.md +477 -477
  54. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/musicbrainz/scraping.md +478 -478
  55. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/nasa/scraping.md +339 -339
  56. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/news-aggregation/multi-source.md +205 -205
  57. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/open-library/scraping.md +472 -472
  58. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openalex/scraping.md +470 -470
  59. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openstreetmap/scraping.md +490 -490
  60. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/package-registries/npm-pypi.md +478 -478
  61. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/polymarket/scraping.md +234 -234
  62. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/producthunt/scraping.md +307 -307
  63. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/pubmed/scraping.md +421 -421
  64. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/quora/scraping.md +364 -364
  65. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rawg/scraping.md +352 -352
  66. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/reddit/scraping.md +124 -124
  67. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rest-countries/scraping.md +233 -233
  68. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/sec-edgar/scraping.md +361 -361
  69. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/README.md +36 -36
  70. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/embedded-apps.md +72 -72
  71. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/knowledge-base.md +109 -109
  72. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/polaris-inputs.md +137 -137
  73. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/soundcloud/scraping.md +362 -362
  74. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/spotify/scraping.md +339 -339
  75. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/stackoverflow/scraping.md +435 -435
  76. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/steam/scraping.md +575 -575
  77. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/substack/scraping.md +338 -338
  78. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/thetechgeeks/pricing.md +52 -52
  79. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tiktok/upload.md +107 -107
  80. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tradingview/scraping.md +309 -309
  81. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trello/boards-and-lists.md +88 -88
  82. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trustpilot/scraping.md +375 -375
  83. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/walmart/scraping.md +444 -444
  84. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wayback-machine/scraping.md +306 -306
  85. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/weather/scraping.md +398 -398
  86. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wellfound/scraping.md +596 -596
  87. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/world-bank/scraping.md +356 -356
  88. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/xiaohongshu/scraping.md +84 -84
  89. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/youtube/scraping.md +418 -418
  90. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/zillow/scraping.md +433 -433
  91. package/dist/extensions/builtin/browser/browser.md +73 -73
  92. package/dist/extensions/builtin/browser/install.md +142 -142
  93. package/dist/extensions/builtin/browser/interaction-skills/connection.md +48 -48
  94. package/dist/extensions/builtin/browser/interaction-skills/cookies.md +3 -3
  95. package/dist/extensions/builtin/browser/interaction-skills/cross-origin-iframes.md +3 -3
  96. package/dist/extensions/builtin/browser/interaction-skills/dialogs.md +64 -64
  97. package/dist/extensions/builtin/browser/interaction-skills/downloads.md +3 -3
  98. package/dist/extensions/builtin/browser/interaction-skills/drag-and-drop.md +3 -3
  99. package/dist/extensions/builtin/browser/interaction-skills/dropdowns.md +3 -3
  100. package/dist/extensions/builtin/browser/interaction-skills/iframes.md +3 -3
  101. package/dist/extensions/builtin/browser/interaction-skills/network-requests.md +3 -3
  102. package/dist/extensions/builtin/browser/interaction-skills/print-as-pdf.md +3 -3
  103. package/dist/extensions/builtin/browser/interaction-skills/profile-sync.md +90 -90
  104. package/dist/extensions/builtin/browser/interaction-skills/screenshots.md +17 -17
  105. package/dist/extensions/builtin/browser/interaction-skills/scrolling.md +3 -3
  106. package/dist/extensions/builtin/browser/interaction-skills/shadow-dom.md +3 -3
  107. package/dist/extensions/builtin/browser/interaction-skills/tabs.md +69 -69
  108. package/dist/extensions/builtin/browser/interaction-skills/uploads.md +1 -1
  109. package/dist/extensions/builtin/browser/interaction-skills/viewport.md +3 -3
  110. package/dist/extensions/builtin/browser/src/browser_harness/AGENT.md +15 -15
  111. package/dist/extensions/builtin/browser/src/browser_harness/__init__.py +8 -8
  112. package/dist/extensions/builtin/browser/src/browser_harness/_ipc.py +90 -90
  113. package/dist/extensions/builtin/browser/src/browser_harness/admin.py +722 -722
  114. package/dist/extensions/builtin/browser/src/browser_harness/daemon.py +328 -328
  115. package/dist/extensions/builtin/browser/src/browser_harness/helpers.py +396 -396
  116. package/dist/extensions/builtin/browser/src/browser_harness/run.py +103 -103
  117. package/dist/extensions/builtin/discipline/skills/brainstorming/SKILL.md +33 -33
  118. package/dist/extensions/builtin/discipline/skills/executing-plans/SKILL.md +25 -25
  119. package/dist/extensions/builtin/discipline/skills/finishing-development-branch/SKILL.md +25 -25
  120. package/dist/extensions/builtin/discipline/skills/receiving-code-review/SKILL.md +22 -22
  121. package/dist/extensions/builtin/discipline/skills/requesting-code-review/SKILL.md +31 -31
  122. package/dist/extensions/builtin/discipline/skills/systematic-debugging/SKILL.md +28 -28
  123. package/dist/extensions/builtin/discipline/skills/test-driven-development/SKILL.md +32 -32
  124. package/dist/extensions/builtin/discipline/skills/using-git-worktrees/SKILL.md +25 -25
  125. package/dist/extensions/builtin/discipline/skills/verification-before-completion/SKILL.md +27 -27
  126. package/dist/extensions/builtin/discipline/skills/writing-plans/SKILL.md +26 -26
  127. package/dist/extensions/builtin/goal/README.md +67 -67
  128. package/dist/extensions/builtin/grub/README.md +112 -112
  129. package/dist/extensions/builtin/link-world/agent-workspace/README.md +16 -16
  130. package/dist/extensions/builtin/link-world/internet-search/internet-search.md +65 -65
  131. package/dist/extensions/builtin/link-world/link-world-agent.md +82 -82
  132. package/dist/extensions/builtin/link-world/linkworld.md +313 -313
  133. package/dist/extensions/builtin/link-world/network-routing/network-routing.md +67 -67
  134. package/dist/extensions/builtin/loop/README.md +92 -92
  135. package/dist/extensions/builtin/mcp/figma-design.md +68 -68
  136. package/dist/extensions/builtin/mcp/mcp-management.md +85 -85
  137. package/dist/extensions/builtin/recap/AGENT.md +15 -15
  138. package/dist/extensions/builtin/sal/README.md +72 -72
  139. package/dist/extensions/builtin/security-audit/README.md +289 -289
  140. package/dist/extensions/builtin/team/AGENT.md +112 -112
  141. package/dist/extensions/builtin/team/TESTING.md +299 -299
  142. package/dist/extensions/builtin/token-save/README.md +56 -56
  143. package/dist/extensions/optional/AGENT.md +10 -10
  144. package/dist/modes/interactive/controllers/input-submit-controller.js +2 -2
  145. package/dist/modes/interactive/controllers/stream-render-controller.js +2 -2
  146. package/dist/modes/interactive/interactive-mode.js +19 -19
  147. package/dist/modes/interactive/theme/dark.json +85 -85
  148. package/dist/modes/interactive/theme/light.json +84 -84
  149. package/dist/modes/interactive/theme/theme-schema.json +335 -335
  150. package/dist/modes/interactive/theme/warm.json +81 -81
  151. package/dist/node_modules/@pencil-agent/ai/dist/cli.js +0 -0
  152. package/dist/node_modules/@pencil-agent/ai/dist/models.generated.js +1 -1
  153. package/docs/ACP/345/215/217/350/256/256/351/233/206/346/210/220/345/274/200/345/217/221/346/226/207/346/241/243.md +851 -0
  154. package/docs/SDK-TESTING.md +364 -0
  155. package/docs/codex-goal-command-impl.md +1055 -1055
  156. package/docs/codex-goal-vs-grub.md +500 -500
  157. package/docs/custom-provider.md +27 -27
  158. package/docs/extensions.md +27 -27
  159. package/docs/keybindings.md +27 -27
  160. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/200/273/347/273/223.md" +250 -250
  161. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/212/245/345/221/212.md" +122 -122
  162. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210.md" +1222 -1222
  163. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/256/236/347/216/260/346/212/245/345/221/212.md" +158 -158
  164. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/257/271/346/257/224/345/210/206/346/236/220.md" +128 -128
  165. package/docs/loop /351/207/215/346/236/204/350/256/241/345/210/222.md" +320 -320
  166. package/docs/loop-usage-examples.md +214 -214
  167. package/docs/mem-core/346/212/200/346/234/257/346/226/207/346/241/243.md +593 -0
  168. package/docs/models.md +27 -27
  169. package/docs/packages.md +27 -27
  170. package/docs/pi-design-philosophy.md +457 -457
  171. package/docs/planmode.md +1987 -1987
  172. package/docs/prompt-templates.md +27 -27
  173. package/docs/providers.md +27 -27
  174. package/docs/sdk.md +27 -27
  175. package/docs/skills.md +27 -27
  176. package/docs/startup-performance-optimization.md +301 -0
  177. package/docs/themes.md +27 -27
  178. package/docs/tui.md +27 -27
  179. package/docs//350/256/244/347/237/245/345/234/260/345/233/276.md +47 -0
  180. package/package.json +190 -190
  181. package/docs/cc-agent-design.md +0 -1297
  182. package/docs/cc-tui-design.md +0 -1333
  183. package/docs/nanoPencil-/345/255/246/344/271/240/350/256/241/345/210/222.md +0 -170
  184. package/docs/scan-report.md +0 -3820
  185. package/docs//345/257/271/346/240/207Claude-Code.md +0 -1775
  186. package/docs//351/230/277/351/207/214/345/267/264/345/267/264/350/264/242/346/212/245/345/210/206/346/236/220/344/271/246.md +0 -261
@@ -1,299 +1,299 @@
1
- # Testing the AgentTeam Extension (Phase B)
2
-
3
- > Manual & smoke-test guide for `extensions/builtin/team/`.
4
- > Owner: AgentTeam Phase B. Last updated: 2026-04-08.
5
-
6
- This document describes how to verify the Phase B AgentTeam runtime end to end. It covers persistence, the permission model, the mailbox, transcripts, and the subprocess SubAgent backend.
7
-
8
- ---
9
-
10
- ## 0. Prerequisites
11
-
12
- ```bash
13
- npm install
14
- npm run build # or run via tsx for fast iteration
15
- ```
16
-
17
- Run nanoPencil in interactive mode for the manual scenarios:
18
-
19
- ```bash
20
- npx tsx cli.ts
21
- ```
22
-
23
- All teammate state lives under:
24
-
25
- - `${NANOPENCIL_AGENT_DIR:-~/.nanopencil/agent}/teams/<id>.json` — durable state
26
- - `${NANOPENCIL_AGENT_DIR:-~/.nanopencil/agent}/teams/tasks.json` — shared task list
27
- - `${NANOPENCIL_AGENT_DIR:-~/.nanopencil/agent}/teams/mailbox.jsonl` — replayable mailbox log
28
- - `${NANOPENCIL_AGENT_DIR:-~/.nanopencil/agent}/teams/transcripts/<id>.jsonl` — per-teammate transcripts
29
-
30
- You can wipe state between runs with:
31
-
32
- ```bash
33
- rm -rf ~/.nanopencil/agent/teams
34
- ```
35
-
36
- ---
37
-
38
- ## 1. Type & lint sanity
39
-
40
- ```bash
41
- npx tsc --noEmit
42
- ```
43
-
44
- The team extension must report **zero** errors. Pre-existing `presence/index.ts` errors related to `@pencil-agent/mem-core` are unrelated and may remain until that extension is rebundled.
45
-
46
- ---
47
-
48
- ## 2. Manual smoke test — happy path
49
-
50
- In an interactive session:
51
-
52
- ```text
53
- /team # → "No teammates."
54
- /team:spawn researcher --name scout # spawns read-only researcher
55
- /team # lists scout (○ idle)
56
- /team:status scout # full record incl. cwd
57
- /team:send scout "What does core/sub-agent/ do?"
58
- /team:status scout # messages = 2
59
- /team:terminate scout # ⊗ teammate gone
60
- /team # → "No teammates."
61
- ```
62
-
63
- Pass criteria:
64
-
65
- - `scout`'s state file appears under `~/.nanopencil/agent/teams/` after spawn.
66
- - The state file is removed after `terminate`.
67
- - `~/.nanopencil/agent/teams/transcripts/<id>.jsonl` contains one `leader` line and one `teammate` line per send, then is removed on terminate.
68
-
69
- ---
70
-
71
- ## 3. Persistence across restarts
72
-
73
- ```text
74
- /team:spawn researcher --name scout
75
- /team:send scout "Summarize README.md"
76
- ```
77
-
78
- Quit nanoPencil (`/exit` or Ctrl+C). Restart with `npx tsx cli.ts`. Then:
79
-
80
- ```text
81
- /team
82
- /team:status scout
83
- ```
84
-
85
- Pass criteria:
86
-
87
- - `scout` is still listed.
88
- - The previous message history is intact.
89
- - `scout`'s status is `idle` (any in-flight `running` is downgraded to `idle` on load — see `team-runtime.ts` `load()`).
90
-
91
- ---
92
-
93
- ## 4. Worktree isolation for implementers
94
-
95
- ```text
96
- /team:spawn implementer --name builder
97
- /team:status builder
98
- ```
99
-
100
- Pass criteria:
101
-
102
- - `builder.cwd` and `worktreePath` point at a fresh git worktree under
103
- `~/.nanopencil/agent/...` (or `WorktreeManager`'s default location).
104
- - `git worktree list` from the project shows the new worktree.
105
- - Default mode is `plan` (read-only), **not** `execute`.
106
-
107
- ---
108
-
109
- ## 5. Permission model — execute escalation
110
-
111
- ```text
112
- /team:mode builder execute
113
- ```
114
-
115
- Pass criteria:
116
-
117
- - The response says **"requires approval"** and prints a request id.
118
- - `/team:approve` (no args) lists the pending request with action
119
- `mode_change_to_execute`.
120
- - `/team:approve <id>` flips `builder` to `execute` mode.
121
- - A second `/team:approve <id>` returns "not found or already resolved".
122
- - `/team:status builder` now reports `mode: execute`.
123
- - `/team:terminate builder` cancels any still-pending requests cleanly.
124
-
125
- Negative test — denial path:
126
-
127
- ```text
128
- /team:spawn implementer --name builder2
129
- /team:mode builder2 execute # pending
130
- /team:terminate builder2 # cancels the request as denied
131
- ```
132
-
133
- Pass criteria: terminate succeeds, no dangling promise warnings in stderr.
134
-
135
- ---
136
-
137
- ## 6. Mailbox observation
138
-
139
- The mailbox is currently consumed by `team-runtime` itself; programmatic observers can subscribe via `runtime.getMailbox().subscribe(...)`. To verify mailbox traffic from a test harness:
140
-
141
- ```ts
142
- import { TeamRuntime } from "./team-runtime.js";
143
- const rt = new TeamRuntime({ storageDir: "/tmp/team-test" });
144
- rt.getMailbox().subscribe((m) => console.log(m.type, m.direction));
145
- await rt.spawn({ role: "researcher", baseCwd: process.cwd() });
146
- await rt.send("researcher-1", "ping");
147
- ```
148
-
149
- Expected message sequence: `task_request` → `task_result`.
150
- After `setMode("..." , "execute")` on an implementer: `permission_request`, then on approval `permission_response` and `mode_change`.
151
-
152
- Shared task and teammate mail smoke:
153
-
154
- ```text
155
- /team:spawn researcher --name scout
156
- /team:spawn reviewer --name reviewer
157
- /team:task add Map team implementation
158
- /team:task claim T-1 scout
159
- /team:mail scout reviewer "Please review T-1 when ready"
160
- /team:task list
161
- ```
162
-
163
- Pass criteria:
164
-
165
- - `tasks.json` contains `T-1` with `status: "claimed"` and `ownerName: "scout"`.
166
- - `mailbox.jsonl` contains `task_update`, `task_claim`, and `teammate_message`.
167
- - Restarting nanoPencil preserves `/team:task list` output and the teammate mailbox context shown to each teammate.
168
- - The next `/team:send scout ...` prompt includes `Shared team tasks`, the claimed task, and recent mailbox lines targeting `scout`.
169
-
170
- ---
171
-
172
- ## 6.1 Path-scoped write access
173
-
174
- ```text
175
- /team:spawn implementer --name builder
176
- /team:mode builder execute
177
- /team:approve <request-id>
178
- ```
179
-
180
- Pass criteria:
181
-
182
- - In execute mode, `edit`, `write`, and simple bash write commands can mutate files inside `builder.cwd`.
183
- - Attempts to mutate an absolute path outside `builder.cwd` fail with `Write denied` or the team bash sandbox message.
184
- - `/team:allow-path builder ../shared-output` grants that path prefix; subsequent `edit`, `write`, and simple bash writes under the approved path succeed.
185
- - Complex shell write syntax with `&&`, pipes, command substitution, or subshells remains blocked by default.
186
-
187
- ---
188
-
189
- ## 7. Transcripts
190
-
191
- After any `/team:send`:
192
-
193
- ```bash
194
- ls ~/.nanopencil/agent/teams/transcripts/
195
- cat ~/.nanopencil/agent/teams/transcripts/<id>.jsonl
196
- ```
197
-
198
- Pass criteria:
199
-
200
- - One JSON object per line.
201
- - Each line has `timestamp`, `kind` (`leader` | `teammate` | `event`), and `content`.
202
- - Transcript file is removed when the teammate is terminated.
203
-
204
- ---
205
-
206
- ## 8. Stop in flight
207
-
208
- Spawn a teammate, send a long task, then immediately stop:
209
-
210
- ```text
211
- /team:spawn researcher --name slow
212
- /team:send slow "Read every file under core/ and summarize"
213
- # while running:
214
- /team:stop slow
215
- /team:status slow
216
- ```
217
-
218
- Pass criteria:
219
-
220
- - The send call returns within ~15 seconds with `aborted: true` or an error.
221
- - `slow` ends up in `stopped` status.
222
- - A subsequent `/team:send slow "ping"` works (status returns to `running` then `idle`).
223
-
224
- Concurrent send queue check:
225
-
226
- ```text
227
- /team:spawn researcher --name scout
228
- /team:send scout "First long read"
229
- # while running:
230
- /team:send scout "Second follow-up"
231
- ```
232
-
233
- Pass criteria:
234
-
235
- - The second send is queued instead of rejected.
236
- - Mailbox contains a `task_progress` event with `status: "queued"`.
237
- - The teammate receives and records both leader messages in order.
238
-
239
- ---
240
-
241
- ## 9. Subprocess SubAgent backend (smoke)
242
-
243
- The subprocess backend is exercised via `SubprocessSubAgentBackend` directly. Quick Node smoke from the repo root:
244
-
245
- ```bash
246
- npx tsx -e "
247
- import { SubprocessSubAgentBackend } from './core/sub-agent/subprocess-backend.js';
248
- const backend = new SubprocessSubAgentBackend();
249
- const ctrl = new AbortController();
250
- const handle = await backend.spawn({
251
- prompt: 'hello',
252
- cwd: process.cwd(),
253
- tools: [],
254
- signal: ctrl.signal,
255
- });
256
- console.log(await handle.result());
257
- "
258
- ```
259
-
260
- Pass criteria:
261
-
262
- - The script prints `{ success: true, response: '[subprocess-worker:...] received prompt of 5 chars in cwd ...' }`.
263
- - Aborting before the worker posts: replace `await handle.result()` with `ctrl.abort(); await handle.result();` — should print `{ success: false, error: 'Aborted' }` and the worker thread is terminated.
264
-
265
- > **Scope note.** The subprocess backend ships the harness (worker_threads channel, abort wiring, lifecycle) but does **not** run the full LLM agent loop inside the worker yet. Callers that need real LLM execution should keep using the in-process backend; this backend is the foundation for future crash isolation.
266
-
267
- ---
268
-
269
- ## 10. Recovery (corrupt state file)
270
-
271
- Touch a malformed state file to confirm `loadAll()` skips it gracefully:
272
-
273
- ```bash
274
- echo "not-json" > ~/.nanopencil/agent/teams/garbage.json
275
- npx tsx cli.ts
276
- /team
277
- ```
278
-
279
- Pass criteria: nanoPencil starts normally, `/team` lists only valid teammates, no crash.
280
-
281
- ---
282
-
283
- ## 11. Coverage matrix
284
-
285
- | §B item | Covered by section |
286
- |----------------------------------|--------------------|
287
- | B.1 Persistent teammates | §2, §3 |
288
- | B.2 State store independence | §3, §10 |
289
- | B.3 Mailbox protocol | §6 |
290
- | B.4 Permission model | §5 |
291
- | B.5 Worktree isolation | §4 |
292
- | B.6 Multi-backend (subprocess) | §9 |
293
- | B.7 Transcripts | §7 |
294
- | B.8 Recovery | §3, §10 |
295
- | AbortSignal closure (Phase A.2) | §8 |
296
-
297
- ---
298
-
299
- **Covenant**: When you add a new `/team:*` command or change the permission/mailbox surface, update this file in the same commit.
1
+ # Testing the AgentTeam Extension (Phase B)
2
+
3
+ > Manual & smoke-test guide for `extensions/builtin/team/`.
4
+ > Owner: AgentTeam Phase B. Last updated: 2026-04-08.
5
+
6
+ This document describes how to verify the Phase B AgentTeam runtime end to end. It covers persistence, the permission model, the mailbox, transcripts, and the subprocess SubAgent backend.
7
+
8
+ ---
9
+
10
+ ## 0. Prerequisites
11
+
12
+ ```bash
13
+ npm install
14
+ npm run build # or run via tsx for fast iteration
15
+ ```
16
+
17
+ Run nanoPencil in interactive mode for the manual scenarios:
18
+
19
+ ```bash
20
+ npx tsx cli.ts
21
+ ```
22
+
23
+ All teammate state lives under:
24
+
25
+ - `${NANOPENCIL_AGENT_DIR:-~/.nanopencil/agent}/teams/<id>.json` — durable state
26
+ - `${NANOPENCIL_AGENT_DIR:-~/.nanopencil/agent}/teams/tasks.json` — shared task list
27
+ - `${NANOPENCIL_AGENT_DIR:-~/.nanopencil/agent}/teams/mailbox.jsonl` — replayable mailbox log
28
+ - `${NANOPENCIL_AGENT_DIR:-~/.nanopencil/agent}/teams/transcripts/<id>.jsonl` — per-teammate transcripts
29
+
30
+ You can wipe state between runs with:
31
+
32
+ ```bash
33
+ rm -rf ~/.nanopencil/agent/teams
34
+ ```
35
+
36
+ ---
37
+
38
+ ## 1. Type & lint sanity
39
+
40
+ ```bash
41
+ npx tsc --noEmit
42
+ ```
43
+
44
+ The team extension must report **zero** errors. Pre-existing `presence/index.ts` errors related to `@pencil-agent/mem-core` are unrelated and may remain until that extension is rebundled.
45
+
46
+ ---
47
+
48
+ ## 2. Manual smoke test — happy path
49
+
50
+ In an interactive session:
51
+
52
+ ```text
53
+ /team # → "No teammates."
54
+ /team:spawn researcher --name scout # spawns read-only researcher
55
+ /team # lists scout (○ idle)
56
+ /team:status scout # full record incl. cwd
57
+ /team:send scout "What does core/sub-agent/ do?"
58
+ /team:status scout # messages = 2
59
+ /team:terminate scout # ⊗ teammate gone
60
+ /team # → "No teammates."
61
+ ```
62
+
63
+ Pass criteria:
64
+
65
+ - `scout`'s state file appears under `~/.nanopencil/agent/teams/` after spawn.
66
+ - The state file is removed after `terminate`.
67
+ - `~/.nanopencil/agent/teams/transcripts/<id>.jsonl` contains one `leader` line and one `teammate` line per send, then is removed on terminate.
68
+
69
+ ---
70
+
71
+ ## 3. Persistence across restarts
72
+
73
+ ```text
74
+ /team:spawn researcher --name scout
75
+ /team:send scout "Summarize README.md"
76
+ ```
77
+
78
+ Quit nanoPencil (`/exit` or Ctrl+C). Restart with `npx tsx cli.ts`. Then:
79
+
80
+ ```text
81
+ /team
82
+ /team:status scout
83
+ ```
84
+
85
+ Pass criteria:
86
+
87
+ - `scout` is still listed.
88
+ - The previous message history is intact.
89
+ - `scout`'s status is `idle` (any in-flight `running` is downgraded to `idle` on load — see `team-runtime.ts` `load()`).
90
+
91
+ ---
92
+
93
+ ## 4. Worktree isolation for implementers
94
+
95
+ ```text
96
+ /team:spawn implementer --name builder
97
+ /team:status builder
98
+ ```
99
+
100
+ Pass criteria:
101
+
102
+ - `builder.cwd` and `worktreePath` point at a fresh git worktree under
103
+ `~/.nanopencil/agent/...` (or `WorktreeManager`'s default location).
104
+ - `git worktree list` from the project shows the new worktree.
105
+ - Default mode is `plan` (read-only), **not** `execute`.
106
+
107
+ ---
108
+
109
+ ## 5. Permission model — execute escalation
110
+
111
+ ```text
112
+ /team:mode builder execute
113
+ ```
114
+
115
+ Pass criteria:
116
+
117
+ - The response says **"requires approval"** and prints a request id.
118
+ - `/team:approve` (no args) lists the pending request with action
119
+ `mode_change_to_execute`.
120
+ - `/team:approve <id>` flips `builder` to `execute` mode.
121
+ - A second `/team:approve <id>` returns "not found or already resolved".
122
+ - `/team:status builder` now reports `mode: execute`.
123
+ - `/team:terminate builder` cancels any still-pending requests cleanly.
124
+
125
+ Negative test — denial path:
126
+
127
+ ```text
128
+ /team:spawn implementer --name builder2
129
+ /team:mode builder2 execute # pending
130
+ /team:terminate builder2 # cancels the request as denied
131
+ ```
132
+
133
+ Pass criteria: terminate succeeds, no dangling promise warnings in stderr.
134
+
135
+ ---
136
+
137
+ ## 6. Mailbox observation
138
+
139
+ The mailbox is currently consumed by `team-runtime` itself; programmatic observers can subscribe via `runtime.getMailbox().subscribe(...)`. To verify mailbox traffic from a test harness:
140
+
141
+ ```ts
142
+ import { TeamRuntime } from "./team-runtime.js";
143
+ const rt = new TeamRuntime({ storageDir: "/tmp/team-test" });
144
+ rt.getMailbox().subscribe((m) => console.log(m.type, m.direction));
145
+ await rt.spawn({ role: "researcher", baseCwd: process.cwd() });
146
+ await rt.send("researcher-1", "ping");
147
+ ```
148
+
149
+ Expected message sequence: `task_request` → `task_result`.
150
+ After `setMode("..." , "execute")` on an implementer: `permission_request`, then on approval `permission_response` and `mode_change`.
151
+
152
+ Shared task and teammate mail smoke:
153
+
154
+ ```text
155
+ /team:spawn researcher --name scout
156
+ /team:spawn reviewer --name reviewer
157
+ /team:task add Map team implementation
158
+ /team:task claim T-1 scout
159
+ /team:mail scout reviewer "Please review T-1 when ready"
160
+ /team:task list
161
+ ```
162
+
163
+ Pass criteria:
164
+
165
+ - `tasks.json` contains `T-1` with `status: "claimed"` and `ownerName: "scout"`.
166
+ - `mailbox.jsonl` contains `task_update`, `task_claim`, and `teammate_message`.
167
+ - Restarting nanoPencil preserves `/team:task list` output and the teammate mailbox context shown to each teammate.
168
+ - The next `/team:send scout ...` prompt includes `Shared team tasks`, the claimed task, and recent mailbox lines targeting `scout`.
169
+
170
+ ---
171
+
172
+ ## 6.1 Path-scoped write access
173
+
174
+ ```text
175
+ /team:spawn implementer --name builder
176
+ /team:mode builder execute
177
+ /team:approve <request-id>
178
+ ```
179
+
180
+ Pass criteria:
181
+
182
+ - In execute mode, `edit`, `write`, and simple bash write commands can mutate files inside `builder.cwd`.
183
+ - Attempts to mutate an absolute path outside `builder.cwd` fail with `Write denied` or the team bash sandbox message.
184
+ - `/team:allow-path builder ../shared-output` grants that path prefix; subsequent `edit`, `write`, and simple bash writes under the approved path succeed.
185
+ - Complex shell write syntax with `&&`, pipes, command substitution, or subshells remains blocked by default.
186
+
187
+ ---
188
+
189
+ ## 7. Transcripts
190
+
191
+ After any `/team:send`:
192
+
193
+ ```bash
194
+ ls ~/.nanopencil/agent/teams/transcripts/
195
+ cat ~/.nanopencil/agent/teams/transcripts/<id>.jsonl
196
+ ```
197
+
198
+ Pass criteria:
199
+
200
+ - One JSON object per line.
201
+ - Each line has `timestamp`, `kind` (`leader` | `teammate` | `event`), and `content`.
202
+ - Transcript file is removed when the teammate is terminated.
203
+
204
+ ---
205
+
206
+ ## 8. Stop in flight
207
+
208
+ Spawn a teammate, send a long task, then immediately stop:
209
+
210
+ ```text
211
+ /team:spawn researcher --name slow
212
+ /team:send slow "Read every file under core/ and summarize"
213
+ # while running:
214
+ /team:stop slow
215
+ /team:status slow
216
+ ```
217
+
218
+ Pass criteria:
219
+
220
+ - The send call returns within ~15 seconds with `aborted: true` or an error.
221
+ - `slow` ends up in `stopped` status.
222
+ - A subsequent `/team:send slow "ping"` works (status returns to `running` then `idle`).
223
+
224
+ Concurrent send queue check:
225
+
226
+ ```text
227
+ /team:spawn researcher --name scout
228
+ /team:send scout "First long read"
229
+ # while running:
230
+ /team:send scout "Second follow-up"
231
+ ```
232
+
233
+ Pass criteria:
234
+
235
+ - The second send is queued instead of rejected.
236
+ - Mailbox contains a `task_progress` event with `status: "queued"`.
237
+ - The teammate receives and records both leader messages in order.
238
+
239
+ ---
240
+
241
+ ## 9. Subprocess SubAgent backend (smoke)
242
+
243
+ The subprocess backend is exercised via `SubprocessSubAgentBackend` directly. Quick Node smoke from the repo root:
244
+
245
+ ```bash
246
+ npx tsx -e "
247
+ import { SubprocessSubAgentBackend } from './core/sub-agent/subprocess-backend.js';
248
+ const backend = new SubprocessSubAgentBackend();
249
+ const ctrl = new AbortController();
250
+ const handle = await backend.spawn({
251
+ prompt: 'hello',
252
+ cwd: process.cwd(),
253
+ tools: [],
254
+ signal: ctrl.signal,
255
+ });
256
+ console.log(await handle.result());
257
+ "
258
+ ```
259
+
260
+ Pass criteria:
261
+
262
+ - The script prints `{ success: true, response: '[subprocess-worker:...] received prompt of 5 chars in cwd ...' }`.
263
+ - Aborting before the worker posts: replace `await handle.result()` with `ctrl.abort(); await handle.result();` — should print `{ success: false, error: 'Aborted' }` and the worker thread is terminated.
264
+
265
+ > **Scope note.** The subprocess backend ships the harness (worker_threads channel, abort wiring, lifecycle) but does **not** run the full LLM agent loop inside the worker yet. Callers that need real LLM execution should keep using the in-process backend; this backend is the foundation for future crash isolation.
266
+
267
+ ---
268
+
269
+ ## 10. Recovery (corrupt state file)
270
+
271
+ Touch a malformed state file to confirm `loadAll()` skips it gracefully:
272
+
273
+ ```bash
274
+ echo "not-json" > ~/.nanopencil/agent/teams/garbage.json
275
+ npx tsx cli.ts
276
+ /team
277
+ ```
278
+
279
+ Pass criteria: nanoPencil starts normally, `/team` lists only valid teammates, no crash.
280
+
281
+ ---
282
+
283
+ ## 11. Coverage matrix
284
+
285
+ | §B item | Covered by section |
286
+ |----------------------------------|--------------------|
287
+ | B.1 Persistent teammates | §2, §3 |
288
+ | B.2 State store independence | §3, §10 |
289
+ | B.3 Mailbox protocol | §6 |
290
+ | B.4 Permission model | §5 |
291
+ | B.5 Worktree isolation | §4 |
292
+ | B.6 Multi-backend (subprocess) | §9 |
293
+ | B.7 Transcripts | §7 |
294
+ | B.8 Recovery | §3, §10 |
295
+ | AbortSignal closure (Phase A.2) | §8 |
296
+
297
+ ---
298
+
299
+ **Covenant**: When you add a new `/team:*` command or change the permission/mailbox surface, update this file in the same commit.