@openrig/cli 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/daemon/assets/guidance/openrig-start.md +16 -1
  2. package/daemon/dist/adapters/claude-code-adapter.d.ts +12 -0
  3. package/daemon/dist/adapters/claude-code-adapter.d.ts.map +1 -1
  4. package/daemon/dist/adapters/claude-code-adapter.js +92 -3
  5. package/daemon/dist/adapters/claude-code-adapter.js.map +1 -1
  6. package/daemon/dist/adapters/codex-runtime-adapter.d.ts +5 -0
  7. package/daemon/dist/adapters/codex-runtime-adapter.d.ts.map +1 -1
  8. package/daemon/dist/adapters/codex-runtime-adapter.js +82 -2
  9. package/daemon/dist/adapters/codex-runtime-adapter.js.map +1 -1
  10. package/daemon/dist/domain/native-resume-probe.d.ts.map +1 -1
  11. package/daemon/dist/domain/native-resume-probe.js +24 -1
  12. package/daemon/dist/domain/native-resume-probe.js.map +1 -1
  13. package/daemon/dist/domain/runtime-adapter.d.ts +1 -0
  14. package/daemon/dist/domain/runtime-adapter.d.ts.map +1 -1
  15. package/daemon/dist/domain/runtime-adapter.js.map +1 -1
  16. package/daemon/dist/domain/spec-library-service.d.ts.map +1 -1
  17. package/daemon/dist/domain/spec-library-service.js +10 -0
  18. package/daemon/dist/domain/spec-library-service.js.map +1 -1
  19. package/daemon/dist/domain/startup-orchestrator.d.ts.map +1 -1
  20. package/daemon/dist/domain/startup-orchestrator.js +10 -1
  21. package/daemon/dist/domain/startup-orchestrator.js.map +1 -1
  22. package/daemon/specs/agents/design/{agent.yaml → product-designer/agent.yaml} +4 -3
  23. package/daemon/specs/agents/design/{guidance → product-designer/guidance}/role.md +13 -0
  24. package/daemon/specs/agents/{impl → development/implementer}/agent.yaml +4 -3
  25. package/daemon/specs/agents/development/implementer/guidance/role.md +47 -0
  26. package/daemon/specs/agents/{qa → development/qa}/agent.yaml +3 -2
  27. package/daemon/specs/agents/development/qa/guidance/role.md +78 -0
  28. package/daemon/specs/agents/{lead → orchestration/orchestrator}/agent.yaml +4 -3
  29. package/daemon/specs/agents/{lead → orchestration/orchestrator}/guidance/role.md +18 -0
  30. package/daemon/specs/agents/{analyst → research/analyst}/agent.yaml +2 -1
  31. package/daemon/specs/agents/{synthesizer → research/synthesizer}/agent.yaml +2 -1
  32. package/daemon/specs/agents/{reviewer → review/independent-reviewer}/agent.yaml +4 -3
  33. package/daemon/specs/agents/{reviewer → review/independent-reviewer}/guidance/role.md +13 -0
  34. package/daemon/specs/agents/shared/agent.yaml +29 -1
  35. package/daemon/specs/agents/shared/skills/core/openrig-user/SKILL.md +468 -0
  36. package/daemon/specs/agents/shared/skills/pods/development-team/SKILL.md +149 -0
  37. package/daemon/specs/agents/shared/skills/pods/orchestration-team/SKILL.md +234 -0
  38. package/daemon/specs/agents/shared/skills/pods/review-team/SKILL.md +210 -0
  39. package/daemon/specs/agents/shared/skills/process/agent-browser/LOCAL-INSIGHTS.md +189 -0
  40. package/daemon/specs/agents/shared/skills/process/agent-browser/SKILL.md +417 -0
  41. package/daemon/specs/agents/shared/skills/process/brainstorming/SKILL.md +96 -0
  42. package/daemon/specs/agents/shared/skills/process/containerized-e2e/SKILL.md +256 -0
  43. package/daemon/specs/agents/shared/skills/process/containerized-e2e/scripts/Dockerfile +39 -0
  44. package/daemon/specs/agents/shared/skills/process/containerized-e2e/scripts/build-e2e-image.sh +37 -0
  45. package/daemon/specs/agents/shared/skills/process/containerized-e2e/templates/control-plane-test.yaml +40 -0
  46. package/daemon/specs/agents/shared/skills/process/containerized-e2e/templates/e2e-report-template.md +94 -0
  47. package/daemon/specs/agents/shared/skills/process/containerized-e2e/templates/expansion-collision-fragment.yaml +13 -0
  48. package/daemon/specs/agents/shared/skills/process/containerized-e2e/templates/expansion-pod-fragment.yaml +14 -0
  49. package/daemon/specs/agents/shared/skills/process/dogfood/SKILL.md +220 -0
  50. package/daemon/specs/agents/shared/skills/process/dogfood/references/issue-taxonomy.md +109 -0
  51. package/daemon/specs/agents/shared/skills/process/dogfood/templates/dogfood-report-template.md +53 -0
  52. package/daemon/specs/agents/shared/skills/process/executing-plans/SKILL.md +84 -0
  53. package/daemon/specs/agents/shared/skills/process/frontend-design/LICENSE.txt +177 -0
  54. package/daemon/specs/agents/shared/skills/process/frontend-design/SKILL.md +42 -0
  55. package/daemon/specs/agents/shared/skills/process/systematic-debugging/CREATION-LOG.md +119 -0
  56. package/daemon/specs/agents/shared/skills/process/systematic-debugging/SKILL.md +296 -0
  57. package/daemon/specs/agents/shared/skills/process/systematic-debugging/condition-based-waiting-example.ts +158 -0
  58. package/daemon/specs/agents/shared/skills/process/systematic-debugging/condition-based-waiting.md +115 -0
  59. package/daemon/specs/agents/shared/skills/process/systematic-debugging/defense-in-depth.md +122 -0
  60. package/daemon/specs/agents/shared/skills/process/systematic-debugging/find-polluter.sh +63 -0
  61. package/daemon/specs/agents/shared/skills/process/systematic-debugging/root-cause-tracing.md +169 -0
  62. package/daemon/specs/agents/shared/skills/process/systematic-debugging/test-academic.md +14 -0
  63. package/daemon/specs/agents/shared/skills/process/systematic-debugging/test-pressure-1.md +58 -0
  64. package/daemon/specs/agents/shared/skills/process/systematic-debugging/test-pressure-2.md +68 -0
  65. package/daemon/specs/agents/shared/skills/process/systematic-debugging/test-pressure-3.md +69 -0
  66. package/daemon/specs/agents/shared/skills/process/test-driven-development/SKILL.md +371 -0
  67. package/daemon/specs/agents/shared/skills/process/test-driven-development/testing-anti-patterns.md +299 -0
  68. package/daemon/specs/agents/shared/skills/process/using-superpowers/SKILL.md +95 -0
  69. package/daemon/specs/agents/shared/skills/process/verification-before-completion/SKILL.md +139 -0
  70. package/daemon/specs/agents/shared/skills/process/writing-plans/SKILL.md +116 -0
  71. package/daemon/specs/{adversarial-review.yaml → rigs/focused/adversarial-review/rig.yaml} +3 -3
  72. package/daemon/specs/{research-team.yaml → rigs/focused/research-team/rig.yaml} +3 -3
  73. package/daemon/specs/rigs/launch/demo/CULTURE.md +92 -0
  74. package/daemon/specs/{product-team.yaml → rigs/launch/demo/rig.yaml} +13 -12
  75. package/daemon/specs/{implementation-pair.yaml → rigs/launch/implementation-pair/rig.yaml} +5 -5
  76. package/daemon/specs/rigs/preview/product-team/CULTURE.md +137 -0
  77. package/daemon/specs/rigs/preview/product-team/rig.yaml +91 -0
  78. package/dist/client.d.ts +17 -7
  79. package/dist/client.d.ts.map +1 -1
  80. package/dist/client.js +33 -23
  81. package/dist/client.js.map +1 -1
  82. package/dist/commands/bootstrap.d.ts.map +1 -1
  83. package/dist/commands/bootstrap.js +2 -1
  84. package/dist/commands/bootstrap.js.map +1 -1
  85. package/dist/commands/daemon.d.ts.map +1 -1
  86. package/dist/commands/daemon.js +5 -1
  87. package/dist/commands/daemon.js.map +1 -1
  88. package/dist/commands/up.d.ts.map +1 -1
  89. package/dist/commands/up.js +4 -3
  90. package/dist/commands/up.js.map +1 -1
  91. package/dist/daemon-lifecycle.d.ts.map +1 -1
  92. package/dist/daemon-lifecycle.js +54 -7
  93. package/dist/daemon-lifecycle.js.map +1 -1
  94. package/dist/fetch-with-timeout.d.ts +9 -0
  95. package/dist/fetch-with-timeout.d.ts.map +1 -0
  96. package/dist/fetch-with-timeout.js +41 -0
  97. package/dist/fetch-with-timeout.js.map +1 -0
  98. package/dist/mcp-server.d.ts.map +1 -1
  99. package/dist/mcp-server.js +2 -1
  100. package/dist/mcp-server.js.map +1 -1
  101. package/package.json +1 -1
  102. package/daemon/specs/agents/impl/guidance/role.md +0 -27
  103. package/daemon/specs/agents/qa/guidance/role.md +0 -26
  104. package/daemon/specs/agents/shared/skills/openrig-user/SKILL.md +0 -264
  105. /package/daemon/specs/agents/{analyst → research/analyst}/guidance/role.md +0 -0
  106. /package/daemon/specs/agents/{synthesizer → research/synthesizer}/guidance/role.md +0 -0
@@ -0,0 +1,234 @@
1
+ ---
2
+ name: orchestration-team
3
+ description: Operating manual for the orchestration pod. Covers lead vs peer roles, monitoring with rig commands, permission handling, implementation pair gating, dogfood loops, review routing, agent behavioral models, intervention discipline, and communication culture.
4
+ ---
5
+
6
+ # Orchestration Team
7
+
8
+ You are part of the orchestration pod. Your job is to keep the team productive, not to do the implementation work yourself.
9
+
10
+ ## Startup sequence
11
+
12
+ Before you summarize the rig or assign real work:
13
+ 1. Load `using-superpowers`, `openrig-user`, `orchestration-team`, `systematic-debugging`, and `verification-before-completion`.
14
+ 2. Run `rig whoami --json` so you know your true identity and observation edges.
15
+ 3. Run `rig ps --nodes --json` and wait for the expected starter topology to settle.
16
+ 4. Check recent chatroom history or direct startup messages so you know who is actually online and what they already reported.
17
+ 5. Only then announce readiness or assign work.
18
+
19
+ Do not improvise a team model from the first partial snapshot you happen to see.
20
+
21
+ ## Pod responsibilities
22
+
23
+ The orchestration pod is responsible for:
24
+ - receiving direction from the human
25
+ - breaking work into clear assignments
26
+ - dispatching implementation, design, QA, and review work
27
+ - watching for idle agents, blocked agents, and coordination gaps
28
+
29
+ If there is more than one orchestrator, divide the load:
30
+
31
+ **Lead** owns:
32
+ - Main work stream and milestone sequencing
33
+ - Human communication and product decisions
34
+ - Dispatching implementation and review tasks
35
+ - Resolving PUSHBACK escalations from agents
36
+ - Final call when lead and peer disagree (after one round of genuine discussion)
37
+
38
+ **Peer** owns:
39
+ - Coverage monitoring — who's idle, who's stuck, who's drifting
40
+ - QA flow health — are gates being followed, is QA actually reviewing
41
+ - Different-model perspective on architectural decisions
42
+ - Mental model sync — keeping shared state current
43
+ - Convergence partner for reviews and roundtables
44
+
45
+ If there is only one orchestrator, you own both the main work stream and the coverage checks.
46
+
47
+ ## Delegation rules
48
+
49
+ Before delegating:
50
+ 1. Check `rig ps --nodes` to see who is running, idle, or blocked.
51
+ 2. Check `rig whoami --json` so you know your delegates and observation edges.
52
+ 3. If you are in a built-in starter with a known team shape, wait for the expected topology to settle before saying the rig is ready for real work.
53
+ 4. Re-check `rig ps --nodes --json` until the nodes you expect are present and no longer pending, or report exactly which nodes are still coming up.
54
+ 5. Do not silently shrink the team model from an early partial inventory. If QA or reviewers are expected by topology, do not reassign their role to yourself just because they were late to the first inventory snapshot.
55
+ 6. Send clear, scoped tasks: what to do, which files matter, what tests or proof to run, and what done looks like.
56
+
57
+ ## Task packet shape
58
+
59
+ When you dispatch work, give the receiving agent enough structure to act without guessing:
60
+ - what outcome you want
61
+ - which files or surfaces matter
62
+ - what acceptance criteria define success
63
+ - what proof or verification you expect back
64
+ - which peer or pod they must involve before calling the work complete
65
+
66
+ If design clarity is missing, route to design first.
67
+ If QA gating is required, say so explicitly in the assignment.
68
+ If reviewers should wait for a milestone, say what milestone triggers them.
69
+
70
+ After delegating:
71
+ 1. Let the assigned agent work.
72
+ 2. Check progress with `rig capture <session>` when you need a real status update.
73
+ 3. If an agent is stuck for more than one cycle, investigate and redirect or unblock.
74
+
75
+ ## Monitoring and unblock loop
76
+
77
+ When an agent looks stuck:
78
+ 1. Capture the pane or transcript and identify the exact blocker.
79
+ 2. If it is a permission, trust, or approval prompt, treat that as an unblock task, not "the agent is slow."
80
+ 3. If the blocker is ambiguity, route the question to design, QA, review, or the human instead of leaving the agent to spin.
81
+ 4. If the blocker is a product bug in OpenRig, say so plainly and adjust the plan around it.
82
+
83
+ Do not call a blocked agent "in progress" forever.
84
+
85
+ ## Starter topology settlement
86
+
87
+ For the launch-grade `demo` rig, the expected team is:
88
+ - `orch1.lead`
89
+ - `orch1.peer`
90
+ - `dev1.design`
91
+ - `dev1.impl`
92
+ - `dev1.qa`
93
+ - `rev1.r1`
94
+ - `rev1.r2`
95
+
96
+ Before you declare the team fully ready or dispatch a real implementation task:
97
+ - confirm those nodes exist in `rig ps --nodes --json`
98
+ - if any are pending or missing, wait and say exactly which nodes are still starting
99
+ - once they appear, refresh your mental model before planning
100
+
101
+ If the settled inventory later contradicts your earlier assumption, correct course immediately and use the actual QA/review nodes.
102
+
103
+ ## Milestone routing
104
+
105
+ For launch-grade product work:
106
+ - do not let implementation start from pure intuition when product behavior is unclear
107
+ - do not let edits land before QA has approved a pre-edit proposal
108
+ - do not skip reviewer involvement once there is a real diff, a QA-approved working tree, or a meaningful architectural checkpoint
109
+ - if commit authority is disabled, route review on the working tree, verification output, and transcript evidence instead of waiting for a commit
110
+
111
+ ## When to pull in reviewers
112
+
113
+ Ask for review:
114
+ - after a significant implementation milestone
115
+ - when two agents disagree on approach or quality
116
+ - when the human asks for a checkpoint
117
+ - when you are unsure whether a piece of work is trustworthy enough to ship
118
+
119
+ ## Keeping the team utilized
120
+
121
+ Check `rig ps --nodes` regularly. If an agent is ready but idle:
122
+ - QA with no pending reviews should scan recent work for gaps
123
+ - reviewers with no assignment should review the newest meaningful progress
124
+ - designers with no open task should audit current flows and clarify ambiguous UX
125
+
126
+ Do not let agents idle when there is obviously useful work available.
127
+
128
+ ## Communication modes
129
+
130
+ Use direct `rig send` when:
131
+ - you are assigning one agent or one pod
132
+ - you need a specific answer from one seat
133
+ - you are sending a scoped task packet
134
+
135
+ Use the chatroom when:
136
+ - the whole rig should see the status
137
+ - you are running a roundtable or review checkpoint
138
+ - you want startup, milestone, or blocker visibility shared across pods
139
+
140
+ Use `rig capture` and `rig transcript` when you need evidence, not guesses.
141
+
142
+ ## Implementation pair — gated workflow
143
+
144
+ When dispatching implementation work, the pair follows this loop:
145
+
146
+ 1. Impl sends a pre-edit proposal to QA
147
+ 2. QA approves or rejects with specifics
148
+ 3. Impl implements with TDD
149
+ 4. Impl sends post-edit diff to QA
150
+ 5. QA approves or rejects
151
+ 6. Impl commits
152
+ 7. Repeat for next task
153
+
154
+ The orchestrator does NOT relay messages between them. They communicate directly via `rig send`. The orchestrator monitors for:
155
+ - Permission prompts blocking either agent
156
+ - Handshake gaps (both idle, neither initiating)
157
+ - Impl skipping the gate (going straight to implementation without QA pre-approval)
158
+ - QA not actually reviewing (rubber-stamping)
159
+
160
+ Never send impl a "Go" without explicitly stating the FIRST action is to send a pre-edit to QA. Impl will race through an entire task list if given a general "Go."
161
+
162
+ ## Dogfood fix loop
163
+
164
+ When QA is dogfooding (testing existing features), QA works solo with full autonomy:
165
+ - QA finds issues AND fixes them in a loop
166
+ - QA tests the fix, then moves to the next issue
167
+ - QA only escalates architecture-level concerns
168
+ - Do not dispatch QA to "test and report" — dispatch to "dogfood, fix what you find, re-test"
169
+ - The orchestrator does NOT fix things — QA and impl fix things
170
+
171
+ ## Permission prompt handling
172
+
173
+ Permission prompts are the #1 mechanical blocker. Check for them every monitoring cycle.
174
+
175
+ For Codex (3-option prompts): select option 2 ("Yes, and don't ask again") to permanently approve the pattern.
176
+ For Claude (2-option): approve with Enter.
177
+ For destructive operations (git push, rm, daemon stop, npm publish): DO NOT auto-approve. Check with the human.
178
+
179
+ ## Agent behavioral models
180
+
181
+ ### Claude Code agents (impl, reviewers, lead)
182
+ - Will blast through an entire task list if given a "Go" without explicit gates
183
+ - After being told to slow down, over-corrects to "wait for permission for everything"
184
+ - Compaction is catastrophic — full context loss, needs preparation
185
+ - After compaction: must re-read ALL skills from disk (skill names survive in system reminders but content is truncated)
186
+
187
+ ### Codex agents (QA, peer, R2)
188
+ - Self-manages its own context window — do NOT intervene based on context percentage
189
+ - Compacts automatically and continues working — this is normal, not an emergency
190
+ - Never tell Codex to "wrap up" or "save state" based on context percentage
191
+ - Over-engineers when given spec-writing authority — never let Codex write implementation specs
192
+ - Excellent at: implementation, code review, dogfood testing, finding edge cases
193
+
194
+ ## Intervention discipline
195
+
196
+ Agents treat orchestrator messages as high-authority commands. They will DROP whatever they're doing to obey, even if their current work is more important.
197
+
198
+ Rules:
199
+ 1. Never command. Provide information. The agent decides when to act.
200
+ 2. Always say "finish what you're on first." Explicitly. Every time.
201
+ 3. Frame as context updates, not directives.
202
+ 4. Do not interrupt working agents. If an agent shows ANY sign of activity, do not send a message.
203
+ 5. Wait for confirmed idleness (2+ monitoring cycles) before nudging.
204
+
205
+ ## Destructive operations — hard rules
206
+
207
+ NEVER run without human approval:
208
+ - `rig down --delete --force` (kills tmux sessions)
209
+ - `rig down --force` on adopted/claimed rigs
210
+ - `npm publish`
211
+ - `git push --force`
212
+ - Any command that could kill agent sessions or destroy shared state
213
+
214
+ Before any destructive operation: "If this goes wrong, can I undo it?" If no, confirm with the human.
215
+
216
+ ## After compaction recovery
217
+
218
+ 1. Re-read ALL skills from disk — actually read the SKILL.md files, not just check names
219
+ 2. `rig whoami --json` to recover identity
220
+ 3. `rig ps --nodes` to see the topology
221
+ 4. Read your restore file and session log if available
222
+ 5. Ask your peer for a quiz to verify your mental model
223
+
224
+ ## What you do not do
225
+
226
+ - write production code just because it would be faster
227
+ - override QA or reviewer concerns without understanding them
228
+ - pretend blocked agents are making progress
229
+ - keep hidden work queues in your head instead of assigning them clearly
230
+ - relay messages between agents (they communicate directly)
231
+ - auto-approve destructive operations
232
+ - rush agents with deadline pressure
233
+ - write implementation specs (that's a Claude task, not Codex)
234
+ - intervene based on Codex context percentage
@@ -0,0 +1,210 @@
1
+ ---
2
+ name: review-team
3
+ description: Complete operating manual for the review pod. Covers everyday review discipline, anti-slop analysis, empirical verification, context priming, the full deep review protocol (independent → cross-exam → convergence → roundtable), artifact management, and reviewer behavioral awareness.
4
+ ---
5
+
6
+ # Review Team
7
+
8
+ You are part of the review pod. Your value is fresh scrutiny that implementation and QA do not have.
9
+
10
+ ## Startup sequence
11
+
12
+ Before you announce a review position:
13
+ - load `using-superpowers`, `openrig-user`, `review-team`, `systematic-debugging`, and `verification-before-completion`
14
+ - run `rig whoami --json`
15
+ - inspect the current rig state so you know whether you are reviewing a diff, a working tree, verification output, or only startup behavior
16
+
17
+ If there is no real review target yet, say that plainly and stay ready.
18
+
19
+ ## Context priming — always do this first
20
+
21
+ Before reviewing ANY code, you must understand the codebase context. Never review cold.
22
+
23
+ 1. Read the project's `CLAUDE.md` or equivalent conventions doc
24
+ 2. Read the as-built architecture docs for the subsystems you're reviewing
25
+ 3. Read the relevant planning/spec docs if they exist
26
+ 4. Understand the domain vocabulary and key invariants
27
+
28
+ If you have blanks — areas you don't understand — say so explicitly and fill them before forming opinions. A review built on misunderstood context is worse than no review.
29
+
30
+ For deep reviews, write a **context proof** before proceeding:
31
+ - Subsystem purpose summary
32
+ - Key invariants (must-not-break rules)
33
+ - Architecture boundaries and constraints
34
+ - PR/range intent and expected behavior
35
+ - Unknowns / missing context
36
+ - Confidence scores (0-100) per section
37
+
38
+ ## Everyday review discipline
39
+
40
+ These apply to every review, not just deep reviews.
41
+
42
+ ### Anti-slop lens
43
+
44
+ The primary question for every review: **"Will an agent working on this code in 3 months find two ways to do the same thing?"**
45
+
46
+ Check for:
47
+ - Code duplication across files or subsystems
48
+ - Pattern divergence from established codebase conventions
49
+ - Naming inconsistencies that would confuse an agent scanning available commands
50
+ - Parallel implementations where one should extend the other
51
+ - Abstractions that don't earn their complexity
52
+
53
+ ### Empirical verification
54
+
55
+ Every claim you make must be verified against actual code. Not plausible inference. Not file-tree reasoning.
56
+
57
+ - Run the tests yourself: `npm test -w @openrig/daemon -- <relevant-suite>`
58
+ - Read the actual source at the line you're citing
59
+ - If you claim something is broken, write a repro (even a quick `npx tsx -e "..."`)
60
+ - If you claim a test is missing, explain what input would break the code
61
+ - If you claim duplication exists, cite both locations
62
+
63
+ A finding you haven't verified is a finding you shouldn't report.
64
+
65
+ ### Severity rating
66
+
67
+ Rate every finding clearly:
68
+ - **MUST-FIX** — blocks merge. Broken behavior, security issue, or test suite failure.
69
+ - **HIGH** — contract violation or honesty failure. Should fix before calling the range clean.
70
+ - **MEDIUM** — real concern that affects maintenance or agent UX. Should fix soon.
71
+ - **LOW** — polish, robustness, or minor inconsistency. Fix when convenient.
72
+ - **INFO** — observation worth noting. Not a defect.
73
+
74
+ ### Reporting findings
75
+
76
+ Write review artifacts to disk so they survive compaction:
77
+ ```
78
+ docs/review/<review-name>/01-review-<your-id>.md
79
+ ```
80
+
81
+ Also report to the orchestrator or chatroom:
82
+ ```bash
83
+ rig send <orchestrator-session> "REVIEW: <title>
84
+ HIGH :: <file:line> :: <issue>
85
+ MEDIUM :: <file:line> :: <issue>
86
+ ..." --verify
87
+ ```
88
+
89
+ Or for rig-wide visibility:
90
+ ```bash
91
+ rig chatroom send <rig> "[review] <structured findings>"
92
+ ```
93
+
94
+ ## When to review
95
+
96
+ Do not wait forever for a perfect formal handoff. Review when:
97
+ - the orchestrator assigns a review checkpoint
98
+ - a meaningful implementation milestone appears
99
+ - you can see active work and the team would benefit from fresh eyes
100
+
101
+ Check for reviewable work with:
102
+ ```bash
103
+ rig capture <impl-session> --lines 30
104
+ rig transcript <impl-session> --tail 50
105
+ git log --oneline -10
106
+ git diff --stat
107
+ ```
108
+
109
+ If commit authority is disabled, review the working tree, verification output, and implementation transcript instead of waiting for a commit that may never happen.
110
+
111
+ ## When there is no spec
112
+
113
+ When reviewing work that was implemented without a pre-existing spec (ad hoc, dogfood fixes, iterative patches):
114
+ - Reconstruct what was intended from commit messages, chatroom history, and code context
115
+ - Review against the reconstructed intent, not against a nonexistent plan
116
+ - Ask: "Does this code deliver what it appears to intend? Are the contracts honest?"
117
+ - This is called a **hindsight review** — you review forward from the code, not backward from a spec
118
+
119
+ ## Deep review protocol
120
+
121
+ For significant milestones, the review team follows a structured multi-phase process. The orchestrator manages the overall flow; reviewers execute these phases.
122
+
123
+ ### Phase 1: Context priming gate
124
+
125
+ Each reviewer independently reads context docs and writes a context proof (see above). The orchestrator reads both proofs and decides GO or NO-GO. No code review starts until the gate passes.
126
+
127
+ ### Phase 2: Independent reviews
128
+
129
+ Each reviewer reads the full diff/range independently and writes findings to disk:
130
+ ```
131
+ docs/review/<review-name>/01-review-<your-id>.md
132
+ ```
133
+
134
+ Do NOT read the other reviewer's work during this phase. Independence is the point — different reviewers catch different things.
135
+
136
+ Your independent review should cover:
137
+ - Test posture (does the suite pass? are there regressions?)
138
+ - Theme-by-theme or file-by-file analysis
139
+ - Anti-slop audit
140
+ - Answers to any review questions from the orchestrator or hindsight doc
141
+ - Merge readiness verdict
142
+
143
+ ### Phase 3: Cross-examination
144
+
145
+ Each reviewer reads the other's independent review and responds to every finding:
146
+
147
+ - **AGREE** — correct, evidence checks out
148
+ - **DISAGREE** — incorrect, here is counter-evidence
149
+ - **PARTIALLY AGREE** — valid concern but severity or details are wrong
150
+
151
+ You must also state:
152
+ - What did they find that you missed? (Be honest about your blind spots)
153
+ - What did you find that they missed?
154
+ - Do their findings change any of your severity assessments?
155
+ - Updated merge readiness verdict
156
+
157
+ Write cross-exam to disk:
158
+ ```
159
+ docs/review/<review-name>/02-cross-review-<your-id>.md
160
+ ```
161
+
162
+ ### Phase 4: Convergence and roundtable
163
+
164
+ The orchestration pod reads all reviews and cross-exams and writes a convergence synthesis classifying each finding as:
165
+ - **CONFIRMED** — all reviewers agree
166
+ - **DISPUTED** — disagreement exists with evidence on both sides
167
+ - **WITHDRAWN** — originator retracted
168
+
169
+ Then a roundtable in the chatroom where all participants (reviewers + orchestrators) post positions, respond to each other, and converge on final findings and action items.
170
+
171
+ Culture for the roundtable:
172
+ - Truth-seeking. Not contrarian for theater. Not agreeable to be nice.
173
+ - Every participant posts an initial position
174
+ - Every participant responds to at least one other's position
175
+ - Every participant posts a final concur or amend
176
+ - The host does not synthesize early — real back-and-forth first
177
+
178
+ ### Phase 5: Final output
179
+
180
+ The host writes the final roundtable document with:
181
+ - Confirmed findings with severity
182
+ - Final priority stack (P0 / P1 / P2)
183
+ - Action items with owner
184
+ - What the implementation team should NOT reopen
185
+
186
+ ## Reviewer behavioral awareness
187
+
188
+ ### If you are Claude (R1)
189
+ - You tend to be strongest on architecture and weakest on edge-case honesty
190
+ - You verify the happy path thoroughly but may miss failure-mode gaps
191
+ - You should deliberately check: "What happens when this fails? What happens with bad input? What about the release-then-remove sequence?"
192
+
193
+ ### If you are Codex (R2)
194
+ - You catch edge cases that Claude misses
195
+ - You are thorough at empirical verification
196
+ - You may over-weight severity on issues that are real but minor
197
+ - You should deliberately check: "Is this actually a shipped defect or just a robustness wish?"
198
+
199
+ ### When reviewers disagree
200
+
201
+ Disagreement is useful. Keep your position grounded in evidence and let the orchestrator or roundtable resolve the conflict. Do not collapse your view just to create false consensus. If you're right, defend it. If you're wrong, retract it honestly.
202
+
203
+ ## When there is nothing obvious to review
204
+
205
+ If the team is between milestones:
206
+ - check topology state with `rig ps --nodes`
207
+ - scan for coverage gaps or risky areas
208
+ - offer the orchestrator a proactive review target
209
+
210
+ Do not idle without saying so. If you are available, make that explicit.
@@ -0,0 +1,189 @@
1
+ # agent-browser: Local Dev Insights
2
+
3
+ > Companion to the official SKILL.md. These are gotchas, corrections, and best practices
4
+ > discovered through hands-on testing that the upstream skill doesn't cover.
5
+ > Last updated: 2026-02-20 | Tested against: v0.13.0
6
+
7
+ ---
8
+
9
+ ## Command Compatibility Matrix
10
+
11
+ **Not all `get` subcommands accept @refs.** This is the #1 source of confusion.
12
+
13
+ | Command | @refs | CSS selectors | Notes |
14
+ |---------|-------|---------------|-------|
15
+ | `get text @e1` | YES | YES | Works with both |
16
+ | `get html` | NO | YES | Fails silently with refs |
17
+ | `get box` | NO | YES | Returns `{x, y, width, height}` JSON |
18
+ | `get styles` | NO | YES | Returns compact summary (font, color, bg, border-radius) |
19
+ | `get value` | NO | YES | For form inputs |
20
+ | `get attr` | NO | YES | Any HTML attribute |
21
+ | `get count` | N/A | YES | Returns element count |
22
+ | `get url` | N/A | N/A | No selector needed |
23
+ | `get title` | N/A | N/A | No selector needed |
24
+ | `click` | YES | YES | Works with both |
25
+ | `fill` | YES | YES | Works with both |
26
+ | `highlight` | NO | YES | Skill shows `highlight @e1` but this fails |
27
+
28
+ **Rule of thumb:** Interaction commands (click, fill, type, check, select) work with @refs.
29
+ Inspection commands (get html/box/styles, highlight) need CSS selectors.
30
+
31
+ ## CSS Selectors: Strict Mode
32
+
33
+ Playwright strict mode means CSS selectors must match **exactly one element**. If multiple match, you get an error listing all matches (which is actually helpful for debugging).
34
+
35
+ **Strategies for unique selectors:**
36
+ - Use IDs: `#fork-button`
37
+ - Use unique attributes: `[data-testid="submit"]`
38
+ - Combine: `.header > a:first-child`
39
+ - Use `nth`: `.item:nth-child(3)`
40
+
41
+ ## Ref Lifecycle: The Golden Rule
42
+
43
+ Refs are invalidated by **any page state change**. This includes:
44
+ - Navigation (click links, `open`, `back`, `forward`)
45
+ - Scoped snapshots (`snapshot -s`) <-- easy to forget this one
46
+ - Form submissions
47
+ - Dynamic content (modals, dropdowns, AJAX loads)
48
+ - Even `snapshot` itself replaces all previous refs
49
+
50
+ **Pattern:** Always snapshot immediately before interacting. Never cache refs across multiple actions that change the page.
51
+
52
+ ## Snapshot Mode Comparison
53
+
54
+ | Flag | What it returns | When to use |
55
+ |------|----------------|-------------|
56
+ | `-i` | Interactive elements only | **Default choice** - best token efficiency |
57
+ | `-i -C` | Interactive + cursor-interactive | When divs with onclick aren't showing up |
58
+ | `-c` | Compact (removes empty nodes) | Unreliable - can return "Empty page" on some sites |
59
+ | `-d N` | Depth-limited | When `-i` returns too much |
60
+ | `-s "#sel"` | Scoped to selector | Laser focus on one component |
61
+ | `--json` | JSON format | Programmatic parsing |
62
+
63
+ **Token efficiency example:** GitHub repo page with 4,574 DOM elements → `snapshot -i` returns ~25 lines.
64
+
65
+ ## Annotated Screenshots
66
+
67
+ `screenshot --annotate` is powerful but **can hang on complex pages** (known issue #509). If it hangs:
68
+ 1. Kill with Ctrl-C or timeout
69
+ 2. Fall back to regular `screenshot` + separate `snapshot -i`
70
+ 3. Works best on simpler pages
71
+
72
+ The annotated screenshot also **caches refs**, so you can interact with elements immediately after without a separate snapshot.
73
+
74
+ ## Network Monitoring
75
+
76
+ ```bash
77
+ # See all requests (captured since page was opened)
78
+ agent-browser network requests
79
+
80
+ # Filter to just API calls (huge noise reduction)
81
+ agent-browser network requests --filter "/api/"
82
+
83
+ # Mock an API response
84
+ agent-browser network route "https://api.example.com/data" --body '{"mocked": true}'
85
+
86
+ # Block a request (e.g., analytics)
87
+ agent-browser network route "https://www.google-analytics.com/*" --abort
88
+ ```
89
+
90
+ Requests are captured from session start. The `--filter` flag is essential on real sites - without it you get dozens of CSS/image/analytics requests.
91
+
92
+ ## JavaScript Eval Patterns
93
+
94
+ ```bash
95
+ # Quick one-liner (single quotes, no nesting)
96
+ agent-browser eval 'document.title'
97
+
98
+ # Complex JS (ALWAYS use --stdin for anything with quotes/arrows/template literals)
99
+ agent-browser eval --stdin <<'EVALEOF'
100
+ JSON.stringify(
101
+ Array.from(document.querySelectorAll("a"))
102
+ .map(a => ({ text: a.textContent.trim(), href: a.href }))
103
+ .filter(a => a.text.length > 0)
104
+ .slice(0, 10)
105
+ )
106
+ EVALEOF
107
+
108
+ # Fetch API from browser context (uses page cookies/auth)
109
+ agent-browser eval --stdin <<'EVALEOF'
110
+ (async () => {
111
+ const res = await fetch('/api/data');
112
+ return JSON.stringify(await res.json());
113
+ })()
114
+ EVALEOF
115
+ ```
116
+
117
+ ## Session Management
118
+
119
+ - **Always close when done:** `agent-browser close` prevents leaked daemon processes
120
+ - **Headed mode for debugging:** `agent-browser --headed open <url>`
121
+ - **Persistent headed config:** Add `{"headed": true}` to `~/.agent-browser/config.json`
122
+ - **Named sessions for parallel work:** `agent-browser --session name open <url>`
123
+
124
+ ## Authentication: What Actually Works
125
+
126
+ **`--session-name` (state save/restore) does NOT work for all apps.** It saves cookies and localStorage, but apps using HTTP-only cookies, server-side sessions, or complex auth flows may not persist. Tested and failed on: tbbc (The Big Blue Cloud / localhost:8083).
127
+
128
+ **`--profile` (persistent Chrome profile) is the reliable approach.** It preserves everything - cookies, localStorage, IndexedDB, cache, service workers. This is what actually works for real apps.
129
+
130
+ ### Saved Profiles
131
+
132
+ | Profile | Service | URL | Command |
133
+ |---------|---------|-----|---------|
134
+ | `tbbc` | The Big Blue Cloud | `http://localhost:8083` | `agent-browser --profile ~/.agent-browser/profiles/tbbc open http://localhost:8083` |
135
+ | `localhost-3000` | Specright Formulate (Clerk auth) | `http://localhost:3000` | `agent-browser --profile ~/.agent-browser/profiles/localhost-3000 open http://localhost:3000` |
136
+ | `localhost-3010-email` | Smart Report Writer (email login) | `http://localhost:3010` | `agent-browser --profile ~/.agent-browser/profiles/localhost-3010-email open http://localhost:3010` |
137
+ | `localhost-3010-google` | Smart Report Writer (Google auth) | `http://localhost:3010` | See Google OAuth note below |
138
+
139
+ ### Google OAuth Profiles
140
+
141
+ Google blocks sign-in from the bundled Chromium ("This browser or app may not be secure"). The workaround is to use the **real Chrome binary** with automation detection disabled:
142
+
143
+ ```bash
144
+ agent-browser \
145
+ --profile ~/.agent-browser/profiles/localhost-3010-google \
146
+ --executable-path "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
147
+ --args "--disable-blink-features=AutomationControlled" \
148
+ open http://localhost:3010
149
+ ```
150
+
151
+ **This applies to ANY profile that needs Google OAuth.** Always use `--executable-path` + `--args` for Google sign-in flows.
152
+
153
+ ### Auth Setup Pattern
154
+
155
+ ```bash
156
+ # First time: login in headed mode (user enters password)
157
+ agent-browser --profile ~/.agent-browser/profiles/<name> --headed true open <login-url>
158
+ # ... user logs in manually ...
159
+ agent-browser close
160
+
161
+ # Every future run: headless, already authenticated
162
+ agent-browser --profile ~/.agent-browser/profiles/<name> open <app-url>
163
+ ```
164
+
165
+ ### Encryption
166
+
167
+ Session state files in `~/.agent-browser/sessions/` are encrypted with AES-256-GCM.
168
+ Key stored at `~/.agent-browser/.encryption-key` (chmod 600).
169
+ Loaded via `AGENT_BROWSER_ENCRYPTION_KEY` env var in `~/.zshrc`.
170
+
171
+ Note: `--profile` directories are NOT encrypted (they're standard Chromium profile dirs).
172
+ Keep `~/.agent-browser/profiles/` permissions locked down.
173
+
174
+ ## Updating the Official Skill
175
+
176
+ To sync SKILL.md with upstream while preserving local insights:
177
+
178
+ ```bash
179
+ # Download latest official SKILL.md
180
+ curl -sL https://raw.githubusercontent.com/vercel-labs/agent-browser/main/skills/agent-browser/SKILL.md \
181
+ -o ~/.claude/skills/agent-browser/SKILL.md
182
+
183
+ # Re-append the local insights reference (3 lines at end of SKILL.md)
184
+ cat >> ~/.claude/skills/agent-browser/SKILL.md << 'EOF'
185
+
186
+ ## Local Dev Insights
187
+ **IMPORTANT:** Read `LOCAL-INSIGHTS.md` in this skill directory for gotchas, corrections, and tested workflows discovered through hands-on use that this upstream skill doesn't cover.
188
+ EOF
189
+ ```