opencastle 0.32.4 → 0.32.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/README.md +13 -3
  2. package/bin/cli.mjs +2 -0
  3. package/dist/cli/bootstrap.js +1 -1
  4. package/dist/cli/bootstrap.js.map +1 -1
  5. package/dist/cli/bootstrap.test.js +16 -0
  6. package/dist/cli/bootstrap.test.js.map +1 -1
  7. package/dist/cli/init.test.js +38 -0
  8. package/dist/cli/init.test.js.map +1 -1
  9. package/dist/cli/stack-config-update.test.js +18 -0
  10. package/dist/cli/stack-config-update.test.js.map +1 -1
  11. package/dist/cli/stack-config.d.ts.map +1 -1
  12. package/dist/cli/stack-config.js +1 -0
  13. package/dist/cli/stack-config.js.map +1 -1
  14. package/dist/cli/types.d.ts +1 -1
  15. package/dist/cli/types.d.ts.map +1 -1
  16. package/dist/orchestrator/plugins/index.d.ts.map +1 -1
  17. package/dist/orchestrator/plugins/index.js +4 -0
  18. package/dist/orchestrator/plugins/index.js.map +1 -1
  19. package/dist/orchestrator/plugins/notion/config.d.ts +3 -0
  20. package/dist/orchestrator/plugins/notion/config.d.ts.map +1 -0
  21. package/dist/orchestrator/plugins/notion/config.js +46 -0
  22. package/dist/orchestrator/plugins/notion/config.js.map +1 -0
  23. package/dist/orchestrator/plugins/trello/config.d.ts +3 -0
  24. package/dist/orchestrator/plugins/trello/config.d.ts.map +1 -0
  25. package/dist/orchestrator/plugins/trello/config.js +43 -0
  26. package/dist/orchestrator/plugins/trello/config.js.map +1 -0
  27. package/dist/orchestrator/plugins/types.d.ts +1 -1
  28. package/dist/orchestrator/plugins/types.d.ts.map +1 -1
  29. package/package.json +1 -1
  30. package/src/cli/bootstrap.test.ts +21 -0
  31. package/src/cli/bootstrap.ts +1 -1
  32. package/src/cli/init.test.ts +46 -0
  33. package/src/cli/stack-config-update.test.ts +20 -0
  34. package/src/cli/stack-config.ts +1 -0
  35. package/src/cli/types.ts +1 -1
  36. package/src/dashboard/node_modules/.vite/deps/_metadata.json +6 -6
  37. package/src/orchestrator/agents/api-designer.agent.md +25 -34
  38. package/src/orchestrator/agents/architect.agent.md +40 -84
  39. package/src/orchestrator/agents/content-engineer.agent.md +29 -31
  40. package/src/orchestrator/agents/copywriter.agent.md +35 -60
  41. package/src/orchestrator/agents/data-expert.agent.md +24 -30
  42. package/src/orchestrator/agents/database-engineer.agent.md +26 -31
  43. package/src/orchestrator/agents/developer.agent.md +32 -34
  44. package/src/orchestrator/agents/devops-expert.agent.md +31 -26
  45. package/src/orchestrator/agents/documentation-writer.agent.md +29 -29
  46. package/src/orchestrator/agents/performance-expert.agent.md +36 -33
  47. package/src/orchestrator/agents/release-manager.agent.md +25 -34
  48. package/src/orchestrator/agents/researcher.agent.md +41 -95
  49. package/src/orchestrator/agents/reviewer.agent.md +24 -34
  50. package/src/orchestrator/agents/security-expert.agent.md +35 -39
  51. package/src/orchestrator/agents/seo-specialist.agent.md +25 -32
  52. package/src/orchestrator/agents/session-guard.agent.md +20 -79
  53. package/src/orchestrator/agents/team-lead.agent.md +50 -254
  54. package/src/orchestrator/agents/testing-expert.agent.md +37 -49
  55. package/src/orchestrator/agents/ui-ux-expert.agent.md +33 -39
  56. package/src/orchestrator/customizations/KNOWN-ISSUES.md +0 -1
  57. package/src/orchestrator/customizations/agents/skill-matrix.json +20 -4
  58. package/src/orchestrator/customizations/agents/skill-matrix.md +20 -0
  59. package/src/orchestrator/instructions/general.instructions.md +24 -84
  60. package/src/orchestrator/plugins/astro/SKILL.md +23 -179
  61. package/src/orchestrator/plugins/convex/SKILL.md +38 -12
  62. package/src/orchestrator/plugins/index.ts +4 -0
  63. package/src/orchestrator/plugins/netlify/SKILL.md +17 -13
  64. package/src/orchestrator/plugins/nextjs/SKILL.md +55 -261
  65. package/src/orchestrator/plugins/notion/SKILL.md +205 -0
  66. package/src/orchestrator/plugins/notion/config.ts +47 -0
  67. package/src/orchestrator/plugins/nx/SKILL.md +20 -72
  68. package/src/orchestrator/plugins/playwright/SKILL.md +5 -17
  69. package/src/orchestrator/plugins/slack/SKILL.md +28 -190
  70. package/src/orchestrator/plugins/teams/SKILL.md +10 -140
  71. package/src/orchestrator/plugins/trello/SKILL.md +151 -0
  72. package/src/orchestrator/plugins/trello/config.ts +44 -0
  73. package/src/orchestrator/plugins/types.ts +1 -1
  74. package/src/orchestrator/plugins/vitest/SKILL.md +2 -2
  75. package/src/orchestrator/prompts/bug-fix.prompt.md +25 -63
  76. package/src/orchestrator/prompts/implement-feature.prompt.md +29 -66
  77. package/src/orchestrator/prompts/quick-refinement.prompt.md +31 -66
  78. package/src/orchestrator/skills/accessibility-standards/SKILL.md +50 -105
  79. package/src/orchestrator/skills/agent-hooks/SKILL.md +60 -110
  80. package/src/orchestrator/skills/agent-memory/SKILL.md +44 -93
  81. package/src/orchestrator/skills/api-patterns/SKILL.md +20 -68
  82. package/src/orchestrator/skills/code-commenting/SKILL.md +49 -101
  83. package/src/orchestrator/skills/context-map/SKILL.md +47 -88
  84. package/src/orchestrator/skills/data-engineering/SKILL.md +27 -74
  85. package/src/orchestrator/skills/decomposition/SKILL.md +50 -98
  86. package/src/orchestrator/skills/deployment-infrastructure/SKILL.md +44 -107
  87. package/src/orchestrator/skills/documentation-standards/SKILL.md +28 -89
  88. package/src/orchestrator/skills/fast-review/SKILL.md +51 -276
  89. package/src/orchestrator/skills/frontend-design/SKILL.md +53 -163
  90. package/src/orchestrator/skills/git-workflow/SKILL.md +18 -54
  91. package/src/orchestrator/skills/memory-merger/SKILL.md +51 -88
  92. package/src/orchestrator/skills/observability-logging/SKILL.md +29 -75
  93. package/src/orchestrator/skills/orchestration-protocols/SKILL.md +58 -117
  94. package/src/orchestrator/skills/panel-majority-vote/SKILL.md +65 -140
  95. package/src/orchestrator/skills/performance-optimization/SKILL.md +21 -85
  96. package/src/orchestrator/skills/project-consistency/SKILL.md +62 -281
  97. package/src/orchestrator/skills/react-development/SKILL.md +38 -86
  98. package/src/orchestrator/skills/security-hardening/SKILL.md +40 -84
  99. package/src/orchestrator/skills/self-improvement/SKILL.md +26 -60
  100. package/src/orchestrator/skills/seo-patterns/SKILL.md +40 -105
  101. package/src/orchestrator/skills/session-checkpoints/SKILL.md +26 -68
  102. package/src/orchestrator/skills/team-lead-reference/SKILL.md +66 -206
  103. package/src/orchestrator/skills/testing-workflow/SKILL.md +42 -112
  104. package/src/orchestrator/skills/validation-gates/SKILL.md +39 -170
  105. package/src/orchestrator/snippets/base-output-contract.md +14 -0
  106. package/src/orchestrator/snippets/discovered-issues-policy.md +15 -0
  107. package/src/orchestrator/snippets/logging-mandatory.md +11 -0
  108. package/src/orchestrator/snippets/never-expose-secrets.md +22 -0
@@ -9,173 +9,114 @@ Runtime patterns for managing delegated agents. **Load at:** Execution phase (St
9
9
 
10
10
  ## Active Steering
11
11
 
12
- Monitor agent sessions during execution. Intervene early when you spot:
12
+ Intervene early when you spot:
13
13
 
14
- - **Failing tests/builds** the agent can't resolve a dependency or breaks existing code
15
- - **Unexpected file changes** — files outside the agent's partition appear in the diff
16
- - **Scope creep** the agent starts refactoring code you didn't ask about
17
- - **Circular behavior** the agent retries the same failing approach without adjusting
18
- - **Intent misunderstanding** session log shows the agent interpreted the prompt differently
14
+ | Signal | Action |
15
+ |--------|--------|
16
+ | Failing tests/builds | Can't resolve dependency or breaks existing code |
17
+ | Unexpected file changes | Files outside partition in diff |
18
+ | Scope creep | Refactors code not in scope |
19
+ | Circular behavior | Same failing approach retried without change |
20
+ | Intent misunderstanding | Session log shows wrong prompt interpretation |
19
21
 
20
- **When redirecting, be specific.** Explain *why* you're redirecting and *how* to proceed:
22
+ When redirecting, explain *why* and *how*:
21
23
 
22
- > "Don't modify `libs/data/src/lib/product.ts` — that file is shared across features. Instead, add the new query in `libs/data/src/lib/reviews.ts`. This keeps the change isolated."
24
+ > "Don't modify `libs/data/src/lib/product.ts` — shared across features. Add the new query in `libs/data/src/lib/reviews.ts`."
23
25
 
24
- **Timing matters.** Catching a problem 5 minutes in can save an hour. Don't wait until the agent finishes.
25
-
26
- **Background agent caveat:** The drift signals above apply only to **sub-agents** (inline) where you see results in real-time. Background agents run autonomously — you cannot inspect their intermediate state or redirect mid-execution. For background agents, steering is **post-hoc**: invest more effort in prompt specificity and file partition constraints upfront, then review thoroughly when the agent returns its output.
26
+ **Sub-agents:** Catch problems early (5 min in can save an hour). **Background agents:** Steer post-hoc invest in prompt specificity and partition constraints upfront.
27
27
 
28
28
  ## Background Agents
29
29
 
30
- Background agents run autonomously in isolated Git worktrees. Use for well-scoped subtasks with clear acceptance criteria.
30
+ Run autonomously in isolated Git worktrees. Reserve for well-scoped tasks >5 min with clear acceptance criteria.
31
31
 
32
32
  - **Spawn:** Delegate Session → Background → Select agent → Enter prompt
33
- - **Auto-compaction:** At 95% token limit, context is automatically compressed
34
- - **Resume:** Use `--resume` for previous sessions
35
- - **Duration threshold:** Reserve for tasks expected to take >5 minutes
36
- - **No real-time monitoring:** You cannot inspect intermediate state. Drift detection happens only at completion review. Mitigate with: (a) highly specific prompts, (b) strict file partition constraints, (c) acceptance criteria checklists in the prompt
33
+ - **Auto-compaction:** At 95% token limit; use `--resume` to continue
34
+ - **No real-time monitoring:** Invest in specific prompts, strict partition constraints, and acceptance criteria checklists upfront
37
35
 
38
36
  ## Parallel Research Protocol
39
37
 
40
- When a task requires broad exploration before implementation, spawn multiple research sub-agents in parallel to gather context efficiently.
41
-
42
- ### When to Use
43
-
44
- - 3+ independent research questions need answering before implementation can begin
45
- - Broad codebase exploration across multiple libraries or domains
46
- - Multi-area analysis (e.g., "How do we handle X in the frontend, backend, and CMS?")
38
+ Spawn multiple research sub-agents in parallel when 3+ independent questions must be answered before implementation. **Use when:** 3+ independent research questions, broad codebase exploration, or multi-area analysis (frontend/backend/CMS). **Skip when:** single-file investigation, answer in one known location, sequential results, or fewer than 3 questions.
47
39
 
48
40
  ### Spawn Strategy
49
41
 
50
- - **Divide by topic/area**, not by file count — each researcher should own a coherent domain
51
- - **Max 3-5 parallel researchers** — more than 5 creates diminishing returns and token waste
52
- - **Each researcher gets a focused scope** explicit directories, file patterns, or questions
53
- - **Use Economy/Standard tier** for research sub-agents to manage cost
54
-
55
- ### Research Sub-Agent Prompt Template
42
+ | Rule | Detail |
43
+ |------|--------|
44
+ | Divide by topic/area | Each researcher owns a coherent domain |
45
+ | Max 3–5 researchers | More creates diminishing returns and token waste |
46
+ | Focused scope per agent | Explicit dirs, file patterns, or questions |
47
+ | Economy/Standard tier | Manage cost for research sub-agents |
56
48
 
49
+ **Prompt template:**
57
50
  ```
58
51
  Research: [specific question]
59
52
  Scope: [files/directories to search]
60
- Return: A structured summary with:
61
- - Key findings (bullet list)
62
- - Relevant file paths (with line numbers)
63
- - Patterns observed
64
- - Unanswered questions
53
+ Return: key findings, relevant file paths (with line numbers), patterns, unanswered questions
65
54
  ```
66
55
 
67
56
  ### Result Merge Protocol
68
57
 
69
- After all research sub-agents return:
70
-
71
- 1. **Collect** all sub-agent results into a single context
72
- 2. **Deduplicate** findings same file/pattern reported by multiple agents counts once
73
- 3. **Resolve conflicts** — if agents report contradictory information, trust the one with more specific evidence (exact file paths + line numbers > general observations)
74
- 4. **Synthesize** into a single context block for the next phase — distill the combined findings into a concise summary that can be included in implementation delegation prompts
75
-
76
- ### When NOT to Use
77
-
78
- - Single-file investigation — just read the file directly
79
- - When the answer is in one known location — a single sub-agent or direct read is faster
80
- - When results must be sequential (e.g., "find X, then based on X find Y")
81
- - For fewer than 3 questions — overhead of parallel coordination exceeds time saved
58
+ 1. Collect all results into single context
59
+ 2. Deduplicate (same file/pattern counts once)
60
+ 3. Resolve conflicts specific evidence beats general observations
61
+ 4. Synthesize into concise context block for implementation prompts
82
62
 
83
63
  ## Batch Reviews
84
64
 
85
- When multiple background agents complete work simultaneously, batch similar reviews to save time:
86
-
87
- - Group reviews by domain (e.g., all UI changes together, all data changes together)
88
- - Run fast reviews in parallel for independent outputs
89
- - If multiple outputs share the same file partition boundary, review them sequentially to catch integration issues
90
- - For panel reviews, combine related artifacts into a single panel question when they share acceptance criteria
65
+ - Group by domain (UI, data); run fast reviews in parallel for independent outputs
66
+ - Review sequentially when outputs share the same partition boundary
67
+ - Combine related artifacts into one panel question when they share acceptance criteria
91
68
 
92
69
  ## Context Compaction
93
70
 
94
- Between phases, summarize prior agent output before passing it to the next agent. Never paste raw sub-agent results into a downstream prompt.
95
-
96
- **When:** Multi-phase chains where the next agent only needs outcomes, not full reasoning traces. Skip for single-phase work or when raw detail is needed (e.g., code review).
97
-
98
- **How:** After a sub-agent returns, extract only: files changed, key decisions, verification results (pass/fail), and blockers. Discard raw tool output, reasoning traces, and failed attempts.
99
-
100
- **Template for delegation prompts:**
71
+ Summarize prior phase output before passing to the next agent. **Extract:** files changed, key decisions, verification (pass/fail), blockers. **Discard:** raw tool output, reasoning traces, failed attempts.
101
72
 
73
+ **Template:**
102
74
  ```
103
75
  ### Prior Phase Output
104
76
  **Phase [N] — [Agent Name] — [Task Title]**
105
- - Files changed: [list with one-line descriptions]
106
- - Decisions: [key decisions that affect downstream work]
77
+ - Files changed: [list]
78
+ - Decisions: [key decisions affecting downstream work]
107
79
  - Verification: [lint ✅ | types ✅ | tests ✅]
108
80
  - Blockers: [none | list]
109
81
  ```
110
82
 
111
- ## Agent Health-Check Protocol
112
-
113
- Monitor delegated agents for failure signals. Intervene early rather than waiting for completion.
83
+ ## Agent Health Monitoring
114
84
 
115
85
  ### Health Signals
116
86
 
117
- | Signal | Detection | Threshold | Recovery |
118
- |--------|-----------|-----------|----------|
119
- | **Stuck** | No new terminal output or file changes | Sub-agent: 5 min / Background: 15 min | Check terminal output. If idle, nudge with clarification. If frozen, abort and re-delegate with simpler scope. |
120
- | **Looping** | Same error message repeated 3+ times | 3 consecutive identical failures | Abort immediately. Analyze the error, add context the agent is missing, re-delegate with explicit fix path. |
121
- | **Scope creep** | Files outside assigned partition appear in diff | Any file outside partition | Redirect: "Only modify files in [partition]. Revert changes to [file]." |
122
- | **Context exhaustion** | Responses become repetitive, confused, or lose earlier instructions | Visible confusion or instruction amnesia | Checkpoint immediately. End session. Resume in fresh context. |
123
- | **Permission loop** | Agent repeatedly asks for confirmation or waits for input | 2+ consecutive prompts without progress | Auto-approve if safe, or abort and re-delegate with `--dangerously-skip-permissions` flag or equivalent. |
124
-
125
- ### Health-Check Cadence
87
+ | Signal | Threshold | Recovery |
88
+ |--------|-----------|----------|
89
+ | **Stuck** no output/changes | Sub: 5 min / BG: 15 min | Nudge; if frozen, abort + re-delegate with simpler scope |
90
+ | **Looping** same error repeated | 3 consecutive failures | Abort; add context; re-delegate with explicit fix path |
91
+ | **Scope creep** files outside partition | Any | Redirect: "Only modify files in [partition]. Revert [file]." |
92
+ | **Context exhaustion** confused/repetitive | Visible instruction amnesia | Checkpoint, end session, resume in fresh context |
93
+ | **Permission loop** waiting for input | 2+ prompts without progress | Auto-approve if safe; abort + re-delegate |
126
94
 
127
- - **Sub-agents (inline):** Monitor continuouslyyou see output in real-time
128
- - **Background agents:** Check terminal output after 10 minutes, then every 10 minutes
129
- - **After completion:** Always review the full diff before accepting output
95
+ **Cadence:** Sub-agents — continuous (real-time). Background agentscheck at 10 min, then every 10 min. Always review full diff before accepting.
130
96
 
131
97
  ### Escalation Path
132
98
 
133
- 1. **First failure:** Re-delegate with more specific prompt + error context
134
- 2. **Second failure:** Downscope the task (split into smaller pieces) and re-delegate
135
- 3. **Third failure:** Log to Dead Letter Queue (`.opencastle/AGENT-FAILURES.md`), escalate to Architect for root cause analysis. If the failure involves a panel 3x BLOCK or unresolvable agent/reviewer conflict, create a **dispute record** in `.opencastle/DISPUTES.md` instead (see **team-lead-reference** skill § Dispute Protocol).
99
+ 1. **Failure 1:** Re-delegate with more specific prompt + error context
100
+ 2. **Failure 2:** Downscope (split into smaller pieces), re-delegate
101
+ 3. **Failure 3:** Log to `.opencastle/AGENT-FAILURES.md`; if panel BLOCK or conflict, create dispute in `.opencastle/DISPUTES.md` (see **team-lead-reference** § Dispute Protocol)
136
102
 
137
103
  ## Error Recovery Playbook
138
104
 
139
- Common failure modes and how to recover:
140
-
141
- ### Agent Stuck in Retry Loop
142
-
143
- **Symptom:** Agent retries the same failing command 3+ times without changing approach.
144
- **Recovery:** Intervene immediately. Read the error output, identify the root cause, and re-delegate with explicit fix instructions. Use the **self-improvement** skill to add a lesson.
145
-
146
- ### MCP Tool Unavailable
147
-
148
- **Symptom:** Tool calls fail with connection or timeout errors.
149
- **Recovery:** (1) Check if the MCP server is running. (2) If transient, retry once. (3) If persistent, work around: use CLI tools as alternatives. Log to DLQ if critical.
150
-
151
- ### Background Agent Produces Broken Output
152
-
153
- **Symptom:** Background agent returns, but files have lint/type/test errors.
154
- **Recovery:** (1) Review the diff to understand intent. (2) If fixable with small edits, fix inline. (3) If fundamentally wrong, discard the worktree changes and re-delegate with a more specific prompt. (4) Log to DLQ after 2 failed attempts.
155
-
156
- ### Merge Conflict from Parallel Agents
157
-
158
- **Symptom:** Two background agents modified overlapping files.
159
- **Recovery:** (1) This should never happen if file partitioning was followed. (2) Accept one agent's changes first (the one with more complex work). (3) Re-delegate the simpler changes to adapt to the new state. (4) Use the **self-improvement** skill to add a lesson about the conflict.
160
-
161
- ### Context Window Exhausted
162
-
163
- **Symptom:** Agent responses become confused, repetitive, or lose track of earlier instructions.
164
- **Recovery:** (1) Save a session checkpoint immediately. (2) End the current session. (3) Resume in a new session, loading the checkpoint. (4) Reduce parallel work in the next session.
165
-
166
- ### Test Failures After Merge
167
-
168
- **Symptom:** Tests pass individually but fail when multiple agent outputs are merged.
169
- **Recovery:** (1) Run affected tests to identify which projects break. (2) Check for import conflicts, duplicate definitions, or state pollution. (3) Delegate fix to the agent whose changes are most likely the cause.
105
+ | Failure | Symptom | Recovery |
106
+ |---------|---------|----------|
107
+ | **Retry loop** | Same command fails 3+ times | Abort; identify root cause; re-delegate with explicit fix; log lesson |
108
+ | **MCP unavailable** | Tool connection/timeout errors | Check server; retry once; fall back to CLI; log to DLQ if critical |
109
+ | **Broken BG output** | Lint/type/test errors on return | Fix inline if small; discard + re-delegate if fundamental; DLQ after 2 fails |
110
+ | **Parallel merge conflict** | Two agents modified overlapping files | Accept complex side first; re-delegate simple side to adapt; log lesson |
111
+ | **Context exhausted** | Confused/repetitive responses | Checkpoint; end session; resume with checkpoint; reduce parallel work |
112
+ | **Post-merge test failure** | Tests pass alone but fail merged | Run affected tests; check import/state conflicts; delegate fix to likely cause |
170
113
 
171
114
  ## Agent Circuit Breaker
172
115
 
173
- Track per-agent failure counts across the session (not just per-task). If the same agent keeps failing, the problem is likely systemic.
174
-
175
116
  | Threshold | Action |
176
117
  |-----------|--------|
177
- | **2 failures** | Warning — investigate: same error class? Model endpoint healthy? Prompt pattern issue? |
178
- | **3 failures** | Open circuit — stop delegating to that agent. Reassign tasks to an overlapping agent, try a different model tier, or checkpoint and escalate to the user. |
179
- | **Next session** | Half-open — circuit resets. If the agent fails again immediately, re-open and add a lesson via **self-improvement**. |
118
+ | **2 failures** | Investigate: same error class? Model healthy? Prompt pattern? |
119
+ | **3 failures** | Open circuit — stop delegating; reassign or escalate to user |
120
+ | **Next session** | Half-open — resets; re-open + add lesson if fails again |
180
121
 
181
- This is a judgment-based pattern, not a hard gate. 3 failures on similar tasks with the same error is more concerning than 3 unrelated failures.
122
+ Judgment-based, not a hard gate. 3 similar failures with the same error is more concerning than 3 unrelated failures.
@@ -3,168 +3,98 @@ name: panel-majority-vote
3
3
  description: "Run 3 isolated reviewer sub-agents against the same question and decide PASS/BLOCK by majority vote (2/3 wins). Use when deterministic verification is insufficient."
4
4
  ---
5
5
 
6
- <!-- ⚠️ This file is managed by OpenCastle. Edits will be overwritten on update. Customize in the .opencastle/ directory instead. -->
6
+ # Skill: Panel majority vote
7
7
 
8
- # Skill: Panel majority vote (3 reviewers)
8
+ ## Contract
9
9
 
10
- Use this skill when deterministic verification is unavailable and you need a panel to decide PASS/BLOCK for a single question against a declared artifact scope.
10
+ | Rule | Detail |
11
+ |------|--------|
12
+ | Scope | One run root, one panel key |
13
+ | Artifacts | Reviewers use only declared in-scope artifacts |
14
+ | Runners | Exactly 3 isolated reviewer runs |
15
+ | Verdict | Majority (2/3 wins) |
16
+ | On BLOCK | Consolidated report must include retry summary |
11
17
 
12
- ## Contract
13
- - Scope is exactly one run root and one panel key.
14
- - Reviewers must only use the declared in-scope artifacts.
15
- - Exactly 3 isolated reviewer runs.
16
- - Majority vote decides overall verdict (2/3 wins).
17
- - Consolidated panel report must include a short retry summary when BLOCK.
18
-
19
- ## Inputs
20
- - Run root: `<runRoot>`
21
- - Panel key: `<panelKey>` (a filesystem-safe identifier used to name output files)
22
- - Exact question text (single question)
23
- - Explicit in-scope artifact list (all under the same run root)
24
-
25
- Optional (defaults shown):
26
- - Panel output directory: `<panelDir>` (default: `<runRoot>/panel/`)
27
-
28
- ## Outputs (files)
29
- - (Optional) Prompt payload: `<panelDir>/<panelKey>-panel-prompt.md`
30
- - Raw reviewer outputs: `<panelDir>/<panelKey>-reviewer-outputs.md`
31
- - Consolidated report: `<panelDir>/<panelKey>.md`
32
-
33
- ## Procedure (required: run in isolation)
34
- Run this skill in an isolated subagent (using `runSubagent`) so the panel cannot accidentally consult unrelated workspace context.
35
-
36
- The isolated runner subagent must:
37
- 1. Validate scope
38
- - Ensure every in-scope artifact path is under `<runRoot>`.
39
- - Ensure the in-scope list is sufficient to answer the question.
40
-
41
- 2. Spawn exactly 3 reviewers (in parallel)
42
- - Launch 3 isolated reviewer subagents (using `runSubagent`) with the exact same prompt payload.
43
- - The prompt payload may be passed directly to the reviewer subagents (no file required).
44
- - If you want an explicit artifact of the prompt payload, optionally write it to `<panelDir>/<panelKey>-panel-prompt.md`.
45
- - Reviewer prompt must require this strict output format:
46
- 1) VERDICT: PASS | BLOCK
47
- 2) MUST-FIX:
48
- - ...
49
- 3) SHOULD-FIX:
50
- - ...
51
- 4) QUESTIONS:
52
- - ...
53
- 5) TEST IDEAS:
54
- - ...
55
- 6) CONFIDENCE: low | med | high
56
- - Reviewers must not include any other sections.
57
-
58
- 3. Persist reviewer outputs (required audit trail)
59
- - Create/overwrite `<panelDir>/<panelKey>-reviewer-outputs.md`.
60
- - Include at the top:
61
- - Run root
62
- - Panel key
63
- - Question text
64
- - In-scope artifact list
65
- - (Optional) The exact prompt payload text provided to reviewers
66
- - Then include each reviewer output verbatim, clearly separated.
67
-
68
- 4. Consolidate by majority vote (2/3 wins)
69
- - Compute:
70
- - PASS count
71
- - BLOCK count
72
- - Overall = PASS if PASS >= 2 else BLOCK
73
- - Deduplicate MUST-FIX and SHOULD-FIX items; annotate how many reviewers flagged each.
74
- - Record disagreements (items flagged by only 1 reviewer; or materially conflicting assessments).
75
- - Include determinize-next recommendations.
76
- - If Overall = BLOCK, include a short Retry summary:
77
- - top changes required before retrying
78
-
79
- 5. Write the consolidated panel report
80
- - Create `<panelDir>/<panelKey>.md` using the template in `panel-report.template.md` (in this directory).
81
-
82
- 6. Print a concise summary to chat
83
- - Overall verdict + vote tally + path to `<panelDir>/<panelKey>.md`.
84
-
85
- 7. Log the panel result **(⛔ hard gate — do NOT return the verdict or proceed until logged)**
86
- - Log the panel result using the **observability-logging** skill's panel record command. An unlogged panel is a failed panel.
87
- - Include: `panel_key`, `verdict`, `pass_count`, `block_count`, `must_fix`, `should_fix`, `reviewer_model`, `weighted`, `attempt`, `tracker_issue`, `artifacts_count`, `report_path`.
88
- - The skill's panel record command includes a verify step.
89
-
90
- Finally: ensure whatever produced the claim being verified links the consolidated panel report as verification evidence.
18
+ ## Inputs / Outputs
91
19
 
92
- ## Notes
93
- - If the panel output is BLOCK, prefer to change the underlying work and re-run the same panel question over re-wording the question.
94
- - After 3 consecutive BLOCKs on the same panel key, create a **dispute record** in `.opencastle/DISPUTES.md` instead of retrying further. The dispute packages the agent's position, all reviewer feedback, attempt history, and resolution options for human decision-making. See the **team-lead-reference** skill § Dispute Protocol for the full procedure.
20
+ **Inputs:** `<runRoot>`, `<panelKey>` (filesystem-safe), question text, artifact list. Panel dir default: `<runRoot>/panel/`.
95
21
 
96
- ## Model Selection for Reviewers
22
+ | File | Path |
23
+ |------|------|
24
+ | Prompt payload (optional) | `<panelDir>/<panelKey>-panel-prompt.md` |
25
+ | Raw reviewer outputs | `<panelDir>/<panelKey>-reviewer-outputs.md` |
26
+ | Consolidated report | `<panelDir>/<panelKey>.md` |
97
27
 
98
- Choose reviewer models based on the domain being reviewed:
99
- - **Security, architecture, complex logic** → Quality (Claude Sonnet 4.6) for all 3 reviewers
100
- - **Feature implementation, UI, queries** → Standard (Gemini 3.1 Pro) for all 3 reviewers
101
- - **Mixed-domain review** → Use Quality for at least 1 reviewer, Standard for the other 2
28
+ ## Procedure
102
29
 
103
- All 3 reviewers should use the same model to ensure comparable verdicts. Mixing models can lead to inconsistent review depth and confusing disagreements.
30
+ 1. **Validate scope** every artifact path is under `<runRoot>`; list is sufficient to answer the question.
31
+ 2. **Spawn 3 reviewers in parallel** — identical prompt to 3 isolated subagents. Optionally write payload to `<panelDir>/<panelKey>-panel-prompt.md`. Required output sections (no others): `VERDICT: PASS | BLOCK`, `MUST-FIX:`, `SHOULD-FIX:`, `QUESTIONS:`, `TEST IDEAS:`, `CONFIDENCE: low | med | high`.
32
+ 3. **Persist outputs** — write `<panelDir>/<panelKey>-reviewer-outputs.md` with header (run root, panel key, question, artifacts) and each reviewer output verbatim, separated.
33
+ 4. **Consolidate** — count PASS/BLOCK; overall PASS if ≥ 2. Deduplicate MUST-FIX/SHOULD-FIX with reviewer counts. Record disagreements. Include determinize-next recs. If BLOCK, add retry summary.
34
+ 5. **Write report** — create `<panelDir>/<panelKey>.md` using `panel-report.template.md`.
35
+ 6. **Print summary** — overall verdict + vote tally + report path.
36
+ 7. **Log (⛔ hard gate)** — use **observability-logging** skill panel command. Fields: `panel_key`, `verdict`, `pass_count`, `block_count`, `must_fix`, `should_fix`, `reviewer_model`, `weighted`, `attempt`, `tracker_issue`, `artifacts_count`, `report_path`. Link report as verification evidence.
104
37
 
105
- ## Weighted Consensus Variant
38
+ ## Notes
39
+
40
+ - On BLOCK: change the underlying work and re-run; do not re-word the question.
41
+ - After 3 consecutive BLOCKs on the same panel key: create a dispute record per **team-lead-reference** § Dispute Protocol.
106
42
 
107
- Extends the panel system for subjective decisions where domain expertise should weight more heavily than a simple head-count.
43
+ ## Model Selection
108
44
 
109
- ### When to Use Weighted Consensus
45
+ | Domain | Model |
46
+ |--------|-------|
47
+ | Security, architecture, complex logic | Quality (Claude Sonnet 4.6) × 3 |
48
+ | Feature implementation, UI, queries | Standard (Gemini 3.1 Pro) × 3 |
49
+ | Mixed-domain | Quality × 1, Standard × 2 |
110
50
 
111
- | Decision Type | Use Simple Majority | Use Weighted Consensus |
112
- |--------------|--------------------|-----------------------|
113
- | Security vulnerability present? | ✅ | — |
114
- | Code correctness | ✅ | — |
115
- | Best UI approach for user experience | — | ✅ |
116
- | Architecture tradeoff (performance vs maintainability) | — | ✅ |
117
- | Data model design choices | — | ✅ |
118
- | Naming conventions / code style disputes | — | ✅ |
51
+ Use same model for all 3 reviewers.
119
52
 
120
- ### Weight Assignment Rules
53
+ ## Weighted Consensus Variant
121
54
 
122
- Each reviewer gets a weight based on 3 factors:
55
+ For subjective decisions where domain expertise should weight more than head-count.
123
56
 
124
- | Factor | Weight Bonus | Example |
125
- |--------|-------------|---------|
126
- | **Domain expertise** | +2 | Security Expert reviewing auth code |
127
- | **Confidence level** | +1 (high) / 0 (med) / -1 (low) | Self-reported by reviewer |
128
- | **Prior success** | +1 | Agent has >80% success rate for similar reviews (from AGENT-PERFORMANCE.md) |
57
+ ### When to Use
129
58
 
130
- **Base weight:** 1 for all reviewers. Add bonuses to get final weight.
59
+ | Decision Type | Mode |
60
+ |--------------|------|
61
+ | Security vulnerability, code correctness | Simple majority |
62
+ | UI/UX, architecture tradeoffs, data model, naming | Weighted |
131
63
 
132
- **Example:**
64
+ ### Weight Assignment
133
65
 
134
- ```text
135
- Reviewer 1 (Security Expert, reviewing auth): base 1 + domain 2 + confidence 1 = weight 4
136
- Reviewer 2 (Frontend Dev, reviewing auth): base 1 + domain 0 + confidence 1 = weight 2
137
- Reviewer 3 (Architect, reviewing auth): base 1 + domain 1 + confidence 0 = weight 2
138
- ```
66
+ Base weight: 1. Add bonuses:
139
67
 
140
- ### Weighted Voting Protocol
68
+ | Factor | Bonus |
69
+ |--------|-------|
70
+ | Domain expertise (relevant to review) | +2 |
71
+ | Confidence high / med / low | +1 / 0 / -1 |
72
+ | Prior success rate >80% (AGENT-PERFORMANCE.md) | +1 |
141
73
 
142
- 1. **Assign weights** to each reviewer before spawning them (based on their role relative to the review domain)
143
- 2. **Spawn reviewers** with the same prompt as simple majority (use the existing procedure)
144
- 3. **Collect verdicts** — each reviewer submits PASS/BLOCK with confidence level
145
- 4. **Calculate weighted score:**
146
- - Sum weights of PASS reviewers → PASS score
147
- - Sum weights of BLOCK reviewers → BLOCK score
148
- - Overall = PASS if PASS score > BLOCK score, else BLOCK
149
- 5. **Tie-breaking:** If scores are equal, the reviewer with the highest individual weight breaks the tie. If weights are also equal, default to BLOCK (conservative).
74
+ Example: Security Expert + high = **4**; Architect + med = **2**.
150
75
 
151
- ### Conflict Resolution
76
+ ### Voting Protocol
152
77
 
153
- - If a low-weight reviewer BLOCKs but high-weight reviewers PASS: note the BLOCK concerns in the report but overall PASS. Include the low-weight MUST-FIX items as SHOULD-FIX instead.
154
- - If the domain expert BLOCKs but generalists PASS: overall BLOCK. Domain expertise overrides general opinion.
155
- - If all reviewers have equal weight: falls back to simple majority vote (2/3 wins).
78
+ 1. Assign weights before spawning.
79
+ 2. Spawn with same prompt; collect PASS/BLOCK + confidence.
80
+ 3. Score: sum weights by verdict; PASS if PASS score > BLOCK score.
81
+ 4. Tie: highest individual weight breaks tie; if equal, default BLOCK.
156
82
 
157
- ### Weighted Panel Report Extension
83
+ ### Conflict Resolution
84
+
85
+ | Scenario | Outcome |
86
+ |----------|---------|
87
+ | Low-weight BLOCKs, high-weight PASSes | PASS; move BLOCK's MUST-FIX → SHOULD-FIX |
88
+ | Domain expert BLOCKs, generalists PASS | BLOCK |
89
+ | All equal weight | Simple majority (2/3 wins) |
158
90
 
159
- Add these fields to the consolidated panel report template when using weighted consensus:
91
+ ### Report Extension
160
92
 
161
93
  ```markdown
162
94
  ### Weighting
163
95
  | Reviewer | Role | Domain | Confidence | Prior Success | Final Weight |
164
96
  |----------|------|--------|------------|---------------|-------------|
165
97
  | 1 | [Agent] | +X | +X | +X | X |
166
- | 2 | [Agent] | +X | +X | +X | X |
167
- | 3 | [Agent] | +X | +X | +X | X |
168
98
 
169
99
  ### Weighted Score
170
100
  - PASS: X (reviewers: 1, 3)
@@ -172,12 +102,7 @@ Add these fields to the consolidated panel report template when using weighted c
172
102
  - **Overall: PASS/BLOCK** (weighted)
173
103
  ```
174
104
 
175
- ### Integration with Existing Panel Workflow
176
-
177
- The weighted consensus variant follows the SAME procedure steps (1-6) from the main panel protocol. The only differences are:
178
- 1. Weight assignment happens in step 2 (before spawning reviewers)
179
- 2. Step 4 uses weighted calculation instead of simple count
180
- 3. The consolidated report includes the weighting table
105
+ ### Integration
181
106
 
182
- The Team Lead decides whether to use simple majority or weighted consensus when scheduling the panel review. Include the decision rationale in the delegation prompt.
107
+ Same steps 1–7 as standard panel. Differences: assign weights in step 2; use weighted calculation in step 4; add weighting table to report. Team Lead decides simple vs. weighted; include rationale in delegation prompt.
183
108
 
@@ -3,101 +3,37 @@ name: performance-optimization
3
3
  description: "Frontend and backend performance optimization patterns including rendering, asset optimization, JavaScript performance, caching, profiling, and code review checklist. Use when optimizing components, reviewing code for performance, or analyzing bundle size and Core Web Vitals."
4
4
  ---
5
5
 
6
- <!-- ⚠️ This file is managed by OpenCastle. Edits will be overwritten on update. Customize in the .opencastle/ directory instead. -->
7
-
8
6
  # Performance Optimization
9
7
 
10
- ## General Principles
11
-
12
- - **Measure first, optimize second** — profile before optimizing. Use Chrome DevTools, Lighthouse, Datadog.
13
- - **Optimize for the common case** — focus on frequently executed code paths.
14
- - **Avoid premature optimization** — write clear code first, optimize when necessary.
15
- - **Minimize resource usage** — CPU, memory, network, disk.
16
- - **Prefer simplicity** — simple algorithms are often faster and easier to optimize.
17
- - **Document performance assumptions** — comment performance-critical code.
18
- - **Automate performance testing** — integrate into CI/CD.
19
- - **Set performance budgets** — define limits for load time, memory, API latency.
20
-
21
- ## Rendering and DOM
22
-
23
- - **Memoization**: Use `React.memo`, `useMemo`, `useCallback` judiciously — only when profiling shows unnecessary re-renders. Don't pre-optimize.
24
- - Stable `key` props in lists (avoid array indices unless static).
25
- - Avoid inline styles (can trigger layout thrashing). Prefer CSS classes.
26
- - CSS transitions/animations over JavaScript for GPU-accelerated effects.
27
- - `requestIdleCallback` for deferring non-critical rendering.
28
-
29
- ## Asset Optimization
30
-
31
- - Modern image formats (WebP, AVIF). Tools: ImageOptim, Squoosh.
32
- - SVGs for icons.
33
- - Bundle and minify JS/CSS (Webpack, Rollup, esbuild). Tree-shaking.
34
- - Long-lived cache headers for static assets. Cache busting for updates.
35
- - `loading="lazy"` for images. Dynamic imports for JS.
36
- - Font subsetting. `font-display: swap`.
37
-
38
- ## JavaScript Performance
8
+ **Rule:** Measure first (`Chrome DevTools`, `Lighthouse`, `Datadog`), optimize second. Set budgets (load time, memory, API latency). Automate in CI/CD.
39
9
 
40
- - Offload heavy computation to Web Workers.
41
- - Debounce/throttle scroll, resize, input events.
42
- - Clean up event listeners, intervals, DOM references (prevent memory leaks).
43
- - Maps/Sets for lookups. TypedArrays for numeric data.
44
- - Avoid global variables.
45
- - Avoid deep object cloning unless necessary.
10
+ ## Patterns by Domain
46
11
 
47
- ## Node.js
12
+ | Domain | Key patterns |
13
+ |--------|-------------|
14
+ | **Rendering** | `React.memo`/`useMemo`/`useCallback` only after profiling; stable `key` props; CSS classes over inline styles; CSS animations (GPU); `requestIdleCallback` for non-critical work |
15
+ | **Assets** | WebP/AVIF images; SVG icons; bundle+minify+tree-shake (esbuild/Rollup); `loading="lazy"`; dynamic imports; long-lived cache headers + cache-busting; font subsetting + `font-display: swap` |
16
+ | **JS** | Web Workers for heavy computation; debounce/throttle events; clean up listeners/intervals; `Map`/`Set` for lookups; `TypedArray` for numeric data |
17
+ | **Node.js** | Async APIs only (never `readFileSync` in prod); clustering/worker threads for CPU; streams for large I/O; profile with `clinic.js` / `node --inspect` |
48
18
 
49
- - Async APIs only — never `fs.readFileSync` in production.
50
- - Clustering or worker threads for CPU-bound tasks.
51
- - Streams for large file/network processing.
52
- - Profile with `clinic.js`, `node --inspect`.
19
+ ## Debounce Example
53
20
 
54
- ## Code Review Checklist
55
-
56
- - [ ] No obvious algorithmic inefficiencies (O(n²) or worse)?
57
- - [ ] Appropriate data structures?
58
- - [ ] No unnecessary computations or repeated work?
59
- - [ ] Caching used where appropriate with correct invalidation?
60
- - [ ] Database queries optimized, indexed, no N+1?
61
- - [ ] Large payloads paginated, streamed, or chunked?
62
- - [ ] No memory leaks or unbounded resource usage?
63
- - [ ] Network requests minimized, batched, retried on failure?
64
- - [ ] Assets optimized, compressed, served efficiently?
65
- - [ ] No blocking operations in hot paths?
66
- - [ ] Logging in hot paths minimized and structured?
67
- - [ ] Performance-critical paths documented and tested?
68
- - [ ] Automated benchmarks for performance-sensitive code?
69
- - [ ] Alerts for performance regressions?
70
- - [ ] No anti-patterns (SELECT *, blocking I/O, globals)?
71
- - [ ] Memoization used judiciously — only where profiling shows benefit?
72
-
73
- ## Practical Examples
74
-
75
- ### Debouncing User Input
76
-
77
- ```javascript
78
- // BAD: API call on every keystroke
21
+ ```js
22
+ // BAD: fetch on every keystroke
79
23
  input.addEventListener('input', (e) => fetch(`/search?q=${e.target.value}`));
80
-
81
- // GOOD: Debounced
82
- let timeout;
83
- input.addEventListener('input', (e) => {
84
- clearTimeout(timeout);
85
- timeout = setTimeout(() => fetch(`/search?q=${e.target.value}`), 300);
86
- });
24
+ // GOOD: debounced 300 ms
25
+ let t; input.addEventListener('input', (e) => { clearTimeout(t); t = setTimeout(() => fetch(`/search?q=${e.target.value}`), 300); });
87
26
  ```
88
27
 
89
- ### Lazy Loading Images
90
-
91
- ```html
92
- <!-- BAD -->
93
- <img src="large-image.jpg" />
28
+ ## Review Checklist
94
29
 
95
- <!-- GOOD -->
96
- <img src="large-image.jpg" loading="lazy" />
97
- ```
30
+ - [ ] No O(n²)+ algorithms; appropriate data structures
31
+ - [ ] Caching with correct invalidation; no N+1 DB queries
32
+ - [ ] Large payloads paginated/streamed; network requests batched
33
+ - [ ] No memory leaks or blocking ops in hot paths
34
+ - [ ] Assets optimized; memoization only where profiling shows benefit
35
+ - [ ] Benchmarks for perf-sensitive code; alerts for regressions
98
36
 
99
37
  ## References
100
38
 
101
- - [Google Web Fundamentals: Performance](https://web.dev/performance/)
102
- - [MDN: Performance](https://developer.mozilla.org/en-US/docs/Web/Performance)
103
- - [Lighthouse](https://developers.google.com/web/tools/lighthouse)
39
+ - [web.dev/performance](https://web.dev/performance/) · [MDN Performance](https://developer.mozilla.org/en-US/docs/Web/Performance) · [Lighthouse](https://developers.google.com/web/tools/lighthouse)