@hopla/claude-setup 1.15.0 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@
9
9
  {
10
10
  "name": "hopla",
11
11
  "description": "Agentic coding system: PIV loop, TDD, debugging, brainstorming, subagent execution, and team workflows",
12
- "version": "1.15.0",
12
+ "version": "1.17.0",
13
13
  "source": "./",
14
14
  "author": {
15
15
  "name": "Hopla Tools",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "hopla",
3
3
  "description": "Agentic coding system for Claude Code: PIV loop (Plan → Implement → Validate), TDD, debugging, brainstorming, subagent execution, and team workflows",
4
- "version": "1.15.0",
4
+ "version": "1.17.0",
5
5
  "author": {
6
6
  "name": "Hopla Tools",
7
7
  "email": "julio@hopla.tools"
package/README.md CHANGED
@@ -233,6 +233,9 @@ After each PIV loop, run the `execution-report` skill + `/hopla:system-review` t
233
233
  | `brainstorm` | "let's brainstorm", "explore approaches" |
234
234
  | `debug` | "debug this", "find the bug", "why is this failing" |
235
235
  | `tdd` | "write tests first", "TDD", "red-green-refactor" |
236
+ | `refactoring` | "refactor", "clean up", "simplify", "extract", "deduplicate" |
237
+ | `performance` | "slow", "optimize", "bottleneck", "lento", "tarda mucho" |
238
+ | `migration` | "migrate", "upgrade", "switch from X to Y", "major version bump" |
236
239
  | `subagent-execution` | "use subagents", plans with 5+ tasks |
237
240
  | `parallel-dispatch` | "run in parallel", "parallelize this", independent tasks |
238
241
 
@@ -441,6 +444,7 @@ project/
441
444
  │ ├── rca/ ← Root cause analysis docs (commit)
442
445
  │ ├── execution-reports/ ← Post-implementation reports (commit)
443
446
  │ ├── system-reviews/ ← Process improvement reports (commit)
447
+ │ ├── audits/ ← Persistent audit reports (commit — opt-in)
444
448
  │ └── code-reviews/ ← Code review reports (don't commit — ephemeral)
445
449
  └── .claude/
446
450
  └── commands/ ← Project-specific commands (optional)
package/cli.js CHANGED
@@ -87,6 +87,21 @@ function logRemoved(label) {
87
87
  log(` ${RED}✕${RESET} ${verb}: ${label}`);
88
88
  }
89
89
 
90
+ // Safe parser for settings.json-style files. Returns null when the file is
91
+ // missing. Warns (and returns null) when the file exists but is not valid JSON
92
+ // — previously these failures were silently swallowed, causing cleanup and
93
+ // permission updates to skip with no signal to the user.
94
+ function parseSettingsFile(settingsPath) {
95
+ if (!fs.existsSync(settingsPath)) return null;
96
+ try {
97
+ return JSON.parse(fs.readFileSync(settingsPath, "utf8"));
98
+ } catch (err) {
99
+ log(` ${YELLOW}⚠${RESET} Could not parse ${settingsPath}: ${err.message}`);
100
+ log(` Skipping this file. Fix the JSON and re-run to apply changes.`);
101
+ return null;
102
+ }
103
+ }
104
+
90
105
  function logInstalled(label, exists) {
91
106
  const verb = DRY_RUN
92
107
  ? (exists ? "Would update" : "Would install")
@@ -220,35 +235,34 @@ function removeLegacyFiles() {
220
235
 
221
236
  // hopla hook entries from settings.json AND settings.local.json
222
237
  for (const settingsPath of SETTINGS_FILES) {
223
- if (!fs.existsSync(settingsPath)) continue;
224
- try {
225
- const settings = JSON.parse(fs.readFileSync(settingsPath, "utf8"));
226
- let changed = false;
227
-
228
- if (settings.hooks) {
229
- for (const [event, matchers] of Object.entries(settings.hooks)) {
230
- if (!Array.isArray(matchers)) continue;
231
- const filtered = matchers.filter((m) => {
232
- if (!m.hooks || !Array.isArray(m.hooks)) return true;
233
- const isHopla = m.hooks.every((h) =>
234
- LEGACY_HOOK_COMMANDS.some((cmd) => h.command && h.command.includes(cmd))
235
- );
236
- return !isHopla;
237
- });
238
- if (filtered.length !== matchers.length) {
239
- settings.hooks[event] = filtered;
240
- if (filtered.length === 0) delete settings.hooks[event];
241
- changed = true;
242
- }
238
+ const settings = parseSettingsFile(settingsPath);
239
+ if (!settings) continue;
240
+
241
+ let changed = false;
242
+
243
+ if (settings.hooks) {
244
+ for (const [event, matchers] of Object.entries(settings.hooks)) {
245
+ if (!Array.isArray(matchers)) continue;
246
+ const filtered = matchers.filter((m) => {
247
+ if (!m.hooks || !Array.isArray(m.hooks)) return true;
248
+ const isHopla = m.hooks.every((h) =>
249
+ LEGACY_HOOK_COMMANDS.some((cmd) => h.command && h.command.includes(cmd))
250
+ );
251
+ return !isHopla;
252
+ });
253
+ if (filtered.length !== matchers.length) {
254
+ settings.hooks[event] = filtered;
255
+ if (filtered.length === 0) delete settings.hooks[event];
256
+ changed = true;
243
257
  }
244
- if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
245
258
  }
259
+ if (Object.keys(settings.hooks).length === 0) delete settings.hooks;
260
+ }
246
261
 
247
- if (changed) {
248
- safeWrite(settingsPath, JSON.stringify(settings, null, 2) + "\n");
249
- removed.push(`hooks from ${path.basename(settingsPath)}`);
250
- }
251
- } catch { /* ignore parse errors */ }
262
+ if (changed) {
263
+ safeWrite(settingsPath, JSON.stringify(settings, null, 2) + "\n");
264
+ removed.push(`hooks from ${path.basename(settingsPath)}`);
265
+ }
252
266
  }
253
267
 
254
268
  return removed;
@@ -257,31 +271,27 @@ function removeLegacyFiles() {
257
271
  function removeHoplaPermissions() {
258
272
  const removed = [];
259
273
  for (const settingsPath of SETTINGS_FILES) {
260
- if (!fs.existsSync(settingsPath)) continue;
261
- try {
262
- const settings = JSON.parse(fs.readFileSync(settingsPath, "utf8"));
263
- if (!settings.permissions || !Array.isArray(settings.permissions.allow)) continue;
264
- const before = settings.permissions.allow.length;
265
- settings.permissions.allow = settings.permissions.allow.filter(
266
- (p) => !ALL_HOPLA_PERMISSIONS.has(p)
267
- );
268
- if (settings.permissions.allow.length !== before) {
269
- safeWrite(settingsPath, JSON.stringify(settings, null, 2) + "\n");
270
- removed.push(`permissions from ${path.basename(settingsPath)}`);
271
- }
272
- } catch { /* ignore */ }
274
+ const settings = parseSettingsFile(settingsPath);
275
+ if (!settings) continue;
276
+ if (!settings.permissions || !Array.isArray(settings.permissions.allow)) continue;
277
+ const before = settings.permissions.allow.length;
278
+ settings.permissions.allow = settings.permissions.allow.filter(
279
+ (p) => !ALL_HOPLA_PERMISSIONS.has(p)
280
+ );
281
+ if (settings.permissions.allow.length !== before) {
282
+ safeWrite(settingsPath, JSON.stringify(settings, null, 2) + "\n");
283
+ removed.push(`permissions from ${path.basename(settingsPath)}`);
284
+ }
273
285
  }
274
286
  return removed;
275
287
  }
276
288
 
277
289
  function detectPlugin() {
278
290
  for (const settingsPath of SETTINGS_FILES) {
279
- if (!fs.existsSync(settingsPath)) continue;
280
- try {
281
- const settings = JSON.parse(fs.readFileSync(settingsPath, "utf8"));
282
- const plugins = settings.enabledPlugins || {};
283
- if (Object.keys(plugins).some((key) => key.startsWith("hopla@"))) return true;
284
- } catch { /* ignore */ }
291
+ const settings = parseSettingsFile(settingsPath);
292
+ if (!settings) continue;
293
+ const plugins = settings.enabledPlugins || {};
294
+ if (Object.keys(plugins).some((key) => key.startsWith("hopla@"))) return true;
285
295
  }
286
296
  return false;
287
297
  }
@@ -373,11 +383,16 @@ async function uninstall() {
373
383
  async function setupPermissions() {
374
384
  const settingsPath = path.join(CLAUDE_DIR, "settings.json");
375
385
 
376
- let settings = { permissions: { allow: [] } };
377
- if (fs.existsSync(settingsPath)) {
378
- try {
379
- settings = JSON.parse(fs.readFileSync(settingsPath, "utf8"));
380
- } catch { /* keep defaults */ }
386
+ // Use parseSettingsFile so malformed JSON is reported instead of silently
387
+ // overwritten. When the file is missing we start from defaults.
388
+ let settings = parseSettingsFile(settingsPath);
389
+ if (!settings) {
390
+ if (fs.existsSync(settingsPath)) {
391
+ // Malformed JSON — do NOT overwrite (user needs to fix first)
392
+ log(` ${YELLOW}↷${RESET} Skipped permissions setup — settings.json is not valid JSON.`);
393
+ return;
394
+ }
395
+ settings = { permissions: { allow: [] } };
381
396
  }
382
397
  if (!settings.permissions) settings.permissions = {};
383
398
  if (!settings.permissions.allow) settings.permissions.allow = [];
@@ -98,7 +98,7 @@ Work through each task in the plan sequentially. For each task:
98
98
 
99
99
  1. **Announce** the task you are starting (e.g., "Starting Task 2: Create the filter component")
100
100
  2. **Follow the pattern** referenced in the plan — do not invent new patterns
101
- 3. **Check for existing implementations** — before creating new functions, constants, or utility modules, search the codebase for existing implementations that serve the same purpose. Reuse or extend rather than duplicate. DRY violations were the #1 code quality issue across 28 implementations.
101
+ 3. **Check for existing implementations** — before creating new functions, constants, or utility modules, search the codebase for existing implementations that serve the same purpose. Reuse or extend rather than duplicate.
102
102
  4. **Implement** only what the task specifies — nothing more
103
103
  5. **Validate** the task using the method specified in the plan's validate field
104
104
  6. **Report completion** with a brief status: what was done, what was skipped, any decision made
@@ -137,54 +137,15 @@ If the user requests changes that are NOT in the plan during execution:
137
137
  - Suggest committing the current planned work first
138
138
  - Then create a new branch or add it to the backlog
139
139
  - Say: "This looks like a separate feature. I recommend we commit the current work first, then handle this in a new branch. Should I add it to `.agents/plans/backlog/` instead?"
140
- 5. **Never** silently add significant unplanned work — scope creep caused the lowest alignment score (6/10) in past implementations
140
+ 5. **Never** silently add significant unplanned work — it mixes unreviewed changes into an otherwise reviewed plan and breaks the audit trail
141
141
 
142
142
  ## Step 5: Run Full Validation Pyramid
143
143
 
144
- After all tasks are complete, run the full validation sequence in order.
145
- **Do not skip levels. Do not proceed if a level fails.**
144
+ After all tasks are complete, run **Levels 1–7** from `commands/guides/validation-pyramid.md` (same repo). Do not skip levels. Do not proceed if a level fails.
146
145
 
147
- Use the exact commands from the plan's **Validation Checklist**. If not specified, read `CLAUDE.md` section "Development Commands" to find the correct commands.
146
+ Use the exact commands from the plan's **Validation Checklist**. If not specified, read `CLAUDE.md` "Development Commands" to find the correct commands.
148
147
 
149
- ### Level 1 Lint & Format
150
- Run the project's lint and format check (e.g. `npm run lint`, `uv run ruff check .`).
151
- Fix any issues before continuing.
152
-
153
- ### Level 2 — Type Check
154
- Run the project's type checker (e.g. `npm run type-check`, `uv run mypy .`).
155
- Fix all type errors before continuing.
156
-
157
- ### Level 3 — Unit Tests
158
- Run the project's unit test suite (e.g. `npm run test`, `uv run pytest`).
159
- If tests fail:
160
- - Investigate the root cause
161
- - Fix the code (not the tests)
162
- - Re-run until all pass
163
-
164
- ### Level 4 — Integration Tests
165
- Run integration tests or manual verification as specified in the plan (e.g. `npm run test:e2e`, manual curl).
166
- Verify the feature works end-to-end.
167
-
168
- ### Level 5 — Code Review
169
- Run a code review on all changed files following the the `code-review` skill process. This catches bugs that linting, types, and tests miss (security issues, logic errors, pattern violations).
170
-
171
- If the review finds critical or high severity issues, **fix them before proceeding**.
172
-
173
- ### Level 6 — File Drift Check
174
- Compare the files actually changed against the plan's task list:
175
-
176
- ```bash
177
- git diff --name-only
178
- git ls-files --others --exclude-standard
179
- ```
180
-
181
- Flag any files that were changed but are **not listed in any task**. These are potential scope leaks — unplanned additions that didn't get the same scrutiny as planned tasks. Report them in the completion summary so the user can review.
182
-
183
- ### Level 7 — Human Review (flag for user)
184
- List what the user should manually verify:
185
- - Specific behaviors to test in the browser or CLI
186
- - Edge cases to check
187
- - Any decisions made during implementation that the user should review
148
+ Level 5 triggers the `code-review` skill (not a slash command). Level 6 is the file-drift check specific to plan execution. Level 7 surfaces items for human verification.
188
149
 
189
150
  ## Step 6: Completion Report
190
151
 
@@ -0,0 +1,74 @@
1
+ # Validation Pyramid
2
+
3
+ Shared reference for the full validation sequence. Callers (`commands/execute.md`, `commands/validate.md`, `skills/verify/SKILL.md`, plus plans' `Validation Checklist`) pick the levels that apply to their scope.
4
+
5
+ Run levels **in order**. Do not skip a level. Do not proceed if a level fails — fix it first.
6
+
7
+ Commands below are generic examples; use the exact commands from the project's `CLAUDE.md` "Development Commands" section or the plan's checklist.
8
+
9
+ ## Level 1 — Lint & Format
10
+
11
+ Run the project's lint and format commands (e.g. `npm run lint`, `uv run ruff check .`).
12
+
13
+ If issues are found:
14
+
15
+ - Fix them automatically where the tool supports it (e.g. `--fix`)
16
+ - Re-run to confirm clean
17
+
18
+ ## Level 2 — Type Check
19
+
20
+ Run the project's type checker (e.g. `npm run typecheck`, `tsc --noEmit`, `uv run mypy .`).
21
+
22
+ Fix all type errors before continuing.
23
+
24
+ ## Level 3 — Unit Tests
25
+
26
+ Run the project's unit test suite (e.g. `npm run test`, `uv run pytest`).
27
+
28
+ If tests fail:
29
+
30
+ - Investigate the root cause
31
+ - Fix the code (not the tests, unless the test is wrong)
32
+ - Re-run until all pass
33
+
34
+ ## Level 4 — Integration Tests
35
+
36
+ Run integration tests if the project has them (e.g. `npm run test:e2e`, manual curl).
37
+
38
+ If not available, skip and note it in the report.
39
+
40
+ ## Level 5 — Code Review
41
+
42
+ Trigger the `code-review` skill on the changed files. This catches bugs that lint, types, and tests miss (security issues, logic errors, pattern violations).
43
+
44
+ If the review finds `critical` or `high` severity issues, **fix them before proceeding**.
45
+
46
+ ## Level 6 — File Drift Check (post-execution only)
47
+
48
+ Compare the files actually changed against the plan's task list:
49
+
50
+ ```bash
51
+ git diff --name-only
52
+ git ls-files --others --exclude-standard
53
+ ```
54
+
55
+ Flag any files that were changed but are **not listed in any task**. These are potential scope leaks — report them in the completion summary so the user can review.
56
+
57
+ Skip this level when validating outside of a plan (`/hopla:validate` or the `verify` skill without a plan).
58
+
59
+ ## Level 7 — Human Review
60
+
61
+ Flag for the user what they should verify manually:
62
+
63
+ - Specific behaviors to test in the browser or CLI
64
+ - Edge cases to check
65
+ - Any decisions made during implementation that the user should review
66
+
67
+ ## Which levels apply when
68
+
69
+ | Caller | Levels |
70
+ |---|---|
71
+ | `/hopla:validate` | 1–4 |
72
+ | `verify` skill | 1–4 + 7 |
73
+ | `/hopla:execute` | 1–7 |
74
+ | Plan's `Validation Checklist` | as specified by the plan, typically 1–5 or 1–7 |
@@ -435,9 +435,15 @@ Create the following directories (with `.gitkeep` where needed):
435
435
  ├── rca/ <- /hopla:rca saves root cause analysis docs here (commit)
436
436
  ├── execution-reports/ <- the `execution-report` skill saves here (commit — needed for cross-session learning)
437
437
  ├── system-reviews/ <- /hopla:system-review saves here (commit — needed for feedback loop)
438
+ ├── audits/ <- persistent audit reports worth preserving (commit — opt-in; copy a code review here when you want to keep it)
438
439
  └── code-reviews/ <- the `code-review` skill saves here (do NOT commit — ephemeral, consumed by code-review-fix)
439
440
  ```
440
441
 
442
+ **Policy — `audits/` vs `code-reviews/`:**
443
+
444
+ - `code-reviews/` is **ephemeral working state**. Every run overwrites/adds files; `code-review-fix` consumes them and they become stale fast. Never commit.
445
+ - `audits/` is **persistent**. Move or copy a review here when it documents a finding the team should remember (security issue, architectural concern, post-mortem evidence). Commit.
446
+
441
447
  Add to `.gitignore` (create if it doesn't exist):
442
448
  ```
443
449
  .agents/code-reviews/
@@ -27,8 +27,9 @@ Read the following to understand the project:
27
27
  2. `README.md` — project overview and setup
28
28
  3. `package.json` or `pyproject.toml` — stack, dependencies, scripts
29
29
  4. `.agents/guides/` — if this directory exists, read any guides relevant to the feature being planned (e.g. `@.agents/guides/api-guide.md` when planning an API endpoint)
30
- 5. `MEMORY.md` (if it exists at project root or `~/.claude/`)check for user preferences that affect this feature (UI patterns like modal vs inline, keyboard shortcuts, component conventions)
31
- 6. `.agents/execution-reports/` if this directory exists, scan recent reports (last 3-5) for technical patterns discovered and gotchas relevant to the feature being planned. These contain real-world learnings from previous implementations that prevent re-discovering known issues.
30
+ 5. `.agents/specs/` — if this directory exists, scan for design specs that match the feature name. These come from the `brainstorm` skill and already document the chosen approach, files affected, edge cases, and open questions. If a matching spec exists, it is the authoritative designthe plan turns that design into tasks. If no spec exists and the feature is non-trivial, suggest running the `brainstorm` skill first.
31
+ 6. `MEMORY.md` (if it exists at project root or `~/.claude/`) check for user preferences that affect this feature (UI patterns like modal vs inline, keyboard shortcuts, component conventions)
32
+ 7. `.agents/execution-reports/` — if this directory exists, scan recent reports (last 3-5) for technical patterns discovered and gotchas relevant to the feature being planned. These contain real-world learnings from previous implementations that prevent re-discovering known issues.
32
33
 
33
34
  Then run:
34
35
 
@@ -47,7 +48,7 @@ Investigate the areas of the codebase relevant to this feature:
47
48
  - Locate similar features already implemented to use as reference
48
49
  - Find the entry points that will need to be modified or extended
49
50
  - Identify potential conflicts or dependencies
50
- - **DRY check:** Before specifying new utility functions, constants, or helpers in the plan, search for existing implementations that can be reused or extended. DRY violations were the #1 code review finding across 28 implementations.
51
+ - **DRY check:** Before specifying new utility functions, constants, or helpers in the plan, search for existing implementations that can be reused or extended.
51
52
 
52
53
  Use the Grep tool to find relevant files (pattern: relevant keyword, case-insensitive).
53
54
 
@@ -60,7 +61,7 @@ For each existing table, API endpoint, or component the plan will modify, verify
60
61
  - **API endpoints:** Read the actual route handler. Confirm the request/response shape matches your assumptions.
61
62
  - **Components:** Read the component file. Confirm props, state, and data flow match your assumptions.
62
63
 
63
- Document verified assumptions in the plan's **Context References** with the exact file and line number. This prevents the #1 cause of mid-implementation redesigns: plans that assumed a field name, type, or constraint that didn't match reality.
64
+ Document verified assumptions in the plan's **Context References** with the exact file and line number. This prevents mid-implementation redesigns caused by plans that assumed a field name, type, or constraint that did not match reality.
64
65
 
65
66
  ### Data audit (required for features that consume existing data)
66
67
 
@@ -79,17 +80,17 @@ Based on research, define:
79
80
  - Any risks, edge cases, or gotchas to flag
80
81
  - What tests are needed
81
82
  - **Derived/computed values:** If any value is calculated from other fields, specify the exact formula including how stored values are interpreted (sign, units, semantics), AND how derived values propagate when inputs change (event system, reactivity, polling, etc.)
82
- - **Interaction states & edge cases:** For features involving interactive UI (forms, grids, keyboard navigation, wizards, CLI interactions), define a matrix of user interactions and their expected behavior. Cover: all keyboard shortcuts (both directions — e.g., Tab AND Shift+Tab), state transitions (empty → editing → saved → error), and boundary conditions (first item, last item, empty list, maximum items). This prevents iterative fix rounds that consumed up to 40% of session time in past implementations.
83
- - **API input validation:** For every API endpoint being created or modified, specify: required fields, field format constraints (e.g., "IMEI must be exactly 15 digits"), payload size limits, and what the user sees on validation failure. This was the #2 most common gap in past plans — validation was only added after code review in 4 of 7 implementations.
84
- - **Bidirectional data interactions:** If feature A updates data that feature B displays, does B need to react? If adding an item triggers validation, does editing trigger re-validation? Map all data mutation → side effect chains, not just keyboard navigation. Missed bidirectional interactions were a recurring planning blind spot.
83
+ - **Interaction states & edge cases:** For features involving interactive UI (forms, grids, keyboard navigation, wizards, CLI interactions), define a matrix of user interactions and their expected behavior. Cover: all keyboard shortcuts (both directions — e.g., Tab AND Shift+Tab), state transitions (empty → editing → saved → error), and boundary conditions (first item, last item, empty list, maximum items).
84
+ - **API input validation:** For every API endpoint being created or modified, specify: required fields, field format constraints (e.g., "IMEI must be exactly 15 digits"), payload size limits, and what the user sees on validation failure.
85
+ - **Bidirectional data interactions:** If feature A updates data that feature B displays, does B need to react? If adding an item triggers validation, does editing trigger re-validation? Map all data mutation → side effect chains, not just keyboard navigation.
85
86
  - **AI/LLM prompt tasks:** If the plan involves creating or modifying AI prompts (system prompts, prompt templates, LLM-based features), add an explicit task for testing against real data with 2-3 iteration cycles budgeted. AI prompt engineering rarely works on the first attempt.
86
- - **User preferences check:** Before specifying UI architecture (modal vs. inline, page vs. panel, dialog vs. drawer), verify against MEMORY.md and conversation history for established preferences. In past implementations, plans that specified modals were rejected because the user preferred inline panels — this caused rework. When no preference exists, note it as a decision point for the user to confirm.
87
- - **Reuse context analysis:** When a new view reuses an existing component in a different context (e.g., a list component in a "history" view vs. an "active" view), the plan MUST list what's different about the new context's requirements: different columns, different data filters, different interactions, different toolbar layout. Missed context differences caused 40%+ of unplanned work in past implementations.
87
+ - **User preferences check:** Before specifying UI architecture (modal vs. inline, page vs. panel, dialog vs. drawer), verify against `MEMORY.md` and conversation history for established preferences. When no preference exists, note it as a decision point for the user to confirm.
88
+ - **Reuse context analysis:** When a new view reuses an existing component in a different context (e.g., a list component in a "history" view vs. an "active" view), the plan MUST list what's different about the new context's requirements: different columns, different data filters, different interactions, different toolbar layout.
88
89
  - **Multi-phase plan guidance:** For features requiring 3+ phases, create an architectural plan (`backlog/NN-feature.md`) with schema, phase boundaries, and target architecture. When executing each phase, create a standalone plan (`phase-NX-description.md`) with full task-level detail following this template. The architectural plan is the spec; phase plans are the execution instructions. Each phase should have its own feature branch and PR.
89
- - **API surface enumeration (security/access control plans):** When the plan modifies access control, authorization, or data visibility, enumerate ALL API surfaces that serve the same data — REST endpoints, WebSocket handlers, Durable Object methods, and any other data paths. Each surface must be updated consistently. In past implementations, updating only the WebSocket path while missing the parallel REST endpoint caused a security gap that was only caught by code review. Add a task for each surface, not just the primary one.
90
- - **Role access matrix:** For features involving multiple user roles or multi-tenant access (admin, member, viewer, buyer, external user), define a matrix: what data does each role see? What endpoints does each role call? What filters apply per role? In past implementations, plans that didn't specify role-level access had authorization bugs discovered only during code review.
91
- - **External integration buffer:** If the feature integrates an external API or third-party service, budget 2x the estimated time. Document: do we have working test credentials? Is the SDK tested in our runtime (Workers, Node, edge, etc.)? Are there known deprecations or version constraints? External integrations consistently took 2-3x longer than planned in past implementations.
92
- - **UI iteration budget:** For features with significant UI (new pages, complex forms, interactive grids), note that UI specifications are provisional — expect 30-50% additional work for visual refinement based on user feedback. Specify what "good enough for v1" looks like vs. future polish. This prevents scope creep from being classified as plan failure.
90
+ - **API surface enumeration (security/access control plans):** When the plan modifies access control, authorization, or data visibility, enumerate ALL API surfaces that serve the same data — REST endpoints, WebSocket handlers, Durable Object methods, and any other data paths. Each surface must be updated consistently. Add a task for each surface, not just the primary one.
91
+ - **Role access matrix:** For features involving multiple user roles or multi-tenant access (admin, member, viewer, buyer, external user), define a matrix: what data does each role see? What endpoints does each role call? What filters apply per role?
92
+ - **External integration buffer:** If the feature integrates an external API or third-party service, budget 2x the estimated time. Document: do we have working test credentials? Is the SDK tested in our runtime (Workers, Node, edge, etc.)? Are there known deprecations or version constraints?
93
+ - **UI iteration budget:** For features with significant UI (new pages, complex forms, interactive grids), note that UI specifications are provisional — visual polish typically needs iteration on user feedback. Specify what "good enough for v1" looks like vs. future polish so scope creep is not classified as plan failure.
93
94
 
94
95
  ## Phase 5: Generate the Plan
95
96
 
@@ -112,7 +113,7 @@ Use this structure:
112
113
  - [Anything explicitly excluded]
113
114
 
114
115
  ## Likely Follow-ups
115
- [Features or changes naturally adjacent to this work that the user may request during or after execution. Historical data: 71% of sessions had scope expansion. Listing these upfront helps the executing agent handle them via the Scope Guard rather than improvising.]
116
+ [Features or changes naturally adjacent to this work that the user may request during or after execution. Listing these upfront helps the executing agent handle scope expansion via the Scope Guard rather than improvising.]
116
117
  - [Follow-up 1]
117
118
  - [Follow-up 2]
118
119
 
@@ -195,7 +196,7 @@ Scoring guide:
195
196
  ## Notes for Executing Agent
196
197
  [Any important context, warnings, or decisions made during planning that the executing agent needs to know]
197
198
 
198
- > **UI Styling Note:** UI styling specifications (colors, sizes, variants, labels, spacing) are `[provisional]` proposals. Historical data shows these change in 50%+ of implementations based on user feedback. Implement as specified but do not over-invest in pixel-perfect adherence expect iteration.
199
+ > **UI Styling Note:** UI styling specifications (colors, sizes, variants, labels, spacing) are `[provisional]` proposals expect them to change once the user sees the implementation. Implement as specified but do not over-invest in pixel-perfect adherence; plan for iteration.
199
200
  ```
200
201
 
201
202
  ---
@@ -206,7 +207,7 @@ After generating the plan, count the implementation tasks (excluding test tasks)
206
207
 
207
208
  - **3–7 tasks:** Optimal size. Proceed as-is.
208
209
  - **8–11 tasks:** Consider grouping tasks into logical phases with intermediate commit points. Add a `## Phase Boundaries` section to the plan listing where commits should happen.
209
- - **12+ tasks:** The plan should be split into multiple plans or phased with mandatory intermediate commits. Historical data: plans with 12+ tasks scored 6/10 alignment vs 10/10 for 3–7 task plans. Add phase boundaries and consider whether independent task groups can be separate plans.
210
+ - **12+ tasks:** The plan should be split into multiple plans or phased with mandatory intermediate commits. Large plans tend to drift during execution; phase boundaries give reviewers and the executing agent natural checkpoints. Consider whether independent task groups can be separate plans.
210
211
 
211
212
  ---
212
213
 
@@ -231,7 +232,7 @@ Before saving the draft, review the plan against these criteria:
231
232
  - [ ] **Plan size checked:** If >8 tasks, phase boundaries are defined with intermediate commit points. If >12 tasks, split justification is provided or phases are created.
232
233
  - [ ] **Likely follow-ups listed:** If the Out of Scope section has items, the Likely Follow-ups section is populated with naturally adjacent work the user may request
233
234
  - [ ] **API surface enumeration (if security/access plan):** All parallel API surfaces (REST, WebSocket, DO) that serve the same data are listed with a task for each
234
- - [ ] **N+1 query check:** For every task that writes database queries or API calls, verify: is any call inside a loop? Could it be batched? Are there duplicate existence checks before mutations? N+1 queries were found in 5 of 13 recent implementations.
235
+ - [ ] **N+1 query check:** For every task that writes database queries or API calls, verify: is any call inside a loop? Could it be batched? Are there duplicate existence checks before mutations?
235
236
 
236
237
  ## Phase 7: Save Draft and Enter Review Loop
237
238
 
@@ -14,36 +14,9 @@ If a `.claude/commands/validate.md` exists at the project root, use the commands
14
14
 
15
15
  ## Step 2: Run the Validation Pyramid
16
16
 
17
- Execute each level in order. **Do not skip levels. Do not proceed if a level fails — fix it first.**
17
+ Execute levels **1–4** from `commands/guides/validation-pyramid.md` (same repo). Do not skip levels. Do not proceed if a level fails — fix it first.
18
18
 
19
- ### Level 1 Lint & Format
20
-
21
- Run the project's lint and format commands (e.g. `npm run lint`, `uv run ruff check .`).
22
-
23
- If issues are found:
24
- - Fix them automatically if the tool supports it (e.g. `--fix`)
25
- - Re-run to confirm clean
26
-
27
- ### Level 2 — Type Check
28
-
29
- Run the project's type checker (e.g. `npm run typecheck`, `uv run mypy .`).
30
-
31
- Fix all type errors before continuing.
32
-
33
- ### Level 3 — Unit Tests
34
-
35
- Run the project's test suite (e.g. `npm run test`, `uv run pytest`).
36
-
37
- If tests fail:
38
- - Investigate the root cause
39
- - Fix the code (not the tests, unless the test is wrong)
40
- - Re-run until all pass
41
-
42
- ### Level 4 — Integration Tests
43
-
44
- Run integration tests if the project has them (e.g. `npm run test:e2e`).
45
-
46
- If not available, skip and note it in the report.
19
+ Use the exact commands from the project's `CLAUDE.md` "Development Commands" section. If a `.claude/commands/validate.md` exists at the project root, use the commands defined there instead.
47
20
 
48
21
  ## Step 3: Summary Report
49
22
 
@@ -72,4 +45,4 @@ If anything failed and could not be fixed, list the remaining issues and suggest
72
45
  ## Next Step
73
46
 
74
47
  After validation passes, suggest:
75
- > "All validation levels passed. Consider running the `code-review` skill for a deeper analysis — code review catches bugs in 79% of implementations that pass automated validation (stale closures, missing input validation, route shadowing, unhandled promise rejections). Run the `code-review` skill to check, or the `git` skill (say "commit") to commit directly."
48
+ > "All validation levels passed. Consider triggering the `code-review` skill for a deeper analysis — it catches classes of bugs that lint/types/tests miss (stale closures, missing input validation, route shadowing, unhandled promise rejections). Say 'review the code' to trigger it, or say 'commit' to use the `git` skill directly."
@@ -13,6 +13,18 @@ function run(cmd) {
13
13
  }
14
14
  }
15
15
 
16
+ function excerptClaudeMd(content) {
17
+ const lines = content.split("\n");
18
+ // Prefer the first `---` separator after the opening heading and first section
19
+ for (let i = 5; i < Math.min(lines.length, 120); i++) {
20
+ if (lines[i].trim() === "---") {
21
+ return lines.slice(0, i).join("\n").trimEnd();
22
+ }
23
+ }
24
+ // No separator within a reasonable window — cap at 60 lines
25
+ return lines.slice(0, Math.min(lines.length, 60)).join("\n").trimEnd();
26
+ }
27
+
16
28
  function discoverSkills() {
17
29
  // Try plugin context first: ../skills/ relative to this script
18
30
  const hookDir = import.meta.dirname;
@@ -85,11 +97,11 @@ async function main() {
85
97
  }
86
98
  }
87
99
 
88
- // CLAUDE.md summary (first 20 lines)
100
+ // CLAUDE.md excerpt cut at a natural boundary, not a fixed line count
89
101
  const claudeMdPath = path.join(process.cwd(), "CLAUDE.md");
90
102
  if (fs.existsSync(claudeMdPath)) {
91
- const content = fs.readFileSync(claudeMdPath, "utf8").split("\n").slice(0, 20).join("\n");
92
- lines.push(`Project rules (CLAUDE.md excerpt):\n${content}`);
103
+ const excerpt = excerptClaudeMd(fs.readFileSync(claudeMdPath, "utf8"));
104
+ lines.push(`Project rules (CLAUDE.md excerpt):\n${excerpt}`);
93
105
  }
94
106
 
95
107
  // Auto-discover available skills
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hopla/claude-setup",
3
- "version": "1.15.0",
3
+ "version": "1.17.0",
4
4
  "description": "Hopla team agentic coding system for Claude Code",
5
5
  "type": "module",
6
6
  "bin": {
@@ -16,6 +16,10 @@
16
16
  "hooks/",
17
17
  ".claude-plugin/"
18
18
  ],
19
+ "scripts": {
20
+ "prepublishOnly": "node scripts/check-versions.js",
21
+ "check-versions": "node scripts/check-versions.js"
22
+ },
19
23
  "engines": {
20
24
  "node": ">=18"
21
25
  },
@@ -14,7 +14,7 @@ Read `CLAUDE.md` or `AGENTS.md` to understand project standards and patterns.
14
14
 
15
15
  If `.agents/guides/` exists, read any guides relevant to the files being reviewed (e.g. `@.agents/guides/api-guide.md` when reviewing API changes). These guides define the expected patterns for specific task types.
16
16
 
17
- If `.agents/guides/review-checklist.md` exists, read it and apply the project-specific checks it defines in addition to the standard checks below. Project-specific checklists cover framework gotchas and domain anti-patterns unique to the project (e.g., AG Grid stale closures, Hono route ordering).
17
+ If `.agents/guides/review-checklist.md` exists, read it and apply the project-specific checks it defines in addition to the standard checks. Project-specific checklists cover framework gotchas and domain anti-patterns unique to the project (e.g., grid stale closures, route ordering).
18
18
 
19
19
  ## Step 2: Identify Changed Files
20
20
 
@@ -28,42 +28,16 @@ Read each changed or new file in its entirety — not just the diff.
28
28
 
29
29
  ## Step 3: Analyze for Issues
30
30
 
31
- For each changed file, look for:
32
-
33
- **1. Logic Errors**
34
- - Off-by-one errors, incorrect conditionals
35
- - Missing error handling, unhandled edge cases
36
- - Race conditions or async issues
37
- - Stale closures — callbacks passed to imperative APIs (grids, charts, maps) that capture stale state instead of using refs or stable references
38
- - Unhandled promise rejections — `.then()` without `.catch()`, async calls without `try/catch` in non-void contexts
39
- - Side effects inside JSX render — mutations of arrays/objects inside `.map()` in JSX (breaks React strict mode, causes double-execution bugs)
40
- - Stale dependency arrays — for every new `useState`/`useRef` variable introduced in the diff, verify it appears in the dependency arrays of `useEffect`, `useCallback`, or `useMemo` that reference it. Missing deps cause stale closures — this was the #1 React bug category across 28 implementations
41
-
42
- **2. Security Issues**
43
- - Exposed secrets or API keys
44
- - SQL/command injection vulnerabilities
45
- - Missing input validation on API endpoints — required fields, format constraints (regex, length), payload size limits
46
- - Insecure data handling — raw user input in queries, responses exposing internal data or stack traces
47
- - XSS vulnerabilities (frontend)
48
- - Multi-user authorization context — for multi-tenant apps, verify each endpoint filters by the correct context (e.g., active org vs personal org, admin vs viewer). Check that middleware/auth guards match the intended audience for each route
49
-
50
- **3. Performance Problems**
51
- - Unnecessary re-renders (React)
52
- - N+1 queries — database queries or API calls inside loops (`for`, `.map`, `.forEach`), duplicate existence checks before mutations, sequential operations that could use `Promise.all()` or batch SQL. This was found in 5 of 13 recent implementations
53
- - Memory leaks
54
-
55
- **4. Code Quality**
56
- - DRY violations — before flagging, search for similar functions/constants elsewhere in the codebase; suggest extraction to a shared module if the same logic exists in multiple places
57
- - Poor naming or overly complex functions
58
- - Missing TypeScript types or `any` usage
59
-
60
- **5. Pattern Adherence**
61
- - Follows project conventions from CLAUDE.md
62
- - Consistent with existing codebase style
63
-
64
- **6. Route & Middleware Ordering**
65
- - Static routes defined AFTER parameterized routes (e.g., `/users/all` after `/users/:id`) causing shadowing — the parameterized route captures requests meant for the static one
66
- - Middleware applied in incorrect order (e.g., auth after route handler, CORS after response sent)
31
+ Apply the full checklist in `checklist.md` (same directory). It covers:
32
+
33
+ 1. Logic errors (stale closures, unhandled rejections, missing deps)
34
+ 2. Security (secrets, injection, input validation, multi-tenant auth)
35
+ 3. Performance (N+1, re-renders, memory leaks)
36
+ 4. Code quality (DRY, naming, types)
37
+ 5. Pattern adherence (project conventions)
38
+ 6. Route & middleware ordering
39
+
40
+ Read `checklist.md` before reviewing so you apply every category.
67
41
 
68
42
  ## Step 4: Verify Issues Are Real
69
43
 
@@ -73,9 +47,10 @@ Before reporting, confirm each issue is legitimate:
73
47
 
74
48
  ## Step 5: Output Report
75
49
 
76
- Save to `.agents/code-reviews/[descriptive-name].md`
50
+ Save to `.agents/code-reviews/[descriptive-name].md`.
77
51
 
78
52
  **Format for each issue:**
53
+
79
54
  ```
80
55
  severity: critical | high | medium | low
81
56
  file: path/to/file.ts
@@ -88,6 +63,7 @@ suggestion: [how to fix it]
88
63
  If no issues found: "Code review passed. No technical issues detected."
89
64
 
90
65
  **Rules:**
66
+
91
67
  - Be specific — line numbers, not vague complaints
92
68
  - Focus on real bugs, not style preferences (linting handles that)
93
69
  - Flag security issues as `critical`
@@ -96,4 +72,5 @@ If no issues found: "Code review passed. No technical issues detected."
96
72
  ## Next Step
97
73
 
98
74
  After the review, suggest:
75
+
99
76
  > "Code review saved to `.agents/code-reviews/[name].md`. If issues were found, run `/hopla:code-review-fix .agents/code-reviews/[name].md` to fix them. If the review passed clean, proceed to the `execution-report` skill."
@@ -0,0 +1,44 @@
1
+ # Code Review Checklist
2
+
3
+ Apply every category to every changed file. Severity guidance is in the parent `SKILL.md`.
4
+
5
+ ## 1. Logic Errors
6
+
7
+ - Off-by-one errors, incorrect conditionals
8
+ - Missing error handling, unhandled edge cases
9
+ - Race conditions or async issues
10
+ - Stale closures — callbacks passed to imperative APIs (grids, charts, maps) that capture stale state instead of using refs or stable references
11
+ - Unhandled promise rejections — `.then()` without `.catch()`, async calls without `try/catch` in non-void contexts
12
+ - Side effects inside JSX render — mutations of arrays/objects inside `.map()` in JSX (breaks React strict mode, causes double-execution bugs)
13
+ - Stale dependency arrays — for every new `useState`/`useRef` variable introduced in the diff, verify it appears in the dependency arrays of `useEffect`, `useCallback`, or `useMemo` that reference it. Missing deps cause stale closures and are a recurring source of React bugs.
14
+
15
+ ## 2. Security Issues
16
+
17
+ - Exposed secrets or API keys
18
+ - SQL/command injection vulnerabilities
19
+ - Missing input validation on API endpoints — required fields, format constraints (regex, length), payload size limits
20
+ - Insecure data handling — raw user input in queries, responses exposing internal data or stack traces
21
+ - XSS vulnerabilities (frontend)
22
+ - Multi-user authorization context — for multi-tenant apps, verify each endpoint filters by the correct context (e.g., active org vs personal org, admin vs viewer). Check that middleware/auth guards match the intended audience for each route.
23
+
24
+ ## 3. Performance Problems
25
+
26
+ - Unnecessary re-renders (React)
27
+ - N+1 queries — database queries or API calls inside loops (`for`, `.map`, `.forEach`), duplicate existence checks before mutations, sequential operations that could use `Promise.all()` or batch SQL
28
+ - Memory leaks (event listeners not detached, timers not cleared, closures holding large objects)
29
+
30
+ ## 4. Code Quality
31
+
32
+ - DRY violations — before flagging, search for similar functions/constants elsewhere in the codebase; suggest extraction to a shared module if the same logic exists in multiple places
33
+ - Poor naming or overly complex functions
34
+ - Missing TypeScript types or `any` usage
35
+
36
+ ## 5. Pattern Adherence
37
+
38
+ - Follows project conventions from `CLAUDE.md`
39
+ - Consistent with existing codebase style
40
+
41
+ ## 6. Route & Middleware Ordering
42
+
43
+ - Static routes defined AFTER parameterized routes (e.g., `/users/all` after `/users/:id`) causing shadowing — the parameterized route captures requests meant for the static one
44
+ - Middleware applied in incorrect order (e.g., auth after route handler, CORS after response sent)
@@ -22,97 +22,30 @@ Also check for recent code reviews:
22
22
  ls -t .agents/code-reviews/ 2>/dev/null | head -5
23
23
  ```
24
24
 
25
- If a code review exists for this feature, note its path for the Code Review Findings section below.
25
+ If a code review exists for this feature, note its path for the Code Review Findings section.
26
26
 
27
- ## Step 2: Generate Report
27
+ ## Step 2: Generate the Report
28
28
 
29
- Save to: `.agents/execution-reports/[feature-name].md`
29
+ Save to: `.agents/execution-reports/[feature-name].md`.
30
30
 
31
- Use the following structure:
31
+ Use the full structure documented in `report-structure.md` (same directory). It covers:
32
32
 
33
- ---
34
-
35
- ### Meta Information
36
-
37
- - **Plan file:** [path to the plan that guided this implementation]
38
- - **Files added:** [list with paths]
39
- - **Files modified:** [list with paths]
40
- - **Lines changed:** +X -Y
41
-
42
- ### Validation Results
43
-
44
- - Syntax & Linting: ✓/✗ [details if failed]
45
- - Type Checking: ✓/✗ [details if failed]
46
- - Unit Tests: ✓/✗ [X passed, Y failed]
47
- - Integration Tests: ✓/✗ [X passed, Y failed]
48
-
49
- ### Code Review Findings
50
-
51
- - **Code review file:** [path to `.agents/code-reviews/[name].md`, or "Not run"]
52
- - **Issues found:** [count by severity: X critical, Y high, Z medium, W low]
53
- - **Issues fixed before this report:** [count]
54
- - **Key findings:** [1-2 sentence summary of the most significant issues found]
55
-
56
- ### What Went Well
57
-
58
- List specific things that worked smoothly:
59
- - [concrete examples]
60
-
61
- ### Challenges Encountered
62
-
63
- List specific difficulties encountered:
64
- - [what was difficult and why]
65
-
66
- ### Bugs Encountered
67
-
68
- Categorize each bug found during implementation:
69
-
70
- | Bug | Category | Found By | Severity |
71
- |-----|----------|----------|----------|
72
- | [description] | stale closure / validation / race condition / styling / scope mismatch / type error / route ordering / other | lint / types / tests / code review / manual testing | critical / high / medium / low |
33
+ - Meta information (plan file, files added/modified, lines changed)
34
+ - Validation results
35
+ - Code review findings
36
+ - What went well
37
+ - Challenges encountered
38
+ - Bugs encountered (with categorization table)
39
+ - Divergences from plan
40
+ - Scope assessment
41
+ - Skipped items
42
+ - Technical patterns discovered (with ready-to-paste CLAUDE.md entry)
43
+ - Recommendations
73
44
 
74
- If no bugs were encountered, write "No bugs encountered during implementation."
75
-
76
- ### Divergences from Plan
77
-
78
- For each divergence from the original plan:
79
-
80
- **[Divergence Title]**
81
- - **Planned:** [what the plan specified]
82
- - **Actual:** [what was implemented instead]
83
- - **Reason:** [why this divergence occurred]
84
- - **Type:** Better approach found | Plan assumption wrong | Security concern | Performance issue | Other
85
-
86
- ### Scope Assessment
87
-
88
- - **Planned tasks:** [number of tasks in the original plan]
89
- - **Executed tasks:** [number of tasks actually completed]
90
- - **Unplanned additions:** [count and brief description of work not in the original plan]
91
- - **Scope accuracy:** On target | Under-scoped (took more work than planned) | Over-scoped (simpler than expected)
92
-
93
- ### Skipped Items
94
-
95
- List anything from the plan that was not implemented:
96
- - [what was skipped] — Reason: [why]
97
-
98
- ### Technical Patterns Discovered
99
-
100
- New gotchas, patterns, or conventions learned during this implementation that should be documented:
101
-
102
- - **Pattern/Gotcha:** [description]
103
- - **Where it applies:** [what type of feature or context triggers this]
104
- - **Ready-to-paste CLAUDE.md entry:** [Write the EXACT text that should be added to the project's CLAUDE.md to prevent this gotcha in future features. Format it as a bullet point under the appropriate section. If it belongs in a guide instead, write the exact text for the guide. Do not write vague suggestions like "document this" — write the actual content so the system reviewer can apply it directly.]
105
-
106
- If nothing new was discovered, write "No new patterns discovered."
107
-
108
- ### Recommendations
109
-
110
- Based on this implementation, what should change for next time?
111
- - Plan command improvements: [suggestions]
112
- - Execute command improvements: [suggestions]
113
- - CLAUDE.md additions: [suggestions]
45
+ Read `report-structure.md` before writing so every section is filled correctly.
114
46
 
115
47
  ## Next Step
116
48
 
117
49
  After the report is saved, suggest:
118
- > "Execution report saved to `.agents/execution-reports/[name].md`. Run the `git` skill (say "commit") to commit your changes."
50
+
51
+ > "Execution report saved to `.agents/execution-reports/[name].md`. Use the `git` skill (say 'commit') to save your changes."
@@ -0,0 +1,88 @@
1
+ # Execution Report Structure
2
+
3
+ Fill every section. Write "Not applicable" rather than leaving a section blank — empty sections make it unclear whether the check was performed.
4
+
5
+ ## Meta Information
6
+
7
+ - **Plan file:** [path to the plan that guided this implementation]
8
+ - **Files added:** [list with paths]
9
+ - **Files modified:** [list with paths]
10
+ - **Lines changed:** +X −Y
11
+
12
+ ## Validation Results
13
+
14
+ - Syntax & Linting: ✓/✗ [details if failed]
15
+ - Type Checking: ✓/✗ [details if failed]
16
+ - Unit Tests: ✓/✗ [X passed, Y failed]
17
+ - Integration Tests: ✓/✗ [X passed, Y failed]
18
+
19
+ ## Code Review Findings
20
+
21
+ - **Code review file:** [path to `.agents/code-reviews/[name].md`, or "Not run"]
22
+ - **Issues found:** [count by severity: X critical, Y high, Z medium, W low]
23
+ - **Issues fixed before this report:** [count]
24
+ - **Key findings:** [1-2 sentence summary of the most significant issues found]
25
+
26
+ ## What Went Well
27
+
28
+ List specific things that worked smoothly:
29
+
30
+ - [concrete examples]
31
+
32
+ ## Challenges Encountered
33
+
34
+ List specific difficulties:
35
+
36
+ - [what was difficult and why]
37
+
38
+ ## Bugs Encountered
39
+
40
+ Categorize each bug found during implementation:
41
+
42
+ | Bug | Category | Found By | Severity |
43
+ |-----|----------|----------|----------|
44
+ | [description] | stale closure / validation / race condition / styling / scope mismatch / type error / route ordering / other | lint / types / tests / code review / manual testing | critical / high / medium / low |
45
+
46
+ If no bugs were encountered, write "No bugs encountered during implementation."
47
+
48
+ ## Divergences from Plan
49
+
50
+ For each divergence:
51
+
52
+ **[Divergence Title]**
53
+
54
+ - **Planned:** [what the plan specified]
55
+ - **Actual:** [what was implemented instead]
56
+ - **Reason:** [why this divergence occurred]
57
+ - **Type:** Better approach found | Plan assumption wrong | Security concern | Performance issue | Other
58
+
59
+ ## Scope Assessment
60
+
61
+ - **Planned tasks:** [number in the original plan]
62
+ - **Executed tasks:** [number actually completed]
63
+ - **Unplanned additions:** [count and brief description of work not in the original plan]
64
+ - **Scope accuracy:** On target | Under-scoped (took more work than planned) | Over-scoped (simpler than expected)
65
+
66
+ ## Skipped Items
67
+
68
+ List anything from the plan that was not implemented:
69
+
70
+ - [what was skipped] — Reason: [why]
71
+
72
+ ## Technical Patterns Discovered
73
+
74
+ New gotchas, patterns, or conventions learned during this implementation that should be documented:
75
+
76
+ - **Pattern/Gotcha:** [description]
77
+ - **Where it applies:** [what type of feature or context triggers this]
78
+ - **Ready-to-paste CLAUDE.md entry:** [Write the EXACT text that should be added to the project's CLAUDE.md to prevent this gotcha in future features. Format it as a bullet point under the appropriate section. If it belongs in a guide instead, write the exact text for the guide. Do not write vague suggestions like "document this" — write the actual content so the system reviewer can apply it directly.]
79
+
80
+ If nothing new was discovered, write "No new patterns discovered."
81
+
82
+ ## Recommendations
83
+
84
+ Based on this implementation, what should change for next time?
85
+
86
+ - Plan command improvements: [suggestions]
87
+ - Execute command improvements: [suggestions]
88
+ - CLAUDE.md additions: [suggestions]
@@ -0,0 +1,110 @@
1
+ ---
2
+ name: migration
3
+ description: "Phased migration workflow for upgrading dependencies, switching frameworks, or moving between systems. Use when the user says 'migrate', 'upgrade', 'switch from X to Y', 'move to', 'replace library', 'major version bump', 'deprecated', or when changing a framework/runtime/database version. Do NOT use for greenfield features or small refactors — use plan-feature or refactoring instead."
4
+ ---
5
+
6
+ > 🌐 **Language:** All user-facing output must match the user's language. Code, paths, and commands stay in English.
7
+
8
+ # Migration: Move Systems Without Breaking Them
9
+
10
+ ## Iron Rule
11
+
12
+ **Every migration needs a rollback plan before the first line changes.** If you cannot describe how to undo the migration in one sentence, you are not ready to start it.
13
+
14
+ ## Step 1: Classify the Migration
15
+
16
+ Ask the user (one question at a time):
17
+
18
+ - **Type**: dependency upgrade (major version), framework switch (e.g. Express → Hono), runtime switch (Node → Bun), data store (SQLite → Postgres), API version (v1 → v2)
19
+ - **Scope**: one module, one service, or the whole codebase?
20
+ - **Downtime tolerance**: blue/green, zero-downtime (dual-run), acceptable window?
21
+ - **Deadline driver**: deprecation, security, performance, or opportunistic?
22
+
23
+ This framing determines whether the work is a single PR or a multi-phase plan.
24
+
25
+ ## Step 2: Audit the Surface
26
+
27
+ Map **everything** that will be affected:
28
+
29
+ - Imports / usages of the old API (use `grep -r` or `rg` across the codebase)
30
+ - Public contracts that depend on current behavior (downstream callers, API consumers)
31
+ - Build / deploy steps tied to the current version
32
+ - Test suites that assume old behavior
33
+ - Documentation mentioning the old API
34
+
35
+ Write the inventory to `.agents/specs/migration-<topic>.md` with counts — "47 import sites across 12 files". Numbers help you size the work honestly.
36
+
37
+ ## Step 3: Read the Upgrade Notes
38
+
39
+ Before writing code, read the target's official migration guide / changelog end to end. Note:
40
+
41
+ - **Breaking changes** (renamed APIs, removed APIs, default-behavior flips)
42
+ - **Deprecations** (will break in N+2, not now)
43
+ - **Required minimum versions** for peer dependencies
44
+ - **Data-shape changes** that require a migration script
45
+
46
+ If the target project has no migration guide, treat it as higher risk and budget more time for exploration.
47
+
48
+ ## Step 4: Choose a Strategy
49
+
50
+ | Strategy | When to use |
51
+ |---|---|
52
+ | **Big bang** | Small codebase, low downstream coupling, clean cut possible |
53
+ | **Incremental with adapter** | Many call sites — introduce a thin wrapper that presents the old API on top of the new, migrate call sites one by one |
54
+ | **Dual-run (strangler fig)** | High-risk or zero-downtime — run both old and new side by side, shift traffic gradually |
55
+ | **Branch by abstraction** | Internal refactor + external API stays stable — hide the switch behind an interface |
56
+
57
+ Pick one and document the trade-off in the spec file.
58
+
59
+ ## Step 5: Plan the Phases
60
+
61
+ For anything non-trivial, run `/hopla:plan-feature` with `migration-<topic>` as the feature name. The plan should specify:
62
+
63
+ - **Phase boundaries** (compatibility shim in, call sites migrated, shim removed)
64
+ - **Rollback plan per phase** (revert commit? feature flag? dual-write?)
65
+ - **Validation at each phase** (test suite green, feature flags covered, canary metrics)
66
+ - **Data migration script** (if the storage layer changes) — idempotent, resumable
67
+
68
+ Each phase should land as its own PR.
69
+
70
+ ## Step 6: Migrate With Guardrails
71
+
72
+ Execute phase by phase. After every phase:
73
+
74
+ - Run the full validation pyramid (`commands/guides/validation-pyramid.md`)
75
+ - Check for mixed-version pitfalls — modules importing both the old and new API in the same request
76
+ - Confirm the rollback path still works (git revert + redeploy, or feature flag off)
77
+
78
+ Never advance to the next phase if validation failed on the previous one.
79
+
80
+ ## Step 7: Remove the Old Path
81
+
82
+ Once every call site is migrated and observed green in production (where applicable):
83
+
84
+ - Delete the compatibility shim
85
+ - Remove the old dependency (`npm uninstall`, etc.)
86
+ - Remove the feature flag
87
+ - Update documentation to reference only the new path
88
+
89
+ This "cleanup" step is part of the migration. A migration left half-done with a permanent shim is worse than no migration.
90
+
91
+ ## Rules
92
+
93
+ - Never migrate on a Friday or before a public release
94
+ - Keep the rollback plan alive at every phase — if it stops working, pause
95
+ - Track breaking changes from the target's changelog in the spec, not in memory
96
+ - Data migrations must be idempotent and resumable — migrations fail mid-run
97
+ - If the migration drags past its original estimate by 2x, stop and reassess scope
98
+
99
+ ## Integration
100
+
101
+ - Use `/hopla:plan-feature` to generate the phased plan from the Step 2 inventory
102
+ - Use the `worktree` skill to keep the migration isolated from other work
103
+ - The `code-review` skill (checklist sections 2 and 5) catches dual-import patterns and pattern drift
104
+ - The `performance` skill verifies the migration did not regress hot paths
105
+
106
+ ## Next Step
107
+
108
+ Once the migration is planned:
109
+
110
+ > "Migration classified and inventoried. Saved spec to `.agents/specs/migration-<topic>.md`. Run `/hopla:plan-feature` to generate the phased implementation plan."
@@ -0,0 +1,102 @@
1
+ ---
2
+ name: performance
3
+ description: "Measured performance optimization workflow. Use when the user says 'slow', 'optimize', 'performance', 'bottleneck', 'too slow', 'high memory', 'high CPU', 'lento', 'tarda mucho', or when asking to make something faster. Do NOT use for correctness bugs or new features — use the debug or plan-feature skills instead."
4
+ ---
5
+
6
+ > 🌐 **Language:** All user-facing output must match the user's language. Code, paths, and commands stay in English.
7
+
8
+ # Performance: Measure Before You Change
9
+
10
+ ## Iron Rule
11
+
12
+ **No optimization without a measurement.** Every performance change must start with a number (latency, memory, query count) and end with a comparison. Guessing at hot paths wastes time and often makes things slower.
13
+
14
+ ## Step 1: Clarify the Symptom
15
+
16
+ Ask the user (one question at a time):
17
+
18
+ - What operation feels slow? (page load, API request, build, test run, specific query)
19
+ - How slow is it? (exact number if possible — "3 seconds", "30 MB", "10s with 100 items")
20
+ - What is "fast enough"? (target: < 500 ms p95, < 100 MB, etc.)
21
+ - Is it reproducible, or only under load?
22
+
23
+ Without a concrete target, you cannot declare the optimization done.
24
+
25
+ ## Step 2: Measure the Baseline
26
+
27
+ Pick the right tool for the symptom:
28
+
29
+ | Symptom | Measurement |
30
+ |---|---|
31
+ | Slow endpoint | `curl -w "%{time_total}"` or APM dashboard (see `guides/mcp-integration.md` for MCP options) |
32
+ | Slow DB query | `EXPLAIN ANALYZE` (Postgres), `EXPLAIN` (SQLite/MySQL) |
33
+ | Slow frontend render | Chrome DevTools Performance tab, React Profiler |
34
+ | Memory growth | `process.memoryUsage()` snapshots, heap dumps |
35
+ | Slow build/test | Time the command, compare against a clean cache |
36
+
37
+ Record the baseline with units. "3.2 s to load /dashboard with 1000 items" — not "it feels slow".
38
+
39
+ ## Step 3: Identify the Hot Path
40
+
41
+ Rank suspects by where the baseline measurement actually spends its time:
42
+
43
+ - **N+1 queries** — are there loops calling the DB or an API?
44
+ - **Missing indexes** — does `EXPLAIN ANALYZE` show a seq scan on a large table?
45
+ - **Synchronous I/O** — is there a blocking call that could be awaited in parallel (`Promise.all`)?
46
+ - **Rendering** — are components re-rendering with unchanged props? Are lists virtualized?
47
+ - **Algorithm** — is there an O(n²) that could be O(n) with a map?
48
+ - **Caching** — is the same computation repeated without memoization?
49
+
50
+ Do **not** guess. Use the profiler output or query plan to pick one suspect.
51
+
52
+ ## Step 4: Apply One Change
53
+
54
+ Change one thing. Not three.
55
+
56
+ - Add the index
57
+ - Replace the loop with `Promise.all`
58
+ - Memoize the expensive selector
59
+ - Batch the API calls
60
+ - Virtualize the list
61
+
62
+ Keep the diff minimal so you can attribute the delta to this change alone.
63
+
64
+ ## Step 5: Measure Again
65
+
66
+ Re-run the exact same measurement from Step 2 under the same conditions. Report:
67
+
68
+ - Baseline: X
69
+ - After change: Y
70
+ - Delta: (X − Y) / X × 100 %
71
+ - Target: [target from Step 1]
72
+
73
+ If you did not hit the target, go back to Step 3 and pick the next suspect. If you regressed, revert and rethink.
74
+
75
+ ## Step 6: Regression Guard
76
+
77
+ Once the target is met, add a guard so future changes do not erode the win:
78
+
79
+ - A test with a timeout assertion (e.g. `expect(duration).toBeLessThan(500)`)
80
+ - A query count assertion (e.g. `expect(dbQueries).toHaveLength(1)`)
81
+ - A bundle size budget, memory budget, or frame budget if applicable
82
+
83
+ Without a guard, the win decays.
84
+
85
+ ## Rules
86
+
87
+ - One suspect at a time — never stack optimizations before measuring
88
+ - Keep the baseline in the commit message so the win is auditable
89
+ - If the fix adds significant complexity for a small win (< 10 %), consider reverting
90
+ - Do not optimize code that is not actually hot — premature optimization hurts readability
91
+
92
+ ## Integration
93
+
94
+ - Use the `code-review` skill checklist section 3 (Performance Problems) for patterns to watch for
95
+ - If the optimization requires architectural changes, stop and run `/hopla:plan-feature`
96
+ - After the change lands, the `verify` skill will require the regression guard to run fresh
97
+
98
+ ## Next Step
99
+
100
+ After the target is met and a regression guard is in place:
101
+
102
+ > "Target hit: [baseline → result]. Regression guard added. Say 'commit' to trigger the `git` skill with a `perf:` conventional commit."
@@ -0,0 +1,84 @@
1
+ ---
2
+ name: refactoring
3
+ description: "Safe refactoring workflow with behavior preservation. Use when the user says 'refactor', 'clean up', 'simplify', 'extract', 'restructure', 'deduplicate', 'rename', or when asking to improve code structure without changing behavior. Do NOT use for bug fixes, new features, or performance work — use the debug, plan-feature, or performance skills instead."
4
+ ---
5
+
6
+ > 🌐 **Language:** All user-facing output must match the user's language. Code, paths, and commands stay in English.
7
+
8
+ # Refactoring: Restructure Without Changing Behavior
9
+
10
+ ## Iron Rule
11
+
12
+ **Behavior must be identical before and after.** If a refactor changes observable behavior — output, side effects, error shape, API surface — it is not a refactor. Stop and reclassify the work as a feature change or a bug fix.
13
+
14
+ ## Step 1: Confirm the Refactor Is Worth Doing
15
+
16
+ Ask the user (one question at a time):
17
+
18
+ - What is the current pain? (duplication, unclear naming, deep nesting, coupled modules)
19
+ - What is the desired structure? (extract helper, collapse abstraction, rename, move, inline)
20
+ - Is there a test suite, and does it cover the code being refactored?
21
+
22
+ If the answers reveal a missing test covering the target, **write the test first** (pin current behavior), then refactor. Untested refactors are rewrites.
23
+
24
+ ## Step 2: Capture the Baseline
25
+
26
+ Run the project's validation commands from `CLAUDE.md` (or use `/hopla:validate`). Record:
27
+
28
+ - Lint / format — current state
29
+ - Types — current state
30
+ - Unit tests — pass/fail count
31
+ - Relevant integration tests — pass/fail
32
+
33
+ Every level must be green before starting. A refactor on top of red tests cannot prove it preserved behavior.
34
+
35
+ ## Step 3: Apply the Smallest Safe Change
36
+
37
+ Pick one refactor at a time:
38
+
39
+ - Extract function / module
40
+ - Rename (symbol, file)
41
+ - Inline (remove pointless indirection)
42
+ - Move (relocate to a better home)
43
+ - Deduplicate (merge two near-identical pieces)
44
+ - Replace conditional with polymorphism / table lookup
45
+ - Flatten / collapse nesting
46
+
47
+ **Do not** mix refactors. If the change wants to become a redesign, stop and suggest `/hopla:plan-feature`.
48
+
49
+ ## Step 4: Re-run the Baseline
50
+
51
+ After each refactor, re-run the same validation set from Step 2. Results must match exactly:
52
+
53
+ - Same lint result (0 new warnings unless whitelisted)
54
+ - Same type result
55
+ - Same pass/fail count on tests
56
+ - Same integration result
57
+
58
+ If anything diverges, the refactor leaked behavior — revert or fix before continuing.
59
+
60
+ ## Step 5: Commit at a Clean Boundary
61
+
62
+ When the baseline is restored and the refactor is coherent, suggest a commit via the `git` skill:
63
+
64
+ > "Refactor complete — behavior preserved. Say 'commit' to save it with a `refactor:` conventional commit."
65
+
66
+ ## Rules
67
+
68
+ - One refactor per commit — easier to review, easier to revert
69
+ - Never combine refactor + feature in the same commit
70
+ - Prefer many small refactors over one large one
71
+ - If the test suite is missing, add tests FIRST, then refactor (two commits minimum)
72
+ - Preserve public API unless the user explicitly approves a breaking change
73
+
74
+ ## Integration
75
+
76
+ - Pair with the `tdd` skill when adding characterization tests before a refactor
77
+ - Use the `code-review` skill after the refactor to confirm no pattern violations were introduced
78
+ - If the refactor touches many files, consider the `worktree` skill for isolation
79
+
80
+ ## Next Step
81
+
82
+ After the refactor passes validation:
83
+
84
+ > "Refactor complete and validated. Say 'commit' to trigger the `git` skill with a `refactor:` conventional commit."
@@ -47,13 +47,7 @@ Instead, run the verification and report actual results.
47
47
 
48
48
  ## Integration with Validation Pyramid
49
49
 
50
- When completing a feature (not just a single file edit), run the full validation pyramid:
51
-
52
- 1. **Level 1**: Lint & format
53
- 2. **Level 2**: Type check
54
- 3. **Level 3**: Unit tests
55
- 4. **Level 4**: Integration tests (if applicable)
56
- 5. **Level 5**: Human review suggestion
50
+ When completing a feature (not just a single file edit), run levels **1–4 + 7** from `commands/guides/validation-pyramid.md` (Lint, Types, Unit, Integration, Human review).
57
51
 
58
52
  Reference `/hopla:validate` for project-specific validation commands.
59
53
 
@@ -72,3 +66,14 @@ When verifying completion of a plan execution (not just a standalone task):
72
66
  3. **All acceptance criteria met?** — Read the plan's acceptance criteria and verify each one has fresh evidence.
73
67
 
74
68
  These checks prevent the common pattern where implementation is "done" but divergences are silently omitted from the report.
69
+
70
+ ## Authoritative post-implementation sequence
71
+
72
+ When verification passes, follow this order to avoid redundant work and skill overlap:
73
+
74
+ 1. **`verify`** (this skill) — confirm fresh evidence for every completion claim.
75
+ 2. **`code-review`** — run technical review on changed files. Fix `critical`/`high` issues before proceeding.
76
+ 3. **`execution-report`** — document what was built, bugs found, divergences, patterns discovered.
77
+ 4. **`git`** — commit (and PR when ready).
78
+
79
+ Each step cites the next, so at any point you should be routing to exactly one subsequent skill. If multiple skills are triggering simultaneously on a completion claim, this is the canonical ordering.