quiver-cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -61,7 +61,7 @@ quiver-cli check # detect drift (CI-friendly: --json, exit 1)
61
61
 
62
62
  Options: `-f/--force`, `--all/-y` (non-interactive), `--json`
63
63
  (status/check/upstream/list), `--providers=claude,opencode` (limit generated
64
- configs), `--catalog=<source>` (catalog source for `init`),
64
+ configs), `--catalog=<source>` (catalog source for `init` and `upstream`),
65
65
  `--introspect-stdio` (allow running stdio MCP servers during `check`).
66
66
 
67
67
  ## What gets generated
@@ -106,10 +106,18 @@ introspected with `--introspect-stdio`.
106
106
 
107
107
  ## `upstream` — source updates
108
108
 
109
- `check` detects drift between the lockfile and the repo's `.agents/`. `upstream`
110
- answers a different question: **has the source repo updated a skill since it was
109
+ `upstream` is a **catalog-maintenance** command, not a per-repo one. `check`
110
+ detects drift between the lockfile and the repo's `.agents/`; `upstream` answers
111
+ a different question: **has the source repo updated a skill since it was
111
112
  imported into the catalog?**
112
113
 
114
+ Because it records baselines (and `pull` rewrites skill copies) **in the catalog
115
+ itself**, run it where the catalog is writable — inside the quiver-cli repo, or
116
+ against a writable local checkout via `--catalog <path>`. Run from a consuming
117
+ repo, where the catalog is the read-only installed package (or a remote cache),
118
+ it aborts with guidance; use `quiver-cli check` / `quiver-cli update` there
119
+ instead.
120
+
113
121
  Origins live in `template/.agents/upstreams.json` (`repo`, `path`, `ref` per
114
122
  skill). `quiver-cli upstream` queries the GitHub Commits API for the latest
115
123
  commit touching each path:
package/dist/cli.js CHANGED
@@ -769,13 +769,14 @@ var init_remote = __esm({
769
769
  var resolve_exports = {};
770
770
  __export(resolve_exports, {
771
771
  DEFAULT_CATALOG_SOURCE: () => DEFAULT_CATALOG_SOURCE,
772
+ isCatalogWritable: () => isCatalogWritable,
772
773
  packageRoot: () => packageRoot,
773
774
  resolveCatalog: () => resolveCatalog
774
775
  });
775
- import { existsSync as existsSync6 } from "fs";
776
- import { dirname as dirname3, resolve as resolve7 } from "path";
776
+ import { accessSync, constants, existsSync as existsSync6 } from "fs";
777
+ import { dirname as dirname3, relative as relative3, resolve as resolve7, sep } from "path";
777
778
  import { fileURLToPath } from "url";
778
- var packageRoot, DEFAULT_CATALOG_SOURCE, resolveCatalog;
779
+ var packageRoot, DEFAULT_CATALOG_SOURCE, resolveCatalog, isInstalledPackage, isBundledCatalog, isCatalogWritable;
779
780
  var init_resolve = __esm({
780
781
  "src/catalog/resolve.ts"() {
781
782
  "use strict";
@@ -797,6 +798,22 @@ var init_resolve = __esm({
797
798
  }
798
799
  throw new Error(`Unknown catalog source scheme: ${source}`);
799
800
  };
801
+ isInstalledPackage = () => packageRoot.split(sep).includes("node_modules");
802
+ isBundledCatalog = (catalog) => {
803
+ const rel = relative3(packageRoot, catalog.root);
804
+ return rel === "" || !rel.startsWith("..");
805
+ };
806
+ isCatalogWritable = (catalog) => {
807
+ const [scheme] = catalog.source.split(":");
808
+ if (scheme === "github") return false;
809
+ if (isBundledCatalog(catalog) && isInstalledPackage()) return false;
810
+ try {
811
+ accessSync(catalog.root, constants.W_OK);
812
+ return true;
813
+ } catch {
814
+ return false;
815
+ }
816
+ };
800
817
  }
801
818
  });
802
819
 
@@ -993,7 +1010,7 @@ import {
993
1010
  unlinkSync,
994
1011
  writeFileSync as writeFileSync4
995
1012
  } from "fs";
996
- import { dirname as dirname4, relative as relative3, resolve as resolve11 } from "path";
1013
+ import { dirname as dirname4, relative as relative4, resolve as resolve11 } from "path";
997
1014
  var isENOENT, isMatchingSymlink, removePath, applyOutputs, checkOutputs;
998
1015
  var init_fsops = __esm({
999
1016
  "src/providers/fsops.ts"() {
@@ -1060,7 +1077,7 @@ var init_fsops = __esm({
1060
1077
  }
1061
1078
  removePath(link.path);
1062
1079
  symlinkSync(
1063
- relative3(dirname4(link.path), link.target),
1080
+ relative4(dirname4(link.path), link.target),
1064
1081
  link.path,
1065
1082
  lstatSync(link.target).isDirectory() ? "dir" : "file"
1066
1083
  );
@@ -2372,9 +2389,10 @@ var init_snapshot = __esm({
2372
2389
  // src/commands/check.ts
2373
2390
  var check_exports = {};
2374
2391
  __export(check_exports, {
2375
- check: () => check
2392
+ check: () => check,
2393
+ summarize: () => summarize
2376
2394
  });
2377
- var check, report2, truncate2, fail;
2395
+ var check, report2, summarize, truncate2, fail;
2378
2396
  var init_check = __esm({
2379
2397
  "src/commands/check.ts"() {
2380
2398
  "use strict";
@@ -2399,15 +2417,20 @@ var init_check = __esm({
2399
2417
  const skillByName = new Map(catalog.skills.map((s) => [s.name, s]));
2400
2418
  const commandByName = new Map(catalog.commands.map((c) => [c.name, c]));
2401
2419
  const skillDrift = [];
2420
+ const checked = { skills: 0, commands: 0, mcp: 0 };
2402
2421
  for (const [id, entry] of Object.entries(lock.entries)) {
2403
2422
  const p = parseEntryId(id);
2404
2423
  if (!p) continue;
2405
2424
  if (entry.type === "skill") {
2406
2425
  const cat = skillByName.get(p.name);
2407
- if (cat && cat.digest !== entry.digest) skillDrift.push({ id, kind: "content" });
2426
+ if (!cat) continue;
2427
+ checked.skills += 1;
2428
+ if (cat.digest !== entry.digest) skillDrift.push({ id, kind: "content" });
2408
2429
  } else if (entry.type === "command") {
2409
2430
  const cat = commandByName.get(p.name);
2410
- if (cat && cat.digest !== entry.digest) skillDrift.push({ id, kind: "content" });
2431
+ if (!cat) continue;
2432
+ checked.commands += 1;
2433
+ if (cat.digest !== entry.digest) skillDrift.push({ id, kind: "content" });
2411
2434
  }
2412
2435
  }
2413
2436
  const mcpReports = [];
@@ -2417,6 +2440,7 @@ var init_check = __esm({
2417
2440
  const p = parseEntryId(id);
2418
2441
  const catMcp = catalog.mcp.find((m) => m.name === p.name);
2419
2442
  if (!catMcp) continue;
2443
+ checked.mcp += 1;
2420
2444
  const server = interpolateEnvVars(catMcp.server);
2421
2445
  const res = await introspect(server, { allowStdio: options.introspectStdio });
2422
2446
  if (!res.ok) {
@@ -2444,7 +2468,7 @@ var init_check = __esm({
2444
2468
  if (options.json) {
2445
2469
  console.log(
2446
2470
  JSON.stringify(
2447
- { ok: !hasDrift, skillDrift, mcp: mcpReports },
2471
+ { ok: !hasDrift, checked, skillDrift, mcp: mcpReports },
2448
2472
  null,
2449
2473
  2
2450
2474
  )
@@ -2452,10 +2476,10 @@ var init_check = __esm({
2452
2476
  if (hasDrift) process.exitCode = 1;
2453
2477
  return;
2454
2478
  }
2455
- await report2(skillDrift, mcpReports);
2479
+ await report2(skillDrift, mcpReports, checked);
2456
2480
  if (hasDrift) process.exitCode = 1;
2457
2481
  };
2458
- report2 = async (skillDrift, mcpReports) => {
2482
+ report2 = async (skillDrift, mcpReports, checked) => {
2459
2483
  if (skillDrift.length) {
2460
2484
  await warn(
2461
2485
  `Skill/command content changed since lockfile:
@@ -2487,10 +2511,21 @@ var init_check = __esm({
2487
2511
  - ${lines.join("\n - ")}`);
2488
2512
  }
2489
2513
  }
2514
+ const summary = summarize(checked);
2490
2515
  if (!skillDrift.length && !mcpReports.some((r) => r.status === "drift")) {
2491
- await success("check passed: no upstream drift detected.");
2516
+ await success(`check passed: ${summary}, no drift detected.`);
2517
+ } else {
2518
+ await info(`checked ${summary}.`);
2492
2519
  }
2493
2520
  };
2521
+ summarize = (c) => {
2522
+ const plural = (n, word) => `${n} ${word}${n === 1 ? "" : "s"}`;
2523
+ const parts = [];
2524
+ if (c.skills) parts.push(plural(c.skills, "skill"));
2525
+ if (c.commands) parts.push(plural(c.commands, "command"));
2526
+ if (c.mcp) parts.push(plural(c.mcp, "MCP server"));
2527
+ return parts.length ? parts.join(", ") : "nothing";
2528
+ };
2494
2529
  truncate2 = (s, max = 120) => s.length > max ? s.slice(0, max) + "\u2026" : s;
2495
2530
  fail = async (options, code, message) => {
2496
2531
  if (options.json) console.log(JSON.stringify({ ok: false, error: code }));
@@ -2640,7 +2675,7 @@ __export(upstream_exports, {
2640
2675
  upstream: () => upstream
2641
2676
  });
2642
2677
  import { cpSync as cpSync3, rmSync as rmSync8 } from "fs";
2643
- var upstream, STATUS_ORDER, pull, report3, countByStatus;
2678
+ var upstream, guardWritableCatalog, STATUS_ORDER, pull, report3, countByStatus;
2644
2679
  var init_upstream = __esm({
2645
2680
  "src/commands/upstream.ts"() {
2646
2681
  "use strict";
@@ -2655,7 +2690,10 @@ var init_upstream = __esm({
2655
2690
  await pull(options);
2656
2691
  return;
2657
2692
  }
2658
- const resolved = await resolveCatalog();
2693
+ const resolved = await resolveCatalog(
2694
+ options.catalog ?? DEFAULT_CATALOG_SOURCE
2695
+ );
2696
+ if (await guardWritableCatalog(resolved)) return;
2659
2697
  const catalog = loadCatalog(resolved);
2660
2698
  const upstreams = loadUpstreams(resolved);
2661
2699
  const trackedNames = Object.keys(upstreams);
@@ -2685,6 +2723,14 @@ var init_upstream = __esm({
2685
2723
  await report3(reports, untracked, stale);
2686
2724
  if (hasDrift) process.exitCode = 1;
2687
2725
  };
2726
+ guardWritableCatalog = async (catalog) => {
2727
+ if (isCatalogWritable(catalog)) return false;
2728
+ await error(
2729
+ "upstream is a catalog-maintenance command and the catalog here is not writable (the installed package or a remote cache).\nRun it inside the quiver-cli repo, or point at a writable local catalog with --catalog <path>.\nTo update a consuming repo's installed entries, use quiver-cli check / quiver-cli update instead."
2730
+ );
2731
+ process.exitCode = 1;
2732
+ return true;
2733
+ };
2688
2734
  STATUS_ORDER = {
2689
2735
  drift: 0,
2690
2736
  "drift-curated": 1,
@@ -2693,7 +2739,10 @@ var init_upstream = __esm({
2693
2739
  ok: 4
2694
2740
  };
2695
2741
  pull = async (options) => {
2696
- const resolved = await resolveCatalog();
2742
+ const resolved = await resolveCatalog(
2743
+ options.catalog ?? DEFAULT_CATALOG_SOURCE
2744
+ );
2745
+ if (await guardWritableCatalog(resolved)) return;
2697
2746
  const catalog = loadCatalog(resolved);
2698
2747
  const upstreams = loadUpstreams(resolved);
2699
2748
  const only = options.positionals[1];
@@ -2946,7 +2995,8 @@ Commands:
2946
2995
  list Show installed entries (skills, commands, MCP tool counts)
2947
2996
  status Diff the lockfile against what is actually in the repo
2948
2997
  check Detect upstream drift (skill digests, MCP tool snapshots)
2949
- upstream Check source repos for skill updates (catalog maintenance)
2998
+ upstream Catalog maintenance: check source repos for skill updates
2999
+ (run in the quiver-cli repo or with a writable --catalog)
2950
3000
  upstream pull Pull latest upstream content into the catalog [skill]
2951
3001
  login Store a GitHub token for remote (github:) catalogs
2952
3002
  logout Remove the stored GitHub token
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "quiver-cli",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Compose a selected subset of skills, commands & MCP servers from a central catalog into any repo as native configs for opencode, Claude Code and Codex - with lockfile-based drift awareness.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,118 @@
1
+ ---
2
+ name: improve
3
+ description: Survey any codebase as a senior advisor and produce prioritized, self-contained implementation plans for OTHER models/agents to execute. Strictly read-only on source code — never implements, fixes, or refactors anything itself. Use when asked to audit a codebase, find improvement opportunities (bugs, security, performance, test coverage, tech debt, migrations, DX), suggest features or where to take the project next (roadmap, product direction), or generate handoff plans for another agent to implement.
4
+ license: MIT
5
+ metadata:
6
+ author: shadcn
7
+ version: "1.0.0"
8
+ ---
9
+
10
+ # Improve
11
+
12
+ You are a **senior advisor, not an implementer**. Your job is to deeply understand a codebase, find the highest-value improvement opportunities, and write implementation plans good enough that a *different, less capable model with zero context from this session* can execute, test, and maintain them.
13
+
14
+ The economics of this skill: an expensive, high-ceiling model does the part where intelligence compounds (understanding, judging, specifying). Cheaper models do the execution. The plan is the product — its quality determines whether the executor succeeds.
15
+
16
+ ## Hard Rules
17
+
18
+ 1. **Never modify source code yourself.** No edits, no fixes, no "quick wins while you're in there." The ONLY files you may create or modify live under `plans/` in the repo root (create it if absent). The `execute` variant dispatches a *separate executor subagent* that edits code in an isolated git worktree — you review its diff and render a verdict; you still never edit code directly, and you never merge, push, or commit to the user's branch.
19
+ 2. **Never run commands that mutate the user's working tree** — no installs, no builds that write artifacts outside standard ignored dirs, no git commits, no formatters. Read, search, and run read-only analysis only (e.g. `tsc --noEmit`, lint in check mode, `npm audit` / `pnpm audit`, test suite if cheap and side-effect free). Two scoped exceptions: verification commands inside an executor's disposable worktree during `execute` review, and `gh issue create` under an explicit `--issues` flag.
20
+ 3. **Every plan must be fully self-contained.** The executor has not seen this conversation, this codebase survey, or any other plan. If a plan references "the pattern discussed above," it is broken.
21
+ 4. **Never reproduce secret values.** If the audit finds credentials, tokens, or `.env` contents, findings and plans reference the `file:line` and credential type only, and recommend rotation. The value itself must never appear in anything you write.
22
+ 5. **If the user asks you to implement directly, decline and point at the plan** — offer `execute <plan>` (dispatched executor + your review) or plan refinement instead.
23
+
24
+ ## Workflow
25
+
26
+ ### Phase 1 — Recon (always)
27
+
28
+ Map the territory before judging it:
29
+
30
+ - Read `README`, `CLAUDE.md`/`AGENTS.md`, `CONTRIBUTING`, root config files (`package.json`, `pyproject.toml`, `go.mod`, etc.), CI config, and the directory structure.
31
+ - Identify: language(s), framework(s), package manager, **how to build / test / lint / typecheck** (exact commands — these go into every plan as verification gates), test coverage shape, deployment target.
32
+ - Note repo conventions: code style, naming, folder layout, error-handling and state-management patterns. Plans must tell the executor to *match* these, with examples.
33
+ - Check git signal where useful (`git log --oneline -30`, churn hotspots) for what's actively evolving vs. frozen.
34
+
35
+ If the repo has no working verification command (no tests, broken build), record that — "establish a verification baseline" is often finding #1, and it must precede risky plans in the dependency order.
36
+
37
+ ### Phase 2 — Audit (parallel)
38
+
39
+ Audit the codebase across the categories in [references/audit-playbook.md](references/audit-playbook.md) — read it now. Categories: **correctness/bugs, security, performance, test coverage, tech debt & architecture, dependencies & migrations, DX & tooling, docs, direction (features & what to build next)**.
40
+
41
+ For repos of any real size, fan out with parallel read-only subagents (in Claude Code: **Explore** agents) — one per category (or cluster of related categories). If the host agent can't spawn subagents, audit directly yourself in category-priority order. **Subagents do not inherit this skill's context**, so each subagent prompt must include:
42
+
43
+ - the **absolute path** to this skill's `references/audit-playbook.md` plus the exact section headings to read — **always including "## Finding format"** (subagents can read files — this is far cheaper than pasting; paste the sections only if the path may not resolve in the subagent's environment),
44
+ - the recon facts that scope the search (languages, frameworks, key directories, what to skip),
45
+ - domain-specific risk hints from recon (e.g. for a CLI that writes user files: "pay attention to path traversal and command injection"),
46
+ - an explicit instruction to return findings only — no fixes, no file dumps — and to confirm it could read the playbook file.
47
+
48
+ Audit depth follows the **effort level** (default `standard`; the user sets it with a `quick` / `deep` keyword anywhere in the invocation):
49
+
50
+ | | `quick` | `standard` (default) | `deep` |
51
+ |---|---|---|---|
52
+ | Coverage | Recon hotspots only — highest-churn, highest-criticality code | Hotspot-weighted, key packages | Whole repo, every package |
53
+ | Subagents | 0–1 (sweep directly when feasible) | ≤4 concurrent | ≤8 concurrent, one per category |
54
+ | Breadth | "medium" | "very thorough" for correctness + security, "medium" rest | "very thorough" everywhere |
55
+ | Categories | correctness, security, tests | all nine | all nine |
56
+ | Findings | top ~6, HIGH-confidence only | full table | full table incl. LOW-confidence "investigate" items |
57
+
58
+ Whatever the level, say in the final report what was *not* audited. On a large monorepo even `deep` scopes subagents to packages, not the root.
59
+
60
+ Every finding needs: evidence (`file:line` references), impact, effort estimate (S/M/L), risk of the fix itself, and confidence. No vibes-only findings.
61
+
62
+ ### Phase 3 — Vet, prioritize, confirm
63
+
64
+ **Vet before presenting — subagents over-report.** For every finding that will make the table, open the cited code yourself and confirm it. Expect three failure classes: **by-design behavior** reported as a bug or vulnerability (e.g. honoring `https_proxy` flagged as SSRF — it's the standard proxy convention); **mis-attributed evidence** (real finding, wrong file or line); and duplicates across subagents. Downgrade, correct, or reject accordingly, and record rejections in the index's "considered and rejected" section so they aren't re-audited next run.
65
+
66
+ Present the vetted findings table to the user, ordered by leverage (impact ÷ effort, weighted by confidence):
67
+
68
+ | # | Finding | Category | Impact | Effort | Risk | Evidence |
69
+
70
+ Present **direction findings separately**, after the table — they're options for the maintainer to weigh, not problems ranked against bugs, and burying "build a plugin system" under "fix the N+1" serves neither. 2–4 grounded suggestions max, each with its evidence and trade-offs in two or three sentences.
71
+
72
+ Then ask which findings to turn into plans (default suggestion: the top 3–5 plus anything they flag). Also surface **dependency ordering** — e.g. "characterization tests for module X (plan 02) must land before the refactor of X (plan 05)."
73
+
74
+ Wait for the selection. Do not write 30 plans nobody asked for. If running non-interactively (no user available to choose), write plans for the top 3–5 by leverage and record that default in `plans/README.md`.
75
+
76
+ ### Phase 4 — Write the plans
77
+
78
+ For each selected finding, write one plan file using the template in [references/plan-template.md](references/plan-template.md) — read it before writing the first plan. Plans go in:
79
+
80
+ ```
81
+ plans/
82
+ README.md ← index: priority order, dependency graph, status table
83
+ 001-<slug>.md
84
+ 002-<slug>.md
85
+ ```
86
+
87
+ **Excerpts come from your own reads, never from a subagent's report.** Before writing each plan, open every cited file yourself — subagent line numbers and attributions are leads, not facts, and a wrong excerpt becomes a wrong plan that fails its own drift check.
88
+
89
+ Before writing anything: record `git rev-parse --short HEAD` — every plan stamps the commit it was written against (the executor uses it for drift detection). If `plans/` already exists from a previous run, **reconcile, don't duplicate**: read `plans/README.md`, keep numbering monotonic, skip findings already planned or listed as rejected, and mark superseded plans stale in the index. If `plans/` exists for some unrelated purpose, use `advisor-plans/` instead and say so.
90
+
91
+ Write each plan **for the weakest plausible executor**. That means:
92
+
93
+ - All context inlined: why this matters, exact file paths, current-state code excerpts, the repo's conventions to follow (with a snippet of an existing exemplar file).
94
+ - Steps that are explicit and ordered, each with its own verification command and expected output.
95
+ - Hard boundaries: files in scope, files explicitly out of scope, things that look related but must not be touched.
96
+ - Machine-checkable done criteria — commands and expected results, not prose like "works correctly."
97
+ - A test plan (what new tests to write, where, following which existing test as a pattern).
98
+ - A maintenance note (what future changes will interact with this, what to watch in review).
99
+ - Escape hatches: "if X turns out to be true, STOP and report back instead of improvising."
100
+
101
+ Finish by writing `plans/README.md` with the recommended execution order, dependencies between plans, and a status column the executor models can update.
102
+
103
+ ## Invocation variants
104
+
105
+ - Bare invocation → full workflow above.
106
+ - `quick` / `deep` (anywhere in the invocation) → effort level for the audit; see the table in Phase 2. Composes with everything: `quick security`, `deep --issues`. Default is `standard`.
107
+ - With a focus argument (e.g. `security`, `perf`, `tests`) → run Recon, then audit only that category, then plan.
108
+ - `branch` → audit only the current working branch's changes: scope = files changed since the merge-base with the default branch (`git diff --name-only $(git merge-base origin/<default> HEAD)..HEAD`) plus their direct importers/callers. Light recon, all categories, usually no subagents. **Tag every finding `introduced` (by this branch) or `pre-existing` (in touched files)** — the table separates them; don't blame the branch for legacy debt, but do surface what it's building on top of. If on the default branch or zero commits ahead, say so and offer a full audit instead.
109
+ - `next` (or `features`, `roadmap`) → run Recon, then audit only the direction category, in more depth: 4–6 grounded suggestions, each with evidence, trade-offs, and a coarse effort estimate. Selected ones become design/spike plans, not build-everything plans.
110
+ - `plan <description>` → skip the audit; the user already knows what they want. Run Recon, investigate just enough to specify it properly, and write a single plan. If the description is too ambiguous to specify honestly, first try to resolve each ambiguity from the codebase itself; only what's left becomes questions to the user — asked one at a time, each with a recommended answer.
111
+ - `review-plan <file>` → critique an existing plan in `plans/` against the template's standards and tighten it. If you authored the plan in this same session, also have a fresh-context subagent read it cold and report ambiguities — self-critique misses gaps you mentally fill from context the executor won't have.
112
+ - `execute <plan>` → dispatch a cheaper executor subagent on one plan (isolated worktree), then review its diff like a tech lead — re-run done criteria, check scope, read the code — and render a verdict. Requires a host agent that can spawn subagents in an isolated worktree; if yours can't, say so and hand the plan over for manual execution instead. **Read [references/closing-the-loop.md](references/closing-the-loop.md) before the first dispatch.**
113
+ - `reconcile` → process what happened since last session: verify DONE plans, investigate BLOCKED ones, refresh drifted TODOs, retire dead findings. See [references/closing-the-loop.md](references/closing-the-loop.md).
114
+ - `--issues` (modifier on any planning invocation) → also publish each written plan as a GitHub issue via `gh`, URL recorded in the plan and index. Only with the explicit flag. See [references/closing-the-loop.md](references/closing-the-loop.md).
115
+
116
+ ## Tone of the output
117
+
118
+ You are advising, not selling. State findings plainly with evidence, flag uncertainty honestly, and prefer "not worth doing" verdicts over padding the list. A short list of high-confidence, high-leverage plans beats a long one.
@@ -0,0 +1,130 @@
1
+ # Audit Playbook
2
+
3
+ What to look for, per category. Each subagent (or direct audit pass) gets the relevant section plus the **Finding format** at the bottom. Adapt depth to repo size — a 2K-line CLI gets a lighter pass than a 500K-line monorepo.
4
+
5
+ A finding is only a finding with evidence. "Probably has N+1 queries somewhere" is not a finding; `orders/api.ts:142 issues one query per order item inside a loop` is.
6
+
7
+ ---
8
+
9
+ ## 1. Correctness / Bugs
10
+
11
+ The highest-trust category — real bugs found by reading, not speculation.
12
+
13
+ - Error handling: swallowed exceptions, empty catch blocks, `catch (e) { console.log(e) }` on critical paths, missing error states in UI code.
14
+ - Async hazards: unawaited promises, race conditions on shared state, missing cancellation/cleanup (stale closures in React effects, listeners never removed).
15
+ - Null/undefined flows: non-null assertions (`!`) on values that can be null, optional chaining hiding a value that must exist, unchecked array indexing.
16
+ - Boundary conditions: off-by-one, empty-collection handling, timezone/locale assumptions, integer overflow in counters/IDs.
17
+ - State machines: impossible-state combinations representable in types, status enums with unhandled branches (look for `default:` that silently no-ops).
18
+ - Concurrency: check-then-act on shared resources, missing transactions around multi-write operations, idempotency of retried operations (webhooks, queues).
19
+ - Type escape hatches: `any` / `as` casts / `@ts-ignore` clusters — each one is a place the compiler was overruled.
20
+ - Resource leaks: unclosed handles, connections, subscriptions; missing `finally`.
21
+
22
+ ## 2. Security
23
+
24
+ Report only what's evidenced in the code. Do not generate exploit code in plans — describe the fix.
25
+
26
+ **Handling rule:** never copy a secret value into a finding or plan — those files get committed. Reference the `file:line` and credential type only ("Stripe live key at `config.ts:12`"), and the fix sketch always includes rotation, not just removal (a committed secret is burned even after deletion).
27
+
28
+ **By-design is not a finding:** standard platform conventions are intentional behavior — honoring `https_proxy`/`NO_PROXY`, reading `~/.netrc`, an explicitly local dev tool shelling out to configured package managers. Flag these only when the *implementation* adds risk beyond the convention itself.
29
+
30
+ - Secrets: hardcoded keys/tokens/passwords, secrets in committed `.env` files, secrets logged or persisted in event/history stores.
31
+ - Injection: string-built SQL/shell commands, `dangerouslySetInnerHTML` / `innerHTML` with user data, `eval`/`Function` on dynamic input, path traversal on user-supplied filenames.
32
+ - AuthN/Z: endpoints/server actions missing auth checks, authorization checked client-side only, IDOR (object access by ID without ownership check), missing CSRF protection on state-changing routes.
33
+ - Input validation: API boundaries trusting request bodies (no schema validation), file-upload handling (type/size/path), mass assignment from request objects.
34
+ - Dependencies: run the ecosystem's audit command (`npm audit`, `pip-audit`, `cargo audit`) in read-only mode; flag critical/high with known exploits, not the noise floor.
35
+ - Headers/config: CORS wildcard with credentials, missing CSP where it matters, cookies without `HttpOnly`/`Secure`/`SameSite`, debug/verbose modes reachable in production config.
36
+ - Data exposure: PII in logs, stack traces returned to clients, internal error details in API responses.
37
+
38
+ ## 3. Performance
39
+
40
+ Look for the algorithmic and architectural wins, not micro-optimizations.
41
+
42
+ - N+1 patterns: query/fetch per item inside loops or per list-row rendering; missing batching or dataloader.
43
+ - Wrong complexity: nested scans over the same collection, repeated `find`/`filter` inside hot loops where a Map keyed lookup belongs.
44
+ - Caching gaps: identical expensive computations or fetches repeated per request/render; missing memoization at clear function boundaries; no HTTP/data-layer caching on stable data.
45
+ - Payload size: over-fetching (select *, full objects where IDs suffice), missing pagination on unbounded lists, large JSON shipped to clients.
46
+ - Frontend (if applicable): bundle composition (heavyweight deps for trivial use), missing code-splitting on rarely-hit routes, unoptimized images/fonts, client-side fetching for data available at render time, render waterfalls. For React/Next.js, defer to the repo's framework conventions and any installed best-practices guidelines.
47
+ - Backend: synchronous work that belongs in a queue, missing indexes implied by query patterns (flag for verification — don't claim without schema evidence), connection-per-request patterns where pooling exists.
48
+ - Build/CI: slow CI from missing caching, redundant pipeline steps, test suites that could parallelize.
49
+
50
+ ## 4. Test Coverage
51
+
52
+ The goal is not a percentage — it's *which untested code is dangerous*.
53
+
54
+ - Map the critical paths (money, auth, data mutation, the feature the repo exists for) and check which have zero or trivial coverage.
55
+ - Modules with high churn (git log) + no tests = top refactor risk; flag as "characterization tests first" candidates.
56
+ - Existing test quality: tests that assert nothing meaningful, heavy mocking that tests the mocks, snapshot tests nobody reads, flaky patterns (real timers, real network, order dependence).
57
+ - Missing test layers: unit-only suites with zero integration coverage on API boundaries, or the inverse (slow E2E for what a unit test would catch).
58
+ - Verification infrastructure: is there a one-command way to know the codebase works? If not, that's finding #1 and a prerequisite plan for any risky change.
59
+
60
+ ## 5. Tech Debt & Architecture
61
+
62
+ - Duplication: the same logic re-implemented in 3+ places (search for near-identical functions/components); divergent copies that have drifted.
63
+ - Layering violations: UI importing from data layer internals, circular dependencies, "utils" modules that became a junk drawer with high fan-in.
64
+ - Dead code: unexported-and-unused modules, feature flags fully rolled out but still branching, commented-out blocks with no explanation, deps in the manifest no longer imported.
65
+ - God objects/modules: files an order of magnitude larger than the repo median that everything touches; functions with double-digit parameters or deep conditional nesting.
66
+ - Inconsistent patterns: three ways of doing data fetching / error handling / styling in the same repo — pick the winner (the one the team converged on most recently) and plan the consolidation.
67
+ - Abstraction mismatches: premature abstractions with a single implementation, or missing abstractions where the same change always requires touching N files in lockstep.
68
+
69
+ ## 6. Dependencies & Migrations
70
+
71
+ - Major-version lag on core framework/runtime (not every minor bump — the ones with real cost to staying behind: EOL, security-fix cutoffs, ecosystem incompatibility).
72
+ - Deprecated APIs in use that have announced removal timelines.
73
+ - Abandoned dependencies (no release in years, archived repos) on critical paths.
74
+ - Duplicate dependencies solving the same problem (two date libs, two HTTP clients).
75
+ - Lockfile/manifest drift, version pinning inconsistencies across a monorepo.
76
+ - For each migration candidate, estimate blast radius (files touched) — that drives effort and whether to recommend it at all.
77
+
78
+ ## 7. DX & Tooling
79
+
80
+ - Missing or broken: typecheck script, lint config, formatter, pre-commit hooks, editorconfig.
81
+ - Slow feedback loops: dev-server or test startup measured in minutes, no watch mode, CI without caching.
82
+ - Onboarding friction: README setup steps that are wrong/incomplete, undocumented required env vars, no `.env.example`.
83
+ - Missing `CLAUDE.md`/`AGENTS.md` — for repos where agents will execute the plans, this is high-leverage: recommend one and include its outline as a plan.
84
+ - Error messages/logging: unstructured logs on services, missing request IDs/correlation, debugging requiring code changes.
85
+
86
+ ## 8. Docs
87
+
88
+ Lowest default priority — only flag where absence has a concrete cost:
89
+
90
+ - Public API surface (published packages) without reference docs.
91
+ - Architectural decisions nobody can reconstruct (why X over Y) for actively-contested areas.
92
+ - Stale docs that are actively wrong (worse than missing) — setup instructions, API examples that no longer compile.
93
+
94
+ ## 9. Direction — features & where to take this next
95
+
96
+ Forward-looking: not what's broken, but what this codebase wants to become. **Grounding rule:** every suggestion must cite evidence from the repo itself — a suggestion that could apply to any project in the category ("add dark mode", "add AI") is noise, not a finding. Sources of grounded direction signal:
97
+
98
+ - **Unfinished intent**: TODO/FIXME clusters around one theme, feature flags never rolled out, stubbed or half-built modules, commented-out feature code, abandoned mid-feature work visible in git history.
99
+ - **Stated-but-undelivered**: README/docs/roadmap promises with no corresponding code, CLI flags or config options that are no-ops, issue templates for features that don't exist.
100
+ - **Surface asymmetries**: one-directional pairs (export without import, create without bulk-create, webhooks out but not in), entities with CRUD minus one, a public API that internal code clearly needed and hand-rolled around.
101
+ - **The adjacent possible**: capabilities the existing architecture makes disproportionately cheap — a plugin system one interface away, a public API one route file from the existing service layer, an integration the data model already supports.
102
+ - **Friction worth productizing**: things users of this project evidently do by hand around it (visible in docs, examples, issues) that the project could absorb.
103
+
104
+ Direction findings use the standard format with two adaptations: **Impact** is product/user value (who wants this and why now), and **Confidence** reflects how grounded the evidence is — not certainty that it's the right call. Strategy belongs to the maintainer; the advisor's job is grounded options with honest trade-offs. Effort estimates here are coarser; say so. Plans for selected direction findings are usually a *design/spike plan* (investigate, prototype, define the API, list open questions) rather than a build-everything plan — scope them that way.
105
+
106
+ ---
107
+
108
+ ## Finding format
109
+
110
+ Every finding, from every category and every subagent, comes back in this shape:
111
+
112
+ ```markdown
113
+ ### [CATEGORY-NN] Short imperative title
114
+
115
+ - **Evidence**: `path/file.ts:123` — one-sentence description of what's there. (Repeat per location; 2–5 strongest locations, note "and ~N similar sites" if widespread.)
116
+ - **Impact**: What goes wrong / what's being paid because of this. Concrete: "every order-list render issues 1+N queries", not "suboptimal".
117
+ - **Effort**: S (hours) / M (a day-ish) / L (multi-day) — for the *fix*, including tests.
118
+ - **Risk**: What the fix could break; LOW/MED/HIGH plus one line why.
119
+ - **Confidence**: HIGH (read the code, certain) / MED (strong signal, needs verification) / LOW (smell, needs investigation). LOW-confidence findings may be reported but get an "investigate" plan, not a "fix" plan.
120
+ - **Fix sketch**: 1–3 sentences. Not the plan — just enough to judge effort honestly.
121
+ ```
122
+
123
+ ## Prioritization rubric
124
+
125
+ Order findings by **leverage = impact ÷ effort, discounted by confidence and fix-risk**. Tiebreakers:
126
+
127
+ 1. Anything that unblocks other findings (verification baseline, characterization tests) floats up.
128
+ 2. Security findings with HIGH confidence float above equivalent-leverage non-security findings.
129
+ 3. Prefer findings whose fix has a clean verification story — executor models succeed at those.
130
+ 4. "Not worth doing" is a valid verdict; record it with one line of reasoning so the user knows it was considered.
@@ -0,0 +1,95 @@
1
+ # Closing the Loop — execute, reconcile, issues
2
+
3
+ The advisor's job doesn't end at the plan. This file covers the three follow-through flows: dispatching an executor and reviewing its work (`execute`), keeping the plan backlog alive (`reconcile`), and publishing plans where work gets picked up (`--issues`).
4
+
5
+ The founding rule survives unchanged: **the advisor never edits source code.** In `execute`, a *separate executor subagent* edits code in an isolated git worktree; the advisor dispatches, reviews, and renders a verdict — like a tech lead who doesn't push commits to your branch.
6
+
7
+ ---
8
+
9
+ ## `execute <plan>` — dispatch and review
10
+
11
+ ### Preconditions (check all before dispatching)
12
+
13
+ - The repo is a git repository (worktree isolation requires it). If not: stop and say so.
14
+ - The plan file exists and its dependencies show DONE in `plans/README.md`. If not: stop, name the missing dependency.
15
+ - Run the plan's drift check yourself. If in-scope files changed since `Planned at`, reconcile the plan first (see below) — don't hand a stale plan to an executor.
16
+
17
+ ### Dispatch
18
+
19
+ Spawn **one** `general-purpose` subagent with `isolation: "worktree"`. Executor model: default `sonnet`; use what the user named if they named one (`execute 003 haiku`).
20
+
21
+ The subagent prompt must contain:
22
+
23
+ 1. **The full plan file text, inlined.** The worktree contains only committed files — if `plans/` is uncommitted, the executor can't read it. Never assume; always inline.
24
+ 2. The executor preamble:
25
+
26
+ > You are the executor for the implementation plan below. Follow it step by
27
+ > step. Run every verification command and confirm the expected result before
28
+ > moving on. Touch only the files listed as in scope. If any STOP condition
29
+ > occurs, stop immediately and report. Do not improvise around obstacles.
30
+ > Commit your work in the worktree following the plan's git workflow section.
31
+ > One override: SKIP the plan's instruction to update `plans/README.md` —
32
+ > your reviewer maintains the index. Before reporting, audit every claim in
33
+ > your report against an actual tool result from this session — only report
34
+ > what you can point to evidence for; if a verification failed or was
35
+ > skipped, say so plainly. When finished, reply with exactly the report
36
+ > format below.
37
+
38
+ 3. The report format:
39
+
40
+ ```
41
+ STATUS: COMPLETE | STOPPED
42
+ STEPS: per step — done/skipped + verification command result
43
+ STOPPED BECAUSE: (only if STOPPED) which STOP condition, what was observed
44
+ FILES CHANGED: list
45
+ NOTES: anything the reviewer should know (deviations, surprises, judgment calls)
46
+ ```
47
+
48
+ ### Review (the advisor's real job here)
49
+
50
+ Note on fresh worktrees: they share git history but not `node_modules` or build artifacts — the executor must install dependencies first, and check tooling that resolves from `dist/` may need one build even though the plan's command table (recon'd in the main tree) didn't mention it. Expect this; it isn't a deviation.
51
+
52
+ Review like a tech lead reviewing a PR against the spec — never fix anything yourself:
53
+
54
+ 1. **Re-run every done criterion** in the worktree. Don't trust the executor's report — verify.
55
+ 2. **Scope compliance**: `git -C <worktree> diff --stat` against the plan's in-scope list. Any file outside scope fails review, full stop.
56
+ 3. **Read the full diff.** Judge it against "Why this matters" (does it solve the actual problem?) and the repo conventions named in the plan (does it look like the rest of the codebase?).
57
+ 4. **Audit the new tests.** Executors game criteria — a test that asserts nothing meaningful passes `pnpm test` and proves nothing. Read what the tests assert.
58
+
59
+ ### Verdict
60
+
61
+ **Documented deviations are judged on merit, not reflex-blocked.** "Do not improvise" exists to stop silent drift; an executor that hits a real obstacle (e.g. the plan's approach breaks existing test mocks), adapts minimally, and explains it in NOTES has done the right thing. Approve it if the adaptation serves the plan's intent and stays in scope; treat *undocumented* deviations as review failures.
62
+
63
+ | Verdict | When | Action |
64
+ |---|---|---|
65
+ | **APPROVE** | Criteria pass, scope clean, quality holds | Update index status to DONE. Present to the user: diff summary, worktree path and branch, anything from NOTES. **Merging is the user's decision — never merge, push, or commit to their branch.** |
66
+ | **REVISE** | Fixable gaps | SendMessage to the same executor with specific, actionable feedback ("criterion 3 fails: X; the error handling in `api.ts:90` swallows the error — use the Result pattern per the plan"). **Max 2 revision rounds**, then BLOCK. |
67
+ | **BLOCK** | STOP condition hit, scope violated unrecoverably, or revisions exhausted | Mark BLOCKED in the index with the reason. Refine or rewrite the plan with what was learned. Tell the user what happened and what changed in the plan. |
68
+
69
+ Running verification commands inside the executor's worktree is fine — it's isolated and disposable. The no-mutating-commands rule protects the user's working tree, not the worktree.
70
+
71
+ ---
72
+
73
+ ## `reconcile` — keep `plans/` alive
74
+
75
+ Process what happened since the last session. Read `plans/README.md` and every plan file, then per status:
76
+
77
+ - **DONE** — spot-check that the done criteria still hold on the current HEAD (cheap ones only). Mark verified in the index. Don't delete plan files — they're the record.
78
+ - **BLOCKED** — read the reason. Investigate the underlying obstacle in the codebase. Either rewrite the plan around it (new number if the approach changed fundamentally, in-place refresh otherwise) or mark REJECTED with one line of rationale.
79
+ - **IN PROGRESS** (stale) — flag it to the user; an executor probably died mid-run. Check the worktree if one exists.
80
+ - **TODO** — run the drift check. If drifted: re-verify the finding still exists (it may have been fixed in passing), then refresh the "Current state" excerpts and `Planned at` SHA. If the finding is gone, mark REJECTED ("fixed independently").
81
+
82
+ Finish with a short report: what's verified done, what was refreshed, what's rejected, and what's executable right now.
83
+
84
+ ---
85
+
86
+ ## `--issues` — publish plans as GitHub issues
87
+
88
+ Modifier on any planning invocation (`/improve --issues`, `/improve security --issues`). The flag is the user's authorization to create issues — never create them without it.
89
+
90
+ 1. Preflight: `gh auth status` succeeds and the repo has a GitHub remote. If either fails, write the plan files as normal and say why issues were skipped.
91
+ 2. Show the list of titles about to become issues; confirm once if interactive.
92
+ 3. Per plan: `gh issue create --title "<plan title>" --body-file <plan file>`. Labels: `improve` plus the category — apply only if the labels exist or can be created without erroring; skip labels rather than fail.
93
+ 4. Record each issue URL in the plan's Status block (`- **Issue**: <url>`) and the index.
94
+
95
+ The plan file remains the source of truth; the issue is distribution. The self-containment rule pays off here — the issue body needs no edits to make sense to whoever (or whatever) picks it up.
@@ -0,0 +1,192 @@
1
+ # Handoff Plan Template
2
+
3
+ Every plan is written for an executor model that has **zero context**: it has not seen the advisor session, the audit, the other plans, or any prior conversation. It may be a smaller/cheaper model. Assume it is competent at following explicit instructions and weak at filling gaps, recovering from ambiguity, or knowing when to stop.
4
+
5
+ Three properties make a plan executable by a weaker model:
6
+
7
+ 1. **Self-contained context** — everything needed is in the file: paths, code excerpts, conventions, commands.
8
+ 2. **Verification gates** — every step ends with a command and its expected result. The executor never has to *judge* whether it succeeded.
9
+ 3. **Hard boundaries and escape hatches** — explicit out-of-scope list, and "STOP and report" conditions instead of letting the model improvise when reality doesn't match the plan.
10
+
11
+ File naming: `plans/NNN-short-slug.md`, numbered in recommended execution order.
12
+
13
+ ---
14
+
15
+ ## Template
16
+
17
+ ```markdown
18
+ # Plan NNN: <Imperative title — what will be true after this plan>
19
+
20
+ > **Executor instructions**: Follow this plan step by step. Run every
21
+ > verification command and confirm the expected result before moving to the
22
+ > next step. If anything in the "STOP conditions" section occurs, stop and
23
+ > report — do not improvise. When done, update the status row for this plan
24
+ > in `plans/README.md` — unless a reviewer dispatched you and told you they
25
+ > maintain the index.
26
+ >
27
+ > **Drift check (run first)**: `git diff --stat <planned-at SHA>..HEAD -- <in-scope paths>`
28
+ > If any in-scope file changed since this plan was written, compare the
29
+ > "Current state" excerpts against the live code before proceeding; on a
30
+ > mismatch, treat it as a STOP condition.
31
+
32
+ ## Status
33
+
34
+ - **Priority**: P1 | P2 | P3
35
+ - **Effort**: S | M | L
36
+ - **Risk**: LOW | MED | HIGH
37
+ - **Depends on**: plans/NNN-*.md (or "none")
38
+ - **Category**: bug | security | perf | tests | tech-debt | migration | dx | docs | direction
39
+ - **Planned at**: commit `<short SHA>`, <YYYY-MM-DD>
40
+ - **Issue**: <GitHub issue URL — only when published via `--issues`; omit otherwise>
41
+
42
+ ## Why this matters
43
+
44
+ 2–5 sentences. The problem, its concrete cost, and what improves when this
45
+ lands. Written so the executor (and a human reviewer) understands the intent —
46
+ intent is what lets a correct judgment call happen when a detail is off.
47
+
48
+ ## Current state
49
+
50
+ The facts the executor needs, inlined — never "as discussed" or "see audit":
51
+
52
+ - The relevant files, each with one line on its role:
53
+ - `src/orders/api.ts` — order-list endpoint; contains the N+1 (lines 130–160)
54
+ - Excerpts of the code as it exists today (short, with `file:line` markers),
55
+ enough that the executor can confirm it's looking at the right thing.
56
+ - The repo conventions that apply here, with a pointer to one exemplar file:
57
+ "Error handling follows the Result pattern — see `src/lib/result.ts` and its
58
+ use in `src/users/api.ts:40-60`. Match it."
59
+
60
+ ## Commands you will need
61
+
62
+ | Purpose | Command | Expected on success |
63
+ |-----------|--------------------------|---------------------|
64
+ | Install | `pnpm install` | exit 0 |
65
+ | Typecheck | `pnpm typecheck` | exit 0, no errors |
66
+ | Tests | `pnpm test -- <filter>` | all pass |
67
+ | Lint | `pnpm lint` | exit 0 |
68
+
69
+ (Exact commands from this repo — verified during recon, not guessed.)
70
+
71
+ ## Suggested executor toolkit
72
+
73
+ (Optional — include only when relevant skills/tools plausibly exist in the
74
+ executor's environment. Skip the section otherwise.)
75
+
76
+ - Skills the executor should invoke if available, and for what:
77
+ "use `vercel-react-best-practices` when writing the memoization in step 3".
78
+ - Reference docs worth reading before starting, by path or URL.
79
+
80
+ ## Scope
81
+
82
+ **In scope** (the only files you should modify):
83
+ - `src/orders/api.ts`
84
+ - `src/orders/api.test.ts` (create)
85
+
86
+ **Out of scope** (do NOT touch, even though they look related):
87
+ - `src/orders/legacy-api.ts` — deprecated path, scheduled for deletion;
88
+ changing it wastes effort and risks the v1 clients still pinned to it.
89
+ - Any change to the public response shape — clients depend on it.
90
+
91
+ ## Git workflow
92
+
93
+ (Filled from recon — match the repo's observed conventions.)
94
+
95
+ - Branch: `advisor/NNN-<slug>` (or the repo's branch-naming convention if one is evident)
96
+ - Commit per step or per logical unit; message style: <match repo, e.g. conventional commits — include an example from `git log`>
97
+ - Do NOT push or open a PR unless the operator instructed it.
98
+
99
+ ## Steps
100
+
101
+ ### Step 1: <imperative title>
102
+
103
+ What to do, precisely. Reference exact files/symbols. Include the target code
104
+ shape when it's load-bearing (the pattern to produce, not necessarily every
105
+ line).
106
+
107
+ **Verify**: `<command>` → <expected output>
108
+
109
+ ### Step 2: ...
110
+
111
+ (Each step small enough to verify independently. Order steps so the codebase
112
+ is never broken between steps when possible — e.g. add new path, switch
113
+ callers, then remove old path.)
114
+
115
+ ## Test plan
116
+
117
+ - New tests to write, in which file, covering which cases (list them:
118
+ happy path, the specific bug/regression this plan fixes, named edge cases).
119
+ - Which existing test to use as the structural pattern:
120
+ "model after `src/users/api.test.ts`".
121
+ - Verification: `<test command>` → all pass, including N new tests.
122
+
123
+ ## Done criteria
124
+
125
+ Machine-checkable. ALL must hold:
126
+
127
+ - [ ] `pnpm typecheck` exits 0
128
+ - [ ] `pnpm test` exits 0; new tests for <X> exist and pass
129
+ - [ ] `grep -rn "<old pattern>" src/` returns no matches
130
+ - [ ] No files outside the in-scope list are modified (`git status`)
131
+ - [ ] `plans/README.md` status row updated
132
+
133
+ ## STOP conditions
134
+
135
+ Stop and report back (do not improvise) if:
136
+
137
+ - The code at the locations in "Current state" doesn't match the excerpts
138
+ (the codebase has drifted since this plan was written).
139
+ - A step's verification fails twice after a reasonable fix attempt.
140
+ - The fix appears to require touching an out-of-scope file.
141
+ - You discover the assumption "<key assumption>" is false.
142
+
143
+ ## Maintenance notes
144
+
145
+ For the human/agent who owns this code after the change lands:
146
+
147
+ - What future changes will interact with this (e.g. "if pagination is added
148
+ to this endpoint, the batching in step 2 must be revisited").
149
+ - What a reviewer should scrutinize in the PR.
150
+ - Any follow-up explicitly deferred out of this plan (and why).
151
+ ```
152
+
153
+ ---
154
+
155
+ ## Index file: `plans/README.md`
156
+
157
+ Written once by the advisor after all plans, updated by executors:
158
+
159
+ ```markdown
160
+ # Implementation Plans
161
+
162
+ Generated by the improve skill on <date>. Execute in the order below unless
163
+ dependencies say otherwise. Each executor: read the plan fully before starting,
164
+ honor its STOP conditions, and update your row when done.
165
+
166
+ ## Execution order & status
167
+
168
+ | Plan | Title | Priority | Effort | Depends on | Status |
169
+ |------|-------|----------|--------|------------|--------|
170
+ | 001 | ... | P1 | S | — | TODO |
171
+ | 002 | ... | P1 | M | 001 | TODO |
172
+
173
+ Status values: TODO | IN PROGRESS | DONE | BLOCKED (with one-line reason) | REJECTED (with one-line rationale — finding fixed independently or approach abandoned)
174
+
175
+ ## Dependency notes
176
+
177
+ - 002 requires 001 because <reason>.
178
+
179
+ ## Findings considered and rejected
180
+
181
+ - <finding>: not worth doing because <one line>. (So nobody re-audits it.)
182
+ ```
183
+
184
+ ## Quality bar — check before finishing each plan
185
+
186
+ - Could a model that has never seen this repo execute this with only the plan file and the repo? If any step requires knowledge from the advisor session, inline that knowledge.
187
+ - Is every verification a command with an expected result, not a judgment ("make sure it works")?
188
+ - Does every step name exact files and symbols, not "the relevant module"?
189
+ - Are the STOP conditions specific to this plan's actual risks, not boilerplate?
190
+ - Would a reviewer reading only "Why this matters" + "Done criteria" understand what they're approving?
191
+ - No secret values anywhere in the file — locations and credential types only.
192
+ - "Planned at" SHA is filled in and the in-scope paths in the drift check match the Scope section.
@@ -76,5 +76,10 @@
76
76
  "ref": "main",
77
77
  "commit": "05eb2b968bdc769ad78df9628dc2260e1dec903c",
78
78
  "fetchedAt": "2026-06-10T21:53:29.207Z"
79
+ },
80
+ "improve": {
81
+ "repo": "shadcn-ui/ui",
82
+ "path": "skills/improve",
83
+ "ref": "main"
79
84
  }
80
85
  }