autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,578 @@
1
+ # Agent Suite + Tooling Integration Implementation Plan
2
+
3
+ > **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
4
+
5
+ **Goal:** Create 6 new agents, improve 8 existing agents, and integrate shellcheck/shfmt/shellharden/semgrep into the Makefile and CI pipeline.
6
+
7
+ **Architecture:** Agents are markdown files in `~/.claude/agents/` with YAML frontmatter. Tooling integrates via new `make lint` target wired into `make ci`. No code compilation — all files are markdown or config.
8
+
9
+ **Tech Stack:** Markdown (agents), Bash (Makefile), YAML (CI), shellcheck/shfmt/shellharden/semgrep (linting)
10
+
11
+ **Design doc:** `docs/plans/2026-02-23-agent-suite-design.md`
12
+ **Research:** 7 docs at `docs/plans/2026-02-23-research-*.md`
13
+
14
+ ---
15
+
16
+ ## Batch 1: Linting Tooling
17
+
18
+ Install tools, create configs, wire into Makefile and CI.
19
+
20
+ ### Task 1: Install shfmt and shellharden
21
+
22
+ **Step 1: Install tools**
23
+
24
+ Run: `brew install shfmt shellharden`
25
+ Expected: Both install successfully
26
+
27
+ **Step 2: Install semgrep**
28
+
29
+ Run: `pip3 install semgrep`
30
+ Expected: semgrep installs successfully
31
+
32
+ **Step 3: Verify all 4 tools available**
33
+
34
+ Run: `which shellcheck shfmt shellharden semgrep`
35
+ Expected: All 4 paths printed
36
+
37
+ ### Task 2: Create .shellcheckrc
38
+
39
+ **Files:**
40
+ - Create: `~/Documents/projects/autonomous-coding-toolkit/.shellcheckrc`
41
+
42
+ **Step 1: Write the config file**
43
+
44
+ ```
45
+ # Enable all optional checks
46
+ enable=all
47
+ # Follow sourced files
48
+ external-sources=true
49
+ ```
50
+
51
+ **Step 2: Verify shellcheck picks it up**
52
+
53
+ Run: `cd ~/Documents/projects/autonomous-coding-toolkit && shellcheck scripts/quality-gate.sh 2>&1 | head -5`
54
+ Expected: ShellCheck runs (may show warnings — that's fine, confirms the config is loaded)
55
+
56
+ ### Task 3: Update Makefile with lint target
57
+
58
+ **Files:**
59
+ - Modify: `~/Documents/projects/autonomous-coding-toolkit/Makefile`
60
+
61
+ **Step 1: Replace Makefile contents**
62
+
63
+ ```makefile
64
+ .PHONY: test validate lint ci
65
+
66
+ lint:
67
+ @echo "=== ShellCheck ==="
68
+ @shellcheck scripts/*.sh scripts/lib/*.sh 2>&1 || true
69
+ @echo "=== shfmt ==="
70
+ @shfmt -d -i 2 -ci scripts/*.sh scripts/lib/*.sh 2>&1 || true
71
+ @echo "=== Shellharden ==="
72
+ @shellharden --check scripts/*.sh scripts/lib/*.sh 2>&1 || true
73
+ @echo "=== Semgrep ==="
74
+ @semgrep --config "p/bash" --quiet scripts/ 2>&1 || true
75
+ @echo "=== Lint Complete ==="
76
+
77
+ test:
78
+ @bash scripts/tests/run-all-tests.sh
79
+
80
+ validate:
81
+ @bash scripts/validate-all.sh
82
+
83
+ ci: lint validate test
84
+ @echo "CI: ALL PASSED"
85
+ ```
86
+
87
+ Note: `|| true` on each linter prevents one tool's warnings from blocking the rest. Individual tools exit non-zero on warnings, which would stop `make` otherwise. This is advisory-mode — future work can make specific tools blocking.
88
+
89
+ **Step 2: Verify lint target works**
90
+
91
+ Run: `cd ~/Documents/projects/autonomous-coding-toolkit && make lint 2>&1 | tail -10`
92
+ Expected: All 4 sections run, "Lint Complete" at end
93
+
94
+ **Step 3: Verify CI still passes**
95
+
96
+ Run: `cd ~/Documents/projects/autonomous-coding-toolkit && make ci 2>&1 | tail -5`
97
+ Expected: "CI: ALL PASSED"
98
+
99
+ ### Task 4: Update CI workflow
100
+
101
+ **Files:**
102
+ - Modify: `~/Documents/projects/autonomous-coding-toolkit/.github/workflows/ci.yml`
103
+
104
+ **Step 1: Update ci.yml**
105
+
106
+ ```yaml
107
+ name: CI
108
+ on:
109
+ push:
110
+ branches: [main]
111
+ pull_request:
112
+ branches: [main]
113
+
114
+ jobs:
115
+ ci:
116
+ runs-on: ubuntu-latest
117
+ steps:
118
+ - uses: actions/checkout@v4
119
+ - name: Install dependencies
120
+ run: |
121
+ sudo apt-get install -y jq shellcheck
122
+ sudo snap install shfmt
123
+ pip install semgrep
124
+ - name: Run CI
125
+ run: make ci
126
+ ```
127
+
128
+ Note: shellharden is not readily available on Ubuntu CI runners and is advisory-only. Skip it in CI — it runs locally via `make lint`.
129
+
130
+ **Step 2: Commit tooling changes**
131
+
132
+ Run:
133
+ ```bash
134
+ cd ~/Documents/projects/autonomous-coding-toolkit
135
+ git add .shellcheckrc Makefile .github/workflows/ci.yml
136
+ git commit -m "feat: add lint target with shellcheck, shfmt, shellharden, semgrep"
137
+ ```
138
+
139
+ ---
140
+
141
+ ## Batch 2: New Agents (bash-expert, shell-expert, python-expert)
142
+
143
+ ### Task 5: Create bash-expert agent
144
+
145
+ **Files:**
146
+ - Create: `~/.claude/agents/bash-expert.md`
147
+
148
+ **Step 1: Write the agent file**
149
+
150
+ Use the recommended structure from `docs/plans/2026-02-23-research-bash-expert-agent.md` Section "Recommended Agent Structure". The agent must include:
151
+
152
+ - Frontmatter: `name: bash-expert`, `model: sonnet`, `tools: Read, Grep, Glob, Bash`, `maxTurns: 30`
153
+ - Description with explicit trigger contexts: `.sh` files, CI pipeline shell steps, hook scripts, systemd ExecStart, Makefile targets
154
+ - Role statement referencing Google Shell Style Guide, BashPitfalls, ShellCheck wiki
155
+ - Scan workflow (6 steps): read → grep P1 patterns → grep P2 patterns → check tooling config → run shellcheck → check scope
156
+ - Priority 1 blocking patterns (9 grep targets from research synthesis table): unquoted vars in command args, `eval` on variables, `|| true` masking errors, `cd` without error handling, missing `set -euo pipefail`, `for f in $(ls`, `local var=$(cmd)` masking exit code, `2>&1 >>` ordering, same-file pipeline
157
+ - Priority 2 quality patterns (10 targets): `#!/bin/bash` vs `#!/usr/bin/env bash`, `grep -P`, `ls` for file existence, backtick substitution, missing `--help`, no EXIT trap for temp files, `echo` where `printf` safer, `[ ]` instead of `[[ ]]`, hardcoded `/tmp/` instead of `mktemp`, `$*` instead of `$@`
158
+ - Output format: BLOCKING / QUALITY / STYLE / TOOLING table with file:line references
159
+ - Generation mode rules: `set -Eeuo pipefail`, `IFS=$'\n\t'`, `SCRIPT_DIR` detection, `err()`/`die()` functions, `trap EXIT` for cleanup, `main()` pattern, `--help` via `usage()`, `local` for function vars
160
+ - 100-line scope gate: flag scripts over 100 lines with complex control flow as Python rewrite candidates
161
+ - Hallucination guard: "Report only what Read/Grep/Bash output directly confirms"
162
+
163
+ **Step 2: Verify the file loads**
164
+
165
+ Run: `head -10 ~/.claude/agents/bash-expert.md`
166
+ Expected: YAML frontmatter with name, description, tools, model
167
+
168
+ ### Task 6: Create shell-expert agent
169
+
170
+ **Files:**
171
+ - Create: `~/.claude/agents/shell-expert.md`
172
+
173
+ **Step 1: Write the agent file**
174
+
175
+ Use the recommended structure from `docs/plans/2026-02-23-research-shell-expert-agent.md` Section 9. The agent must include:
176
+
177
+ - Frontmatter: `name: shell-expert`, `model: sonnet`, `tools: Read, Grep, Glob, Bash`, `maxTurns: 30`
178
+ - Description: diagnosing systemd failures, PATH/env issues, package management, permissions, environment config. NOT script writing.
179
+ - Relationship to infra-auditor: monitoring vs investigation
180
+ - Five diagnostic domains with ordered checklists:
181
+ 1. Service Lifecycle: `systemctl --user show <svc> -p ActiveState,SubState,NRestarts,Result,ExecMainStartTimestamp --value`. Triage by Result code. Debug sequence: status → journalctl → manual repro → disable Restart= to expose errors. `systemd-analyze verify` for syntax lint.
182
+ 2. Environment & PATH: `which` → `type -a` → PATH listing → EnvironmentFile quoting check. Detect nvm/pyenv shims. Systemd EnvironmentFile does NOT strip shell quotes. Tilde/`$HOME` don't expand in ExecStart.
183
+ 3. Hardening Audit: `systemd-analyze security <svc>` → exposure score → top-5 directives. Categories: privilege escalation, filesystem, namespace, kernel, syscall, network.
184
+ 4. Package Management: `apt-get check` → `dpkg --configure -a` if broken → held packages → security updates → orphaned packages.
185
+ 5. Permissions: `~/.env` mode (600), SUID/SGID audit, world-writable scan, service user ownership.
186
+ - Key rule: "Use `systemctl show` properties, NEVER parse `systemctl status` text output"
187
+ - Output format: CRITICAL / WARNING / INFO sections with root cause + fix command + verification command per finding
188
+ - Hallucination guard: "Only recommend fixes you have confirmed through command output"
189
+
190
+ **Step 2: Verify the file loads**
191
+
192
+ Run: `head -10 ~/.claude/agents/shell-expert.md`
193
+ Expected: YAML frontmatter with name, description, tools, model
194
+
195
+ ### Task 7: Create python-expert agent
196
+
197
+ **Files:**
198
+ - Create: `~/.claude/agents/python-expert.md`
199
+
200
+ **Step 1: Write the agent file**
201
+
202
+ Use the recommended structure from `docs/plans/2026-02-23-research-python-expert-agent.md` Section 8. The agent must include:
203
+
204
+ - Frontmatter: `name: python-expert`, `model: sonnet`, `tools: Read, Grep, Glob, Bash`, `maxTurns: 30`
205
+ - Description: reviewing/writing Python code with focus on async discipline, resource lifecycle, type safety. Specific to HA/Telegram/Notion/Ollama ecosystem.
206
+ - Scan groups (extending lesson-scanner numbering):
207
+ - Scan 7: WebSocket send without `try/except ConnectionClosed` (Lesson #34). Pattern: `await.*\.(send|recv)\(` inside `async def` without surrounding try/except.
208
+ - Scan 8: Blocking SQLite in async context (Lesson #33). Pattern: `sqlite3\.connect\(` inside `async def`. Also: `aiosqlite\.connect\(` outside `async with`.
209
+ - Scan 9: Type boundary violations. Pattern: functions accepting MQTT/payload/state/update/event params without Pydantic BaseModel validation.
210
+ - Scan 10: Dangling create_task (Lesson #43). Pattern: `create_task(` without storing ref AND without `add_done_callback`.
211
+ - Ruff config recommendation for Python projects (the exact toml from research doc)
212
+ - Key patterns: ASYNC210/230/251 rules, RUF006, `pickle.loads`/`eval`/`exec`/`subprocess(shell=True)` security flags
213
+ - HA subscriber pattern: `self._unsub = subscribe(...)`, call in `shutdown()`
214
+ - Mode B note: for full architectural review, use `model: opus` and add class structure analysis
215
+ - Hallucination guard: "Report only what Grep/Read confirms with file:line evidence"
216
+
217
+ **Step 2: Verify the file loads**
218
+
219
+ Run: `head -10 ~/.claude/agents/python-expert.md`
220
+ Expected: YAML frontmatter with name, description, tools, model
221
+
222
+ **Step 3: Commit batch 2**
223
+
224
+ Run:
225
+ ```bash
226
+ git add ~/.claude/agents/bash-expert.md ~/.claude/agents/shell-expert.md ~/.claude/agents/python-expert.md
227
+ git commit -m "feat: add bash-expert, shell-expert, python-expert agents"
228
+ ```
229
+
230
+ Note: `~/.claude/agents/` is outside the repo. These won't be committed to the toolkit repo. Instead, just verify the files exist. If you want them tracked, copy them to the toolkit's `agents/` directory too.
231
+
232
+ ---
233
+
234
+ ## Batch 3: New Agents (integration-tester, dependency-auditor, service-monitor)
235
+
236
+ ### Task 8: Create integration-tester agent
237
+
238
+ **Files:**
239
+ - Create: `~/.claude/agents/integration-tester.md`
240
+
241
+ **Step 1: Write the agent file**
242
+
243
+ Use the recommended structure from `docs/plans/2026-02-23-research-integration-tester-agent.md` "Recommended Agent Structure". The agent must include:
244
+
245
+ - Frontmatter: `name: integration-tester`, `model: opus`, `tools: Read, Grep, Glob, Bash`, `maxTurns: 40`
246
+ - Description: verifying data flows across service seams, catching Cluster B bugs
247
+ - Five operating principles: black box only, evidence-based assertions, one probe per seam, fail fast with cause, no side effects
248
+ - Four probe strategies: `freshness_and_schema`, `sentinel_injection`, `db_row_trace`, `env_audit` — each with explicit numbered steps
249
+ - Seam registry (7 seams from design doc): HA logbook, Intelligence, Hub cache, Notion replica, Capture DB, Ollama queue, Shared env
250
+ - Output format: summary table (seam_id, status, latency) + per-seam evidence + action items
251
+ - Results written to `/tmp/integration-tester-results/`
252
+ - Key rule: "Never read service source code to infer behavior. Only check external observables."
253
+ - Hallucination guard: "Every PASS and FAIL must include quoted command output as evidence"
254
+
255
+ **Step 2: Verify**
256
+
257
+ Run: `head -10 ~/.claude/agents/integration-tester.md`
258
+ Expected: YAML frontmatter
259
+
260
+ ### Task 9: Create dependency-auditor agent
261
+
262
+ **Files:**
263
+ - Create: `~/.claude/agents/dependency-auditor.md`
264
+
265
+ **Step 1: Write the agent file**
266
+
267
+ Use the recommended structure from `docs/plans/2026-02-23-research-dependency-auditor-agent.md` Section 8. The agent must include:
268
+
269
+ - Frontmatter: `name: dependency-auditor`, `model: haiku`, `tools: Read, Grep, Glob, Bash`, `maxTurns: 25`
270
+ - Description: scans 8 project repos for CVEs, outdated packages, license compliance. Read-only.
271
+ - Step 0: Tool availability check (`which pip-audit osv-scanner trivy npm npx`)
272
+ - Step 1: Repo detection (scan `~/Documents/projects/` for requirements.txt, pyproject.toml, package.json, Dockerfile)
273
+ - Step 2: CVE scanning per repo (`pip-audit -f json`, `npm audit --json`, `trivy fs --format json`)
274
+ - Step 3: Cross-language CVE aggregation (`osv-scanner scan --recursive`)
275
+ - Step 4: Outdated package detection (`pip list --outdated --format json`, `npx npm-check-updates --jsonUpgraded`)
276
+ - Step 5: License compliance (`pip-licenses --format json`, `npx license-checker --json`). Allowlist: MIT, Apache-2.0, BSD-2/3-Clause, ISC, PSF, CC0, Public Domain, Unlicense.
277
+ - Step 6: Report format (CRITICAL-HIGH / MEDIUM / Outdated / License tables)
278
+ - Key rule: "This agent is read-only. NEVER run pip install, npm audit fix, or modify any file."
279
+ - Hallucination guard: "Only report CVEs that appear in tool JSON output"
280
+
281
+ **Step 2: Verify**
282
+
283
+ Run: `head -10 ~/.claude/agents/dependency-auditor.md`
284
+ Expected: YAML frontmatter
285
+
286
+ ### Task 10: Create service-monitor agent
287
+
288
+ **Files:**
289
+ - Create: `~/.claude/agents/service-monitor.md`
290
+
291
+ **Step 1: Write the agent file**
292
+
293
+ Use the recommended structure from `docs/plans/2026-02-23-research-service-monitor-agent.md` "Recommended Agent Structure". The agent must include:
294
+
295
+ - Frontmatter: `name: service-monitor`, `model: sonnet`, `tools: Read, Grep, Glob, Bash`, `maxTurns: 50`, `memory: user`
296
+ - Description: audits 12 user systemd services and 21 timers for failures, restart loops, silent errors, resource anomalies, known failure patterns
297
+ - Six inspection phases:
298
+ 1. Service state sweep: `systemctl --user show <svc> -p ActiveState,SubState,NRestarts,Result,ExecMainStartTimestamp --value` for all 12 services
299
+ 2. Timer health check: `LastTriggerUSec` via `systemctl --user show` compared against expected intervals
300
+ 3. Per-service log analysis: `journalctl --user -u <svc> --since "24 hours ago" -q` — error rates, zero-entry detection (Cluster A)
301
+ 4. Resource anomaly: memory usage vs MemoryMax, load average
302
+ 5. Known failure patterns: Telegram 409 (`grep "409"`), MQTT disconnect loop (`grep -i "disconnect\|reconnect"`), OOM kills (`Result == oom-kill`)
303
+ 6. Baseline comparison: read memory for previous NRestarts/error counts, flag >2x deviation
304
+ - State taxonomy: OK / RECOVERED / RESTARTING / FAILED / ANOMALY (Cluster A)
305
+ - Timer stale threshold: 2x expected interval
306
+ - Known limitations: NRestarts is cumulative (must combine with ActiveEnterTimestamp), LastTriggerUSec=0 means never fired, `--user` required for all user services
307
+ - Output format: CRITICAL / WARNING / ANOMALY / TIMER ISSUES / OK sections
308
+ - Memory update: persist new baselines after each run
309
+ - Hallucination guard: "Report only command output you have actually executed"
310
+
311
+ **Step 2: Verify**
312
+
313
+ Run: `head -10 ~/.claude/agents/service-monitor.md`
314
+ Expected: YAML frontmatter
315
+
316
+ **Step 3: Verify all 6 new agents exist**
317
+
318
+ Run: `ls ~/.claude/agents/*.md | wc -l`
319
+ Expected: 14 (8 existing + 6 new)
320
+
321
+ ---
322
+
323
+ ## Batch 4: Existing Agent Improvements (P0 — Correctness)
324
+
325
+ ### Task 11: Fix security-reviewer
326
+
327
+ **Files:**
328
+ - Modify: `~/.claude/agents/security-reviewer.md`
329
+
330
+ **Step 1: Update frontmatter**
331
+
332
+ Change tools from `Read, Grep, Glob, Bash` to `Read, Grep, Glob` (remove Bash). Add `model: sonnet`, `maxTurns: 25`, `memory: project`.
333
+
334
+ Update description to: `"Reviews code for security vulnerabilities and sensitive data exposure. Use proactively after any code changes that touch authentication, data handling, file I/O, subprocess calls, or network requests."`
335
+
336
+ **Step 2: Add Python/bash attack categories**
337
+
338
+ After existing categories, add:
339
+
340
+ - **Python-specific:** `pickle.loads()`, `eval()`, `exec()`, `subprocess` with `shell=True`, `yaml.load()` without `Loader=SafeLoader`, `os.system()`, `input()` in Python 2 context
341
+ - **Cryptography:** `hashlib.md5`, `hashlib.sha1` for security purposes, `random.random()` in security-sensitive context, hardcoded salts/IVs
342
+ - **Shell-specific:** `eval` on variables, unquoted command substitution in arguments, `curl | bash` patterns
343
+
344
+ **Step 3: Add hallucination guard**
345
+
346
+ Add at end: "CRITICAL: Report ONLY findings grounded in specific file:line evidence from Read/Grep output. If a grep returns no matches for a category, record it as CLEAN — do not infer vulnerabilities. Zero grep results = zero findings for that category."
347
+
348
+ **Step 4: Add CLEAN section to output format**
349
+
350
+ ```
351
+ CLEAN (no findings):
352
+ - [list of categories with zero grep matches]
353
+ ```
354
+
355
+ ### Task 12: Fix infra-auditor
356
+
357
+ **Files:**
358
+ - Modify: `~/.claude/agents/infra-auditor.md`
359
+
360
+ **Step 1: Update frontmatter**
361
+
362
+ Add `model: haiku`, `maxTurns: 15`.
363
+
364
+ **Step 2: Fix sync freshness math**
365
+
366
+ Replace the sync freshness section with:
367
+
368
+ ```markdown
369
+ ## Sync freshness
370
+
371
+ - Notion sync: compare `$(date +%s) - $(stat -c '%Y' ~/Documents/notion/.sync-metadata.json 2>/dev/null || echo 0)` — warn if delta > 43200 (12 hours)
372
+ - Telegram brief log: check `journalctl --user -u telegram-brief.timer --since "26 hours ago" -q | wc -l` — warn if 0 (missed daily run)
373
+ ```
374
+
375
+ **Step 3: Add timer audit**
376
+
377
+ Add new section:
378
+
379
+ ```markdown
380
+ ## Timer freshness
381
+
382
+ Run: `systemctl --user list-timers --no-pager`
383
+ Check that all timers show a "NEXT" time in the future. Any timer with "n/a" for NEXT or LAST is stale.
384
+ ```
385
+
386
+ **Step 4: Add hallucination guard**
387
+
388
+ Add at end: "Report only output from commands you actually executed. Do not infer service state."
389
+
390
+ ### Task 13: Fix lesson-scanner description
391
+
392
+ **Files:**
393
+ - Modify: `~/.claude/agents/lesson-scanner.md`
394
+
395
+ **Step 1: Update frontmatter**
396
+
397
+ Change description count from "53 lessons" to "66 lessons". Add `model: sonnet`, `maxTurns: 25`.
398
+
399
+ **Step 2: Commit P0 fixes**
400
+
401
+ Run:
402
+ ```bash
403
+ cd ~/Documents/projects/autonomous-coding-toolkit
404
+ # Note: agent files are in ~/.claude/agents/, not in repo
405
+ # If toolkit has a copy, update that too
406
+ ```
407
+
408
+ ---
409
+
410
+ ## Batch 5: Existing Agent Improvements (P1 — Quality)
411
+
412
+ ### Task 14: Add model and maxTurns to remaining agents
413
+
414
+ **Files:**
415
+ - Modify: `~/.claude/agents/doc-updater.md`
416
+ - Modify: `~/.claude/agents/counter.md`
417
+ - Modify: `~/.claude/agents/counter-daily.md`
418
+ - Modify: `~/.claude/agents/notion-researcher.md`
419
+ - Modify: `~/.claude/agents/notion-writer.md`
420
+
421
+ **Step 1: Update each agent's frontmatter**
422
+
423
+ | Agent | Add model | Add maxTurns |
424
+ |-------|-----------|-------------|
425
+ | doc-updater | `model: sonnet` | `maxTurns: 25` |
426
+ | counter | (already opus) | `maxTurns: 20` |
427
+ | counter-daily | (already sonnet) | `maxTurns: 5` |
428
+ | notion-researcher | `model: sonnet` | `maxTurns: 40` |
429
+ | notion-writer | `model: haiku` | `maxTurns: 20` |
430
+
431
+ ### Task 15: Fix doc-updater git diff command
432
+
433
+ **Files:**
434
+ - Modify: `~/.claude/agents/doc-updater.md`
435
+
436
+ **Step 1: Update Process section**
437
+
438
+ Change line `1. Run \`git diff HEAD~1 --name-only\`` to:
439
+
440
+ ```
441
+ 1. Run `git status --short` to see uncommitted changes AND `git diff HEAD --name-only` to see committed changes
442
+ ```
443
+
444
+ **Step 2: Add structured output format**
445
+
446
+ Add after Rules section:
447
+
448
+ ```markdown
449
+ ## Output Summary
450
+
451
+ After making changes, report:
452
+ - Files updated: [list of files touched]
453
+ - Changes made: [1-line summary per change]
454
+ - Files checked but unchanged: [count]
455
+ - Skipped (no updates needed): [if applicable]
456
+ ```
457
+
458
+ ### Task 16: Add follow-up rule to counter-daily
459
+
460
+ **Files:**
461
+ - Modify: `~/.claude/agents/counter-daily.md`
462
+
463
+ **Step 1: Add follow-up behavior rule**
464
+
465
+ Add to the agent body: "If Justin responds to your three questions, acknowledge his answers once with a brief reflection, then stop. Do not continue into a full counter session. The daily check is three questions, not a conversation."
466
+
467
+ ---
468
+
469
+ ## Batch 6: Existing Agent Improvements (P2 — Capability)
470
+
471
+ ### Task 17: Add new scan groups to lesson-scanner
472
+
473
+ **Files:**
474
+ - Modify: `~/.claude/agents/lesson-scanner.md` (global copy)
475
+ - Modify: `~/Documents/projects/autonomous-coding-toolkit/agents/lesson-scanner.md` (toolkit copy)
476
+
477
+ **Step 1: Add Scan Group 7: Plan Quality (Lessons #60-66)**
478
+
479
+ After existing scan groups, add:
480
+
481
+ ```markdown
482
+ ## Step 3g: Plan Quality Checks (Lessons #60-66)
483
+
484
+ For each implementation plan in `docs/plans/`:
485
+ - Check batch count > 0 (plan has tasks)
486
+ - Check each batch has at least one task with a verification step
487
+ - Flag plans with > 10 batches as potentially over-scoped
488
+ - Flag tasks without explicit file paths
489
+
490
+ Report as: Nice-to-Have
491
+ ```
492
+
493
+ **Step 2: Add Scan 3f: .venv/bin/pip (Lesson #51)**
494
+
495
+ ```markdown
496
+ ## Step 3f: Venv Pip Usage (Lesson #51)
497
+
498
+ Grep for: `\.venv/bin/pip ` (NOT `.venv/bin/python -m pip`)
499
+ Flag: "Use `.venv/bin/python -m pip` instead of `.venv/bin/pip` — Homebrew PATH corruption"
500
+ Severity: Should-Fix
501
+ ```
502
+
503
+ ### Task 18: Add vector search fallback to notion-researcher
504
+
505
+ **Files:**
506
+ - Modify: `~/.claude/agents/notion-researcher.md`
507
+
508
+ **Step 1: Add zero-result handling**
509
+
510
+ Add to the agent body: "If grep/glob searches return zero results for a query, try broadening the search: (1) use partial keywords, (2) search in `~/Documents/notion/` with wider patterns, (3) try the Notion MCP search tool as a fallback. Report clearly when a topic is not found in the local replica."
511
+
512
+ ### Task 19: Add pre-flight check to notion-writer
513
+
514
+ **Files:**
515
+ - Modify: `~/.claude/agents/notion-writer.md`
516
+
517
+ **Step 1: Add pre-flight validation**
518
+
519
+ Add as first step in the agent body: "Before any API call, verify: (1) `NOTION_API_KEY` env var is set and non-empty, (2) target database/page ID is a valid UUID format (8-4-4-4-12 hex). If either fails, report the error immediately and stop — do not attempt the API call."
520
+
521
+ ---
522
+
523
+ ## Batch 7: Symlinks and Final Verification
524
+
525
+ ### Task 20: Symlink research docs to ~/Documents/research/
526
+
527
+ **Step 1: Create symlinks**
528
+
529
+ Run:
530
+ ```bash
531
+ cd ~/Documents/research
532
+ for f in ~/Documents/projects/autonomous-coding-toolkit/docs/plans/2026-02-23-research-*.md; do
533
+ ln -sf "$f" "$(basename "$f")"
534
+ done
535
+ ```
536
+
537
+ **Step 2: Verify**
538
+
539
+ Run: `ls -la ~/Documents/research/2026-02-23-research-*.md | wc -l`
540
+ Expected: 7
541
+
542
+ ### Task 21: Verify all agents load correctly
543
+
544
+ **Step 1: Check all 14 agents have valid frontmatter**
545
+
546
+ Run:
547
+ ```bash
548
+ for f in ~/.claude/agents/*.md; do
549
+ name=$(head -10 "$f" | grep "^name:" | cut -d: -f2- | tr -d ' ')
550
+ model=$(head -10 "$f" | grep "^model:" | cut -d: -f2- | tr -d ' ')
551
+ tools=$(head -10 "$f" | grep "^tools:" | cut -d: -f2-)
552
+ echo "$name | $model | $tools"
553
+ done
554
+ ```
555
+
556
+ Expected: All 14 agents print name, model, and tools. No empty fields.
557
+
558
+ ### Task 22: Run full CI
559
+
560
+ Run: `cd ~/Documents/projects/autonomous-coding-toolkit && make ci 2>&1 | tail -10`
561
+ Expected: "CI: ALL PASSED"
562
+
563
+ ### Task 23: Commit all changes
564
+
565
+ Run:
566
+ ```bash
567
+ cd ~/Documents/projects/autonomous-coding-toolkit
568
+ git add -A
569
+ git commit -m "feat: add 6 new agents, improve 8 existing agents, integrate linting tooling
570
+
571
+ - New agents: bash-expert, shell-expert, python-expert, integration-tester,
572
+ dependency-auditor, service-monitor
573
+ - Existing improvements: P0 correctness fixes (security-reviewer, infra-auditor,
574
+ lesson-scanner), P1 quality (model/maxTurns on all agents, doc-updater git diff),
575
+ P2 capability (lesson-scanner scan groups, notion fallbacks)
576
+ - Tooling: shellcheck + shfmt + shellharden + semgrep via make lint, CI updated
577
+ - 7 research docs at docs/plans/2026-02-23-research-*.md"
578
+ ```