autonomous-coding-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.claude-plugin/marketplace.json +22 -0
  2. package/.claude-plugin/plugin.json +13 -0
  3. package/LICENSE +21 -0
  4. package/Makefile +21 -0
  5. package/README.md +140 -0
  6. package/SECURITY.md +28 -0
  7. package/agents/bash-expert.md +113 -0
  8. package/agents/dependency-auditor.md +138 -0
  9. package/agents/integration-tester.md +120 -0
  10. package/agents/lesson-scanner.md +149 -0
  11. package/agents/python-expert.md +179 -0
  12. package/agents/service-monitor.md +141 -0
  13. package/agents/shell-expert.md +147 -0
  14. package/benchmarks/runner.sh +147 -0
  15. package/benchmarks/tasks/01-rest-endpoint/rubric.sh +29 -0
  16. package/benchmarks/tasks/01-rest-endpoint/task.md +17 -0
  17. package/benchmarks/tasks/02-refactor-module/task.md +8 -0
  18. package/benchmarks/tasks/03-fix-integration-bug/task.md +8 -0
  19. package/benchmarks/tasks/04-add-test-coverage/task.md +8 -0
  20. package/benchmarks/tasks/05-multi-file-feature/task.md +8 -0
  21. package/bin/act.js +238 -0
  22. package/commands/autocode.md +6 -0
  23. package/commands/cancel-ralph.md +18 -0
  24. package/commands/code-factory.md +53 -0
  25. package/commands/create-prd.md +55 -0
  26. package/commands/ralph-loop.md +18 -0
  27. package/commands/run-plan.md +117 -0
  28. package/commands/submit-lesson.md +122 -0
  29. package/docs/ARCHITECTURE.md +630 -0
  30. package/docs/CONTRIBUTING.md +125 -0
  31. package/docs/lessons/0001-bare-exception-swallowing.md +34 -0
  32. package/docs/lessons/0002-async-def-without-await.md +28 -0
  33. package/docs/lessons/0003-create-task-without-callback.md +28 -0
  34. package/docs/lessons/0004-hardcoded-test-counts.md +28 -0
  35. package/docs/lessons/0005-sqlite-without-closing.md +33 -0
  36. package/docs/lessons/0006-venv-pip-path.md +27 -0
  37. package/docs/lessons/0007-runner-state-self-rejection.md +35 -0
  38. package/docs/lessons/0008-quality-gate-blind-spot.md +33 -0
  39. package/docs/lessons/0009-parser-overcount-empty-batches.md +36 -0
  40. package/docs/lessons/0010-local-outside-function-bash.md +33 -0
  41. package/docs/lessons/0011-batch-tests-for-unimplemented-code.md +36 -0
  42. package/docs/lessons/0012-api-markdown-unescaped-chars.md +33 -0
  43. package/docs/lessons/0013-export-prefix-env-parsing.md +33 -0
  44. package/docs/lessons/0014-decorator-registry-import-side-effect.md +43 -0
  45. package/docs/lessons/0015-frontend-backend-schema-drift.md +43 -0
  46. package/docs/lessons/0016-event-driven-cold-start-seeding.md +44 -0
  47. package/docs/lessons/0017-copy-paste-logic-diverges.md +43 -0
  48. package/docs/lessons/0018-layer-passes-pipeline-broken.md +45 -0
  49. package/docs/lessons/0019-systemd-envfile-ignores-export.md +41 -0
  50. package/docs/lessons/0020-persist-state-incrementally.md +44 -0
  51. package/docs/lessons/0021-dual-axis-testing.md +48 -0
  52. package/docs/lessons/0022-jsx-factory-shadowing.md +43 -0
  53. package/docs/lessons/0023-static-analysis-spiral.md +51 -0
  54. package/docs/lessons/0024-shared-pipeline-implementation.md +55 -0
  55. package/docs/lessons/0025-defense-in-depth-all-entry-points.md +65 -0
  56. package/docs/lessons/0026-linter-no-rules-false-enforcement.md +54 -0
  57. package/docs/lessons/0027-jsx-silent-prop-drop.md +64 -0
  58. package/docs/lessons/0028-no-infrastructure-in-client-code.md +49 -0
  59. package/docs/lessons/0029-never-write-secrets-to-files.md +61 -0
  60. package/docs/lessons/0030-cache-merge-not-replace.md +62 -0
  61. package/docs/lessons/0031-verify-units-at-boundaries.md +66 -0
  62. package/docs/lessons/0032-module-lifecycle-subscribe-unsubscribe.md +89 -0
  63. package/docs/lessons/0033-async-iteration-mutable-snapshot.md +72 -0
  64. package/docs/lessons/0034-caller-missing-await-silent-discard.md +65 -0
  65. package/docs/lessons/0035-duplicate-registration-silent-overwrite.md +85 -0
  66. package/docs/lessons/0036-websocket-dirty-disconnect.md +33 -0
  67. package/docs/lessons/0037-parallel-agents-worktree-corruption.md +31 -0
  68. package/docs/lessons/0038-subscribe-no-stored-ref.md +36 -0
  69. package/docs/lessons/0039-fallback-or-default-hides-bugs.md +34 -0
  70. package/docs/lessons/0040-event-firehose-filter-first.md +36 -0
  71. package/docs/lessons/0041-ambiguous-base-dir-path-nesting.md +32 -0
  72. package/docs/lessons/0042-spec-compliance-insufficient.md +36 -0
  73. package/docs/lessons/0043-exact-count-extensible-collections.md +32 -0
  74. package/docs/lessons/0044-relative-file-deps-worktree.md +39 -0
  75. package/docs/lessons/0045-iterative-design-improvement.md +33 -0
  76. package/docs/lessons/0046-plan-assertion-math-bugs.md +38 -0
  77. package/docs/lessons/0047-pytest-single-threaded-default.md +37 -0
  78. package/docs/lessons/0048-integration-wiring-batch.md +40 -0
  79. package/docs/lessons/0049-ab-verification.md +41 -0
  80. package/docs/lessons/0050-editing-sourced-files-during-execution.md +33 -0
  81. package/docs/lessons/0051-infrastructure-fixes-cant-self-heal.md +30 -0
  82. package/docs/lessons/0052-uncommitted-changes-poison-quality-gates.md +31 -0
  83. package/docs/lessons/0053-jq-compact-flag-inconsistency.md +31 -0
  84. package/docs/lessons/0054-parser-matches-inside-code-blocks.md +30 -0
  85. package/docs/lessons/0055-agents-compensate-for-garbled-prompts.md +31 -0
  86. package/docs/lessons/0056-grep-count-exit-code-on-zero.md +42 -0
  87. package/docs/lessons/0057-new-artifacts-break-git-clean-gates.md +42 -0
  88. package/docs/lessons/0058-dead-config-keys-never-consumed.md +49 -0
  89. package/docs/lessons/0059-contract-test-shared-structures.md +53 -0
  90. package/docs/lessons/0060-set-e-silent-death-in-runners.md +53 -0
  91. package/docs/lessons/0061-context-injection-dirty-state.md +50 -0
  92. package/docs/lessons/0062-sibling-bug-neighborhood-scan.md +29 -0
  93. package/docs/lessons/0063-one-flag-two-lifetimes.md +31 -0
  94. package/docs/lessons/0064-test-passes-wrong-reason.md +31 -0
  95. package/docs/lessons/0065-pipefail-grep-count-double-output.md +39 -0
  96. package/docs/lessons/0066-local-keyword-outside-function.md +37 -0
  97. package/docs/lessons/0067-stdin-hang-non-interactive-shell.md +36 -0
  98. package/docs/lessons/0068-agent-builds-wrong-thing-correctly.md +31 -0
  99. package/docs/lessons/0069-plan-quality-dominates-execution.md +30 -0
  100. package/docs/lessons/0070-spec-echo-back-prevents-drift.md +31 -0
  101. package/docs/lessons/0071-positive-instructions-outperform-negative.md +30 -0
  102. package/docs/lessons/0072-lost-in-the-middle-context-placement.md +30 -0
  103. package/docs/lessons/0073-unscoped-lessons-cause-false-positives.md +30 -0
  104. package/docs/lessons/0074-stale-context-injection-wrong-batch.md +32 -0
  105. package/docs/lessons/0075-research-artifacts-must-persist.md +32 -0
  106. package/docs/lessons/0076-wrong-decomposition-contaminates-downstream.md +30 -0
  107. package/docs/lessons/0077-cherry-pick-merges-need-manual-resolution.md +30 -0
  108. package/docs/lessons/0078-static-review-without-live-test.md +30 -0
  109. package/docs/lessons/0079-integration-wiring-batch-required.md +32 -0
  110. package/docs/lessons/FRAMEWORK.md +161 -0
  111. package/docs/lessons/SUMMARY.md +201 -0
  112. package/docs/lessons/TEMPLATE.md +85 -0
  113. package/docs/plans/2026-02-21-code-factory-v2-design.md +204 -0
  114. package/docs/plans/2026-02-21-code-factory-v2-implementation-plan.md +2189 -0
  115. package/docs/plans/2026-02-21-code-factory-v2-phase4-design.md +537 -0
  116. package/docs/plans/2026-02-21-code-factory-v2-phase4-implementation-plan.md +2012 -0
  117. package/docs/plans/2026-02-21-hardening-pass-design.md +108 -0
  118. package/docs/plans/2026-02-21-hardening-pass-plan.md +1378 -0
  119. package/docs/plans/2026-02-21-mab-research-report.md +406 -0
  120. package/docs/plans/2026-02-21-marketplace-restructure-design.md +240 -0
  121. package/docs/plans/2026-02-21-marketplace-restructure-plan.md +832 -0
  122. package/docs/plans/2026-02-21-phase4-completion-plan.md +697 -0
  123. package/docs/plans/2026-02-21-validator-suite-design.md +148 -0
  124. package/docs/plans/2026-02-21-validator-suite-plan.md +540 -0
  125. package/docs/plans/2026-02-22-mab-research-round2.md +556 -0
  126. package/docs/plans/2026-02-22-mab-run-design.md +462 -0
  127. package/docs/plans/2026-02-22-mab-run-plan.md +2046 -0
  128. package/docs/plans/2026-02-22-operations-design-methodology-research.md +681 -0
  129. package/docs/plans/2026-02-22-research-agent-failure-taxonomy.md +532 -0
  130. package/docs/plans/2026-02-22-research-code-guideline-policies.md +886 -0
  131. package/docs/plans/2026-02-22-research-codebase-audit-refactoring.md +908 -0
  132. package/docs/plans/2026-02-22-research-coding-standards-documentation.md +541 -0
  133. package/docs/plans/2026-02-22-research-competitive-landscape.md +687 -0
  134. package/docs/plans/2026-02-22-research-comprehensive-testing.md +1076 -0
  135. package/docs/plans/2026-02-22-research-context-utilization.md +459 -0
  136. package/docs/plans/2026-02-22-research-cost-quality-tradeoff.md +548 -0
  137. package/docs/plans/2026-02-22-research-lesson-transferability.md +508 -0
  138. package/docs/plans/2026-02-22-research-multi-agent-coordination.md +312 -0
  139. package/docs/plans/2026-02-22-research-phase-integration.md +602 -0
  140. package/docs/plans/2026-02-22-research-plan-quality.md +428 -0
  141. package/docs/plans/2026-02-22-research-prompt-engineering.md +558 -0
  142. package/docs/plans/2026-02-22-research-unconventional-perspectives.md +528 -0
  143. package/docs/plans/2026-02-22-research-user-adoption.md +638 -0
  144. package/docs/plans/2026-02-22-research-verification-effectiveness.md +433 -0
  145. package/docs/plans/2026-02-23-agent-suite-design.md +299 -0
  146. package/docs/plans/2026-02-23-agent-suite-plan.md +578 -0
  147. package/docs/plans/2026-02-23-phase3-cost-infrastructure-design.md +148 -0
  148. package/docs/plans/2026-02-23-phase3-cost-infrastructure-plan.md +1062 -0
  149. package/docs/plans/2026-02-23-research-bash-expert-agent.md +543 -0
  150. package/docs/plans/2026-02-23-research-dependency-auditor-agent.md +564 -0
  151. package/docs/plans/2026-02-23-research-improving-existing-agents.md +503 -0
  152. package/docs/plans/2026-02-23-research-integration-tester-agent.md +454 -0
  153. package/docs/plans/2026-02-23-research-python-expert-agent.md +429 -0
  154. package/docs/plans/2026-02-23-research-service-monitor-agent.md +425 -0
  155. package/docs/plans/2026-02-23-research-shell-expert-agent.md +533 -0
  156. package/docs/plans/2026-02-23-roadmap-to-completion.md +530 -0
  157. package/docs/plans/2026-02-24-headless-module-split-design.md +98 -0
  158. package/docs/plans/2026-02-24-headless-module-split.md +443 -0
  159. package/docs/plans/2026-02-24-lesson-scope-metadata-design.md +228 -0
  160. package/docs/plans/2026-02-24-lesson-scope-metadata-plan.md +968 -0
  161. package/docs/plans/2026-02-24-npm-packaging-design.md +841 -0
  162. package/docs/plans/2026-02-24-npm-packaging-plan.md +1965 -0
  163. package/docs/plans/audit-findings.md +186 -0
  164. package/docs/telegram-notification-format.md +98 -0
  165. package/examples/example-plan.md +51 -0
  166. package/examples/example-prd.json +72 -0
  167. package/examples/example-roadmap.md +33 -0
  168. package/examples/quickstart-plan.md +63 -0
  169. package/hooks/hooks.json +26 -0
  170. package/hooks/setup-symlinks.sh +48 -0
  171. package/hooks/stop-hook.sh +135 -0
  172. package/package.json +47 -0
  173. package/policies/bash.md +71 -0
  174. package/policies/python.md +71 -0
  175. package/policies/testing.md +61 -0
  176. package/policies/universal.md +60 -0
  177. package/scripts/analyze-report.sh +97 -0
  178. package/scripts/architecture-map.sh +145 -0
  179. package/scripts/auto-compound.sh +273 -0
  180. package/scripts/batch-audit.sh +42 -0
  181. package/scripts/batch-test.sh +101 -0
  182. package/scripts/entropy-audit.sh +221 -0
  183. package/scripts/failure-digest.sh +51 -0
  184. package/scripts/generate-ast-rules.sh +96 -0
  185. package/scripts/init.sh +112 -0
  186. package/scripts/lesson-check.sh +428 -0
  187. package/scripts/lib/common.sh +61 -0
  188. package/scripts/lib/cost-tracking.sh +153 -0
  189. package/scripts/lib/ollama.sh +60 -0
  190. package/scripts/lib/progress-writer.sh +128 -0
  191. package/scripts/lib/run-plan-context.sh +215 -0
  192. package/scripts/lib/run-plan-echo-back.sh +231 -0
  193. package/scripts/lib/run-plan-headless.sh +396 -0
  194. package/scripts/lib/run-plan-notify.sh +57 -0
  195. package/scripts/lib/run-plan-parser.sh +81 -0
  196. package/scripts/lib/run-plan-prompt.sh +215 -0
  197. package/scripts/lib/run-plan-quality-gate.sh +132 -0
  198. package/scripts/lib/run-plan-routing.sh +315 -0
  199. package/scripts/lib/run-plan-sampling.sh +170 -0
  200. package/scripts/lib/run-plan-scoring.sh +146 -0
  201. package/scripts/lib/run-plan-state.sh +142 -0
  202. package/scripts/lib/run-plan-team.sh +199 -0
  203. package/scripts/lib/telegram.sh +54 -0
  204. package/scripts/lib/thompson-sampling.sh +176 -0
  205. package/scripts/license-check.sh +74 -0
  206. package/scripts/mab-run.sh +575 -0
  207. package/scripts/module-size-check.sh +146 -0
  208. package/scripts/patterns/async-no-await.yml +5 -0
  209. package/scripts/patterns/bare-except.yml +6 -0
  210. package/scripts/patterns/empty-catch.yml +6 -0
  211. package/scripts/patterns/hardcoded-localhost.yml +9 -0
  212. package/scripts/patterns/retry-loop-no-backoff.yml +12 -0
  213. package/scripts/pipeline-status.sh +197 -0
  214. package/scripts/policy-check.sh +226 -0
  215. package/scripts/prior-art-search.sh +133 -0
  216. package/scripts/promote-mab-lessons.sh +126 -0
  217. package/scripts/prompts/agent-a-superpowers.md +29 -0
  218. package/scripts/prompts/agent-b-ralph.md +29 -0
  219. package/scripts/prompts/judge-agent.md +61 -0
  220. package/scripts/prompts/planner-agent.md +44 -0
  221. package/scripts/pull-community-lessons.sh +90 -0
  222. package/scripts/quality-gate.sh +266 -0
  223. package/scripts/research-gate.sh +90 -0
  224. package/scripts/run-plan.sh +329 -0
  225. package/scripts/scope-infer.sh +159 -0
  226. package/scripts/setup-ralph-loop.sh +155 -0
  227. package/scripts/telemetry.sh +230 -0
  228. package/scripts/tests/run-all-tests.sh +52 -0
  229. package/scripts/tests/test-act-cli.sh +46 -0
  230. package/scripts/tests/test-agents-md.sh +87 -0
  231. package/scripts/tests/test-analyze-report.sh +114 -0
  232. package/scripts/tests/test-architecture-map.sh +89 -0
  233. package/scripts/tests/test-auto-compound.sh +169 -0
  234. package/scripts/tests/test-batch-test.sh +65 -0
  235. package/scripts/tests/test-benchmark-runner.sh +25 -0
  236. package/scripts/tests/test-common.sh +168 -0
  237. package/scripts/tests/test-cost-tracking.sh +158 -0
  238. package/scripts/tests/test-echo-back.sh +180 -0
  239. package/scripts/tests/test-entropy-audit.sh +146 -0
  240. package/scripts/tests/test-failure-digest.sh +66 -0
  241. package/scripts/tests/test-generate-ast-rules.sh +145 -0
  242. package/scripts/tests/test-helpers.sh +82 -0
  243. package/scripts/tests/test-init.sh +47 -0
  244. package/scripts/tests/test-lesson-check.sh +278 -0
  245. package/scripts/tests/test-lesson-local.sh +55 -0
  246. package/scripts/tests/test-license-check.sh +109 -0
  247. package/scripts/tests/test-mab-run.sh +182 -0
  248. package/scripts/tests/test-ollama-lib.sh +49 -0
  249. package/scripts/tests/test-ollama.sh +60 -0
  250. package/scripts/tests/test-pipeline-status.sh +198 -0
  251. package/scripts/tests/test-policy-check.sh +124 -0
  252. package/scripts/tests/test-prior-art-search.sh +96 -0
  253. package/scripts/tests/test-progress-writer.sh +140 -0
  254. package/scripts/tests/test-promote-mab-lessons.sh +110 -0
  255. package/scripts/tests/test-pull-community-lessons.sh +149 -0
  256. package/scripts/tests/test-quality-gate.sh +241 -0
  257. package/scripts/tests/test-research-gate.sh +132 -0
  258. package/scripts/tests/test-run-plan-cli.sh +86 -0
  259. package/scripts/tests/test-run-plan-context.sh +305 -0
  260. package/scripts/tests/test-run-plan-e2e.sh +153 -0
  261. package/scripts/tests/test-run-plan-headless.sh +424 -0
  262. package/scripts/tests/test-run-plan-notify.sh +124 -0
  263. package/scripts/tests/test-run-plan-parser.sh +217 -0
  264. package/scripts/tests/test-run-plan-prompt.sh +254 -0
  265. package/scripts/tests/test-run-plan-quality-gate.sh +222 -0
  266. package/scripts/tests/test-run-plan-routing.sh +178 -0
  267. package/scripts/tests/test-run-plan-scoring.sh +148 -0
  268. package/scripts/tests/test-run-plan-state.sh +261 -0
  269. package/scripts/tests/test-run-plan-team.sh +157 -0
  270. package/scripts/tests/test-scope-infer.sh +150 -0
  271. package/scripts/tests/test-setup-ralph-loop.sh +63 -0
  272. package/scripts/tests/test-telegram-env.sh +38 -0
  273. package/scripts/tests/test-telegram.sh +121 -0
  274. package/scripts/tests/test-telemetry.sh +46 -0
  275. package/scripts/tests/test-thompson-sampling.sh +139 -0
  276. package/scripts/tests/test-validate-all.sh +60 -0
  277. package/scripts/tests/test-validate-commands.sh +89 -0
  278. package/scripts/tests/test-validate-hooks.sh +98 -0
  279. package/scripts/tests/test-validate-lessons.sh +150 -0
  280. package/scripts/tests/test-validate-plan-quality.sh +235 -0
  281. package/scripts/tests/test-validate-plans.sh +187 -0
  282. package/scripts/tests/test-validate-plugin.sh +106 -0
  283. package/scripts/tests/test-validate-prd.sh +184 -0
  284. package/scripts/tests/test-validate-skills.sh +134 -0
  285. package/scripts/validate-all.sh +57 -0
  286. package/scripts/validate-commands.sh +67 -0
  287. package/scripts/validate-hooks.sh +89 -0
  288. package/scripts/validate-lessons.sh +98 -0
  289. package/scripts/validate-plan-quality.sh +369 -0
  290. package/scripts/validate-plans.sh +120 -0
  291. package/scripts/validate-plugin.sh +86 -0
  292. package/scripts/validate-policies.sh +42 -0
  293. package/scripts/validate-prd.sh +118 -0
  294. package/scripts/validate-skills.sh +96 -0
  295. package/skills/autocode/SKILL.md +285 -0
  296. package/skills/autocode/ab-verification.md +51 -0
  297. package/skills/autocode/code-quality-standards.md +37 -0
  298. package/skills/autocode/competitive-mode.md +364 -0
  299. package/skills/brainstorming/SKILL.md +97 -0
  300. package/skills/capture-lesson/SKILL.md +187 -0
  301. package/skills/check-lessons/SKILL.md +116 -0
  302. package/skills/dispatching-parallel-agents/SKILL.md +110 -0
  303. package/skills/executing-plans/SKILL.md +85 -0
  304. package/skills/finishing-a-development-branch/SKILL.md +201 -0
  305. package/skills/receiving-code-review/SKILL.md +72 -0
  306. package/skills/requesting-code-review/SKILL.md +59 -0
  307. package/skills/requesting-code-review/code-reviewer.md +82 -0
  308. package/skills/research/SKILL.md +145 -0
  309. package/skills/roadmap/SKILL.md +115 -0
  310. package/skills/subagent-driven-development/SKILL.md +98 -0
  311. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +18 -0
  312. package/skills/subagent-driven-development/implementer-prompt.md +73 -0
  313. package/skills/subagent-driven-development/spec-reviewer-prompt.md +57 -0
  314. package/skills/systematic-debugging/SKILL.md +134 -0
  315. package/skills/systematic-debugging/condition-based-waiting.md +64 -0
  316. package/skills/systematic-debugging/defense-in-depth.md +32 -0
  317. package/skills/systematic-debugging/root-cause-tracing.md +55 -0
  318. package/skills/test-driven-development/SKILL.md +167 -0
  319. package/skills/using-git-worktrees/SKILL.md +219 -0
  320. package/skills/using-superpowers/SKILL.md +54 -0
  321. package/skills/verification-before-completion/SKILL.md +140 -0
  322. package/skills/verify/SKILL.md +82 -0
  323. package/skills/writing-plans/SKILL.md +128 -0
  324. package/skills/writing-skills/SKILL.md +93 -0
@@ -0,0 +1,299 @@
1
+ # Agent Suite + Tooling Integration Design
2
+
3
+ **Date:** 2026-02-23
4
+ **Status:** Draft — awaiting user approval
5
+ **Scope:** 6 new agents, improvements to 8 existing agents, linting tooling in Makefile + CI
6
+ **Research:** 7 documents at `docs/plans/2026-02-23-research-*.md` (3,451 lines total)
7
+
8
+ ---
9
+
10
+ ## Part 1: New Agents (6)
11
+
12
+ All placed in `~/.claude/agents/` (global — cross-project).
13
+
14
+ ### 1.1 bash-expert.md
15
+
16
+ **Purpose:** Review, write, and debug bash scripts. Dual-mode: audit existing scripts for bugs + write new scripts following best practices.
17
+
18
+ **Model:** sonnet | **Tools:** Read, Grep, Glob, Bash
19
+
20
+ **Knowledge base:**
21
+ - Google Shell Style Guide (naming, structure, functions)
22
+ - ShellCheck rules (quoting SC2086, eval SC2091, glob SC2035, subshell SC2012)
23
+ - BashPitfalls wiki (61+ common mistakes)
24
+ - Shellharden quoting rules
25
+ - Toolkit lessons (bare-except #7, async-no-await #25, hardcoded-localhost)
26
+
27
+ **Scan workflow (audit mode):**
28
+ 1. Read target file(s)
29
+ 2. Run `shellcheck --enable=all --external-sources <file>` if available
30
+ 3. Grep for Priority 1 patterns: unquoted variables in command args, `eval` on variables, `|| true` masking errors, `cd` without error handling, missing `set -euo pipefail`
31
+ 4. Grep for Priority 2 patterns: `#!/bin/bash` instead of `#!/usr/bin/env bash`, `grep -P` (non-portable), `ls` for file existence, `cat` in pipelines (UUOC)
32
+ 5. Check script size (>300 lines = recommend splitting)
33
+
34
+ **Writing mode rules:**
35
+ - Always start with `set -euo pipefail`
36
+ - Quote all variable expansions
37
+ - Use arrays for file lists, never word-split strings
38
+ - `mktemp` + trap for temp files
39
+ - `printf` over `echo`
40
+ - `[[ ]]` for conditionals
41
+ - Functions for anything called twice
42
+ - `local` for function variables
43
+
44
+ **Output format:** BLOCKING / SHOULD-FIX / CLEAN table with file:line references.
45
+
46
+ ### 1.2 shell-expert.md
47
+
48
+ **Purpose:** Diagnose systemd service failures, PATH/environment issues, package management, permissions, and environment configuration. Investigation and remediation, NOT script writing.
49
+
50
+ **Model:** sonnet | **Tools:** Read, Grep, Glob, Bash
51
+
52
+ **Relationship to infra-auditor:**
53
+ - `infra-auditor` = monitoring (is everything up?)
54
+ - `shell-expert` = investigation (why did it fail, how to fix?)
55
+
56
+ **Five diagnostic domains:**
57
+
58
+ 1. **Service Lifecycle** — `systemctl --user show <svc> -p ActiveState,SubState,NRestarts,Result,ExecMainStartTimestamp --value` (never parse `status` text). Triage by Result: `exit-code` → check logs, `oom-kill` → check MemoryMax, `start-limit-hit` → needs `systemctl --user reset-failed`.
59
+ 2. **Environment & PATH** — Four-step: `which <cmd>`, `type -a <cmd>`, `echo $PATH | tr : '\n'`, check EnvironmentFile quoting. Detect version manager shims (nvm, pyenv).
60
+ 3. **Hardening Audit** — `systemd-analyze security <svc>` → exposure score → top-5 directives to add.
61
+ 4. **Package Management** — `apt-get check`, held packages, security updates (`apt list --upgradable`), orphaned packages.
62
+ 5. **Permissions** — `~/.env` mode check (should be 600), SUID/SGID audit, world-writable scan, service user ownership.
63
+
64
+ **Key rule:** Use `systemctl show` properties, NEVER parse `systemctl status` text output.
65
+
66
+ ### 1.3 python-expert.md
67
+
68
+ **Purpose:** Review and write Python code with focus on async discipline, resource lifecycle, type safety, and production patterns specific to the project ecosystem (HA, Telegram, Notion, Ollama).
69
+
70
+ **Model:** sonnet | **Tools:** Read, Grep, Glob, Bash
71
+
72
+ **Two modes:**
73
+
74
+ **Mode A: Extend lesson-scanner (default).** Add scan groups:
75
+ - Scan 7: WebSocket send without `try/except ConnectionClosed` (Lesson #34)
76
+ - Scan 8: `sqlite3.connect()` inside `async def` — blocking I/O (Lesson #33)
77
+ - Scan 9: External data (MQTT, HA state, Telegram updates) entering business logic without Pydantic validation
78
+ - Scan 10: `create_task()` without `add_done_callback` (Lesson #43) — RUF006 catches missing ref, not missing callback
79
+
80
+ **Mode B: Full architectural review** (when asked). Class structure analysis, cross-file subscriber lifecycle, type coverage, async flow tracing. Uses opus model.
81
+
82
+ **Ruff config to push to all Python projects:**
83
+ ```toml
84
+ [tool.ruff.lint]
85
+ select = ["E", "W", "F", "B", "ASYNC", "RUF006", "UP", "SIM"]
86
+ ```
87
+
88
+ **Key patterns from research:**
89
+ - `async def` without I/O → RUF029 (preview, enable when stable)
90
+ - ASYNC210/230/251 → blocking HTTP/file/sleep in async context
91
+ - `pickle.loads()`, `eval()`, `exec()`, `subprocess(shell=True)` → security flags
92
+ - HA subscriber pattern: `self._unsub = subscribe(...)`, call in `shutdown()`
93
+
94
+ ### 1.4 integration-tester.md
95
+
96
+ **Purpose:** Verify data flows correctly across service seams. Catches Cluster B bugs where each service passes its own tests but handoffs fail.
97
+
98
+ **Model:** opus | **Tools:** Read, Grep, Glob, Bash
99
+
100
+ **Operating principles:**
101
+ 1. Black box only — never read source to infer behavior; only check external observables
102
+ 2. Evidence-based assertions — every PASS/FAIL includes quoted evidence
103
+ 3. One probe per seam — failures must be unambiguously attributable
104
+ 4. Fail fast with cause — if health check fails, SKIP with reason
105
+ 5. No side effects — test artifacts go to `/tmp/integration-tester-results/`
106
+
107
+ **Seam registry (from projects/CLAUDE.md cross-project dependency table):**
108
+
109
+ | Seam | Producer | Consumer | Probe |
110
+ |------|----------|----------|-------|
111
+ | HA logbook | ha-log-sync | aria engine | File freshness < 15min, valid JSON lines |
112
+ | Intelligence | aria engine | aria hub | File exists, schema matches hub's expected fields |
113
+ | Hub cache | aria hub | — | SQLite opens, table schema matches |
114
+ | Notion replica | notion-tools | telegram-brief | File count > 0, last-modified < 6h |
115
+ | Capture DB | telegram-capture | capture-sync | Row count increasing, last insert < 1h |
116
+ | Ollama queue | queue daemon | 10 timers | Queue endpoint responds, job format valid |
117
+ | Shared env | ~/.env | all consumers | Each consumer's required vars are set and non-empty |
118
+
119
+ **Output:** Per-seam PASS/FAIL/SKIP table with evidence timestamps.
120
+
121
+ ### 1.5 dependency-auditor.md
122
+
123
+ **Purpose:** Scan all 8 repos for outdated packages, known CVEs, and license issues. Read-only — never runs install/fix commands.
124
+
125
+ **Model:** haiku | **Tools:** Read, Grep, Glob, Bash
126
+
127
+ **Three-layer tool stack:**
128
+ 1. **Per-ecosystem:** `pip-audit --format json` (Python), `npm audit --json` (Node)
129
+ 2. **Cross-language:** `osv-scanner` over all repos (unified severity)
130
+ 3. **Docker-specific:** `trivy image` for gpt-researcher container
131
+
132
+ **License compliance:** `pip-licenses --format json` per Python repo, `npx license-checker --json` for Node.
133
+
134
+ **Design rule from research:** Separate `/dep-audit` (read-only detection) from `/dep-upgrade` (state-changing fixes). This agent is audit only.
135
+
136
+ **Output:** BLOCKER (CRITICAL/HIGH CVE) / SHOULD-FIX (MEDIUM, outdated >6mo) / NICE-TO-HAVE (LOW, minor version behind) table per repo.
137
+
138
+ ### 1.6 service-monitor.md
139
+
140
+ **Purpose:** Check 12 services and 21 timers for error patterns, restart loops, and Cluster A silent failures.
141
+
142
+ **Model:** sonnet | **Tools:** Read, Grep, Glob, Bash
143
+
144
+ **Architecture:** 80% bash (deterministic checks), 20% AI (pattern reasoning on summaries).
145
+
146
+ **Per-service checks:**
147
+ - `systemctl --user show <svc> -p ActiveState,SubState,NRestarts,Result,ExecMainStartTimestamp --value`
148
+ - `NRestarts` + `ActiveEnterTimestamp` → restart frequency
149
+ - `journalctl --user -u <svc> --since "24 hours ago" -q | wc -l` → if active + 0 lines = Cluster A silent failure
150
+ - Top 20 error messages (deduplicated)
151
+
152
+ **Per-timer checks:**
153
+ - `LastTriggerUSec` via `systemctl --user show` → compare against expected interval
154
+ - Timers not fired in 2x their interval = STALE
155
+
156
+ **Known patterns to detect:**
157
+ - Telegram 409 → `grep "409"` in telegram-*.service logs
158
+ - MQTT disconnect loop → `grep -i "disconnect\|reconnect"` in aria-hub
159
+ - OOM kills → `Result == oom-kill` + `journalctl -k --since "24h ago" | grep oom`
160
+
161
+ **State taxonomy per service:** OK / RECOVERED / RESTARTING / FAILED / ANOMALY (Cluster A: active but silent)
162
+
163
+ **Output:** Service health table + timer freshness table + anomaly details.
164
+
165
+ ---
166
+
167
+ ## Part 2: Existing Agent Improvements (8)
168
+
169
+ ### P0 — Correctness (prevents wrong output)
170
+
171
+ | Agent | Change | Why |
172
+ |-------|--------|-----|
173
+ | security-reviewer | Add hallucination guard: "Only report findings grounded in specific file:line evidence" | Prevents false findings that drive unnecessary work |
174
+ | security-reviewer | Remove Bash tool → Read, Grep, Glob only | Read-only review should not have shell execution |
175
+ | security-reviewer | Add Python/bash attack categories: `pickle.loads`, `eval`, `subprocess(shell=True)`, hardcoded secrets | Currently web-focused, misses 75% of codebase |
176
+ | infra-auditor | Fix sync freshness: add `$(date +%s)` delta math to `stat -c '%Y'` | Current comparison is broken |
177
+ | lesson-scanner | Update description: "53 lessons" → "66 lessons" | Stale count |
178
+
179
+ ### P1 — Quality (prevents waste)
180
+
181
+ | Agent | Change | Why |
182
+ |-------|--------|-----|
183
+ | All 6 missing | Add `model` field | Prevents wrong model routing |
184
+ | All agents | Add `maxTurns` (infra-auditor: 15, counter: 20, others: 25) | Prevents runaway execution |
185
+ | security-reviewer | Add explicit trigger phrases to description | Improves delegation accuracy |
186
+ | doc-updater | Fix git diff: `HEAD~1` → `git status --short && git diff HEAD` | Misses uncommitted changes |
187
+
188
+ ### P2 — Capability (meaningful new features)
189
+
190
+ | Agent | Change | Why |
191
+ |-------|--------|-----|
192
+ | lesson-scanner | Add Scan Group 7: Plan Quality (Lessons #60-66) | Research-derived lessons not currently scanned |
193
+ | lesson-scanner | Add Scan 3f: `.venv/bin/pip` (Lesson #51) | Hookify warns but scanner should also flag |
194
+ | counter | Add Clusters E and F to Bias Detection lens | Lesson regression check incomplete |
195
+ | notion-researcher | Add vector search fallback behavior | Zero-result behavior undefined |
196
+ | notion-writer | Add pre-flight API key check | Currently discovers missing key on first 401 |
197
+
198
+ ### P3 — Polish
199
+
200
+ | Agent | Change | Why |
201
+ |-------|--------|-----|
202
+ | doc-updater | Add structured output summary | Currently returns no summary of changes made |
203
+ | counter-daily | Add "acknowledge once and stop" rule | Prevents morphing into full counter session |
204
+ | security-reviewer | Add `memory: project` | Baseline known-safe patterns across sessions |
205
+
206
+ ### Agent Chains (future)
207
+
208
+ **Chain 1: Post-commit audit** — security-reviewer → lesson-scanner → doc-updater (single `/post-commit-audit` command)
209
+
210
+ **Chain 2: Service triage** — infra-auditor (detect) → shell-expert (investigate) → service-monitor (verify fix)
211
+
212
+ **Chain 3: Pre-release** — dependency-auditor → integration-tester → lesson-scanner
213
+
214
+ ---
215
+
216
+ ## Part 3: Linting Tooling
217
+
218
+ ### Tools to Install
219
+
220
+ | Tool | Install | Purpose |
221
+ |------|---------|---------|
222
+ | shellcheck | Already installed (0.9.0) | Static analysis |
223
+ | shfmt | `brew install shfmt` | Formatting |
224
+ | shellharden | `brew install shellharden` | Quoting hardening |
225
+ | semgrep | `pip3 install semgrep` | Security pattern matching |
226
+
227
+ ### Makefile Changes
228
+
229
+ ```makefile
230
+ .PHONY: test validate lint ci
231
+
232
+ lint:
233
+ @echo "=== ShellCheck ==="
234
+ @shellcheck scripts/*.sh scripts/lib/*.sh
235
+ @echo "=== shfmt ==="
236
+ @shfmt -d -i 2 -ci scripts/*.sh scripts/lib/*.sh
237
+ @echo "=== Shellharden ==="
238
+ @shellharden --check scripts/*.sh scripts/lib/*.sh 2>/dev/null || true
239
+ @echo "=== Semgrep ==="
240
+ @semgrep --config "p/bash" --quiet scripts/ 2>/dev/null || true
241
+
242
+ test:
243
+ @bash scripts/tests/run-all-tests.sh
244
+
245
+ validate:
246
+ @bash scripts/validate-all.sh
247
+
248
+ ci: lint validate test
249
+ @echo "CI: ALL PASSED"
250
+ ```
251
+
252
+ ### CI Changes (.github/workflows/ci.yml)
253
+
254
+ Add shellcheck + shfmt to the CI job:
255
+
256
+ ```yaml
257
+ - name: Install linting tools
258
+ run: |
259
+ sudo apt-get install -y jq shellcheck
260
+ GO_VERSION=1.21 && curl -sS https://webi.sh/shfmt | sh
261
+ pip install semgrep
262
+ - name: Run CI
263
+ run: make ci
264
+ ```
265
+
266
+ ### .shellcheckrc (new file in repo root)
267
+
268
+ ```
269
+ # Enable all optional checks
270
+ enable=all
271
+ # Exclude specific rules we've explicitly decided to ignore
272
+ # SC2086 — we have intentional word-splitting in quality-gate.sh (tracked as issue #5)
273
+ ```
274
+
275
+ ---
276
+
277
+ ## Implementation Order
278
+
279
+ 1. **Install tools** (shfmt, shellharden, semgrep) — 5 min
280
+ 2. **Add .shellcheckrc + Makefile lint target** — 10 min
281
+ 3. **Update CI** — 5 min
282
+ 4. **Create 6 new agents** — using research docs as source material
283
+ 5. **Apply P0 improvements to existing agents** — correctness fixes first
284
+ 6. **Apply P1-P3 improvements** — quality, capability, polish
285
+ 7. **Symlink research docs to ~/Documents/research/** — per workspace convention
286
+
287
+ ---
288
+
289
+ ## Research Sources
290
+
291
+ | Document | Lines | Path |
292
+ |----------|-------|------|
293
+ | Bash expert | 543 | `docs/plans/2026-02-23-research-bash-expert-agent.md` |
294
+ | Shell expert | 533 | `docs/plans/2026-02-23-research-shell-expert-agent.md` |
295
+ | Python expert | 429 | `docs/plans/2026-02-23-research-python-expert-agent.md` |
296
+ | Integration tester | 454 | `docs/plans/2026-02-23-research-integration-tester-agent.md` |
297
+ | Dependency auditor | 564 | `docs/plans/2026-02-23-research-dependency-auditor-agent.md` |
298
+ | Service monitor | 425 | `docs/plans/2026-02-23-research-service-monitor-agent.md` |
299
+ | Existing improvements | 503 | `docs/plans/2026-02-23-research-improving-existing-agents.md` |