feed-the-machine 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +170 -170
  3. package/bin/generate-manifest.mjs +463 -463
  4. package/bin/install.mjs +491 -491
  5. package/docs/HOOKS.md +243 -243
  6. package/docs/INBOX.md +233 -233
  7. package/ftm/SKILL.md +122 -122
  8. package/ftm-audit/SKILL.md +623 -541
  9. package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
  10. package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
  11. package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
  12. package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
  13. package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
  14. package/ftm-audit/scripts/run-knip.sh +23 -23
  15. package/ftm-audit.yml +2 -2
  16. package/ftm-brainstorm/SKILL.md +498 -498
  17. package/ftm-brainstorm/evals/evals.json +100 -100
  18. package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
  19. package/ftm-brainstorm/references/agent-prompts.md +224 -224
  20. package/ftm-brainstorm/references/plan-template.md +121 -121
  21. package/ftm-brainstorm.yml +2 -2
  22. package/ftm-browse/SKILL.md +454 -454
  23. package/ftm-browse/daemon/browser-manager.ts +206 -206
  24. package/ftm-browse/daemon/bun.lock +30 -30
  25. package/ftm-browse/daemon/cli.ts +347 -347
  26. package/ftm-browse/daemon/commands.ts +410 -410
  27. package/ftm-browse/daemon/main.ts +357 -357
  28. package/ftm-browse/daemon/package.json +17 -17
  29. package/ftm-browse/daemon/server.ts +189 -189
  30. package/ftm-browse/daemon/snapshot.ts +519 -519
  31. package/ftm-browse/daemon/tsconfig.json +22 -22
  32. package/ftm-browse.yml +4 -4
  33. package/ftm-capture/SKILL.md +370 -370
  34. package/ftm-capture.yml +4 -4
  35. package/ftm-codex-gate/SKILL.md +361 -361
  36. package/ftm-codex-gate.yml +2 -2
  37. package/ftm-config/SKILL.md +345 -345
  38. package/ftm-config.default.yml +82 -80
  39. package/ftm-config.yml +2 -2
  40. package/ftm-council/SKILL.md +416 -416
  41. package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
  42. package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
  43. package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
  44. package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
  45. package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
  46. package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
  47. package/ftm-council.yml +2 -2
  48. package/ftm-dashboard/SKILL.md +163 -163
  49. package/ftm-dashboard.yml +4 -4
  50. package/ftm-debug/SKILL.md +1037 -1037
  51. package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
  52. package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
  53. package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
  54. package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
  55. package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
  56. package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
  57. package/ftm-debug.yml +2 -2
  58. package/ftm-diagram/SKILL.md +277 -277
  59. package/ftm-diagram.yml +2 -2
  60. package/ftm-executor/SKILL.md +777 -767
  61. package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
  62. package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
  63. package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
  64. package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
  65. package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -72
  66. package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
  67. package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
  68. package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
  69. package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -44
  70. package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
  71. package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
  72. package/ftm-executor/runtime/package.json +8 -8
  73. package/ftm-executor.yml +2 -2
  74. package/ftm-git/SKILL.md +441 -441
  75. package/ftm-git/evals/evals.json +26 -26
  76. package/ftm-git/evals/promptfoo.yaml +75 -75
  77. package/ftm-git/hooks/post-commit-experience.sh +92 -92
  78. package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
  79. package/ftm-git/references/protocols/REMEDIATION.md +139 -139
  80. package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
  81. package/ftm-git.yml +2 -2
  82. package/ftm-inbox/backend/adapters/_retry.py +64 -64
  83. package/ftm-inbox/backend/adapters/base.py +230 -230
  84. package/ftm-inbox/backend/adapters/freshservice.py +104 -104
  85. package/ftm-inbox/backend/adapters/gmail.py +125 -125
  86. package/ftm-inbox/backend/adapters/jira.py +136 -136
  87. package/ftm-inbox/backend/adapters/registry.py +192 -192
  88. package/ftm-inbox/backend/adapters/slack.py +110 -110
  89. package/ftm-inbox/backend/db/connection.py +54 -54
  90. package/ftm-inbox/backend/db/schema.py +78 -78
  91. package/ftm-inbox/backend/executor/__init__.py +7 -7
  92. package/ftm-inbox/backend/executor/engine.py +149 -149
  93. package/ftm-inbox/backend/executor/step_runner.py +98 -98
  94. package/ftm-inbox/backend/main.py +103 -103
  95. package/ftm-inbox/backend/models/__init__.py +1 -1
  96. package/ftm-inbox/backend/models/unified_task.py +36 -36
  97. package/ftm-inbox/backend/planner/__init__.py +6 -6
  98. package/ftm-inbox/backend/planner/generator.py +127 -127
  99. package/ftm-inbox/backend/planner/schema.py +34 -34
  100. package/ftm-inbox/backend/requirements.txt +5 -5
  101. package/ftm-inbox/backend/routes/execute.py +186 -186
  102. package/ftm-inbox/backend/routes/health.py +52 -52
  103. package/ftm-inbox/backend/routes/inbox.py +68 -68
  104. package/ftm-inbox/backend/routes/plan.py +271 -271
  105. package/ftm-inbox/bin/launchagent.mjs +91 -91
  106. package/ftm-inbox/bin/setup.mjs +188 -188
  107. package/ftm-inbox/bin/start.sh +10 -10
  108. package/ftm-inbox/bin/status.sh +17 -17
  109. package/ftm-inbox/bin/stop.sh +8 -8
  110. package/ftm-inbox/config.example.yml +55 -55
  111. package/ftm-inbox/package-lock.json +2898 -2898
  112. package/ftm-inbox/package.json +26 -26
  113. package/ftm-inbox/postcss.config.js +6 -6
  114. package/ftm-inbox/src/app.css +199 -199
  115. package/ftm-inbox/src/app.html +18 -18
  116. package/ftm-inbox/src/lib/api.ts +166 -166
  117. package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
  118. package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
  119. package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
  120. package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
  121. package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
  122. package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
  123. package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
  124. package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
  125. package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
  126. package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
  127. package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
  128. package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
  129. package/ftm-inbox/src/lib/theme.ts +47 -47
  130. package/ftm-inbox/src/routes/+layout.svelte +76 -76
  131. package/ftm-inbox/src/routes/+page.svelte +401 -401
  132. package/ftm-inbox/svelte.config.js +12 -12
  133. package/ftm-inbox/tailwind.config.ts +63 -63
  134. package/ftm-inbox/tsconfig.json +13 -13
  135. package/ftm-inbox/vite.config.ts +6 -6
  136. package/ftm-intent/SKILL.md +241 -241
  137. package/ftm-intent.yml +2 -2
  138. package/ftm-manifest.json +3794 -3794
  139. package/ftm-map/SKILL.md +291 -291
  140. package/ftm-map/scripts/db.py +712 -712
  141. package/ftm-map/scripts/index.py +415 -415
  142. package/ftm-map/scripts/parser.py +224 -224
  143. package/ftm-map/scripts/queries/go-tags.scm +20 -20
  144. package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
  145. package/ftm-map/scripts/queries/python-tags.scm +31 -31
  146. package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
  147. package/ftm-map/scripts/queries/rust-tags.scm +37 -37
  148. package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
  149. package/ftm-map/scripts/query.py +301 -301
  150. package/ftm-map/scripts/ranker.py +377 -377
  151. package/ftm-map/scripts/requirements.txt +5 -5
  152. package/ftm-map/scripts/setup-hooks.sh +27 -27
  153. package/ftm-map/scripts/setup.sh +56 -56
  154. package/ftm-map/scripts/test_db.py +364 -364
  155. package/ftm-map/scripts/test_parser.py +174 -174
  156. package/ftm-map/scripts/test_query.py +183 -183
  157. package/ftm-map/scripts/test_ranker.py +199 -199
  158. package/ftm-map/scripts/views.py +591 -591
  159. package/ftm-map.yml +2 -2
  160. package/ftm-mind/SKILL.md +1943 -1943
  161. package/ftm-mind/evals/promptfoo.yaml +142 -142
  162. package/ftm-mind/references/blackboard-schema.md +328 -328
  163. package/ftm-mind/references/complexity-guide.md +110 -110
  164. package/ftm-mind/references/event-registry.md +319 -319
  165. package/ftm-mind/references/mcp-inventory.md +296 -296
  166. package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
  167. package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
  168. package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
  169. package/ftm-mind/references/reflexion-protocol.md +249 -249
  170. package/ftm-mind/references/routing/SCENARIOS.md +22 -22
  171. package/ftm-mind/references/routing-scenarios.md +35 -35
  172. package/ftm-mind.yml +2 -2
  173. package/ftm-pause/SKILL.md +395 -395
  174. package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
  175. package/ftm-pause/references/protocols/VALIDATION.md +80 -80
  176. package/ftm-pause.yml +2 -2
  177. package/ftm-researcher/SKILL.md +275 -275
  178. package/ftm-researcher/evals/agent-diversity.yaml +17 -17
  179. package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
  180. package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
  181. package/ftm-researcher/references/adaptive-search.md +116 -116
  182. package/ftm-researcher/references/agent-prompts.md +193 -193
  183. package/ftm-researcher/references/council-integration.md +193 -193
  184. package/ftm-researcher/references/output-format.md +203 -203
  185. package/ftm-researcher/references/synthesis-pipeline.md +165 -165
  186. package/ftm-researcher/scripts/score_credibility.py +234 -234
  187. package/ftm-researcher/scripts/validate_research.py +92 -92
  188. package/ftm-researcher.yml +2 -2
  189. package/ftm-resume/SKILL.md +518 -518
  190. package/ftm-resume/references/protocols/VALIDATION.md +172 -172
  191. package/ftm-resume.yml +2 -2
  192. package/ftm-retro/SKILL.md +380 -380
  193. package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
  194. package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
  195. package/ftm-retro.yml +2 -2
  196. package/ftm-routine/SKILL.md +170 -170
  197. package/ftm-routine.yml +4 -4
  198. package/ftm-state/blackboard/capabilities.json +5 -5
  199. package/ftm-state/blackboard/capabilities.schema.json +27 -27
  200. package/ftm-state/blackboard/context.json +23 -23
  201. package/ftm-state/blackboard/experiences/index.json +9 -9
  202. package/ftm-state/blackboard/patterns.json +6 -6
  203. package/ftm-state/schemas/context.schema.json +130 -130
  204. package/ftm-state/schemas/experience-index.schema.json +77 -77
  205. package/ftm-state/schemas/experience.schema.json +78 -78
  206. package/ftm-state/schemas/patterns.schema.json +44 -44
  207. package/ftm-upgrade/SKILL.md +194 -194
  208. package/ftm-upgrade/scripts/check-version.sh +76 -76
  209. package/ftm-upgrade/scripts/upgrade.sh +143 -143
  210. package/ftm-upgrade.yml +2 -2
  211. package/ftm-verify.yml +2 -2
  212. package/ftm.yml +2 -2
  213. package/hooks/ftm-blackboard-enforcer.sh +93 -93
  214. package/hooks/ftm-discovery-reminder.sh +90 -90
  215. package/hooks/ftm-drafts-gate.sh +61 -61
  216. package/hooks/ftm-event-logger.mjs +107 -107
  217. package/hooks/ftm-map-autodetect.sh +79 -79
  218. package/hooks/ftm-pending-sync-check.sh +22 -22
  219. package/hooks/ftm-plan-gate.sh +92 -92
  220. package/hooks/ftm-post-commit-trigger.sh +57 -57
  221. package/hooks/settings-template.json +81 -81
  222. package/install.sh +363 -363
  223. package/package.json +84 -84
  224. package/uninstall.sh +25 -25
@@ -1,89 +1,89 @@
1
- # Scoring Rubrics — All 5 Dimensions
2
-
3
- Detailed rubrics for ftm-retro scoring. Each dimension is scored 0–10 with a citation to specific data. Do not estimate without evidence — if data is missing, note it and score conservatively.
4
-
5
- ---
6
-
7
- ## Dimension 1: Wave Parallelism Efficiency (0–10)
8
-
9
- Were independent tasks actually dispatched in parallel? Could more tasks have been parallelized?
10
-
11
- - **10**: Every task that could run in parallel did. No serial bottlenecks where parallelism was possible.
12
- - **7–9**: Minor serial steps that could have been parallel (e.g., final post-processing tasks run sequentially).
13
- - **4–6**: Significant parallelism opportunities missed. Tasks that had no dependencies ran serially.
14
- - **1–3**: Nearly all tasks ran serially despite having no dependencies on each other.
15
- - **0**: Everything was serial regardless of dependency structure.
16
-
17
- Evidence to cite: wave structure from PROGRESS.md, task dependency graph, agent dispatch timestamps.
18
-
19
- ---
20
-
21
- ## Dimension 2: Audit Pass Rate (0–10)
22
-
23
- What percentage of tasks passed ftm-audit on the first attempt?
24
-
25
- - **10**: 100% first-pass. No task needed a fix cycle.
26
- - **8**: 90%+ first-pass. One or two tasks needed minor fixes.
27
- - **6**: 75–89% first-pass.
28
- - **4**: 50–74% first-pass. Roughly half the tasks needed audit remediation.
29
- - **2**: Below 50% first-pass.
30
- - **0**: Every single task failed audit on the first attempt.
31
-
32
- Evidence to cite: per-task audit results (pass/fail counts, auto-fix counts, manual-fix counts).
33
-
34
- ---
35
-
36
- ## Dimension 3: Codex Gate Pass Rate (0–10)
37
-
38
- What percentage of waves passed the ftm-codex-gate on the first attempt?
39
-
40
- - **10**: All waves passed on first gate run.
41
- - **7–9**: One wave needed a fix-and-retry.
42
- - **4–6**: Multiple waves needed retries.
43
- - **1–3**: Most waves failed the gate at least once.
44
- - **0**: Every wave failed the gate.
45
-
46
- Evidence to cite: codex gate results per wave (pass/fail, failure types).
47
-
48
- ---
49
-
50
- ## Dimension 4: Retry and Fix Count (0–10)
51
-
52
- How many total review-fix cycles were needed across all tasks and waves? Lower is better.
53
-
54
- Formula: `score = max(0, 10 - (total_retries / task_count) * 5)`
55
-
56
- - **10**: Zero retries.
57
- - **8**: Fewer than 0.5 retries per task on average.
58
- - **6**: 0.5–1.0 retries per task.
59
- - **4**: 1–2 retries per task.
60
- - **2**: 2–3 retries per task.
61
- - **0**: More than 3 retries per task on average.
62
-
63
- Evidence to cite: total retries, broken down by type (audit fix, codex gate retry, manual intervention).
64
-
65
- ---
66
-
67
- ## Dimension 5: Execution Smoothness (0–10)
68
-
69
- Subjective but evidence-grounded assessment. Were there blockers, ambiguous plan steps, confusing errors, or required manual interventions?
70
-
71
- - **10**: Fully autonomous from start to finish. No blockers, no ambiguity, no manual steps.
72
- - **7–9**: Minor friction — one clarification needed, one unexpected error handled gracefully.
73
- - **4–6**: Moderate friction — multiple ambiguities, one blocker that paused execution, one manual intervention.
74
- - **1–3**: Significant friction — repeated blockers, unclear plan steps that caused wrong-direction work, multiple manual interventions.
75
- - **0**: Execution could not proceed without constant human steering.
76
-
77
- Evidence to cite: error log entries, any manual interventions recorded in PROGRESS.md, plan ambiguities encountered.
78
-
79
- ---
80
-
81
- ## Scoring Principles
82
-
83
- ### Evidence-first scoring
84
-
85
- Every score needs a citation. "Tasks passed audit" is not a citation. "12/14 tasks passed audit on first attempt; Tasks 3 and 9 each needed one auto-fix cycle" is a citation. If the data to score a dimension is genuinely unavailable, note the gap explicitly and score conservatively (assume worst case for that dimension).
86
-
87
- ### No vibes
88
-
89
- Do not write "the execution felt smooth" or "agents seemed efficient." Write "0 manual interventions were required and all errors were caught and auto-resolved by ftm-audit Phase 2." The report is read by future executions that need to calibrate behavior, not by humans looking for encouragement.
1
+ # Scoring Rubrics — All 5 Dimensions
2
+
3
+ Detailed rubrics for ftm-retro scoring. Each dimension is scored 0–10 with a citation to specific data. Do not estimate without evidence — if data is missing, note it and score conservatively.
4
+
5
+ ---
6
+
7
+ ## Dimension 1: Wave Parallelism Efficiency (0–10)
8
+
9
+ Were independent tasks actually dispatched in parallel? Could more tasks have been parallelized?
10
+
11
+ - **10**: Every task that could run in parallel did. No serial bottlenecks where parallelism was possible.
12
+ - **7–9**: Minor serial steps that could have been parallel (e.g., final post-processing tasks run sequentially).
13
+ - **4–6**: Significant parallelism opportunities missed. Tasks that had no dependencies ran serially.
14
+ - **1–3**: Nearly all tasks ran serially despite having no dependencies on each other.
15
+ - **0**: Everything was serial regardless of dependency structure.
16
+
17
+ Evidence to cite: wave structure from PROGRESS.md, task dependency graph, agent dispatch timestamps.
18
+
19
+ ---
20
+
21
+ ## Dimension 2: Audit Pass Rate (0–10)
22
+
23
+ What percentage of tasks passed ftm-audit on the first attempt?
24
+
25
+ - **10**: 100% first-pass. No task needed a fix cycle.
26
+ - **8**: 90%+ first-pass. One or two tasks needed minor fixes.
27
+ - **6**: 75–89% first-pass.
28
+ - **4**: 50–74% first-pass. Roughly half the tasks needed audit remediation.
29
+ - **2**: Below 50% first-pass.
30
+ - **0**: Every single task failed audit on the first attempt.
31
+
32
+ Evidence to cite: per-task audit results (pass/fail counts, auto-fix counts, manual-fix counts).
33
+
34
+ ---
35
+
36
+ ## Dimension 3: Codex Gate Pass Rate (0–10)
37
+
38
+ What percentage of waves passed the ftm-codex-gate on the first attempt?
39
+
40
+ - **10**: All waves passed on first gate run.
41
+ - **7–9**: One wave needed a fix-and-retry.
42
+ - **4–6**: Multiple waves needed retries.
43
+ - **1–3**: Most waves failed the gate at least once.
44
+ - **0**: Every wave failed the gate.
45
+
46
+ Evidence to cite: codex gate results per wave (pass/fail, failure types).
47
+
48
+ ---
49
+
50
+ ## Dimension 4: Retry and Fix Count (0–10)
51
+
52
+ How many total review-fix cycles were needed across all tasks and waves? Lower is better.
53
+
54
+ Formula: `score = max(0, 10 - (total_retries / task_count) * 5)`
55
+
56
+ - **10**: Zero retries.
57
+ - **8**: Fewer than 0.5 retries per task on average.
58
+ - **6**: 0.5–1.0 retries per task.
59
+ - **4**: 1–2 retries per task.
60
+ - **2**: 2–3 retries per task.
61
+ - **0**: More than 3 retries per task on average.
62
+
63
+ Evidence to cite: total retries, broken down by type (audit fix, codex gate retry, manual intervention).
64
+
65
+ ---
66
+
67
+ ## Dimension 5: Execution Smoothness (0–10)
68
+
69
+ Subjective but evidence-grounded assessment. Were there blockers, ambiguous plan steps, confusing errors, or required manual interventions?
70
+
71
+ - **10**: Fully autonomous from start to finish. No blockers, no ambiguity, no manual steps.
72
+ - **7–9**: Minor friction — one clarification needed, one unexpected error handled gracefully.
73
+ - **4–6**: Moderate friction — multiple ambiguities, one blocker that paused execution, one manual intervention.
74
+ - **1–3**: Significant friction — repeated blockers, unclear plan steps that caused wrong-direction work, multiple manual interventions.
75
+ - **0**: Execution could not proceed without constant human steering.
76
+
77
+ Evidence to cite: error log entries, any manual interventions recorded in PROGRESS.md, plan ambiguities encountered.
78
+
79
+ ---
80
+
81
+ ## Scoring Principles
82
+
83
+ ### Evidence-first scoring
84
+
85
+ Every score needs a citation. "Tasks passed audit" is not a citation. "12/14 tasks passed audit on first attempt; Tasks 3 and 9 each needed one auto-fix cycle" is a citation. If the data to score a dimension is genuinely unavailable, note the gap explicitly and score conservatively (assume worst case for that dimension).
86
+
87
+ ### No vibes
88
+
89
+ Do not write "the execution felt smooth" or "agents seemed efficient." Write "0 manual interventions were required and all errors were caught and auto-resolved by ftm-audit Phase 2." The report is read by future executions that need to calibrate behavior, not by humans looking for encouragement.
@@ -1,109 +1,109 @@
1
- # Report Format — Retro Output Template
2
-
3
- This is the exact format for all retro report files saved to `~/.claude/ftm-retros/`.
4
-
5
- ---
6
-
7
- ## File Naming
8
-
9
- Save to: `~/.claude/ftm-retros/{plan-slug}-{YYYY-MM-DD}.md`
10
-
11
- ### Slug Generation
12
-
13
- Take the plan title, lowercase it, replace spaces with hyphens, strip all non-alphanumeric characters except hyphens.
14
-
15
- Examples:
16
- - "FTM Ecosystem Expansion" → `ftm-ecosystem-expansion`
17
- - "Fix Auth Bug + Rate Limiting" → `fix-auth-bug-rate-limiting`
18
- - "v2.0 API Refactor" → `v20-api-refactor`
19
-
20
- ---
21
-
22
- ## Report Template
23
-
24
- ```markdown
25
- # Retro: {Plan Title}
26
-
27
- **Date:** {YYYY-MM-DD}
28
- **Plan:** {absolute path to plan file}
29
- **Duration:** {total execution time, e.g. "47 minutes"}
30
-
31
- ## Scores
32
-
33
- | Dimension | Score | Notes |
34
- |-----------|-------|-------|
35
- | Wave Parallelism | X/10 | {1-sentence justification with data} |
36
- | Audit Pass Rate | X/10 | {N}/{total} tasks first-pass |
37
- | Codex Gate Pass Rate | X/10 | {N}/{total} waves first-pass |
38
- | Retry/Fix Count | X/10 | {total retries} across {N} tasks |
39
- | Execution Smoothness | X/10 | {1-sentence justification} |
40
-
41
- **Overall: {sum}/50**
42
-
43
- ## Raw Data
44
-
45
- - Tasks: {N}
46
- - Waves: {N}
47
- - Agents spawned: {N}
48
- - Audit findings: {N} total ({N} auto-fixed, {N} manual)
49
- - Codex gate results: Wave 1: pass | Wave 2: fail → pass | Wave 3: pass
50
- - Errors/blockers: {list any, or "none"}
51
-
52
- ## What Went Well
53
-
54
- {2–4 specific observations, each grounded in a data point or task number.}
55
-
56
- Example format:
57
- - **Task 3 (auth middleware)** completed in a single commit with zero audit findings. The agent prompt had clear acceptance criteria and a scoped file list — the agent never wandered.
58
- - **Wave 2 parallelism** was fully utilized: all 4 tasks dispatched simultaneously, cutting estimated serial time from ~32 minutes to ~9 minutes.
59
-
60
- ## What Was Slow
61
-
62
- {2–4 specific bottlenecks with timing data or retry counts where available.}
63
-
64
- Example format:
65
- - **ftm-audit Phase 1 (knip)** repeated full project analysis for each task in wave 3, even though tasks only touched 2–3 files each. Added ~40s × 5 tasks = ~3.5 minutes of unnecessary scanning.
66
- - **Task 7 needed 3 audit fix cycles** due to an import path that kept regenerating incorrectly. The agent prompt did not specify the alias configuration in tsconfig.paths.
67
-
68
- ## Proposed Improvements
69
-
70
- {3–5 specific, actionable suggestions. Each must identify: which skill to change, what to change exactly, and why it would help.}
71
-
72
- Format each as:
73
- **N. {Short title}** — {Skill to change} — {Specific change} — {Expected impact}
74
-
75
- Examples:
76
- 1. **Cache knip results within a wave** — ftm-audit — In Phase 1, check whether knip results are already cached for the current wave (via a temp file at `/tmp/ftm-knip-cache-{wave-id}.json`). Only re-run knip if the cache is missing or if the files changed by this task differ from cached scope. Expected: 3× speedup for ftm-audit on large projects with many tasks per wave.
77
- 2. **Dispatch Instrumentor and Researcher in parallel** — ftm-debug — These two agents have no shared state and currently run sequentially. Dispatch them simultaneously. Expected: ~40% reduction in ftm-debug total runtime.
78
- 3. **Add tsconfig.paths to agent context for TypeScript projects** — ftm-executor — When generating agent prompts for TypeScript tasks, include the relevant `paths` aliases from `tsconfig.json`. Expected: eliminates the import-alias regeneration loop that caused 3 retries on Task 7.
79
-
80
- ## Pattern Analysis
81
-
82
- {Only include this section if past retros exist in ~/.claude/ftm-retros/}
83
-
84
- ### Recurring Issues
85
-
86
- {List problems that appeared in 2 or more retros. Format: "Issue description — appeared in: retro-slug-1, retro-slug-2"}
87
-
88
- ### Score Trends
89
-
90
- {Compare overall scores across retros. Are they improving, declining, or stable? Cite actual numbers.}
91
-
92
- Example: Overall scores: 32/50 → 38/50 → 41/50 across the last 3 retros. Parallelism and smoothness improving; audit pass rate stuck at 6/10 for all three runs.
93
-
94
- ### Unaddressed Suggestions
95
-
96
- {List proposed improvements from past retros that have not yet been implemented. These get escalated — flag them explicitly.}
97
-
98
- Format: "**[ESCALATED]** {suggestion} — first proposed in {retro-slug-date}, appeared {N} times"
99
- ```
100
-
101
- ---
102
-
103
- ## Improvement Specificity Standard
104
-
105
- "Improve parallelism" is not an improvement proposal. "Add a dependency pre-check step to ftm-executor Phase 2 that flags tasks with no declared dependencies as parallelizable, and warn when they are dispatched serially" is an improvement proposal. Every proposed improvement must be concrete enough that a future session could implement it from the description alone without asking clarifying questions.
106
-
107
- ## Pattern Escalation Standard
108
-
109
- Recurring issues that have appeared in 3+ retros without being addressed should be flagged with `[ESCALATED - 3+ occurrences]` and moved to the top of the Proposed Improvements list. These are systemic problems, not one-off noise.
1
+ # Report Format — Retro Output Template
2
+
3
+ This is the exact format for all retro report files saved to `~/.claude/ftm-retros/`.
4
+
5
+ ---
6
+
7
+ ## File Naming
8
+
9
+ Save to: `~/.claude/ftm-retros/{plan-slug}-{YYYY-MM-DD}.md`
10
+
11
+ ### Slug Generation
12
+
13
+ Take the plan title, lowercase it, replace spaces with hyphens, strip all non-alphanumeric characters except hyphens.
14
+
15
+ Examples:
16
+ - "FTM Ecosystem Expansion" → `ftm-ecosystem-expansion`
17
+ - "Fix Auth Bug + Rate Limiting" → `fix-auth-bug-rate-limiting`
18
+ - "v2.0 API Refactor" → `v20-api-refactor`
19
+
20
+ ---
21
+
22
+ ## Report Template
23
+
24
+ ```markdown
25
+ # Retro: {Plan Title}
26
+
27
+ **Date:** {YYYY-MM-DD}
28
+ **Plan:** {absolute path to plan file}
29
+ **Duration:** {total execution time, e.g. "47 minutes"}
30
+
31
+ ## Scores
32
+
33
+ | Dimension | Score | Notes |
34
+ |-----------|-------|-------|
35
+ | Wave Parallelism | X/10 | {1-sentence justification with data} |
36
+ | Audit Pass Rate | X/10 | {N}/{total} tasks first-pass |
37
+ | Codex Gate Pass Rate | X/10 | {N}/{total} waves first-pass |
38
+ | Retry/Fix Count | X/10 | {total retries} across {N} tasks |
39
+ | Execution Smoothness | X/10 | {1-sentence justification} |
40
+
41
+ **Overall: {sum}/50**
42
+
43
+ ## Raw Data
44
+
45
+ - Tasks: {N}
46
+ - Waves: {N}
47
+ - Agents spawned: {N}
48
+ - Audit findings: {N} total ({N} auto-fixed, {N} manual)
49
+ - Codex gate results: Wave 1: pass | Wave 2: fail → pass | Wave 3: pass
50
+ - Errors/blockers: {list any, or "none"}
51
+
52
+ ## What Went Well
53
+
54
+ {2–4 specific observations, each grounded in a data point or task number.}
55
+
56
+ Example format:
57
+ - **Task 3 (auth middleware)** completed in a single commit with zero audit findings. The agent prompt had clear acceptance criteria and a scoped file list — the agent never wandered.
58
+ - **Wave 2 parallelism** was fully utilized: all 4 tasks dispatched simultaneously, cutting estimated serial time from ~32 minutes to ~9 minutes.
59
+
60
+ ## What Was Slow
61
+
62
+ {2–4 specific bottlenecks with timing data or retry counts where available.}
63
+
64
+ Example format:
65
+ - **ftm-audit Phase 1 (knip)** repeated full project analysis for each task in wave 3, even though tasks only touched 2–3 files each. Added ~40s × 5 tasks = ~3.5 minutes of unnecessary scanning.
66
+ - **Task 7 needed 3 audit fix cycles** due to an import path that kept regenerating incorrectly. The agent prompt did not specify the alias configuration in tsconfig.paths.
67
+
68
+ ## Proposed Improvements
69
+
70
+ {3–5 specific, actionable suggestions. Each must identify: which skill to change, what to change exactly, and why it would help.}
71
+
72
+ Format each as:
73
+ **N. {Short title}** — {Skill to change} — {Specific change} — {Expected impact}
74
+
75
+ Examples:
76
+ 1. **Cache knip results within a wave** — ftm-audit — In Phase 1, check whether knip results are already cached for the current wave (via a temp file at `/tmp/ftm-knip-cache-{wave-id}.json`). Only re-run knip if the cache is missing or if the files changed by this task differ from cached scope. Expected: 3× speedup for ftm-audit on large projects with many tasks per wave.
77
+ 2. **Dispatch Instrumentor and Researcher in parallel** — ftm-debug — These two agents have no shared state and currently run sequentially. Dispatch them simultaneously. Expected: ~40% reduction in ftm-debug total runtime.
78
+ 3. **Add tsconfig.paths to agent context for TypeScript projects** — ftm-executor — When generating agent prompts for TypeScript tasks, include the relevant `paths` aliases from `tsconfig.json`. Expected: eliminates the import-alias regeneration loop that caused 3 retries on Task 7.
79
+
80
+ ## Pattern Analysis
81
+
82
+ {Only include this section if past retros exist in ~/.claude/ftm-retros/}
83
+
84
+ ### Recurring Issues
85
+
86
+ {List problems that appeared in 2 or more retros. Format: "Issue description — appeared in: retro-slug-1, retro-slug-2"}
87
+
88
+ ### Score Trends
89
+
90
+ {Compare overall scores across retros. Are they improving, declining, or stable? Cite actual numbers.}
91
+
92
+ Example: Overall scores: 32/50 → 38/50 → 41/50 across the last 3 retros. Parallelism and smoothness improving; audit pass rate stuck at 6/10 for all three runs.
93
+
94
+ ### Unaddressed Suggestions
95
+
96
+ {List proposed improvements from past retros that have not yet been implemented. These get escalated — flag them explicitly.}
97
+
98
+ Format: "**[ESCALATED]** {suggestion} — first proposed in {retro-slug-date}, appeared {N} times"
99
+ ```
100
+
101
+ ---
102
+
103
+ ## Improvement Specificity Standard
104
+
105
+ "Improve parallelism" is not an improvement proposal. "Add a dependency pre-check step to ftm-executor Phase 2 that flags tasks with no declared dependencies as parallelizable, and warn when they are dispatched serially" is an improvement proposal. Every proposed improvement must be concrete enough that a future session could implement it from the description alone without asking clarifying questions.
106
+
107
+ ## Pattern Escalation Standard
108
+
109
+ Recurring issues that have appeared in 3+ retros without being addressed should be flagged with `[ESCALATED - 3+ occurrences]` and moved to the top of the Proposed Improvements list. These are systemic problems, not one-off noise.
package/ftm-retro.yml CHANGED
@@ -1,2 +1,2 @@
1
- name: ftm-retro
2
- description: Post-execution self-assessment skill. Automatically triggered after ftm-executor completes a plan. Scores execution across 5 dimensions, identifies what went well and what was slow, writes structured report with improvement suggestions. Use when user says "retro", "retrospective", "how did that go", "execution review", "self-assessment", "ftm retro".
1
+ name: ftm-retro
2
+ description: Post-execution self-assessment skill. Automatically triggered after ftm-executor completes a plan. Scores execution across 5 dimensions, identifies what went well and what was slow, writes structured report with improvement suggestions. Use when user says "retro", "retrospective", "how did that go", "execution review", "self-assessment", "ftm retro".