maestro-flow 0.4.10 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/.agents/agents/cli-explore-agent.md +189 -0
  2. package/.agents/agents/conceptual-planning-agent.md +247 -0
  3. package/.agents/agents/impeccable-agent.md +101 -0
  4. package/.agents/agents/team-supervisor.md +145 -0
  5. package/.agents/agents/team-worker.md +239 -0
  6. package/.agents/agents/ui-design-agent.md +289 -0
  7. package/.agents/agents/workflow-analyzer.md +117 -0
  8. package/.agents/agents/workflow-codebase-mapper.md +79 -0
  9. package/.agents/agents/workflow-collab-planner.md +145 -0
  10. package/.agents/agents/workflow-debugger.md +105 -0
  11. package/.agents/agents/workflow-executor.md +134 -0
  12. package/.agents/agents/workflow-external-researcher.md +88 -0
  13. package/.agents/agents/workflow-integration-checker.md +85 -0
  14. package/.agents/agents/workflow-nyquist-auditor.md +87 -0
  15. package/.agents/agents/workflow-phase-researcher.md +87 -0
  16. package/.agents/agents/workflow-plan-checker.md +92 -0
  17. package/.agents/agents/workflow-planner.md +197 -0
  18. package/.agents/agents/workflow-project-researcher.md +76 -0
  19. package/.agents/agents/workflow-research-synthesizer.md +72 -0
  20. package/.agents/agents/workflow-reviewer.md +84 -0
  21. package/.agents/agents/workflow-roadmapper.md +83 -0
  22. package/.agents/agents/workflow-verifier.md +122 -0
  23. package/.agents/skills/codify-to-knowhow/SKILL.md +169 -0
  24. package/.agents/skills/codify-to-knowhow/phases/01-load-manifest.md +101 -0
  25. package/.agents/skills/codify-to-knowhow/phases/02-generate-knowhow.md +97 -0
  26. package/.agents/skills/codify-to-knowhow/phases/03-generate-specs.md +92 -0
  27. package/.agents/skills/codify-to-knowhow/phases/04-index-verify.md +119 -0
  28. package/.agents/skills/learn-decompose/SKILL.md +118 -0
  29. package/.agents/skills/learn-follow/SKILL.md +129 -0
  30. package/.agents/skills/learn-investigate/SKILL.md +154 -0
  31. package/.agents/skills/learn-retro/SKILL.md +159 -0
  32. package/.agents/skills/learn-second-opinion/SKILL.md +124 -0
  33. package/.agents/skills/maestro/SKILL.md +224 -0
  34. package/.agents/skills/maestro-amend/SKILL.md +165 -0
  35. package/.agents/skills/maestro-analyze/SKILL.md +135 -0
  36. package/.agents/skills/maestro-brainstorm/SKILL.md +118 -0
  37. package/.agents/skills/maestro-collab/SKILL.md +174 -0
  38. package/.agents/skills/maestro-composer/SKILL.md +181 -0
  39. package/.agents/skills/maestro-execute/SKILL.md +133 -0
  40. package/.agents/skills/maestro-fork/SKILL.md +88 -0
  41. package/.agents/skills/maestro-guard/SKILL.md +103 -0
  42. package/.agents/skills/maestro-help/SKILL.md +266 -0
  43. package/.agents/skills/maestro-help/index/catalog.json +184 -0
  44. package/.agents/skills/maestro-help/phases/01-parse-intent.md +122 -0
  45. package/.agents/skills/maestro-help/phases/02-search-present.md +181 -0
  46. package/.agents/skills/maestro-help/phases/03-workflow-guide.md +186 -0
  47. package/.agents/skills/maestro-impeccable/SKILL.md +251 -0
  48. package/.agents/skills/maestro-init/SKILL.md +80 -0
  49. package/.agents/skills/maestro-learn/SKILL.md +142 -0
  50. package/.agents/skills/maestro-merge/SKILL.md +66 -0
  51. package/.agents/skills/maestro-milestone-audit/SKILL.md +70 -0
  52. package/.agents/skills/maestro-milestone-complete/SKILL.md +77 -0
  53. package/.agents/skills/maestro-milestone-release/SKILL.md +98 -0
  54. package/.agents/skills/maestro-overlay/SKILL.md +180 -0
  55. package/.agents/skills/maestro-plan/SKILL.md +172 -0
  56. package/.agents/skills/maestro-player/SKILL.md +177 -0
  57. package/.agents/skills/maestro-quick/SKILL.md +67 -0
  58. package/.agents/skills/maestro-ralph/SKILL.md +685 -0
  59. package/.agents/skills/maestro-ralph-execute/SKILL.md +259 -0
  60. package/.agents/skills/maestro-roadmap/SKILL.md +170 -0
  61. package/.agents/skills/maestro-tools-execute/SKILL.md +119 -0
  62. package/.agents/skills/maestro-tools-register/SKILL.md +159 -0
  63. package/.agents/skills/maestro-ui-codify/SKILL.md +82 -0
  64. package/.agents/skills/maestro-update/SKILL.md +178 -0
  65. package/.agents/skills/maestro-verify/SKILL.md +111 -0
  66. package/.agents/skills/manage-codebase-rebuild/SKILL.md +77 -0
  67. package/.agents/skills/manage-codebase-refresh/SKILL.md +59 -0
  68. package/.agents/skills/manage-harvest/SKILL.md +96 -0
  69. package/.agents/skills/manage-issue/SKILL.md +75 -0
  70. package/.agents/skills/manage-issue-discover/SKILL.md +83 -0
  71. package/.agents/skills/manage-knowhow/SKILL.md +79 -0
  72. package/.agents/skills/manage-knowhow-capture/SKILL.md +81 -0
  73. package/.agents/skills/manage-learn/SKILL.md +67 -0
  74. package/.agents/skills/manage-status/SKILL.md +54 -0
  75. package/.agents/skills/manage-wiki/SKILL.md +64 -0
  76. package/.agents/skills/quality-auto-test/SKILL.md +138 -0
  77. package/.agents/skills/quality-debug/SKILL.md +122 -0
  78. package/.agents/skills/quality-refactor/SKILL.md +69 -0
  79. package/.agents/skills/quality-retrospective/SKILL.md +79 -0
  80. package/.agents/skills/quality-review/SKILL.md +130 -0
  81. package/.agents/skills/quality-sync/SKILL.md +53 -0
  82. package/.agents/skills/quality-test/SKILL.md +119 -0
  83. package/.agents/skills/security-audit/SKILL.md +157 -0
  84. package/.agents/skills/skill-iter-tune/SKILL.md +384 -0
  85. package/.agents/skills/skill-iter-tune/phases/01-setup.md +144 -0
  86. package/.agents/skills/skill-iter-tune/phases/02-execute.md +292 -0
  87. package/.agents/skills/skill-iter-tune/phases/03-evaluate.md +312 -0
  88. package/.agents/skills/skill-iter-tune/phases/04-improve.md +186 -0
  89. package/.agents/skills/skill-iter-tune/phases/05-report.md +166 -0
  90. package/.agents/skills/skill-iter-tune/specs/evaluation-criteria.md +63 -0
  91. package/.agents/skills/skill-iter-tune/templates/eval-prompt.md +134 -0
  92. package/.agents/skills/skill-iter-tune/templates/execute-prompt.md +97 -0
  93. package/.agents/skills/spec-add/SKILL.md +70 -0
  94. package/.agents/skills/spec-load/SKILL.md +73 -0
  95. package/.agents/skills/spec-remove/SKILL.md +53 -0
  96. package/.agents/skills/spec-setup/SKILL.md +50 -0
  97. package/.agents/skills/team-coordinate/SKILL.md +268 -0
  98. package/.agents/skills/team-coordinate/roles/coordinator/commands/analyze-task.md +247 -0
  99. package/.agents/skills/team-coordinate/roles/coordinator/commands/dispatch.md +131 -0
  100. package/.agents/skills/team-coordinate/roles/coordinator/commands/monitor.md +358 -0
  101. package/.agents/skills/team-coordinate/roles/coordinator/role.md +365 -0
  102. package/.agents/skills/team-coordinate/specs/knowledge-transfer.md +111 -0
  103. package/.agents/skills/team-coordinate/specs/pipelines.md +97 -0
  104. package/.agents/skills/team-coordinate/specs/quality-gates.md +112 -0
  105. package/.agents/skills/team-coordinate/specs/role-spec-template.md +198 -0
  106. package/.agents/skills/team-executor/SKILL.md +191 -0
  107. package/.agents/skills/team-executor/roles/executor/commands/monitor.md +239 -0
  108. package/.agents/skills/team-executor/roles/executor/role.md +171 -0
  109. package/.agents/skills/team-executor/specs/session-schema.md +264 -0
  110. package/.agents/skills/team-lifecycle-v4/SKILL.md +211 -0
  111. package/.agents/skills/team-lifecycle-v4/roles/analyst/role.md +96 -0
  112. package/.agents/skills/team-lifecycle-v4/roles/coordinator/commands/analyze.md +56 -0
  113. package/.agents/skills/team-lifecycle-v4/roles/coordinator/commands/dispatch.md +56 -0
  114. package/.agents/skills/team-lifecycle-v4/roles/coordinator/commands/monitor.md +206 -0
  115. package/.agents/skills/team-lifecycle-v4/roles/coordinator/role.md +130 -0
  116. package/.agents/skills/team-lifecycle-v4/roles/executor/commands/fix.md +35 -0
  117. package/.agents/skills/team-lifecycle-v4/roles/executor/commands/implement.md +62 -0
  118. package/.agents/skills/team-lifecycle-v4/roles/executor/role.md +69 -0
  119. package/.agents/skills/team-lifecycle-v4/roles/planner/role.md +87 -0
  120. package/.agents/skills/team-lifecycle-v4/roles/reviewer/commands/review-code.md +34 -0
  121. package/.agents/skills/team-lifecycle-v4/roles/reviewer/commands/review-spec.md +44 -0
  122. package/.agents/skills/team-lifecycle-v4/roles/reviewer/role.md +71 -0
  123. package/.agents/skills/team-lifecycle-v4/roles/supervisor/role.md +194 -0
  124. package/.agents/skills/team-lifecycle-v4/roles/tester/role.md +89 -0
  125. package/.agents/skills/team-lifecycle-v4/roles/writer/role.md +97 -0
  126. package/.agents/skills/team-lifecycle-v4/specs/knowledge-transfer.md +114 -0
  127. package/.agents/skills/team-lifecycle-v4/specs/pipelines.md +140 -0
  128. package/.agents/skills/team-lifecycle-v4/specs/quality-gates.md +130 -0
  129. package/.agents/skills/team-lifecycle-v4/templates/architecture.md +254 -0
  130. package/.agents/skills/team-lifecycle-v4/templates/epics.md +196 -0
  131. package/.agents/skills/team-lifecycle-v4/templates/product-brief.md +133 -0
  132. package/.agents/skills/team-lifecycle-v4/templates/requirements.md +224 -0
  133. package/.agents/skills/team-quality-assurance/SKILL.md +149 -0
  134. package/.agents/skills/team-quality-assurance/roles/analyst/role.md +90 -0
  135. package/.agents/skills/team-quality-assurance/roles/coordinator/commands/analyze.md +72 -0
  136. package/.agents/skills/team-quality-assurance/roles/coordinator/commands/dispatch.md +111 -0
  137. package/.agents/skills/team-quality-assurance/roles/coordinator/commands/monitor.md +250 -0
  138. package/.agents/skills/team-quality-assurance/roles/coordinator/role.md +143 -0
  139. package/.agents/skills/team-quality-assurance/roles/executor/role.md +68 -0
  140. package/.agents/skills/team-quality-assurance/roles/generator/role.md +70 -0
  141. package/.agents/skills/team-quality-assurance/roles/scout/role.md +77 -0
  142. package/.agents/skills/team-quality-assurance/roles/strategist/role.md +73 -0
  143. package/.agents/skills/team-quality-assurance/specs/pipelines.md +115 -0
  144. package/.agents/skills/team-quality-assurance/specs/team-config.json +131 -0
  145. package/.agents/skills/team-review/SKILL.md +149 -0
  146. package/.agents/skills/team-review/roles/coordinator/commands/analyze.md +71 -0
  147. package/.agents/skills/team-review/roles/coordinator/commands/dispatch.md +91 -0
  148. package/.agents/skills/team-review/roles/coordinator/commands/monitor.md +224 -0
  149. package/.agents/skills/team-review/roles/coordinator/role.md +132 -0
  150. package/.agents/skills/team-review/roles/fixer/role.md +78 -0
  151. package/.agents/skills/team-review/roles/reviewer/role.md +70 -0
  152. package/.agents/skills/team-review/roles/scanner/role.md +81 -0
  153. package/.agents/skills/team-review/specs/dimensions.md +82 -0
  154. package/.agents/skills/team-review/specs/finding-schema.json +82 -0
  155. package/.agents/skills/team-review/specs/pipelines.md +102 -0
  156. package/.agents/skills/team-review/specs/team-config.json +27 -0
  157. package/.agents/skills/team-tech-debt/SKILL.md +130 -0
  158. package/.agents/skills/team-tech-debt/roles/assessor/role.md +79 -0
  159. package/.agents/skills/team-tech-debt/roles/coordinator/commands/analyze.md +47 -0
  160. package/.agents/skills/team-tech-debt/roles/coordinator/commands/dispatch.md +156 -0
  161. package/.agents/skills/team-tech-debt/roles/coordinator/commands/monitor.md +209 -0
  162. package/.agents/skills/team-tech-debt/roles/coordinator/role.md +123 -0
  163. package/.agents/skills/team-tech-debt/roles/executor/role.md +78 -0
  164. package/.agents/skills/team-tech-debt/roles/planner/role.md +71 -0
  165. package/.agents/skills/team-tech-debt/roles/scanner/role.md +92 -0
  166. package/.agents/skills/team-tech-debt/roles/validator/role.md +80 -0
  167. package/.agents/skills/team-tech-debt/specs/pipelines.md +47 -0
  168. package/.agents/skills/team-tech-debt/specs/team-config.json +129 -0
  169. package/.agents/skills/team-testing/SKILL.md +145 -0
  170. package/.agents/skills/team-testing/roles/analyst/role.md +105 -0
  171. package/.agents/skills/team-testing/roles/coordinator/commands/analyze.md +70 -0
  172. package/.agents/skills/team-testing/roles/coordinator/commands/dispatch.md +108 -0
  173. package/.agents/skills/team-testing/roles/coordinator/commands/monitor.md +257 -0
  174. package/.agents/skills/team-testing/roles/coordinator/role.md +134 -0
  175. package/.agents/skills/team-testing/roles/executor/role.md +101 -0
  176. package/.agents/skills/team-testing/roles/generator/role.md +100 -0
  177. package/.agents/skills/team-testing/roles/strategist/role.md +85 -0
  178. package/.agents/skills/team-testing/specs/pipelines.md +101 -0
  179. package/.agents/skills/team-testing/specs/team-config.json +93 -0
  180. package/.agents/skills/wiki-connect/SKILL.md +64 -0
  181. package/.agents/skills/wiki-digest/SKILL.md +70 -0
  182. package/.agents/skills/workflow-skill-designer/SKILL.md +498 -0
  183. package/.agents/skills/workflow-skill-designer/phases/01-requirements-analysis.md +356 -0
  184. package/.agents/skills/workflow-skill-designer/phases/02-orchestrator-design.md +444 -0
  185. package/.agents/skills/workflow-skill-designer/phases/03-phase-design.md +458 -0
  186. package/.agents/skills/workflow-skill-designer/phases/04-validation.md +471 -0
  187. package/.agy/skills/maestro/SKILL.md +13 -11
  188. package/.agy/skills/maestro-ralph/SKILL.md +222 -87
  189. package/.claude/commands/maestro-ralph.md +222 -87
  190. package/.claude/commands/maestro.md +13 -11
  191. package/.codex/skills/maestro/SKILL.md +23 -17
  192. package/.codex/skills/maestro-ralph/SKILL.md +177 -67
  193. package/dist/src/commands/install-backend.d.ts +12 -0
  194. package/dist/src/commands/install-backend.d.ts.map +1 -1
  195. package/dist/src/commands/install-backend.js +144 -0
  196. package/dist/src/commands/install-backend.js.map +1 -1
  197. package/dist/src/core/component-defs.d.ts +6 -0
  198. package/dist/src/core/component-defs.d.ts.map +1 -1
  199. package/dist/src/core/component-defs.js +97 -0
  200. package/dist/src/core/component-defs.js.map +1 -1
  201. package/dist/src/tui/install-ui/ExtraMcpConfig.d.ts +11 -0
  202. package/dist/src/tui/install-ui/ExtraMcpConfig.d.ts.map +1 -0
  203. package/dist/src/tui/install-ui/ExtraMcpConfig.js +61 -0
  204. package/dist/src/tui/install-ui/ExtraMcpConfig.js.map +1 -0
  205. package/dist/src/tui/install-ui/InstallConfirm.d.ts +3 -0
  206. package/dist/src/tui/install-ui/InstallConfirm.d.ts.map +1 -1
  207. package/dist/src/tui/install-ui/InstallConfirm.js +1 -1
  208. package/dist/src/tui/install-ui/InstallConfirm.js.map +1 -1
  209. package/dist/src/tui/install-ui/InstallExecution.d.ts +2 -0
  210. package/dist/src/tui/install-ui/InstallExecution.d.ts.map +1 -1
  211. package/dist/src/tui/install-ui/InstallExecution.js +14 -2
  212. package/dist/src/tui/install-ui/InstallExecution.js.map +1 -1
  213. package/dist/src/tui/install-ui/InstallFlow.d.ts +1 -1
  214. package/dist/src/tui/install-ui/InstallFlow.d.ts.map +1 -1
  215. package/dist/src/tui/install-ui/InstallFlow.js +16 -3
  216. package/dist/src/tui/install-ui/InstallFlow.js.map +1 -1
  217. package/dist/src/tui/install-ui/InstallHub.d.ts +2 -0
  218. package/dist/src/tui/install-ui/InstallHub.d.ts.map +1 -1
  219. package/dist/src/tui/install-ui/InstallHub.js +8 -0
  220. package/dist/src/tui/install-ui/InstallHub.js.map +1 -1
  221. package/dist/src/tui/install-ui/InstallResult.d.ts.map +1 -1
  222. package/dist/src/tui/install-ui/InstallResult.js +3 -1
  223. package/dist/src/tui/install-ui/InstallResult.js.map +1 -1
  224. package/dist/src/utils/update-notices.js +19 -0
  225. package/dist/src/utils/update-notices.js.map +1 -1
  226. package/package.json +5 -2
@@ -0,0 +1,312 @@
1
+ # Phase 3: Evaluate Quality
2
+
3
+ > **COMPACT SENTINEL [Phase 3: Evaluate]**
4
+ > This phase contains 5 execution steps (Step 3.1 -- 3.5).
5
+ > If you can read this sentinel but cannot find the full Step protocol below, context has been compressed.
6
+ > Recovery: `read_file("phases/03-evaluate.md")`
7
+
8
+ Evaluate skill quality using `ccw cli --tool gemini --mode analysis`. Gemini scores the skill across 5 dimensions and provides improvement suggestions.
9
+
10
+ ## Objective
11
+
12
+ - Construct evaluation prompt with skill + artifacts + criteria
13
+ - Execute via ccw cli Gemini
14
+ - Parse multi-dimensional score
15
+ - Write iteration-{N}-eval.md
16
+ - Check termination conditions
17
+
18
+ ## Execution
19
+
20
+ ### Step 3.1: Prepare Evaluation Context
21
+
22
+ ```javascript
23
+ const N = state.current_iteration;
24
+ const iterDir = `${state.work_dir}/iterations/iteration-${N}`;
25
+
26
+ // Read evaluation criteria
27
+ // Ref: specs/evaluation-criteria.md
28
+ const evaluationCriteria = read_file('.claude/skills/skill-iter-tune/specs/evaluation-criteria.md');
29
+
30
+ // Build skillContent (same pattern as Phase 02 — only executable files)
31
+ const skillContent = state.target_skills.map(skill => {
32
+ const skillMd = read_file(`${skill.path}/SKILL.md`);
33
+ const phaseFiles = find_files(`${skill.path}/phases/*.md`).sort().map(f => ({
34
+ relativePath: f.replace(skill.path + '/', ''),
35
+ content: read_file(f)
36
+ }));
37
+ const specFiles = find_files(`${skill.path}/specs/*.md`).map(f => ({
38
+ relativePath: f.replace(skill.path + '/', ''),
39
+ content: read_file(f)
40
+ }));
41
+ return `### File: SKILL.md\n${skillMd}\n\n` +
42
+ phaseFiles.map(f => `### File: ${f.relativePath}\n${f.content}`).join('\n\n') +
43
+ (specFiles.length > 0 ? '\n\n' + specFiles.map(f => `### File: ${f.relativePath}\n${f.content}`).join('\n\n') : '');
44
+ }).join('\n\n---\n\n');
45
+
46
+ // Build artifacts summary
47
+ let artifactsSummary = 'No artifacts produced (execution may have failed)';
48
+
49
+ if (state.execution_mode === 'chain') {
50
+ // Chain mode: group artifacts by skill
51
+ const chainSummaries = state.chain_order.map(skillName => {
52
+ const skillArtifactDir = `${iterDir}/artifacts/${skillName}`;
53
+ const files = find_files(`${skillArtifactDir}/**/*`);
54
+ if (files.length === 0) return `### ${skillName} (no artifacts)`;
55
+ const filesSummary = files.map(f => {
56
+ const relPath = f.replace(`${skillArtifactDir}/`, '');
57
+ const content = read_file(f, { limit: 200 });
58
+ return `--- ${relPath} ---\n${content}`;
59
+ }).join('\n\n');
60
+ return `### ${skillName} (chain position ${state.chain_order.indexOf(skillName) + 1})\n${filesSummary}`;
61
+ });
62
+ artifactsSummary = chainSummaries.join('\n\n---\n\n');
63
+ } else {
64
+ // Single mode (existing)
65
+ const artifactFiles = find_files(`${iterDir}/artifacts/**/*`);
66
+ if (artifactFiles.length > 0) {
67
+ artifactsSummary = artifactFiles.map(f => {
68
+ const relPath = f.replace(`${iterDir}/artifacts/`, '');
69
+ const content = read_file(f, { limit: 200 });
70
+ return `--- ${relPath} ---\n${content}`;
71
+ }).join('\n\n');
72
+ }
73
+ }
74
+
75
+ // Build previous evaluation context
76
+ const previousEvalContext = state.iterations.filter(i => i.evaluation).length > 0
77
+ ? `PREVIOUS ITERATIONS:\n` + state.iterations.filter(i => i.evaluation).map(iter =>
78
+ `Iteration ${iter.round}: Score ${iter.evaluation.score}\n` +
79
+ ` Applied: ${iter.improvement?.changes_applied?.map(c => c.summary).join('; ') || 'none'}\n` +
80
+ ` Weaknesses: ${iter.evaluation.weaknesses?.slice(0, 3).join('; ') || 'none'}`
81
+ ).join('\n') + '\nIMPORTANT: Focus on NEW issues not yet addressed.'
82
+ : '';
83
+ ```
84
+
85
+ ### Step 3.2: Construct Evaluation Prompt
86
+
87
+ ```javascript
88
+ // Ref: templates/eval-prompt.md
89
+ const evalPrompt = `PURPOSE: Evaluate the quality of a workflow skill by examining its definition and produced artifacts.
90
+
91
+ SKILL DEFINITION:
92
+ ${skillContent}
93
+
94
+ TEST SCENARIO:
95
+ ${state.test_scenario.description}
96
+ Requirements: ${state.test_scenario.requirements.join('; ')}
97
+ Success Criteria: ${state.test_scenario.success_criteria}
98
+
99
+ ARTIFACTS PRODUCED:
100
+ ${artifactsSummary}
101
+
102
+ EVALUATION CRITERIA:
103
+ ${evaluationCriteria}
104
+
105
+ ${previousEvalContext}
106
+
107
+ ${state.execution_mode === 'chain' ? `
108
+ CHAIN CONTEXT:
109
+ This skill chain contains ${state.chain_order.length} skills executed in order:
110
+ ${state.chain_order.map((s, i) => `${i+1}. ${s}`).join('\n')}
111
+ Current evaluation covers the entire chain output.
112
+ Please provide per-skill quality scores in an additional "chain_scores" field: { "${state.chain_order[0]}": <score>, ... }
113
+ ` : ''}
114
+
115
+ TASK:
116
+ 1. Score each dimension (Clarity 0.20, Completeness 0.25, Correctness 0.25, Effectiveness 0.20, Efficiency 0.10) on 0-100
117
+ 2. Calculate weighted composite score
118
+ 3. List top 3 strengths
119
+ 4. List top 3-5 weaknesses with file:section references
120
+ 5. Provide 3-5 prioritized improvement suggestions with concrete changes
121
+
122
+ EXPECTED OUTPUT (strict JSON, no markdown):
123
+ {
124
+ "composite_score": <0-100>,
125
+ "dimensions": [
126
+ {"name":"Clarity","id":"clarity","score":<0-100>,"weight":0.20,"feedback":"..."},
127
+ {"name":"Completeness","id":"completeness","score":<0-100>,"weight":0.25,"feedback":"..."},
128
+ {"name":"Correctness","id":"correctness","score":<0-100>,"weight":0.25,"feedback":"..."},
129
+ {"name":"Effectiveness","id":"effectiveness","score":<0-100>,"weight":0.20,"feedback":"..."},
130
+ {"name":"Efficiency","id":"efficiency","score":<0-100>,"weight":0.10,"feedback":"..."}
131
+ ],
132
+ "strengths": ["...", "...", "..."],
133
+ "weaknesses": ["...with file:section ref...", "..."],
134
+ "suggestions": [
135
+ {"priority":"high|medium|low","target_file":"...","description":"...","rationale":"...","code_snippet":"..."}
136
+ ]
137
+ }
138
+
139
+ CONSTRAINTS: Be rigorous, reference exact files, focus on highest-impact changes, output ONLY JSON`;
140
+ ```
141
+
142
+ ### Step 3.3: Execute via ccw cli Gemini
143
+
144
+ > **CHECKPOINT**: Verify evaluation prompt is properly constructed before CLI execution.
145
+
146
+ ```javascript
147
+ // Shell escape utility (same as Phase 02)
148
+ function escapeForShell(str) {
149
+ return str.replace(/"/g, '\\"').replace(/\$/g, '\\$').replace(/`/g, '\\`');
150
+ }
151
+
152
+ const skillPath = state.target_skills[0].path; // Primary skill for --cd
153
+
154
+ const cliCommand = `ccw cli -p "${escapeForShell(evalPrompt)}" --tool gemini --mode analysis --cd "${skillPath}"`;
155
+
156
+ // Execute in background
157
+ shell({
158
+ command: cliCommand,
159
+ run_in_background: true,
160
+ timeout: 300000 // 5 minutes
161
+ });
162
+
163
+ // STOP -- wait for hook callback
164
+ ```
165
+
166
+ ### Step 3.4: Parse Score and Write Eval File
167
+
168
+ After CLI completes:
169
+
170
+ ```javascript
171
+ // Parse JSON from Gemini output
172
+ // The output may contain markdown wrapping -- extract JSON
173
+ const rawOutput = /* CLI output from callback */;
174
+ const jsonMatch = rawOutput.match(/\{[\s\S]*\}/);
175
+ let evaluation;
176
+
177
+ if (jsonMatch) {
178
+ try {
179
+ evaluation = JSON.parse(jsonMatch[0]);
180
+ // Extract chain_scores if present
181
+ if (state.execution_mode === 'chain' && evaluation.chain_scores) {
182
+ state.iterations[N - 1].evaluation.chain_scores = evaluation.chain_scores;
183
+ }
184
+ } catch (e) {
185
+ // Fallback: try to extract score heuristically
186
+ const scoreMatch = rawOutput.match(/"composite_score"\s*:\s*(\d+)/);
187
+ evaluation = {
188
+ composite_score: scoreMatch ? parseInt(scoreMatch[1]) : 50,
189
+ dimensions: [],
190
+ strengths: [],
191
+ weaknesses: ['Evaluation output parsing failed -- raw output saved'],
192
+ suggestions: []
193
+ };
194
+ }
195
+ } else {
196
+ evaluation = {
197
+ composite_score: 50,
198
+ dimensions: [],
199
+ strengths: [],
200
+ weaknesses: ['No structured evaluation output -- defaulting to 50'],
201
+ suggestions: []
202
+ };
203
+ }
204
+
205
+ // Write iteration-N-eval.md
206
+ const evalReport = `# Iteration ${N} Evaluation
207
+
208
+ **Composite Score**: ${evaluation.composite_score}/100
209
+ **Date**: ${new Date().toISOString()}
210
+
211
+ ## Dimension Scores
212
+
213
+ | Dimension | Score | Weight | Feedback |
214
+ |-----------|-------|--------|----------|
215
+ ${(evaluation.dimensions || []).map(d =>
216
+ `| ${d.name} | ${d.score} | ${d.weight} | ${d.feedback} |`
217
+ ).join('\n')}
218
+
219
+ ${(state.execution_mode === 'chain' && evaluation.chain_scores) ? `
220
+ ## Chain Scores
221
+
222
+ | Skill | Score | Chain Position |
223
+ |-------|-------|----------------|
224
+ ${state.chain_order.map((s, i) => `| ${s} | ${evaluation.chain_scores[s] || '-'} | ${i + 1} |`).join('\n')}
225
+ ` : ''}
226
+
227
+ ## Strengths
228
+ ${(evaluation.strengths || []).map(s => `- ${s}`).join('\n')}
229
+
230
+ ## Weaknesses
231
+ ${(evaluation.weaknesses || []).map(w => `- ${w}`).join('\n')}
232
+
233
+ ## Improvement Suggestions
234
+ ${(evaluation.suggestions || []).map((s, i) =>
235
+ `### ${i + 1}. [${s.priority}] ${s.description}\n- **Target**: ${s.target_file}\n- **Rationale**: ${s.rationale}\n${s.code_snippet ? `- **Suggested**:\n\`\`\`\n${s.code_snippet}\n\`\`\`` : ''}`
236
+ ).join('\n\n')}
237
+ `;
238
+
239
+ write_file(`${iterDir}/iteration-${N}-eval.md`, evalReport);
240
+
241
+ // Update state
242
+ state.iterations[N - 1].evaluation = {
243
+ score: evaluation.composite_score,
244
+ dimensions: evaluation.dimensions || [],
245
+ strengths: evaluation.strengths || [],
246
+ weaknesses: evaluation.weaknesses || [],
247
+ suggestions: evaluation.suggestions || [],
248
+ chain_scores: evaluation.chain_scores || null,
249
+ eval_file: `${iterDir}/iteration-${N}-eval.md`
250
+ };
251
+ state.latest_score = evaluation.composite_score;
252
+ state.score_trend.push(evaluation.composite_score);
253
+
254
+ write_file(`${state.work_dir}/iteration-state.json`, JSON.stringify(state, null, 2));
255
+ ```
256
+
257
+ ### Step 3.5: Check Termination
258
+
259
+ ```javascript
260
+ function shouldTerminate(state) {
261
+ // 1. Quality threshold met
262
+ if (state.latest_score >= state.quality_threshold) {
263
+ return { terminate: true, reason: 'quality_threshold_met' };
264
+ }
265
+
266
+ // 2. Max iterations reached
267
+ if (state.current_iteration >= state.max_iterations) {
268
+ return { terminate: true, reason: 'max_iterations_reached' };
269
+ }
270
+
271
+ // 3. Convergence: no improvement in last 2 iterations
272
+ if (state.score_trend.length >= 3) {
273
+ const last3 = state.score_trend.slice(-3);
274
+ const improvement = last3[2] - last3[0];
275
+ if (improvement <= 2) {
276
+ state.converged = true;
277
+ return { terminate: true, reason: 'convergence_detected' };
278
+ }
279
+ }
280
+
281
+ // 4. Error limit
282
+ if (state.error_count >= state.max_errors) {
283
+ return { terminate: true, reason: 'error_limit_reached' };
284
+ }
285
+
286
+ return { terminate: false };
287
+ }
288
+
289
+ const termination = shouldTerminate(state);
290
+ if (termination.terminate) {
291
+ state.termination_reason = termination.reason;
292
+ write_file(`${state.work_dir}/iteration-state.json`, JSON.stringify(state, null, 2));
293
+ // Skip Phase 4, go directly to Phase 5 (Report)
294
+ } else {
295
+ // Continue to Phase 4 (Improve)
296
+ }
297
+ ```
298
+
299
+ ## Error Handling
300
+
301
+ | Error | Recovery |
302
+ |-------|----------|
303
+ | CLI timeout | Retry once, if still fails use score 50 with warning |
304
+ | JSON parse failure | Extract score heuristically, save raw output |
305
+ | No output | Default score 50, note in weaknesses |
306
+
307
+ ## Output
308
+
309
+ - **Files**: `iteration-{N}-eval.md`
310
+ - **State**: `iterations[N-1].evaluation`, `latest_score`, `score_trend` updated
311
+ - **Decision**: terminate -> Phase 5, continue -> Phase 4
312
+ - **track_tasks**: Update current iteration score display
@@ -0,0 +1,186 @@
1
+ # Phase 4: Apply Improvements
2
+
3
+ > **COMPACT SENTINEL [Phase 4: Improve]**
4
+ > This phase contains 4 execution steps (Step 4.1 -- 4.4).
5
+ > If you can read this sentinel but cannot find the full Step protocol below, context has been compressed.
6
+ > Recovery: `read_file("phases/04-improve.md")`
7
+
8
+ Apply targeted improvements to skill files based on evaluation suggestions. Uses a general-purpose Agent to make changes, ensuring only suggested modifications are applied.
9
+
10
+ ## Objective
11
+
12
+ - Read evaluation suggestions from current iteration
13
+ - Launch Agent to apply improvements in priority order
14
+ - Document all changes made
15
+ - Update iteration state
16
+
17
+ ## Execution
18
+
19
+ ### Step 4.1: Prepare Improvement Context
20
+
21
+ ```javascript
22
+ const N = state.current_iteration;
23
+ const iterDir = `${state.work_dir}/iterations/iteration-${N}`;
24
+ const evaluation = state.iterations[N - 1].evaluation;
25
+
26
+ // Verify we have suggestions to apply
27
+ if (!evaluation.suggestions || evaluation.suggestions.length === 0) {
28
+ // No suggestions -- skip improvement, mark iteration complete
29
+ state.iterations[N - 1].improvement = {
30
+ changes_applied: [],
31
+ changes_file: null,
32
+ improvement_rationale: 'No suggestions provided by evaluation'
33
+ };
34
+ state.iterations[N - 1].status = 'completed';
35
+ write_file(`${state.work_dir}/iteration-state.json`, JSON.stringify(state, null, 2));
36
+ // -> Return to orchestrator for next iteration
37
+ return;
38
+ }
39
+
40
+ // Build file inventory for agent context
41
+ const skillFileInventory = state.target_skills.map(skill => {
42
+ return `Skill: ${skill.name} (${skill.path})\nFiles:\n` +
43
+ skill.files.map(f => ` - ${f}`).join('\n');
44
+ }).join('\n\n');
45
+
46
+ // Chain mode: add chain relationship context
47
+ const chainContext = state.execution_mode === 'chain'
48
+ ? `\nChain Order: ${state.chain_order.join(' -> ')}\n` +
49
+ `Chain Scores: ${state.chain_order.map(s =>
50
+ `${s}: ${state.iterations[N-1].evaluation?.chain_scores?.[s] || 'N/A'}`
51
+ ).join(', ')}\n` +
52
+ `Weakest Link: ${state.chain_order.reduce((min, s) => {
53
+ const score = state.iterations[N-1].evaluation?.chain_scores?.[s] || 100;
54
+ return score < (state.iterations[N-1].evaluation?.chain_scores?.[min] || 100) ? s : min;
55
+ }, state.chain_order[0])}`
56
+ : '';
57
+ ```
58
+
59
+ ### Step 4.2: Launch Improvement Agent
60
+
61
+ > **CHECKPOINT**: Before launching agent, verify:
62
+ > 1. evaluation.suggestions is non-empty
63
+ > 2. All target_file paths in suggestions are valid
64
+
65
+ ```javascript
66
+ const suggestionsText = evaluation.suggestions.map((s, i) =>
67
+ `${i + 1}. [${s.priority.toUpperCase()}] ${s.description}\n` +
68
+ ` Target: ${s.target_file}\n` +
69
+ ` Rationale: ${s.rationale}\n` +
70
+ (s.code_snippet ? ` Suggested change:\n ${s.code_snippet}\n` : '')
71
+ ).join('\n');
72
+
73
+ delegate_subagent({
74
+ subagent_type: 'general-purpose',
75
+ run_in_background: false,
76
+ description: `Apply skill improvements iteration ${N}`,
77
+ prompt: `## Task: Apply Targeted Improvements to Skill Files
78
+
79
+ You are improving a workflow skill based on evaluation feedback. Apply ONLY the suggested changes -- do not refactor, add features, or "improve" beyond what is explicitly suggested.
80
+
81
+ ## Current Score: ${evaluation.score}/100
82
+ Dimension breakdown:
83
+ ${evaluation.dimensions.map(d => `- ${d.name}: ${d.score}/100`).join('\n')}
84
+
85
+ ## Skill File Inventory
86
+ ${skillFileInventory}
87
+
88
+ ${chainContext ? `## Chain Context\n${chainContext}\n\nPrioritize improvements on the weakest skill in the chain. Also consider interface compatibility between adjacent skills in the chain.\n` : ''}
89
+
90
+ ## Improvement Suggestions (apply in priority order)
91
+ ${suggestionsText}
92
+
93
+ ## Rules
94
+ 1. Read each target file BEFORE modifying it
95
+ 2. Apply ONLY the suggested changes -- no unsolicited modifications
96
+ 3. If a suggestion's target_file doesn't exist, skip it and note in summary
97
+ 4. If a suggestion conflicts with existing patterns, adapt it to fit (note adaptation)
98
+ 5. Preserve existing code style, naming conventions, and structure
99
+ 6. After all changes, write a change summary to: ${iterDir}/iteration-${N}-changes.md
100
+
101
+ ## Changes Summary Format (write to ${iterDir}/iteration-${N}-changes.md)
102
+
103
+ # Iteration ${N} Changes
104
+
105
+ ## Applied Suggestions
106
+ - [high] description: what was changed in which file
107
+ - [medium] description: what was changed in which file
108
+
109
+ ## Files Modified
110
+ - path/to/file.md: brief description of changes
111
+
112
+ ## Skipped Suggestions (if any)
113
+ - description: reason for skipping
114
+
115
+ ## Notes
116
+ - Any adaptations or considerations
117
+
118
+ ## Success Criteria
119
+ - All high-priority suggestions applied
120
+ - Medium-priority suggestions applied if feasible
121
+ - Low-priority suggestions applied if trivial
122
+ - Changes summary written to ${iterDir}/iteration-${N}-changes.md
123
+ `
124
+ });
125
+ ```
126
+
127
+ ### Step 4.3: Verify Changes
128
+
129
+ After agent completes:
130
+
131
+ ```javascript
132
+ // Verify changes summary was written
133
+ const changesFile = `${iterDir}/iteration-${N}-changes.md`;
134
+ const changesExist = find_files(changesFile).length > 0;
135
+
136
+ if (!changesExist) {
137
+ // Agent didn't write summary -- create a minimal one
138
+ write_file(changesFile, `# Iteration ${N} Changes\n\n## Notes\nAgent completed but did not produce changes summary.\n`);
139
+ }
140
+
141
+ // Read changes summary to extract applied changes
142
+ const changesContent = read_file(changesFile);
143
+
144
+ // Parse applied changes (heuristic: count lines starting with "- [")
145
+ const appliedMatches = changesContent.match(/^- \[.+?\]/gm) || [];
146
+ const changes_applied = appliedMatches.map(m => ({
147
+ summary: m.replace(/^- /, ''),
148
+ file: '' // Extracted from context
149
+ }));
150
+ ```
151
+
152
+ ### Step 4.4: Update State
153
+
154
+ ```javascript
155
+ state.iterations[N - 1].improvement = {
156
+ changes_applied: changes_applied,
157
+ changes_file: changesFile,
158
+ improvement_rationale: `Applied ${changes_applied.length} improvements based on evaluation score ${evaluation.score}`
159
+ };
160
+ state.iterations[N - 1].status = 'completed';
161
+ state.updated_at = new Date().toISOString();
162
+
163
+ // Also update the skill files list in case new files were created
164
+ for (const skill of state.target_skills) {
165
+ skill.files = find_files(`${skill.path}/**/*.md`).map(f => f.replace(skill.path + '/', ''));
166
+ }
167
+
168
+ write_file(`${state.work_dir}/iteration-state.json`, JSON.stringify(state, null, 2));
169
+
170
+ // -> Return to orchestrator for next iteration (Phase 2) or termination check
171
+ ```
172
+
173
+ ## Error Handling
174
+
175
+ | Error | Recovery |
176
+ |-------|----------|
177
+ | Agent fails to complete | Rollback from skill-snapshot: `cp -r "${iterDir}/skill-snapshot/${skill.name}/*" "${skill.path}/"` |
178
+ | Agent corrupts files | Same rollback from snapshot |
179
+ | Changes summary missing | Create minimal summary, continue |
180
+ | target_file not found | Agent skips suggestion, notes in summary |
181
+
182
+ ## Output
183
+
184
+ - **Files**: `iteration-{N}-changes.md`, modified skill files
185
+ - **State**: `iterations[N-1].improvement` and `.status` updated
186
+ - **Next**: Return to orchestrator, begin next iteration (Phase 2) or terminate
@@ -0,0 +1,166 @@
1
+ # Phase 5: Final Report
2
+
3
+ > **COMPACT SENTINEL [Phase 5: Report]**
4
+ > This phase contains 4 execution steps (Step 5.1 -- 5.4).
5
+ > If you can read this sentinel but cannot find the full Step protocol below, context has been compressed.
6
+ > Recovery: `read_file("phases/05-report.md")`
7
+
8
+ Generate comprehensive iteration history report and display results to user.
9
+
10
+ ## Objective
11
+
12
+ - Read complete iteration state
13
+ - Generate formatted final report with score progression
14
+ - Write final-report.md
15
+ - Display summary to user
16
+
17
+ ## Execution
18
+
19
+ ### Step 5.1: Read Complete State
20
+
21
+ ```javascript
22
+ const state = JSON.parse(read_file(`${state.work_dir}/iteration-state.json`));
23
+ state.status = 'completed';
24
+ state.updated_at = new Date().toISOString();
25
+ ```
26
+
27
+ ### Step 5.2: Generate Report
28
+
29
+ ```javascript
30
+ // Determine outcome
31
+ const outcomeMap = {
32
+ quality_threshold_met: 'PASSED -- Quality threshold reached',
33
+ max_iterations_reached: 'MAX ITERATIONS -- Threshold not reached',
34
+ convergence_detected: 'CONVERGED -- Score stopped improving',
35
+ error_limit_reached: 'FAILED -- Too many errors'
36
+ };
37
+ const outcome = outcomeMap[state.termination_reason] || 'COMPLETED';
38
+
39
+ // Build score progression table
40
+ const scoreTable = state.iterations
41
+ .filter(i => i.evaluation)
42
+ .map(i => {
43
+ const dims = i.evaluation.dimensions || [];
44
+ const dimScores = ['clarity', 'completeness', 'correctness', 'effectiveness', 'efficiency']
45
+ .map(id => {
46
+ const dim = dims.find(d => d.id === id);
47
+ return dim ? dim.score : '-';
48
+ });
49
+ return `| ${i.round} | ${i.evaluation.score} | ${dimScores.join(' | ')} |`;
50
+ }).join('\n');
51
+
52
+ // Build iteration details
53
+ const iterationDetails = state.iterations.map(iter => {
54
+ const evalSection = iter.evaluation
55
+ ? `**Score**: ${iter.evaluation.score}/100\n` +
56
+ `**Strengths**: ${iter.evaluation.strengths?.join(', ') || 'N/A'}\n` +
57
+ `**Weaknesses**: ${iter.evaluation.weaknesses?.slice(0, 3).join(', ') || 'N/A'}`
58
+ : '**Evaluation**: Skipped or failed';
59
+
60
+ const changesSection = iter.improvement
61
+ ? `**Changes Applied**: ${iter.improvement.changes_applied?.length || 0}\n` +
62
+ (iter.improvement.changes_applied?.map(c => ` - ${c.summary}`).join('\n') || ' None')
63
+ : '**Improvements**: None';
64
+
65
+ return `### Iteration ${iter.round}\n${evalSection}\n${changesSection}`;
66
+ }).join('\n\n');
67
+
68
+ const report = `# Skill Iter Tune -- Final Report
69
+
70
+ ## Summary
71
+
72
+ | Field | Value |
73
+ |-------|-------|
74
+ | **Target Skills** | ${state.target_skills.map(s => s.name).join(', ')} |
75
+ | **Execution Mode** | ${state.execution_mode} |
76
+ ${state.execution_mode === 'chain' ? `| **Chain Order** | ${state.chain_order.join(' -> ')} |` : ''}
77
+ | **Test Scenario** | ${state.test_scenario.description} |
78
+ | **Iterations** | ${state.iterations.length} |
79
+ | **Initial Score** | ${state.score_trend[0] || 'N/A'} |
80
+ | **Final Score** | ${state.latest_score}/100 |
81
+ | **Quality Threshold** | ${state.quality_threshold} |
82
+ | **Outcome** | ${outcome} |
83
+ | **Started** | ${state.started_at} |
84
+ | **Completed** | ${state.updated_at} |
85
+
86
+ ## Score Progression
87
+
88
+ | Iter | Composite | Clarity | Completeness | Correctness | Effectiveness | Efficiency |
89
+ |------|-----------|---------|--------------|-------------|---------------|------------|
90
+ ${scoreTable}
91
+
92
+ **Trend**: ${state.score_trend.join(' -> ')}
93
+
94
+ ${state.execution_mode === 'chain' ? `
95
+ ## Chain Score Progression
96
+
97
+ | Iter | ${state.chain_order.join(' | ')} |
98
+ |------|${state.chain_order.map(() => '------').join('|')}|
99
+ ${state.iterations.filter(i => i.evaluation?.chain_scores).map(i => {
100
+ const scores = state.chain_order.map(s => i.evaluation.chain_scores[s] || '-');
101
+ return `| ${i.round} | ${scores.join(' | ')} |`;
102
+ }).join('\n')}
103
+ ` : ''}
104
+
105
+ ## Iteration Details
106
+
107
+ ${iterationDetails}
108
+
109
+ ## Remaining Weaknesses
110
+
111
+ ${state.iterations.length > 0 && state.iterations[state.iterations.length - 1].evaluation
112
+ ? state.iterations[state.iterations.length - 1].evaluation.weaknesses?.map(w => `- ${w}`).join('\n') || 'None identified'
113
+ : 'No evaluation data available'}
114
+
115
+ ## Artifact Locations
116
+
117
+ | Path | Description |
118
+ |------|-------------|
119
+ | \`${state.work_dir}/iteration-state.json\` | Complete state history |
120
+ | \`${state.work_dir}/iterations/iteration-{N}/iteration-{N}-eval.md\` | Per-iteration evaluations |
121
+ | \`${state.work_dir}/iterations/iteration-{N}/iteration-{N}-changes.md\` | Per-iteration change logs |
122
+ | \`${state.work_dir}/final-report.md\` | This report |
123
+ | \`${state.backup_dir}/\` | Original skill backups |
124
+
125
+ ## Restore Original
126
+
127
+ To revert all changes and restore the original skill files:
128
+
129
+ \`\`\`bash
130
+ ${state.target_skills.map(s => `cp -r "${state.backup_dir}/${s.name}"/* "${s.path}/"`).join('\n')}
131
+ \`\`\`
132
+ `;
133
+ ```
134
+
135
+ ### Step 5.3: Write Report and Update State
136
+
137
+ ```javascript
138
+ write_file(`${state.work_dir}/final-report.md`, report);
139
+
140
+ state.status = 'completed';
141
+ write_file(`${state.work_dir}/iteration-state.json`, JSON.stringify(state, null, 2));
142
+ ```
143
+
144
+ ### Step 5.4: Display Summary to User
145
+
146
+ Output to user:
147
+
148
+ ```
149
+ Skill Iter Tune Complete!
150
+
151
+ Target: {skill names}
152
+ Iterations: {count}
153
+ Score: {initial} -> {final} ({outcome})
154
+ Threshold: {threshold}
155
+
156
+ Score trend: {score1} -> {score2} -> ... -> {scoreN}
157
+
158
+ Full report: {workDir}/final-report.md
159
+ Backups: {backupDir}/
160
+ ```
161
+
162
+ ## Output
163
+
164
+ - **Files**: `final-report.md`
165
+ - **State**: `status = completed`
166
+ - **Next**: Workflow complete. Return control to user.