maestro-flow 0.4.16 → 0.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.agents/skills/maestro/SKILL.md +1 -1
  2. package/.agents/skills/maestro-analyze/SKILL.md +5 -0
  3. package/.agents/skills/maestro-blueprint/SKILL.md +5 -0
  4. package/.agents/skills/maestro-brainstorm/SKILL.md +5 -0
  5. package/.agents/skills/maestro-init/SKILL.md +1 -1
  6. package/.agents/skills/maestro-next/SKILL.md +219 -0
  7. package/.agents/skills/maestro-ralph-beta/SKILL.md +893 -0
  8. package/.agy/skills/maestro/SKILL.md +1 -1
  9. package/.agy/skills/maestro-analyze/SKILL.md +5 -0
  10. package/.agy/skills/maestro-blueprint/SKILL.md +5 -0
  11. package/.agy/skills/maestro-brainstorm/SKILL.md +5 -0
  12. package/.agy/skills/maestro-init/SKILL.md +1 -1
  13. package/.agy/skills/maestro-next/SKILL.md +215 -0
  14. package/.agy/skills/maestro-ralph-beta/SKILL.md +889 -0
  15. package/.claude/commands/maestro-analyze.md +5 -0
  16. package/.claude/commands/maestro-blueprint.md +5 -0
  17. package/.claude/commands/maestro-brainstorm.md +5 -0
  18. package/.claude/commands/maestro-init.md +1 -1
  19. package/.claude/commands/maestro-next.md +217 -0
  20. package/.claude/commands/maestro-ralph-beta.md +891 -0
  21. package/.claude/commands/maestro.md +1 -1
  22. package/.codex/skills/learn-decompose/SKILL.md +34 -3
  23. package/.codex/skills/learn-retro/SKILL.md +31 -1
  24. package/.codex/skills/learn-second-opinion/SKILL.md +34 -4
  25. package/.codex/skills/maestro-analyze/SKILL.md +44 -5
  26. package/.codex/skills/maestro-blueprint/SKILL.md +5 -0
  27. package/.codex/skills/maestro-brainstorm/SKILL.md +46 -0
  28. package/.codex/skills/maestro-execute/SKILL.md +61 -5
  29. package/.codex/skills/maestro-milestone-audit/SKILL.md +64 -13
  30. package/.codex/skills/maestro-milestone-complete/SKILL.md +12 -0
  31. package/.codex/skills/maestro-plan/SKILL.md +36 -1
  32. package/.codex/skills/maestro-player/SKILL.md +25 -6
  33. package/.codex/skills/maestro-ralph/SKILL.md +108 -81
  34. package/.codex/skills/maestro-ralph-beta/SKILL.md +891 -0
  35. package/.codex/skills/maestro-ralph-execute/SKILL.md +244 -0
  36. package/.codex/skills/maestro-roadmap/SKILL.md +35 -4
  37. package/.codex/skills/maestro-ui-codify/SKILL.md +38 -10
  38. package/.codex/skills/maestro-verify/SKILL.md +40 -5
  39. package/.codex/skills/manage-codebase-rebuild/SKILL.md +52 -5
  40. package/.codex/skills/manage-issue-discover/SKILL.md +106 -15
  41. package/.codex/skills/quality-auto-test/SKILL.md +70 -16
  42. package/.codex/skills/quality-debug/SKILL.md +139 -28
  43. package/.codex/skills/quality-refactor/SKILL.md +61 -11
  44. package/.codex/skills/quality-review/SKILL.md +45 -9
  45. package/.codex/skills/quality-test/SKILL.md +58 -3
  46. package/.codex/skills/security-audit/SKILL.md +38 -0
  47. package/.codex/skills/spec-map/SKILL.md +65 -8
  48. package/.codex/skills/team-coordinate/SKILL.md +28 -11
  49. package/.codex/skills/team-coordinate/specs/role-catalog.md +20 -0
  50. package/.codex/skills/team-lifecycle-v4/SKILL.md +23 -7
  51. package/.codex/skills/team-lifecycle-v4/instructions/agent-instruction.md +20 -0
  52. package/.codex/skills/team-quality-assurance/SKILL.md +40 -2
  53. package/.codex/skills/team-review/SKILL.md +42 -2
  54. package/.codex/skills/team-tech-debt/SKILL.md +45 -2
  55. package/.codex/skills/team-testing/SKILL.md +42 -2
  56. package/dashboard/dist-server/dashboard/src/server/wiki/search.d.ts +6 -4
  57. package/dashboard/dist-server/dashboard/src/server/wiki/search.js +50 -8
  58. package/dashboard/dist-server/dashboard/src/server/wiki/search.js.map +1 -1
  59. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.d.ts +32 -0
  60. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js +294 -0
  61. package/dashboard/dist-server/dashboard/src/server/wiki/virtual-wiki-adapters.js.map +1 -1
  62. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.d.ts +1 -0
  63. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js +35 -1
  64. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js.map +1 -1
  65. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.test.js +235 -0
  66. package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.test.js.map +1 -1
  67. package/dist/src/commands/install-backend.d.ts.map +1 -1
  68. package/dist/src/commands/install-backend.js +23 -4
  69. package/dist/src/commands/install-backend.js.map +1 -1
  70. package/dist/src/commands/ralph.d.ts.map +1 -1
  71. package/dist/src/commands/ralph.js +3 -1
  72. package/dist/src/commands/ralph.js.map +1 -1
  73. package/dist/src/ralph/cmd-check.js +1 -1
  74. package/dist/src/ralph/cmd-check.js.map +1 -1
  75. package/dist/src/ralph/cmd-complete.js +1 -1
  76. package/dist/src/ralph/cmd-complete.js.map +1 -1
  77. package/dist/src/ralph/cmd-next.d.ts.map +1 -1
  78. package/dist/src/ralph/cmd-next.js +56 -5
  79. package/dist/src/ralph/cmd-next.js.map +1 -1
  80. package/dist/src/ralph/cmd-session.js +2 -2
  81. package/dist/src/ralph/cmd-session.js.map +1 -1
  82. package/dist/src/ralph/cmd-skills.d.ts +2 -0
  83. package/dist/src/ralph/cmd-skills.d.ts.map +1 -1
  84. package/dist/src/ralph/cmd-skills.js +11 -4
  85. package/dist/src/ralph/cmd-skills.js.map +1 -1
  86. package/dist/src/ralph/skill-scanner.d.ts +7 -2
  87. package/dist/src/ralph/skill-scanner.d.ts.map +1 -1
  88. package/dist/src/ralph/skill-scanner.js +39 -24
  89. package/dist/src/ralph/skill-scanner.js.map +1 -1
  90. package/dist/src/ralph/status-schema.d.ts +2 -0
  91. package/dist/src/ralph/status-schema.d.ts.map +1 -1
  92. package/dist/src/ralph/status-store.d.ts +8 -1
  93. package/dist/src/ralph/status-store.d.ts.map +1 -1
  94. package/dist/src/ralph/status-store.js +12 -2
  95. package/dist/src/ralph/status-store.js.map +1 -1
  96. package/dist/src/tools/store-knowhow.d.ts.map +1 -1
  97. package/dist/src/tools/store-knowhow.js +51 -64
  98. package/dist/src/tools/store-knowhow.js.map +1 -1
  99. package/dist/src/utils/update-notices.js +12 -0
  100. package/dist/src/utils/update-notices.js.map +1 -1
  101. package/package.json +1 -1
  102. package/templates/config.json +21 -33
  103. package/workflows/finish-work.md +119 -0
  104. package/workflows/init.md +11 -11
  105. package/workflows/milestone-complete.md +23 -1
@@ -247,31 +247,63 @@ Initialize `discovery-state.json`:
247
247
  spawn_agents_on_csv({
248
248
  csv_path: `${sessionFolder}/wave-1.csv`,
249
249
  id_column: "id",
250
- instruction: buildDiscoverInstruction(sessionFolder, discoveryDir, mode),
250
+ instruction: DISCOVER_PERSPECTIVE_INSTRUCTION, // see "Perspective Worker Contract" below
251
251
  max_concurrency: maxConcurrency,
252
252
  max_runtime_seconds: 3600,
253
253
  output_csv_path: `${sessionFolder}/wave-1-results.csv`,
254
- output_schema: { // required: id, result_status, findings
255
- id: "string", result_status: "completed|failed",
256
- findings: "string", issues_found: "string",
257
- severity_distribution: "string", error: "string"
254
+ output_schema: {
255
+ type: "object",
256
+ properties: {
257
+ id: { type: "string" },
258
+ result_status: { type: "string", enum: ["completed", "failed"] },
259
+ findings: { type: "string", maxLength: 500 },
260
+ issues_found: { type: "string", description: "JSON array string" },
261
+ severity_distribution: { type: "string", description: "JSON object string {critical, high, medium, low}" },
262
+ error: { type: "string" }
263
+ },
264
+ required: ["id", "result_status", "findings"]
258
265
  }
259
266
  })
260
267
  ```
261
268
 
262
- 6. Merge `wave-1-results.csv` into master `tasks.csv` (map `result_status` -> master `status` column)
269
+ 6. Merge `wave-1-results.csv` into master `tasks.csv` (map `result_status` -> master `status` column; copy `findings`, `issues_found`, `severity_distribution`, `error`)
263
270
  7. Save per-perspective findings to `{discoveryDir}/{perspective}-findings.json`
264
271
  8. Update `discovery-state.json` with completed perspectives
265
272
  9. Delete temporary files: `wave-1.csv` and `wave-1-results.csv`
266
273
 
267
- **Perspective scan agent protocol**:
268
- - Scan all source files matching scope_glob
269
- - Identify concrete issues with file:line references
270
- - Rate each finding: critical / high / medium / low
271
- - Provide brief fix direction for each finding
272
- - Report affected_components[]
273
- - Share cross-cutting discoveries via discovery board
274
- - Output issues_found as JSON array + severity_distribution as JSON object
274
+ #### Perspective Worker Contract (DISCOVER_PERSPECTIVE_INSTRUCTION)
275
+
276
+ ```
277
+ You are a perspective scanner for ONE dimension. Your perspective, scope_glob, and description come from your CSV row.
278
+
279
+ REQUIRED STEPS:
280
+ 1. Read shared discoveries: {sessionFolder}/discoveries.ndjson (may be empty)
281
+ 2. Scan all files matching scope_glob using Read/Grep/Glob (read-only)
282
+ 3. For each finding: capture title, severity (critical|high|medium|low), description, file:line location, fix_direction, affected_components[]
283
+ 4. Append cross-cutting patterns to discoveries.ndjson (dedup by type+key)
284
+ 5. Call report_agent_job_result EXACTLY ONCE
285
+
286
+ TERMINATION CONTRACT (mandatory — NO worker may end without calling report_agent_job_result):
287
+ - Success path → result_status=completed (issues_found may be empty array if nothing found)
288
+ - Timeout path → near max_runtime_seconds, STOP and report completed with partial issues_found (do NOT report failed for timeout — partial work is valuable)
289
+ - Failure path → unrecoverable error (cannot read scope, parse failure) → result_status=failed with error message
290
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
291
+
292
+ OUTPUT (return via report_agent_job_result; must match output_schema):
293
+ {
294
+ "id": "<your row id>",
295
+ "result_status": "completed" | "failed",
296
+ "findings": "<one-sentence summary, max 500 chars>",
297
+ "issues_found": "<JSON array string: [{\"title\":\"...\",\"severity\":\"...\",\"description\":\"...\",\"location\":\"file:line\",\"fix_direction\":\"...\",\"affected_components\":[...]}]>",
298
+ "severity_distribution": "<JSON object string: {\"critical\":N,\"high\":N,\"medium\":N,\"low\":N}>",
299
+ "error": "<message if failed, else empty>"
300
+ }
301
+
302
+ CONSTRAINTS:
303
+ - Every finding MUST have a concrete file:line reference. No speculative issues.
304
+ - Do NOT write to tasks.csv, wave-*.csv, results.csv, or issues.jsonl (orchestrator owns those).
305
+ - Do NOT call spawn_agents_on_csv (no recursion).
306
+ ```
275
307
 
276
308
  #### Wave 2: Dedup + Issue Creation (Single Agent)
277
309
 
@@ -285,10 +317,69 @@ spawn_agents_on_csv({
285
317
  ...
286
318
  ```
287
319
  5. Write `wave-2.csv` with `prev_context` column
288
- 6. Execute `spawn_agents_on_csv` for dedup agent
320
+ 6. Execute:
321
+
322
+ ```javascript
323
+ spawn_agents_on_csv({
324
+ csv_path: `${sessionFolder}/wave-2.csv`,
325
+ id_column: "id",
326
+ instruction: DEDUP_INSTRUCTION, // see "Dedup Worker Contract" below
327
+ max_concurrency: 1,
328
+ max_runtime_seconds: 1800,
329
+ output_csv_path: `${sessionFolder}/wave-2-results.csv`,
330
+ output_schema: {
331
+ type: "object",
332
+ properties: {
333
+ id: { type: "string" },
334
+ result_status: { type: "string", enum: ["completed", "failed"] },
335
+ findings: { type: "string", maxLength: 500 },
336
+ issues_found: { type: "string", description: "JSON array of deduplicated issues with ISS-* IDs" },
337
+ severity_distribution: { type: "string" },
338
+ error: { type: "string" }
339
+ },
340
+ required: ["id", "result_status", "findings"]
341
+ }
342
+ })
343
+ ```
344
+
289
345
  7. Merge results into master `tasks.csv` (map `result_status` -> master `status` column)
290
346
  8. Delete temporary files: `wave-2.csv` and `wave-2-results.csv`
291
347
 
348
+ #### Dedup Worker Contract (DEDUP_INSTRUCTION)
349
+
350
+ ```
351
+ You are the dedup + issue creation worker. Your prev_context contains all wave-1 perspective findings.
352
+
353
+ REQUIRED STEPS:
354
+ 1. Parse prev_context — extract every issues_found JSON from upstream rows
355
+ 2. Deduplicate: group by file path, compare descriptions (>80% overlap or same file:line → keep higher severity)
356
+ 3. Assign collision-safe ID per unique issue: ISS-YYYYMMDD-NNN
357
+ 4. Build full issue record (severity→priority: critical→1/high→2/medium→3/low→4; source="discover"; tags=[perspective])
358
+ 5. Append deduplicated records to .workflow/issues/issues.jsonl AND {discoveryDir}/discovery-issues.jsonl
359
+ 6. Call report_agent_job_result EXACTLY ONCE
360
+
361
+ TERMINATION CONTRACT (mandatory):
362
+ - Success → result_status=completed with issues_found = final deduped JSON array
363
+ - Timeout → near max_runtime_seconds, persist partial dedup, report completed with note in findings
364
+ - Failure → file write error, parse error → result_status=failed
365
+ - NEVER skip report_agent_job_result.
366
+
367
+ OUTPUT (must match output_schema):
368
+ {
369
+ "id": "<your row id>",
370
+ "result_status": "completed" | "failed",
371
+ "findings": "<pre-dedup count → post-dedup count summary>",
372
+ "issues_found": "<JSON array of final deduplicated issues>",
373
+ "severity_distribution": "<JSON object: {critical, high, medium, low}>",
374
+ "error": "<message if failed, else empty>"
375
+ }
376
+
377
+ CONSTRAINTS:
378
+ - Append-only writes to issues.jsonl. Never overwrite existing records.
379
+ - Every record MUST include source: "discover".
380
+ - Do NOT call spawn_agents_on_csv (no recursion).
381
+ ```
382
+
292
383
  **Dedup agent protocol**:
293
384
  - Merge all perspective findings from prev_context into single list
294
385
  - Deduplicate: group by file path, compare descriptions (>80% overlap or same file:line → keep higher severity)
@@ -141,6 +141,28 @@ For each layer L1->L3 (sequential, respecting --layer filter):
141
141
  6. Record per-scenario pass/fail
142
142
  7. Fail-fast: any critical-priority failed -> stop layer progression
143
143
 
144
+ **Test Writer Spawn output_schema** (strict JSON Schema, used for both writer + diagnosis spawns):
145
+
146
+ ```json
147
+ {
148
+ "type": "object",
149
+ "properties": {
150
+ "id": { "type": "string" },
151
+ "result_status": { "type": "string", "enum": ["completed", "failed", "blocked"] },
152
+ "red_result": { "type": "string", "enum": ["expected_fail", "pass", "unexpected_fail", ""] },
153
+ "classification": { "type": "string", "enum": ["test_defect", "code_defect", "env_issue", ""] },
154
+ "fix_code": { "type": "string" },
155
+ "evidence": { "type": "string" },
156
+ "findings": { "type": "string", "maxLength": 500 },
157
+ "files_modified": { "type": "string" },
158
+ "error": { "type": "string" }
159
+ },
160
+ "required": ["id", "result_status", "findings"]
161
+ }
162
+ ```
163
+
164
+ Merge: `result_status` → master `status`; copy `red_result` / `classification` / `fix_code` / `evidence` / `findings` / `files_modified` / `error`.
165
+
144
166
  **Test Writer Agent Instruction** (injected into spawn_agents_on_csv):
145
167
  ```
146
168
  You are a test writer. Write ONE test file for the given scenario.
@@ -154,16 +176,33 @@ You are a test writer. Write ONE test file for the given scenario.
154
176
  - Run test file once after writing
155
177
 
156
178
  ## RED-GREEN Rules
157
- - Test PASSES immediately: note "pass" — may need strengthening
158
- - Test FAILS as expected (tests real behavior): note "expected_fail" — good
159
- - Test FAILS unexpectedly (setup/import error): fix test setup, note "unexpected_fail"
179
+ - Test PASSES immediately: red_result="pass" — may need strengthening
180
+ - Test FAILS as expected (tests real behavior): red_result="expected_fail" — good
181
+ - Test FAILS unexpectedly (setup/import error): fix test setup, red_result="unexpected_fail"
160
182
  - NEVER modify source code — only write/fix test files
161
183
 
162
- ## Output
163
- - status: "written" if created, "failed" if unable
164
- - red_result: the RED phase outcome
165
- - findings: patterns discovered, notes for dependent scenarios (max 500 chars)
166
- - error: only if status == "failed"
184
+ ## Termination Contract (MANDATORY)
185
+ You MUST call report_agent_job_result EXACTLY ONCE before exiting.
186
+ - Success result_status=completed (test file written + run executed; red_result populated)
187
+ - Failure result_status=failed (cannot write test file, parse error, missing target)
188
+ - Blocked result_status=blocked (test framework unavailable)
189
+ - Timeout → near max_runtime_seconds → result_status=failed with error="timeout"
190
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
191
+
192
+ ## Output (must match output_schema)
193
+ {
194
+ "id": "<row id>",
195
+ "result_status": "completed" | "failed" | "blocked",
196
+ "red_result": "expected_fail" | "pass" | "unexpected_fail" | "",
197
+ "findings": "<patterns discovered, notes for dependent scenarios, max 500 chars>",
198
+ "files_modified": "<test file path>",
199
+ "error": "<message if not completed, else empty>"
200
+ }
201
+
202
+ ## Hard Constraints
203
+ - Do NOT modify source code under test (only test files).
204
+ - Do NOT write to scenarios.csv, layer-L*.csv, results.csv (orchestrator owns those).
205
+ - Do NOT call spawn_agents_on_csv (no recursion).
167
206
 
168
207
  ## Context
169
208
  - prev_context: {prev_context} (findings from prior layer)
@@ -181,7 +220,7 @@ OUTER LOOP (max_iter iterations):
181
220
  3. Diagnosis agent (see instruction below). test_defect -> provide fix. code_defect -> document evidence.
182
221
  4. Apply test_defect fixes, re-run layer
183
222
 
184
- **Diagnosis Agent Instruction** (injected into spawn_agents_on_csv):
223
+ **Diagnosis Agent Instruction** (injected into spawn_agents_on_csv; uses same output_schema as Test Writer):
185
224
  ```
186
225
  You are a test failure diagnostician. Classify ONE test failure.
187
226
 
@@ -193,16 +232,31 @@ You are a test failure diagnostician. Classify ONE test failure.
193
232
  - code_defect: Source violates business rule (actual != expected requirement)
194
233
  - env_issue: Environment problem (service down, config missing, timeout)
195
234
 
196
- ## Output
197
- - classification: test_defect / code_defect / env_issue
198
- - fix_code: If test_defect: "old_line new_line" or full replacement. Empty for others.
199
- - evidence: file:line references supporting classification
200
- - error: only if cannot determine
201
-
202
- ## Rules
235
+ ## Termination Contract (MANDATORY)
236
+ You MUST call report_agent_job_result EXACTLY ONCE before exiting.
237
+ - Successresult_status=completed with concrete classification
238
+ - Failure result_status=failed if you cannot read test_file or target_file
239
+ - Blocked result_status=blocked when env_issue prevents diagnosis
240
+ - Timeout → near max_runtime_seconds → result_status=failed with error="timeout"
241
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
242
+
243
+ ## Output (must match output_schema)
244
+ {
245
+ "id": "<row id>",
246
+ "result_status": "completed" | "failed" | "blocked",
247
+ "classification": "test_defect" | "code_defect" | "env_issue",
248
+ "fix_code": "<old_line → new_line or full replacement (test_defect only); empty otherwise>",
249
+ "evidence": "<file:line refs supporting classification>",
250
+ "findings": "<one-sentence diagnosis summary, max 500 chars>",
251
+ "error": "<message if not completed>"
252
+ }
253
+
254
+ ## Hard Constraints
203
255
  - NEVER suggest source code changes — only test fixes for test_defect
204
256
  - Test correctly catching a real bug = code_defect, not test_defect
205
257
  - When uncertain: prefer code_defect (conservative)
258
+ - Do NOT write to scenarios.csv, layer-L*.csv, results.csv (orchestrator owns those).
259
+ - Do NOT call spawn_agents_on_csv (no recursion).
206
260
  ```
207
261
  5. If no test_defects remain: break inner
208
262
  REFLECT: analyze trends, log strategy, test confidence scoring (5 dims: scenario_coverage, test_quality, diagnostic_accuracy, strategy_effectiveness, infrastructure_fitness)
@@ -91,12 +91,12 @@ When `--yes` or `-y`: Auto-confirm hypothesis selection, skip interactive sympto
91
91
  ### tasks.csv (Master State)
92
92
 
93
93
  ```csv
94
- id,title,description,hypothesis,deps,context_from,wave
95
- "H1","Null pointer in login handler","Investigate whether login handler crashes due to null user object after failed DB lookup","User object is null when DB returns empty result; login.ts:42 dereferences without null check","","","1"
96
- "H2","Missing error boundary","Investigate whether unhandled promise rejection in auth middleware propagates to 500","Auth middleware catches DB errors but not validation errors; middleware.ts:78 has no catch block","","","1"
97
- "H3","Stale session token","Investigate whether expired session tokens bypass refresh logic","Session refresh only triggers on 403 but server returns 401 for expired tokens; session.ts:15","","","1"
98
- "FIX-H1","Fix null pointer in login","Apply null check before user object dereference in login handler","","H1","H1","2"
99
- "FIX-H3","Fix session token refresh","Update refresh trigger to also handle 401 status codes","","H3","H3","2"
94
+ id,title,description,hypothesis,deps,context_from,wave,status,findings,evidence_for,evidence_against,fix_applied,verified,error
95
+ "H1","Null pointer in login handler","Investigate whether login handler crashes due to null user object after failed DB lookup","User object is null when DB returns empty result; login.ts:42 dereferences without null check","","","1","pending","","","","","",""
96
+ "H2","Missing error boundary","Investigate whether unhandled promise rejection in auth middleware propagates to 500","Auth middleware catches DB errors but not validation errors; middleware.ts:78 has no catch block","","","1","pending","","","","","",""
97
+ "H3","Stale session token","Investigate whether expired session tokens bypass refresh logic","Session refresh only triggers on 403 but server returns 401 for expired tokens; session.ts:15","","","1","pending","","","","","",""
98
+ "FIX-H1","Fix null pointer in login","Apply null check before user object dereference in login handler","","H1","H1","2","pending","","","","","",""
99
+ "FIX-H3","Fix session token refresh","Update refresh trigger to also handle 401 status codes","","H3","H3","2","pending","","","","","",""
100
100
  ```
101
101
 
102
102
  **Columns**:
@@ -110,15 +110,17 @@ id,title,description,hypothesis,deps,context_from,wave
110
110
  | `deps` | Input | Semicolon-separated dependency task IDs (wave 2 depends on wave 1) |
111
111
  | `context_from` | Input | Semicolon-separated task IDs whose findings this task needs |
112
112
  | `wave` | Input | Wave number (1 = investigation, 2 = fix attempt) |
113
- | `result_status` | Output | `confirmed` / `refuted` / `inconclusive` / `fixed` / `fix_failed` / `failed` |
114
- | `findings` | Output | Key findings summary (max 500 chars) |
115
- | `evidence_for` | Output | Evidence supporting the hypothesis (wave 1) |
116
- | `evidence_against` | Output | Evidence refuting the hypothesis (wave 1) |
117
- | `fix_applied` | Output | Description of fix applied (wave 2 only) |
118
- | `verified` | Output | `true` / `false` -- whether fix was verified to work (wave 2 only) |
119
- | `error` | Output | Error message if failed |
113
+ | `status` | Lifecycle | `pending` (initial) `confirmed`/`refuted`/`inconclusive`/`fixed`/`fix_failed`/`failed`/`skipped` (set by merge step from worker's `result_status`) |
114
+ | `findings` | Lifecycle | Key findings summary (max 500 chars; merged from worker output) |
115
+ | `evidence_for` | Lifecycle | Evidence supporting the hypothesis (wave 1; merged) |
116
+ | `evidence_against` | Lifecycle | Evidence refuting the hypothesis (wave 1; merged) |
117
+ | `fix_applied` | Lifecycle | Description of fix applied (wave 2 only; merged) |
118
+ | `verified` | Lifecycle | `true` / `false` whether fix was verified to work (wave 2 only; merged) |
119
+ | `error` | Lifecycle | Error message if failed (merged) |
120
120
 
121
- **Column separation rule**: Input columns and Output columns MUST NOT share names. Wave CSV only contains Input columns + `prev_context`. Output columns are returned exclusively via `output_schema`.
121
+ **Column separation rule**: Wave CSV (input to `spawn_agents_on_csv`) contains Input columns + `prev_context` only. Lifecycle columns are NEVER passed to workers. Workers return Output columns exclusively via `output_schema` — those output column names MUST NOT collide with Input column names. During merge: `result_status` master `status`; other output columns copied as-is into matching lifecycle columns.
122
+
123
+ **Initial state**: All rows are written with `status="pending"` and empty lifecycle columns. Each wave selects rows where `wave == N AND status == "pending"` from the master CSV.
122
124
 
123
125
  ### Per-Wave CSV (Temporary)
124
126
 
@@ -234,41 +236,150 @@ mkdir -p {sessionFolder}
234
236
 
235
237
  #### Wave 1: Hypothesis Investigation (Parallel)
236
238
 
237
- 1. Extract wave 1 pending rows from master `tasks.csv` into `wave-1.csv` (no prev_context needed)
238
- 2. Execute:
239
+ 1. **Extract wave-1 input**: filter master `tasks.csv` rows where `wave == 1 AND status == "pending"` → write `wave-1.csv` containing ONLY input columns (id, title, description, hypothesis, deps, context_from, wave). No lifecycle columns, no prev_context (wave 1 has no upstream).
240
+ 2. **Execute**:
239
241
 
240
242
  ```javascript
241
243
  spawn_agents_on_csv({
242
244
  csv_path: `${sessionFolder}/wave-1.csv`,
243
245
  id_column: "id",
244
- instruction: buildInvestigationInstruction(sessionFolder), // agent: ~/.codex/agents/workflow-debugger.toml
245
- max_concurrency: maxConcurrency, max_runtime_seconds: 3600,
246
+ instruction: WAVE1_INVESTIGATION_INSTRUCTION, // see "Wave 1 Worker Contract" below
247
+ max_concurrency: maxConcurrency,
248
+ max_runtime_seconds: 3600,
246
249
  output_csv_path: `${sessionFolder}/wave-1-results.csv`,
247
- output_schema: { id, result_status: [confirmed|refuted|inconclusive|failed], findings, evidence_for, evidence_against, error }
250
+ output_schema: {
251
+ type: "object",
252
+ properties: {
253
+ id: { type: "string" },
254
+ result_status: { type: "string", enum: ["confirmed", "refuted", "inconclusive", "failed"] },
255
+ findings: { type: "string", maxLength: 500 },
256
+ evidence_for: { type: "string" },
257
+ evidence_against: { type: "string" },
258
+ error: { type: "string" }
259
+ },
260
+ required: ["id", "result_status", "findings"]
261
+ }
248
262
  })
249
263
  ```
250
264
 
251
- 3. Merge `wave-1-results.csv` into master `tasks.csv` (map `result_status` master `status` column), delete `wave-1.csv` and `wave-1-results.csv`
252
- 4. **Filter for wave 2**: Mark fix tasks as `skipped` if their hypothesis `result_status` was `refuted` or `inconclusive`
265
+ 3. **Merge**: for each row in `wave-1-results.csv`, look up master row by `id` and write `master.status = result_status`, then copy `findings`, `evidence_for`, `evidence_against`, `error`. Delete `wave-1.csv` and `wave-1-results.csv`.
266
+ 4. **Wave 2 gating** (read from MASTER `tasks.csv` after merge, NOT from wave-1-results.csv):
267
+ - For each `FIX-H{N}` row: read its `context_from` hypothesis ID (e.g., `H{N}`) from master; if master `H{N}.status != "confirmed"`, set `FIX-H{N}.status = "skipped"` (with findings = "upstream {H{N}.status}").
268
+ - Only rows where `status == "pending"` proceed to wave 2.
269
+
270
+ #### Wave 1 Worker Contract (WAVE1_INVESTIGATION_INSTRUCTION)
271
+
272
+ The literal `instruction` string passed to `spawn_agents_on_csv` MUST include the following contract (substitute `{sessionFolder}` at build time):
273
+
274
+ ```
275
+ You are a hypothesis investigation worker. ONE hypothesis row from wave-1.csv is assigned to you.
276
+
277
+ INPUT (from your CSV row):
278
+ - id, title, hypothesis, description
279
+
280
+ REQUIRED STEPS:
281
+ 1. Read shared discoveries: {sessionFolder}/discoveries.ndjson (may be empty)
282
+ 2. Scan codebase for evidence using Read/Grep/Glob (read-only investigation)
283
+ 3. Classify the hypothesis based on evidence collected:
284
+ - confirmed → strong evidence supports the hypothesis (file:line proof)
285
+ - refuted → strong evidence contradicts the hypothesis
286
+ - inconclusive → insufficient evidence within time budget; do NOT guess
287
+ - failed → tool error / cannot read files / blocked by environment
288
+ 4. Append discoveries to {sessionFolder}/discoveries.ndjson if reusable (root_cause / hypothesis_evidence types)
289
+ 5. Call report_agent_job_result EXACTLY ONCE with the verdict
290
+
291
+ TERMINATION CONTRACT (mandatory — NO worker may end without calling report_agent_job_result):
292
+ - Success path → result_status = confirmed | refuted, with evidence
293
+ - Timeout path → if approaching {max_runtime_seconds}, STOP investigation and report inconclusive
294
+ - Failure path → on any unrecoverable error, report failed with error message
295
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
296
+
297
+ OUTPUT (return via report_agent_job_result; must match output_schema):
298
+ {
299
+ "id": "<your row id>",
300
+ "result_status": "confirmed" | "refuted" | "inconclusive" | "failed",
301
+ "findings": "<one-sentence summary, max 500 chars>",
302
+ "evidence_for": "<bullet list of file:line refs supporting, or empty>",
303
+ "evidence_against": "<bullet list of file:line refs refuting, or empty>",
304
+ "error": "<message if failed, else empty>"
305
+ }
306
+
307
+ CONSTRAINTS:
308
+ - Do NOT modify source code. This is investigation only.
309
+ - Do NOT write to tasks.csv, wave-*.csv, or results.csv (orchestrator owns those).
310
+ - Do NOT call spawn_agents_on_csv (no recursion).
311
+ ```
253
312
 
254
313
  #### Wave 2: Fix Attempts (Parallel, Confirmed Only)
255
314
 
256
- 1. If no confirmed hypotheses remain, skip wave 2 entirely
257
- 2. Extract wave 2 pending rows, build `prev_context` from confirmed wave 1 findings
258
- 3. Write `wave-2.csv`, then execute:
315
+ 1. If no master rows have `wave == 2 AND status == "pending"` after gating, skip wave 2 entirely.
316
+ 2. **Extract wave-2 input**: filter master `tasks.csv` where `wave == 2 AND status == "pending"`. For each row, build `prev_context` by concatenating findings/evidence_for from each ID in `context_from` (read from master). Write `wave-2.csv` with input columns + `prev_context`.
317
+ 3. **Execute**:
259
318
 
260
319
  ```javascript
261
320
  spawn_agents_on_csv({
262
321
  csv_path: `${sessionFolder}/wave-2.csv`,
263
322
  id_column: "id",
264
- instruction: buildFixInstruction(sessionFolder), // agent: ~/.codex/agents/workflow-debugger.toml
265
- max_concurrency: maxConcurrency, max_runtime_seconds: 3600,
323
+ instruction: WAVE2_FIX_INSTRUCTION, // see "Wave 2 Worker Contract" below
324
+ max_concurrency: maxConcurrency,
325
+ max_runtime_seconds: 3600,
266
326
  output_csv_path: `${sessionFolder}/wave-2-results.csv`,
267
- output_schema: { id, result_status: [fixed|fix_failed|failed], findings, fix_applied, verified, error }
327
+ output_schema: {
328
+ type: "object",
329
+ properties: {
330
+ id: { type: "string" },
331
+ result_status: { type: "string", enum: ["fixed", "fix_failed", "failed"] },
332
+ findings: { type: "string", maxLength: 500 },
333
+ fix_applied: { type: "string" },
334
+ verified: { type: "string", enum: ["true", "false"] },
335
+ error: { type: "string" }
336
+ },
337
+ required: ["id", "result_status", "findings", "verified"]
338
+ }
268
339
  })
269
340
  ```
270
341
 
271
- 4. Merge `wave-2-results.csv` into master `tasks.csv` (map `result_status` master `status` column), delete `wave-2.csv` and `wave-2-results.csv`
342
+ 4. **Merge**: write `master.status = result_status`, copy `findings`, `fix_applied`, `verified`, `error`. Delete `wave-2.csv` and `wave-2-results.csv`.
343
+
344
+ #### Wave 2 Worker Contract (WAVE2_FIX_INSTRUCTION)
345
+
346
+ ```
347
+ You are a fix worker. ONE confirmed hypothesis row is assigned to you.
348
+
349
+ INPUT (from your CSV row):
350
+ - id (FIX-H{N}), title, description, prev_context (confirmed evidence from H{N})
351
+
352
+ REQUIRED STEPS:
353
+ 1. Read prev_context — the confirmed root cause evidence
354
+ 2. Apply the minimal fix using Edit / Write
355
+ 3. Run verification:
356
+ - If project has tests: run the relevant test suite via Bash
357
+ - If no tests: re-read the modified file and confirm the fix matches the planned change
358
+ 4. Append discoveries (type=fix_applied) to {sessionFolder}/discoveries.ndjson if reusable
359
+ 5. Call report_agent_job_result EXACTLY ONCE
360
+
361
+ TERMINATION CONTRACT (mandatory):
362
+ - Success path → fix applied AND verified → result_status=fixed, verified="true"
363
+ - Partial path → fix applied but verification failed → result_status=fix_failed, verified="false"
364
+ - Timeout path → approaching {max_runtime_seconds} with no fix applied → result_status=fix_failed with error="timeout"
365
+ - Failure path → cannot apply fix (file missing, parse error, etc.) → result_status=failed
366
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
367
+
368
+ OUTPUT (return via report_agent_job_result; must match output_schema):
369
+ {
370
+ "id": "<your row id>",
371
+ "result_status": "fixed" | "fix_failed" | "failed",
372
+ "findings": "<one-sentence summary of what was changed, max 500 chars>",
373
+ "fix_applied": "<file:line description of the change>",
374
+ "verified": "true" | "false",
375
+ "error": "<message if failed, else empty>"
376
+ }
377
+
378
+ CONSTRAINTS:
379
+ - Modify ONLY files implicated by prev_context evidence. No drive-by refactors.
380
+ - Do NOT write to tasks.csv, wave-*.csv, or results.csv.
381
+ - Do NOT call spawn_agents_on_csv (no recursion).
382
+ ```
272
383
 
273
384
  ### Phase 3: Results Aggregation
274
385
 
@@ -169,24 +169,74 @@ For each wave N in ascending order:
169
169
 
170
170
  ```javascript
171
171
  spawn_agents_on_csv({
172
- csv_path: `${sessionFolder}/wave-${N}.csv`,
172
+ csv_path: `${sessionFolder}/wave-${N}.csv`, // only rows where wave==N AND status=="pending"
173
173
  id_column: "id",
174
- instruction: `You are a refactoring executor. For each task:
175
- 1. Read files listed in read_first to understand context
176
- 2. Apply refactoring described in description targeting scope files
177
- 3. Verify convergence_criteria via grep (all criteria must pass)
178
- 4. Run verification_cmd and report test result
179
- 5. If tests fail: revert ALL changes for this task, set result_status=failed
180
- 6. Append discoveries to ${sessionFolder}/discoveries.ndjson
181
- Report: files_modified (semicolon-separated), tests_passed (true/false), findings (what was changed and why)`,
182
- max_concurrency: 1, max_runtime_seconds: 1800,
174
+ instruction: REFACTOR_INSTRUCTION, // see "Refactor Worker Contract" below
175
+ max_concurrency: 1,
176
+ max_runtime_seconds: 1800,
183
177
  output_csv_path: `${sessionFolder}/wave-${N}-results.csv`,
184
- output_schema: { id, result_status: [completed|failed|blocked], findings, files_modified, tests_passed, error }
178
+ output_schema: {
179
+ type: "object",
180
+ properties: {
181
+ id: { type: "string" },
182
+ result_status: { type: "string", enum: ["completed", "failed", "blocked"] },
183
+ findings: { type: "string", maxLength: 500 },
184
+ files_modified: { type: "string", description: "Semicolon-separated paths (empty if reverted)" },
185
+ tests_passed: { type: "string", enum: ["true", "false"] },
186
+ error: { type: "string" }
187
+ },
188
+ required: ["id", "result_status", "findings", "tests_passed"]
189
+ }
185
190
  })
186
191
  ```
187
192
 
188
193
  4. Merge results into master `tasks.csv`: map `result_status` -> master `status` column, copy `findings`, `files_modified`, `tests_passed`, `error` into master. Delete temporary `wave-{N}.csv` and `wave-{N}-results.csv`.
189
194
 
195
+ #### Refactor Worker Contract (REFACTOR_INSTRUCTION)
196
+
197
+ ```
198
+ You are a refactoring executor. ONE task row is assigned to you.
199
+
200
+ INPUT (from your CSV row):
201
+ - id, title, description (refactoring plan)
202
+ - read_first (semicolon-separated paths to read for context)
203
+ - scope (files in refactor scope)
204
+ - convergence_criteria (grep patterns that must pass after refactor)
205
+ - verification_cmd (test command to run)
206
+ - prev_context (findings from upstream tasks)
207
+
208
+ REQUIRED STEPS:
209
+ 1. Read all files in read_first to understand context
210
+ 2. Apply refactoring per description, modifying only files in scope
211
+ 3. Verify EVERY convergence_criterion via grep (ALL must pass; ANY miss → failure)
212
+ 4. Run verification_cmd via Bash; capture pass/fail
213
+ 5. If tests fail OR convergence fails → revert ALL changes for this task using git (or Edit reverse), set files_modified=""
214
+ 6. Append discoveries (type=implementation_note / pattern) to {sessionFolder}/discoveries.ndjson
215
+ 7. Call report_agent_job_result EXACTLY ONCE
216
+
217
+ TERMINATION CONTRACT (mandatory — NO worker may end without calling report_agent_job_result):
218
+ - Success path → tests pass AND convergence passes → result_status=completed, tests_passed="true"
219
+ - Failed path → tests fail OR convergence fails → REVERT, result_status=failed, tests_passed="false"
220
+ - Blocked path → cannot apply (file missing, parse error, unclear scope) → result_status=blocked
221
+ - Timeout path → approaching max_runtime_seconds → REVERT partial changes, result_status=failed with error="timeout"
222
+ - NEVER continue indefinitely. NEVER exit silently. NEVER omit the call.
223
+
224
+ OUTPUT (return via report_agent_job_result; must match output_schema):
225
+ {
226
+ "id": "<your row id>",
227
+ "result_status": "completed" | "failed" | "blocked",
228
+ "findings": "<what was changed and why, max 500 chars>",
229
+ "files_modified": "<semicolon-separated paths or empty if reverted>",
230
+ "tests_passed": "true" | "false",
231
+ "error": "<message if not completed, else empty>"
232
+ }
233
+
234
+ CONSTRAINTS:
235
+ - Modify ONLY files in scope. Never drive-by edit unrelated files.
236
+ - Do NOT write to tasks.csv, wave-*.csv, results.csv, reflection-log.md (orchestrator owns those).
237
+ - Do NOT call spawn_agents_on_csv (no recursion).
238
+ ```
239
+
190
240
  **5b. Reflect per wave:**
191
241
 
192
242
  Append to `reflection-log.md`: