get-research-done 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +560 -0
  3. package/agents/grd-architect.md +789 -0
  4. package/agents/grd-codebase-mapper.md +738 -0
  5. package/agents/grd-critic.md +1065 -0
  6. package/agents/grd-debugger.md +1203 -0
  7. package/agents/grd-evaluator.md +948 -0
  8. package/agents/grd-executor.md +784 -0
  9. package/agents/grd-explorer.md +2063 -0
  10. package/agents/grd-graduator.md +484 -0
  11. package/agents/grd-integration-checker.md +423 -0
  12. package/agents/grd-phase-researcher.md +641 -0
  13. package/agents/grd-plan-checker.md +745 -0
  14. package/agents/grd-planner.md +1386 -0
  15. package/agents/grd-project-researcher.md +865 -0
  16. package/agents/grd-research-synthesizer.md +256 -0
  17. package/agents/grd-researcher.md +2361 -0
  18. package/agents/grd-roadmapper.md +605 -0
  19. package/agents/grd-verifier.md +778 -0
  20. package/bin/install.js +1294 -0
  21. package/commands/grd/add-phase.md +207 -0
  22. package/commands/grd/add-todo.md +193 -0
  23. package/commands/grd/architect.md +283 -0
  24. package/commands/grd/audit-milestone.md +277 -0
  25. package/commands/grd/check-todos.md +228 -0
  26. package/commands/grd/complete-milestone.md +136 -0
  27. package/commands/grd/debug.md +169 -0
  28. package/commands/grd/discuss-phase.md +86 -0
  29. package/commands/grd/evaluate.md +1095 -0
  30. package/commands/grd/execute-phase.md +339 -0
  31. package/commands/grd/explore.md +258 -0
  32. package/commands/grd/graduate.md +323 -0
  33. package/commands/grd/help.md +482 -0
  34. package/commands/grd/insert-phase.md +227 -0
  35. package/commands/grd/insights.md +231 -0
  36. package/commands/grd/join-discord.md +18 -0
  37. package/commands/grd/list-phase-assumptions.md +50 -0
  38. package/commands/grd/map-codebase.md +71 -0
  39. package/commands/grd/new-milestone.md +721 -0
  40. package/commands/grd/new-project.md +1008 -0
  41. package/commands/grd/pause-work.md +134 -0
  42. package/commands/grd/plan-milestone-gaps.md +295 -0
  43. package/commands/grd/plan-phase.md +525 -0
  44. package/commands/grd/progress.md +364 -0
  45. package/commands/grd/quick-explore.md +236 -0
  46. package/commands/grd/quick.md +309 -0
  47. package/commands/grd/remove-phase.md +349 -0
  48. package/commands/grd/research-phase.md +200 -0
  49. package/commands/grd/research.md +681 -0
  50. package/commands/grd/resume-work.md +40 -0
  51. package/commands/grd/set-profile.md +106 -0
  52. package/commands/grd/settings.md +136 -0
  53. package/commands/grd/update.md +172 -0
  54. package/commands/grd/verify-work.md +219 -0
  55. package/get-research-done/config/default.json +15 -0
  56. package/get-research-done/references/checkpoints.md +1078 -0
  57. package/get-research-done/references/continuation-format.md +249 -0
  58. package/get-research-done/references/git-integration.md +254 -0
  59. package/get-research-done/references/model-profiles.md +73 -0
  60. package/get-research-done/references/planning-config.md +94 -0
  61. package/get-research-done/references/questioning.md +141 -0
  62. package/get-research-done/references/tdd.md +263 -0
  63. package/get-research-done/references/ui-brand.md +160 -0
  64. package/get-research-done/references/verification-patterns.md +612 -0
  65. package/get-research-done/templates/DEBUG.md +159 -0
  66. package/get-research-done/templates/UAT.md +247 -0
  67. package/get-research-done/templates/archive-reason.md +195 -0
  68. package/get-research-done/templates/codebase/architecture.md +255 -0
  69. package/get-research-done/templates/codebase/concerns.md +310 -0
  70. package/get-research-done/templates/codebase/conventions.md +307 -0
  71. package/get-research-done/templates/codebase/integrations.md +280 -0
  72. package/get-research-done/templates/codebase/stack.md +186 -0
  73. package/get-research-done/templates/codebase/structure.md +285 -0
  74. package/get-research-done/templates/codebase/testing.md +480 -0
  75. package/get-research-done/templates/config.json +35 -0
  76. package/get-research-done/templates/context.md +283 -0
  77. package/get-research-done/templates/continue-here.md +78 -0
  78. package/get-research-done/templates/critic-log.md +288 -0
  79. package/get-research-done/templates/data-report.md +173 -0
  80. package/get-research-done/templates/debug-subagent-prompt.md +91 -0
  81. package/get-research-done/templates/decision-log.md +58 -0
  82. package/get-research-done/templates/decision.md +138 -0
  83. package/get-research-done/templates/discovery.md +146 -0
  84. package/get-research-done/templates/experiment-readme.md +104 -0
  85. package/get-research-done/templates/graduated-script.md +180 -0
  86. package/get-research-done/templates/iteration-summary.md +234 -0
  87. package/get-research-done/templates/milestone-archive.md +123 -0
  88. package/get-research-done/templates/milestone.md +115 -0
  89. package/get-research-done/templates/objective.md +271 -0
  90. package/get-research-done/templates/phase-prompt.md +567 -0
  91. package/get-research-done/templates/planner-subagent-prompt.md +117 -0
  92. package/get-research-done/templates/project.md +184 -0
  93. package/get-research-done/templates/requirements.md +231 -0
  94. package/get-research-done/templates/research-project/ARCHITECTURE.md +204 -0
  95. package/get-research-done/templates/research-project/FEATURES.md +147 -0
  96. package/get-research-done/templates/research-project/PITFALLS.md +200 -0
  97. package/get-research-done/templates/research-project/STACK.md +120 -0
  98. package/get-research-done/templates/research-project/SUMMARY.md +170 -0
  99. package/get-research-done/templates/research.md +529 -0
  100. package/get-research-done/templates/roadmap.md +202 -0
  101. package/get-research-done/templates/scorecard.json +113 -0
  102. package/get-research-done/templates/state.md +287 -0
  103. package/get-research-done/templates/summary.md +246 -0
  104. package/get-research-done/templates/user-setup.md +311 -0
  105. package/get-research-done/templates/verification-report.md +322 -0
  106. package/get-research-done/workflows/complete-milestone.md +756 -0
  107. package/get-research-done/workflows/diagnose-issues.md +231 -0
  108. package/get-research-done/workflows/discovery-phase.md +289 -0
  109. package/get-research-done/workflows/discuss-phase.md +433 -0
  110. package/get-research-done/workflows/execute-phase.md +657 -0
  111. package/get-research-done/workflows/execute-plan.md +1844 -0
  112. package/get-research-done/workflows/list-phase-assumptions.md +178 -0
  113. package/get-research-done/workflows/map-codebase.md +322 -0
  114. package/get-research-done/workflows/resume-project.md +307 -0
  115. package/get-research-done/workflows/transition.md +556 -0
  116. package/get-research-done/workflows/verify-phase.md +628 -0
  117. package/get-research-done/workflows/verify-work.md +596 -0
  118. package/hooks/dist/grd-check-update.js +61 -0
  119. package/hooks/dist/grd-statusline.js +84 -0
  120. package/package.json +47 -0
  121. package/scripts/audit-help-commands.sh +115 -0
  122. package/scripts/build-hooks.js +42 -0
  123. package/scripts/verify-all-commands.sh +246 -0
  124. package/scripts/verify-architect-warning.sh +35 -0
  125. package/scripts/verify-insights-mode.sh +40 -0
  126. package/scripts/verify-quick-mode.sh +20 -0
  127. package/scripts/verify-revise-data-routing.sh +139 -0
@@ -0,0 +1,681 @@
1
+ # /grd:research
2
+
3
+ **Implements experiments from OBJECTIVE.md with iterative validation (Phase 4 command)**
4
+
5
+ ---
6
+ name: grd:research
7
+ description: Implement experiments from hypothesis with iterative validation loop
8
+ allowed-tools:
9
+ - Read
10
+ - Bash
11
+ - Write
12
+ - Task
13
+ - AskUserQuestion
14
+ agent: grd-researcher
15
+ phase: 4
16
+ requires: [OBJECTIVE.md]
17
+ produces: [experiments/run_NNN/]
18
+ ---
19
+
20
+ <objective>
21
+
22
+ Execute hypothesis-driven experiment implementation with recursive validation through Critic agent.
23
+
24
+ This command launches Phase 4 of the recursive validation loop—the Researcher agent reads OBJECTIVE.md to understand what hypothesis to test, implements experiments in isolated run directories, and spawns the Critic agent for validation. The Critic routes verdicts (PROCEED/REVISE_METHOD/REVISE_DATA) that determine next steps.
25
+
26
+ **Creates:**
27
+ - `experiments/run_NNN_description/` — isolated run directory with complete snapshot
28
+ - `code/` — experiment scripts (train.py or experiment.ipynb)
29
+ - `config.yaml` — hyperparameters and settings
30
+ - `data/` — symlinks/references to data with hashes
31
+ - `logs/` — training output (stdout/stderr)
32
+ - `outputs/` — model artifacts, predictions
33
+ - `metrics/` — SCORECARD.json from Evaluator
34
+ - `README.md` — brief experiment summary
35
+ - `CRITIC_LOG.md` — Critic's evaluation and verdict
36
+
37
+ **Use cases:**
38
+ - After hypothesis formation: Implement experiments from OBJECTIVE.md
39
+ - Iterative refinement: Continue from REVISE_METHOD with Critic feedback
40
+ - Data validation: Trigger REVISE_DATA to return to /grd:explore
41
+ - Experiment versioning: Each iteration creates isolated, reproducible run
42
+
43
+ **After this command:** Review Critic verdict, proceed to Evaluator if PROCEED, or iterate based on routing.
44
+
45
+ </objective>
46
+
47
+ <execution_context>
48
+
49
+ @~/.claude/get-research-done/templates/experiment-readme.md
50
+
51
+ </execution_context>
52
+
53
+ <process>
54
+
55
+ ## Phase 1: Setup and State Loading
56
+
57
+ **Check if project initialized:**
58
+
59
+ ```bash
60
+ [ ! -f .planning/PROJECT.md ] && echo "ERROR: Project not initialized. Run /grd:new-project first." && exit 1
61
+ ```
62
+
63
+ **Check OBJECTIVE.md exists (hard gate):**
64
+
65
+ ```bash
66
+ [ ! -f .planning/OBJECTIVE.md ] && echo "ERROR: No OBJECTIVE.md found. Run /grd:architect first." && exit 1
67
+ ```
68
+
69
+ **If OBJECTIVE.md exists:**
70
+ - Read and extract:
71
+ - Hypothesis statement (what's being tested)
72
+ - Success metrics (what defines success)
73
+ - Evaluation methodology (how to evaluate)
74
+ - Baselines (comparison points)
75
+ - Falsification criteria (what would disprove hypothesis)
76
+ - Display brief summary:
77
+ ```
78
+ Hypothesis: [brief what statement]
79
+ Metrics: [list with weights]
80
+ Evaluation: [strategy]
81
+ ```
82
+
83
+ **Determine iteration state:**
84
+
85
+ If `--continue` flag:
86
+ - Find latest run directory in experiments/
87
+ - Read CRITIC_LOG.md for verdict and recommendations
88
+ - If verdict != REVISE_METHOD and verdict != REVISE_DATA: warn "No revision verdict to continue from"
89
+ - Load verdict history from previous runs
90
+ - Set iteration_count from previous run + 1
91
+
92
+ If `--iteration N`:
93
+ - Use provided N as iteration_count
94
+ - Warn if N conflicts with existing runs
95
+ - Load verdict history from runs 1 through N-1
96
+
97
+ If `--from-archive RUN_NAME`:
98
+ - Restore archived run from experiments/archive/
99
+ - Move back to experiments/
100
+ - Extract iteration count from run
101
+ - Load critique history
102
+
103
+ Otherwise (fresh start):
104
+ - Set iteration_count = 1
105
+ - Initialize empty verdict_history
106
+ - Scan experiments/ for existing runs to determine next run number
107
+
108
+ **Load iteration limit:**
109
+ - Default: 5
110
+ - Override with `--limit N` if provided
111
+ - Log: "Iteration limit: {N}"
112
+ - Store for Researcher agent
113
+
114
+ **Load verdict history:**
115
+ ```bash
116
+ # Load all CRITIC_LOG.md files to build verdict history
117
+ for run_dir in experiments/run_*; do
118
+ if [ -f "$run_dir/CRITIC_LOG.md" ]; then
119
+ # Extract verdict, confidence, iteration from CRITIC_LOG
120
+ VERDICT=$(grep "^\*\*Verdict:\*\*" "$run_dir/CRITIC_LOG.md" | head -1)
121
+ CONFIDENCE=$(grep "^\*\*Confidence:\*\*" "$run_dir/CRITIC_LOG.md" | head -1)
122
+ # Add to verdict_history array
123
+ fi
124
+ done
125
+ ```
126
+
127
+ **Update STATE.md:**
128
+ - Set current_phase: "research"
129
+ - Set current_iteration: {iteration_count}
130
+ - Set iteration_limit: {limit}
131
+ - Set active_hypothesis: (from OBJECTIVE.md)
132
+ - Update loop_history table with current iteration
133
+ - Set loop_status: "researcher" (in progress)
134
+
135
+ **Determine run number:**
136
+
137
+ Parse optional [description] argument for run naming.
138
+
139
+ ```bash
140
+ # Scan experiments/ directory for existing runs
141
+ if [ -d experiments ]; then
142
+ # Get highest run number
143
+ LAST_RUN=$(ls experiments/ | grep -E '^run_[0-9]+' | sed 's/run_\([0-9]*\).*/\1/' | sort -n | tail -1)
144
+ NEXT_RUN=$((LAST_RUN + 1))
145
+ else
146
+ mkdir -p experiments
147
+ NEXT_RUN=1
148
+ fi
149
+
150
+ # Format with zero-padding
151
+ RUN_NUM=$(printf "%03d" $NEXT_RUN)
152
+ ```
153
+
154
+ **If continuing from REVISE_METHOD:**
155
+
156
+ ```bash
157
+ # Load previous CRITIC_LOG.md
158
+ LAST_RUN_DIR=$(ls -d experiments/run_* | sort | tail -1)
159
+ if [ -f "$LAST_RUN_DIR/CRITIC_LOG.md" ]; then
160
+ CRITIQUE_HISTORY=$(cat "$LAST_RUN_DIR/CRITIC_LOG.md")
161
+ echo "Continuing from previous run: $LAST_RUN_DIR"
162
+ echo "Previous verdict: $(grep 'Verdict:' $LAST_RUN_DIR/CRITIC_LOG.md)"
163
+ fi
164
+ ```
165
+
166
+ **Load DATA_REPORT.md context (soft reference):**
167
+
168
+ ```bash
169
+ cat .planning/DATA_REPORT.md 2>/dev/null
170
+ ```
171
+
172
+ If exists, extract data characteristics for experiment design:
173
+ - Sample size
174
+ - Feature types
175
+ - Class balance
176
+ - Leakage warnings
177
+ - Missing data patterns
178
+
179
+ If not exists, note: "No DATA_REPORT.md found - proceeding without data context"
180
+
181
+ ## Phase 2: Spawn Researcher Agent
182
+
183
+ Display research banner:
184
+ ```
185
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
186
+ GRD ► IMPLEMENTING EXPERIMENT
187
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
188
+
189
+ Run: {run_NNN_description}
190
+ Hypothesis: [brief statement from OBJECTIVE.md]
191
+ Mode: [new | continue from REVISE_METHOD]
192
+ ```
193
+
194
+ Spawn grd-researcher agent with context:
195
+
196
+ ```
197
+ Task(prompt="
198
+ <objective_context>
199
+ @.planning/OBJECTIVE.md
200
+
201
+ Extract and internalize:
202
+ - Hypothesis (what, why, expected)
203
+ - Success metrics (names, thresholds, weights)
204
+ - Evaluation methodology (strategy, parameters)
205
+ - Baselines (comparison points)
206
+ - Falsification criteria (routing conditions)
207
+ - Constraints (data, resources, scope)
208
+ </objective_context>
209
+
210
+ <data_context>
211
+ @.planning/DATA_REPORT.md (if exists)
212
+
213
+ Extract for experiment design:
214
+ - Data characteristics (shape, types, distributions)
215
+ - Quality issues (missing data, outliers)
216
+ - Class balance (imbalance severity)
217
+ - Leakage warnings (features to exclude)
218
+ - Sample size (affects methodology)
219
+ </data_context>
220
+
221
+ <run_context>
222
+ Run number: {run_NNN}
223
+ Description: {description_or_auto}
224
+ Iteration: {iteration_count}
225
+ Previous critiques: {critique_history_if_continuing}
226
+ </run_context>
227
+
228
+ <instructions>
229
+ Execute experiment implementation workflow:
230
+
231
+ 1. Create run directory: experiments/{run_NNN_description}/
232
+ 2. Generate experiment code based on OBJECTIVE.md hypothesis
233
+ 3. Create config.yaml with hyperparameters
234
+ 4. Reference data (symlinks + hashes for provenance)
235
+ 5. Execute experiment or prepare for user execution
236
+ 6. Collect metrics and compare to OBJECTIVE.md success criteria
237
+ 7. Spawn Critic agent for validation
238
+ 8. Handle Critic verdict (PROCEED/REVISE_METHOD/REVISE_DATA/ESCALATE)
239
+
240
+ Use template: @get-research-done/templates/experiment-readme.md
241
+ Write README.md to run directory with experiment summary.
242
+ </instructions>
243
+
244
+ <output>
245
+ Return:
246
+ - Run directory path
247
+ - Experiment status (complete/pending/failed)
248
+ - Critic verdict
249
+ - Next steps based on routing
250
+ </output>
251
+ ", subagent_type="grd-researcher", model="sonnet", description="Implement and validate experiment")
252
+ ```
253
+
254
+ ## Phase 3: Handle Loop Completion
255
+
256
+ After Researcher returns, parse verdict and update STATE.md accordingly.
257
+
258
+ **Extract verdict from Researcher response:**
259
+ ```bash
260
+ # Parse Researcher return message for verdict
261
+ VERDICT=$(echo "$RESEARCHER_OUTPUT" | grep "^\*\*Verdict:\*\*" | sed 's/\*\*Verdict:\*\* //' | cut -d' ' -f1)
262
+ CONFIDENCE=$(echo "$RESEARCHER_OUTPUT" | grep "^\*\*Confidence:\*\*" | sed 's/\*\*Confidence:\*\* //')
263
+ ITERATION=$(echo "$RESEARCHER_OUTPUT" | grep "^\*\*Iteration:\*\*" | sed 's/\*\*Iteration:\*\* //')
264
+ ```
265
+
266
+ **Route based on verdict:**
267
+
268
+ ### If PROCEED (HIGH/MEDIUM confidence)
269
+
270
+ Researcher has spawned Evaluator automatically.
271
+
272
+ ```bash
273
+ # Update STATE.md
274
+ echo "Updating STATE.md: verdict=PROCEED, status=evaluator_running"
275
+
276
+ # Add to loop history
277
+ echo "| $ITERATION | $RUN_NAME | PROCEED | $CONFIDENCE | {metrics} |" >> .planning/STATE.md
278
+
279
+ # Update loop status
280
+ sed -i 's/loop_status: .*/loop_status: evaluator/' .planning/STATE.md
281
+ ```
282
+
283
+ **Display:**
284
+ ```
285
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
286
+ GRD ► EXPERIMENT APPROVED ✓
287
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
288
+
289
+ **Verdict:** PROCEED (Confidence: {confidence})
290
+ **Run:** experiments/{run_NNN_description}/
291
+
292
+ Evaluator running quantitative benchmarks...
293
+ SCORECARD.json will be generated in metrics/ directory.
294
+
295
+ Next: Ready for Phase 5 human review after Evaluator completes.
296
+ ```
297
+
298
+ ### If PROCEED (LOW confidence) - Human gate
299
+
300
+ Researcher has paused for human confirmation.
301
+
302
+ ```bash
303
+ # Prompt human for decision
304
+ echo "Low confidence PROCEED - human confirmation required"
305
+ ```
306
+
307
+ **Display concerns and options:**
308
+ ```
309
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
310
+ GRD ► HUMAN CONFIRMATION REQUIRED
311
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
312
+
313
+ **Verdict:** PROCEED (LOW confidence)
314
+ **Run:** experiments/{run_NNN_description}/
315
+
316
+ Metrics pass but concerns exist:
317
+ {list_concerns_from_critic}
318
+
319
+ Options:
320
+ 1. Approve - Proceed to Evaluator despite concerns
321
+ 2. Revise - Treat as REVISE_METHOD, address concerns first
322
+ 3. Investigate - Manual review before deciding
323
+ ```
324
+
325
+ ### If REVISE_METHOD (under limit)
326
+
327
+ Researcher has archived run and is ready for retry.
328
+
329
+ ```bash
330
+ # Update STATE.md
331
+ echo "| $ITERATION | $RUN_NAME | REVISE_METHOD | $CONFIDENCE | {metrics} |" >> .planning/STATE.md
332
+
333
+ # Update iteration count
334
+ NEW_ITERATION=$((ITERATION + 1))
335
+ sed -i "s/current_iteration: .*/current_iteration: $NEW_ITERATION/" .planning/STATE.md
336
+
337
+ # Update loop status
338
+ sed -i 's/loop_status: .*/loop_status: researcher/' .planning/STATE.md
339
+ ```
340
+
341
+ **Display:**
342
+ ```
343
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
344
+ GRD ► REVISION NEEDED (Method)
345
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
346
+
347
+ **Iteration:** {iteration} of {limit}
348
+ **Run archived:** experiments/archive/{run_NNN_description}/
349
+
350
+ Issues identified:
351
+ {list_weaknesses}
352
+
353
+ Recommendations:
354
+ {list_recommendations}
355
+
356
+ Next: /grd:research --continue
357
+ ```
358
+
359
+ ### If REVISE_METHOD (limit reached)
360
+
361
+ Researcher has triggered human decision gate.
362
+
363
+ ```bash
364
+ # Update STATE.md
365
+ echo "| $ITERATION | $RUN_NAME | REVISE_METHOD | $CONFIDENCE | limit_reached |" >> .planning/STATE.md
366
+ sed -i 's/loop_status: .*/loop_status: human_review/' .planning/STATE.md
367
+ ```
368
+
369
+ **Display human decision prompt** (already handled by Researcher Step 8)
370
+
371
+ ### If REVISE_DATA
372
+
373
+ Researcher has identified data quality issues.
374
+
375
+ ```bash
376
+ # Update STATE.md
377
+ echo "| $ITERATION | $RUN_NAME | REVISE_DATA | $CONFIDENCE | data_concerns |" >> .planning/STATE.md
378
+ sed -i 's/loop_status: .*/loop_status: data_verification_required/' .planning/STATE.md
379
+
380
+ # Add to data_revisions table
381
+ echo "| $ITERATION | {concern_list} | pending |" >> .planning/STATE.md
382
+ ```
383
+
384
+ **Display:**
385
+ ```
386
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
387
+ GRD ► REVISION NEEDED (Data)
388
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
389
+
390
+ **Data concerns identified:**
391
+ {list_data_concerns}
392
+
393
+ **Recommended analysis:**
394
+ {specific_concerns_for_explorer}
395
+
396
+ Next steps:
397
+ 1. Run: /grd:explore [path] --concerns "{concern_list}"
398
+ 2. After Explorer updates DATA_REPORT.md:
399
+ - Run /grd:research --continue to retry with updated data context
400
+ - Or run /grd:architect to reformulate hypothesis
401
+ ```
402
+
403
+ ### If ESCALATE
404
+
405
+ Researcher has escalated to human for strategic decision.
406
+
407
+ ```bash
408
+ # Update STATE.md
409
+ echo "| $ITERATION | $RUN_NAME | ESCALATE | N/A | ambiguous_failure |" >> .planning/STATE.md
410
+ sed -i 's/loop_status: .*/loop_status: human_review/' .planning/STATE.md
411
+
412
+ # Add blocker
413
+ echo "- **Current:** Ambiguous failure - cannot determine root cause" >> .planning/STATE.md
414
+ echo "- **Requires:** Human strategic decision" >> .planning/STATE.md
415
+ ```
416
+
417
+ **Display:**
418
+ ```
419
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
420
+ GRD ► HUMAN DECISION REQUIRED
421
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
422
+
423
+ **Reason:** Ambiguous failure - Critic could not determine root cause
424
+
425
+ Evidence:
426
+ {evidence_package_from_researcher}
427
+
428
+ Options:
429
+ 1. Continue - Allow more iterations
430
+ 2. Archive - Abandon hypothesis
431
+ 3. Reset - Start fresh approach
432
+ 4. Escalate - Reformulate hypothesis via /grd:architect
433
+ ```
434
+
435
+ ### If Archived/Reset (Human decision outcome)
436
+
437
+ ```bash
438
+ # Update STATE.md based on decision
439
+ if [ "$DECISION" = "Archive" ]; then
440
+ sed -i 's/loop_status: .*/loop_status: archived/' .planning/STATE.md
441
+ echo "- **Status:** Hypothesis archived - {reason}" >> .planning/STATE.md
442
+ elif [ "$DECISION" = "Reset" ]; then
443
+ sed -i 's/loop_status: .*/loop_status: idle/' .planning/STATE.md
444
+ sed -i 's/current_iteration: .*/current_iteration: 0/' .planning/STATE.md
445
+ fi
446
+ ```
447
+
448
+ **Researcher → Critic handoff:**
449
+ - Researcher completes experiment implementation
450
+ - Passes experiment artifacts to Critic
451
+ - Critic audits and returns verdict
452
+ - Researcher handles routing (including Evaluator spawn on PROCEED)
453
+
454
+ **Command does NOT spawn Critic or Evaluator directly.** Researcher orchestrates the full loop.
455
+
456
+ ## Phase 4: Present Results
457
+
458
+ After Researcher completes (with Critic verdict), present summary:
459
+
460
+ **If verdict is PROCEED:**
461
+ ```
462
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
463
+ GRD ► EXPERIMENT APPROVED ✓
464
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
465
+
466
+ **Run:** experiments/{run_NNN_description}/
467
+ **Verdict:** PROCEED (Confidence: {HIGH|MEDIUM|LOW})
468
+
469
+ ## Experiment Summary
470
+
471
+ {one_paragraph_from_README.md}
472
+
473
+ ## Metrics
474
+
475
+ {metrics_table_from_SCORECARD.json}
476
+
477
+ ## Critic Assessment
478
+
479
+ **Strengths:**
480
+ {strengths_list}
481
+
482
+ **Concerns:**
483
+ {weaknesses_if_any}
484
+
485
+ **Recommendation:**
486
+ {recommendation_text}
487
+
488
+ ---
489
+
490
+ **Next steps:**
491
+ - Review SCORECARD.json in run directory
492
+ - Evaluator will run quantitative benchmarks
493
+ - Proceed to human evaluation gate (Phase 5)
494
+ ```
495
+
496
+ **If verdict is REVISE_METHOD:**
497
+ ```
498
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
499
+ GRD ► REVISION NEEDED (Method)
500
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
501
+
502
+ **Run:** experiments/{run_NNN_description}/
503
+ **Verdict:** REVISE_METHOD (Confidence: {HIGH|MEDIUM|LOW})
504
+
505
+ ## Issues Identified
506
+
507
+ {weaknesses_list_from_CRITIC_LOG}
508
+
509
+ ## Recommendations
510
+
511
+ {specific_actionable_suggestions}
512
+
513
+ ---
514
+
515
+ **Next steps:**
516
+ - Review CRITIC_LOG.md in run directory
517
+ - Address methodological issues
518
+ - Run: /grd:research --continue
519
+ ```
520
+
521
+ **If verdict is REVISE_DATA:**
522
+ ```
523
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
524
+ GRD ► REVISION NEEDED (Data)
525
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
526
+
527
+ **Run:** experiments/{run_NNN_description}/
528
+ **Verdict:** REVISE_DATA (Confidence: {HIGH|MEDIUM|LOW})
529
+
530
+ ## Data Concerns
531
+
532
+ {data_issues_from_CRITIC_LOG}
533
+
534
+ ## Recommendations
535
+
536
+ {specific_data_analysis_needed}
537
+
538
+ ---
539
+
540
+ **Next steps:**
541
+ - Review CRITIC_LOG.md for specific concerns
542
+ - Run: /grd:explore [path] with targeted analysis
543
+ - Critic will append findings to DATA_REPORT.md
544
+ - Return to /grd:research after data issues resolved
545
+ ```
546
+
547
+ **If verdict is ESCALATE:**
548
+ ```
549
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
550
+ GRD ► HUMAN DECISION REQUIRED
551
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
552
+
553
+ **Run:** experiments/{run_NNN_description}/
554
+ **Verdict:** ESCALATE (Confidence: LOW)
555
+
556
+ ## Ambiguous Failure
557
+
558
+ {reasoning_from_CRITIC_LOG}
559
+
560
+ Critic could not determine root cause (method vs data).
561
+
562
+ ---
563
+
564
+ **Next steps:**
565
+ - Review CRITIC_LOG.md and experiment artifacts
566
+ - Manual investigation needed
567
+ - Decide: REVISE_METHOD, REVISE_DATA, or reformulate hypothesis
568
+ ```
569
+
570
+ **Show run directory contents:**
571
+ ```bash
572
+ tree -L 2 experiments/{run_NNN_description}
573
+ ```
574
+
575
+ **Prompt for next action:**
576
+
577
+ Use AskUserQuestion if verdict requires human decision:
578
+ - header: "Experiment Status: {verdict}"
579
+ - question: "Review findings above. How would you like to proceed?"
580
+ - options:
581
+ - "Continue" — Accept verdict and proceed
582
+ - "Override" — Manual routing decision
583
+ - "Archive" — Move run to archive/ and stop
584
+
585
+ </process>
586
+
587
+ <arguments>
588
+
589
+ **[description]** (optional)
590
+ - One-line description for run naming
591
+ - Examples: "baseline", "lr_sweep", "feature_engineering"
592
+ - If omitted, uses "experiment"
593
+
594
+ **Flags:**
595
+
596
+ `--continue`
597
+ - Continue from previous run after REVISE_METHOD verdict
598
+ - Loads latest CRITIC_LOG.md recommendations
599
+ - Increments iteration count
600
+
601
+ `--iteration N`
602
+ - Manually specify iteration number
603
+ - Useful for resuming after interruption
604
+
605
+ `--limit N`
606
+ - Override default iteration limit (default: 5)
607
+ - Use with caution - higher limits increase cost
608
+
609
+ `--from-archive RUN_NAME`
610
+ - Restore archived run and continue from there
611
+ - Moves run back to experiments/
612
+
613
+ </arguments>
614
+
615
+ <examples>
616
+
617
+ **New experiment:**
618
+ ```
619
+ /grd:research baseline
620
+ # Creates: experiments/run_001_baseline/
621
+ # Implements hypothesis from OBJECTIVE.md
622
+ ```
623
+
624
+ **Continue after revision:**
625
+ ```
626
+ /grd:research --continue
627
+ # Creates: experiments/run_002_revised/
628
+ # Includes previous Critic feedback
629
+ ```
630
+
631
+ **Specific iteration:**
632
+ ```
633
+ /grd:research --iteration 3 feature_engineering
634
+ # Creates: experiments/run_003_feature_engineering/
635
+ ```
636
+
637
+ **Auto-named run:**
638
+ ```
639
+ /grd:research
640
+ # Creates: experiments/run_001_hypothesis_test/
641
+ # Description inferred from OBJECTIVE.md
642
+ ```
643
+
644
+ **After data revision:**
645
+ ```
646
+ # After REVISE_DATA, fix data issues, then:
647
+ /grd:research --continue
648
+ # New run with updated data context
649
+ ```
650
+
651
+ </examples>
652
+
653
+ <output>
654
+
655
+ - `experiments/run_NNN_description/` — isolated run directory containing:
656
+ - `README.md` — experiment summary (what, why, how to reproduce)
657
+ - `config.yaml` — hyperparameters and settings
658
+ - `code/` — experiment scripts (train.py or experiment.ipynb)
659
+ - `data/` — symlinks/references with hashes for provenance
660
+ - `logs/` — training output (stdout/stderr)
661
+ - `outputs/` — model artifacts, predictions
662
+ - `metrics/` — SCORECARD.json from Evaluator
663
+ - `CRITIC_LOG.md` — Critic's evaluation and verdict
664
+
665
+ **Run directory provides complete snapshot for reproducibility.**
666
+
667
+ </output>
668
+
669
+ <success_criteria>
670
+
671
+ - [ ] OBJECTIVE.md hard gate enforced (required, cannot proceed without it)
672
+ - [ ] Run number determined (auto-increment or specified)
673
+ - [ ] Researcher agent spawned with OBJECTIVE.md context
674
+ - [ ] DATA_REPORT.md context loaded if available
675
+ - [ ] Critique history passed if continuing
676
+ - [ ] Experiment implemented and validated
677
+ - [ ] Critic verdict obtained and routed appropriately
678
+ - [ ] Results presented with next steps
679
+ - [ ] Run directory created with complete artifacts
680
+
681
+ </success_criteria>