@bohuyeshan/openagent-labforge-core 3.11.5 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. package/README.ja.md +130 -343
  2. package/README.ko.md +128 -337
  3. package/README.md +229 -584
  4. package/README.ru.md +131 -364
  5. package/README.zh-cn.md +231 -547
  6. package/bin/openagent-labforge.js +95 -9
  7. package/bin/platform.test.ts +20 -21
  8. package/dist/agents/article-writer.d.ts +7 -0
  9. package/dist/agents/atlas/default.d.ts +1 -1
  10. package/dist/agents/atlas/gemini.d.ts +1 -1
  11. package/dist/agents/atlas/gpt.d.ts +1 -1
  12. package/dist/agents/bio-methodologist.d.ts +1 -1
  13. package/dist/agents/bio-orchestrator.d.ts +7 -0
  14. package/dist/agents/bio-skill-guidance.d.ts +1 -0
  15. package/dist/agents/builtin-agents/general-agents.d.ts +0 -1
  16. package/dist/agents/builtin-agents/sisyphus-agent.d.ts +16 -0
  17. package/dist/agents/dynamic-agent-prompt-builder.d.ts +0 -2
  18. package/dist/agents/engineering-capability.d.ts +7 -0
  19. package/dist/agents/env-context.d.ts +1 -1
  20. package/dist/agents/github-scout.d.ts +7 -0
  21. package/dist/agents/index.d.ts +0 -1
  22. package/dist/agents/metis.d.ts +1 -1
  23. package/dist/agents/momus.d.ts +1 -1
  24. package/dist/agents/prometheus/behavioral-summary.d.ts +1 -1
  25. package/dist/agents/prometheus/gemini.d.ts +1 -1
  26. package/dist/agents/prometheus/gpt.d.ts +1 -1
  27. package/dist/agents/prometheus/interview-mode.d.ts +1 -1
  28. package/dist/agents/prometheus/plan-generation.d.ts +1 -1
  29. package/dist/agents/prometheus/plan-template.d.ts +1 -1
  30. package/dist/agents/prometheus/system-prompt.d.ts +1 -1
  31. package/dist/agents/scientific-writer.d.ts +7 -0
  32. package/dist/agents/sisyphus-junior/gpt-5-3-codex.d.ts +1 -1
  33. package/dist/agents/sisyphus-junior/gpt-5-4.d.ts +1 -1
  34. package/dist/agents/sisyphus-junior/gpt.d.ts +1 -1
  35. package/dist/agents/tech-scout.d.ts +7 -0
  36. package/dist/agents/types.d.ts +1 -1
  37. package/dist/agents/wase.d.ts +8 -0
  38. package/dist/agents/wet-lab-designer.d.ts +7 -0
  39. package/dist/agents/writing-style-rules.d.ts +1 -0
  40. package/dist/cli/config-manager/bun-install.d.ts +1 -6
  41. package/dist/cli/config-manager/cleanup-managed-mcp-from-opencode-config.d.ts +2 -0
  42. package/dist/cli/config-manager/cleanup-stale-managed-agents.d.ts +2 -0
  43. package/dist/cli/config-manager/parse-opencode-config-file.d.ts +2 -0
  44. package/dist/cli/config-manager/plugin-name-with-version.d.ts +1 -1
  45. package/dist/cli/config-manager/sync-static-agent-to-opencode-config.d.ts +2 -0
  46. package/dist/cli/config-manager/sync-static-mcp-to-opencode-config.d.ts +2 -0
  47. package/dist/cli/config-manager/write-bootstrap-skill.d.ts +15 -0
  48. package/dist/cli/config-manager.d.ts +5 -0
  49. package/dist/cli/index.js +24986 -15362
  50. package/dist/cli/install-validators.d.ts +0 -1
  51. package/dist/cli/model-fallback-types.d.ts +0 -1
  52. package/dist/cli/run/index.d.ts +0 -1
  53. package/dist/cli/run/types.d.ts +0 -1
  54. package/dist/cli/types.d.ts +0 -3
  55. package/dist/config/schema/agent-names.d.ts +36 -3
  56. package/dist/config/schema/agent-overrides.d.ts +504 -0
  57. package/dist/config/schema/background-task.d.ts +0 -2
  58. package/dist/config/schema/experimental.d.ts +5 -0
  59. package/dist/config/schema/git-master.d.ts +0 -1
  60. package/dist/config/schema/hooks.d.ts +0 -2
  61. package/dist/config/schema/mcp-policy.d.ts +1 -0
  62. package/dist/config/schema/oh-my-opencode-config.d.ts +526 -54
  63. package/dist/config/schema.d.ts +0 -1
  64. package/dist/create-hooks.d.ts +0 -13
  65. package/dist/features/background-agent/compaction-aware-message-resolver.d.ts +1 -16
  66. package/dist/features/background-agent/constants.d.ts +2 -1
  67. package/dist/features/background-agent/manager.d.ts +5 -20
  68. package/dist/features/background-agent/process-cleanup.d.ts +1 -1
  69. package/dist/features/background-agent/task-history.d.ts +0 -1
  70. package/dist/features/background-agent/task-poller.d.ts +0 -1
  71. package/dist/features/background-agent/types.d.ts +0 -4
  72. package/dist/features/builtin-commands/commands.d.ts +4 -1
  73. package/dist/features/builtin-skills/skills/bio-methods.d.ts +2 -0
  74. package/dist/features/builtin-skills/skills/bio-pipeline.d.ts +2 -0
  75. package/dist/features/builtin-skills/skills/bio-tools.d.ts +2 -0
  76. package/dist/features/builtin-skills/skills/bio-visualization.d.ts +2 -0
  77. package/dist/features/builtin-skills/skills/cell-annotation.d.ts +2 -0
  78. package/dist/features/builtin-skills/skills/differential-expression.d.ts +2 -0
  79. package/dist/features/builtin-skills/skills/geo-query.d.ts +2 -0
  80. package/dist/features/builtin-skills/skills/index.d.ts +14 -0
  81. package/dist/features/builtin-skills/skills/paper-evidence.d.ts +2 -0
  82. package/dist/features/builtin-skills/skills/pubmed-search.d.ts +2 -0
  83. package/dist/features/builtin-skills/skills/scrna-preprocessing.d.ts +2 -0
  84. package/dist/features/builtin-skills/skills/sequence-analysis.d.ts +2 -0
  85. package/dist/features/builtin-skills/skills/structural-biology.d.ts +2 -0
  86. package/dist/features/builtin-skills/skills/vector-design.d.ts +2 -0
  87. package/dist/features/builtin-skills/skills/wet-lab-design.d.ts +2 -0
  88. package/dist/features/claude-code-agent-loader/loader.d.ts +3 -3
  89. package/dist/features/claude-code-agent-loader/types.d.ts +1 -8
  90. package/dist/features/claude-code-mcp-loader/configure-allowed-env-vars.d.ts +5 -0
  91. package/dist/features/claude-code-mcp-loader/index.d.ts +1 -0
  92. package/dist/features/claude-code-mcp-loader/types.d.ts +3 -0
  93. package/dist/features/claude-code-plugin-loader/agent-loader.d.ts +2 -2
  94. package/dist/features/claude-code-plugin-loader/loader.d.ts +2 -2
  95. package/dist/features/claude-code-plugin-loader/types.d.ts +1 -1
  96. package/dist/features/claude-code-session-state/state.d.ts +5 -0
  97. package/dist/features/opencode-skill-loader/git-master-template-injection.d.ts +1 -1
  98. package/dist/features/opencode-skill-loader/project-skill-directory-discovery.d.ts +1 -0
  99. package/dist/features/opencode-skill-loader/skill-metadata-validator.d.ts +8 -0
  100. package/dist/features/skill-mcp-manager/types.d.ts +0 -4
  101. package/dist/features/tmux-subagent/index.d.ts +0 -1
  102. package/dist/features/tmux-subagent/manager.d.ts +0 -5
  103. package/dist/features/tmux-subagent/types.d.ts +0 -2
  104. package/dist/hooks/atlas/tool-execute-after.d.ts +0 -2
  105. package/dist/hooks/atlas/types.d.ts +0 -2
  106. package/dist/hooks/atlas/verification-reminders.d.ts +0 -4
  107. package/dist/hooks/auto-slash-command/hook.d.ts +0 -7
  108. package/dist/hooks/auto-update-checker/checker.d.ts +1 -3
  109. package/dist/hooks/auto-update-checker/constants.d.ts +2 -2
  110. package/dist/hooks/comment-checker/downloader.d.ts +1 -1
  111. package/dist/hooks/compaction-context-injector/hook.d.ts +1 -5
  112. package/dist/hooks/context-window-monitor.d.ts +5 -2
  113. package/dist/hooks/index.d.ts +0 -2
  114. package/dist/hooks/keyword-detector/detector.d.ts +1 -1
  115. package/dist/hooks/keyword-detector/hook.d.ts +2 -2
  116. package/dist/hooks/keyword-detector/index.d.ts +1 -0
  117. package/dist/hooks/keyword-detector/semantic-hint.d.ts +4 -0
  118. package/dist/hooks/keyword-detector/ultrawork/autonomous.d.ts +8 -0
  119. package/dist/hooks/preemptive-compaction.d.ts +5 -2
  120. package/dist/hooks/runtime-fallback/hook.d.ts +3 -2
  121. package/dist/hooks/runtime-fallback/message-update-handler.d.ts +2 -1
  122. package/dist/hooks/runtime-fallback/types.d.ts +3 -56
  123. package/dist/hooks/session-notification-scheduler.d.ts +3 -5
  124. package/dist/hooks/session-notification.d.ts +0 -2
  125. package/dist/hooks/session-recovery/tool-pairing.d.ts +16 -0
  126. package/dist/hooks/start-work/index.d.ts +1 -1
  127. package/dist/hooks/start-work/worktree-detector.d.ts +0 -7
  128. package/dist/hooks/todo-continuation-enforcer/compaction-guard.d.ts +4 -0
  129. package/dist/hooks/todo-continuation-enforcer/constants.d.ts +6 -5
  130. package/dist/hooks/todo-continuation-enforcer/handler.d.ts +0 -1
  131. package/dist/hooks/todo-continuation-enforcer/idle-event.d.ts +0 -1
  132. package/dist/hooks/todo-continuation-enforcer/resolve-message-info.d.ts +3 -0
  133. package/dist/hooks/todo-continuation-enforcer/session-state.d.ts +1 -1
  134. package/dist/hooks/todo-continuation-enforcer/types.d.ts +8 -3
  135. package/dist/hooks/tool-output-truncator.d.ts +0 -1
  136. package/dist/index.js +56627 -56933
  137. package/dist/mcp/extended.d.ts +4 -2
  138. package/dist/mcp/index.d.ts +2 -1
  139. package/dist/mcp/types.d.ts +2 -3
  140. package/dist/openagent-labforge.schema.json +1362 -83
  141. package/dist/plugin/hooks/create-continuation-hooks.d.ts +1 -2
  142. package/dist/plugin/hooks/create-core-hooks.d.ts +0 -1
  143. package/dist/plugin/hooks/create-session-hooks.d.ts +1 -2
  144. package/dist/plugin/ultrawork-model-override.d.ts +11 -1
  145. package/dist/plugin-dispose.d.ts +12 -10
  146. package/dist/plugin-handlers/agent-config-handler.d.ts +0 -1
  147. package/dist/plugin-handlers/prometheus-agent-config-builder.d.ts +1 -1
  148. package/dist/plugin-state.d.ts +0 -5
  149. package/dist/shared/agent-display-names.d.ts +1 -0
  150. package/dist/shared/data-path.d.ts +1 -1
  151. package/dist/shared/dynamic-truncator.d.ts +7 -4
  152. package/dist/shared/external-plugin-detector.d.ts +7 -0
  153. package/dist/shared/index.d.ts +5 -3
  154. package/dist/shared/jsonc-parser.d.ts +4 -0
  155. package/dist/shared/mcp-local-command-normalizer.d.ts +1 -0
  156. package/dist/shared/migrate-legacy-config-file.d.ts +1 -0
  157. package/dist/shared/model-error-classifier.d.ts +1 -2
  158. package/dist/shared/opencode-command-dirs.d.ts +0 -1
  159. package/dist/shared/plugin-identity.d.ts +2 -3
  160. package/dist/shared/project-discovery-dirs.d.ts +4 -0
  161. package/dist/shared/session-model-state.d.ts +1 -2
  162. package/dist/shared/system-directive.d.ts +5 -6
  163. package/dist/tools/call-omo-agent/background-executor.d.ts +1 -2
  164. package/dist/tools/call-omo-agent/constants.d.ts +2 -2
  165. package/dist/tools/call-omo-agent/sync-executor.d.ts +3 -11
  166. package/dist/tools/call-omo-agent/tools.d.ts +2 -2
  167. package/dist/tools/call-omo-agent/types.d.ts +13 -0
  168. package/dist/tools/delegate-task/constants.d.ts +1 -1
  169. package/dist/tools/delegate-task/model-selection.d.ts +0 -1
  170. package/dist/tools/delegate-task/model-string-parser.d.ts +3 -1
  171. package/dist/tools/delegate-task/parent-context-resolver.d.ts +22 -0
  172. package/dist/tools/lsp/constants.d.ts +0 -1
  173. package/dist/tools/lsp/lsp-client-transport.d.ts +2 -4
  174. package/dist/tools/lsp/lsp-client-wrapper.d.ts +1 -2
  175. package/generated/skills-bundles/catalog.json +1 -1
  176. package/generated/skills-bundles/full/skills/data-analysis/experiment-monitoring/auto-claude__monitor-experiment/SKILL.md +56 -55
  177. package/generated/skills-bundles/full/skills/data-analysis/experiment-ops/auto-claude__run-experiment/SKILL.md +105 -104
  178. package/generated/skills-bundles/full/skills/data-analysis/optimization/auto-claude__dse-loop/SKILL.md +272 -271
  179. package/generated/skills-bundles/full/skills/data-analysis/statistics/auto-claude__analyze-results/SKILL.md +40 -39
  180. package/generated/skills-bundles/full/skills/data-analysis/visualization/auto-claude__paper-figure/SKILL.md +274 -273
  181. package/generated/skills-bundles/full/skills/productivity/visual-design/auto-claude__pixel-art/SKILL.md +131 -130
  182. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-compile/SKILL.md +245 -244
  183. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-plan/SKILL.md +247 -246
  184. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/SKILL.md +303 -302
  185. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/iclr2026.tex +84 -84
  186. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/icml2025.tex +87 -87
  187. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/math_commands.tex +48 -48
  188. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/neurips2025.tex +80 -80
  189. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-writing/SKILL.md +31 -31
  190. package/generated/skills-bundles/full/skills/research/literature-and-web-search/auto-claude__arxiv/SKILL.md +126 -125
  191. package/generated/skills-bundles/full/skills/research/literature-and-web-search/auto-claude__novelty-check/SKILL.md +80 -79
  192. package/generated/skills-bundles/full/skills/research/literature-and-web-search/auto-claude__research-lit/SKILL.md +187 -186
  193. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__idea-creator/SKILL.md +11 -11
  194. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__idea-discovery/SKILL.md +18 -18
  195. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__idea-discovery-robot/SKILL.md +20 -20
  196. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__research-pipeline/SKILL.md +15 -15
  197. package/generated/skills-bundles/full/skills/research/theory-writing/auto-claude__proof-writer/SKILL.md +217 -216
  198. package/generated/skills-bundles/paper/skills/data-analysis/experiment-monitoring/auto-claude__monitor-experiment/SKILL.md +56 -55
  199. package/generated/skills-bundles/paper/skills/data-analysis/experiment-ops/auto-claude__run-experiment/SKILL.md +105 -104
  200. package/generated/skills-bundles/paper/skills/data-analysis/optimization/auto-claude__dse-loop/SKILL.md +272 -271
  201. package/generated/skills-bundles/paper/skills/data-analysis/statistics/auto-claude__analyze-results/SKILL.md +40 -39
  202. package/generated/skills-bundles/paper/skills/data-analysis/visualization/auto-claude__paper-figure/SKILL.md +274 -273
  203. package/generated/skills-bundles/paper/skills/productivity/visual-design/auto-claude__pixel-art/SKILL.md +131 -130
  204. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-compile/SKILL.md +245 -244
  205. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-plan/SKILL.md +247 -246
  206. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/SKILL.md +303 -302
  207. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/iclr2026.tex +84 -84
  208. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/icml2025.tex +87 -87
  209. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/math_commands.tex +48 -48
  210. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/neurips2025.tex +80 -80
  211. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-writing/SKILL.md +31 -31
  212. package/generated/skills-bundles/paper/skills/research/literature-and-web-search/auto-claude__arxiv/SKILL.md +126 -125
  213. package/generated/skills-bundles/paper/skills/research/literature-and-web-search/auto-claude__novelty-check/SKILL.md +80 -79
  214. package/generated/skills-bundles/paper/skills/research/literature-and-web-search/auto-claude__research-lit/SKILL.md +187 -186
  215. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__idea-creator/SKILL.md +11 -11
  216. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__idea-discovery/SKILL.md +18 -18
  217. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__idea-discovery-robot/SKILL.md +20 -20
  218. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__research-pipeline/SKILL.md +15 -15
  219. package/generated/skills-bundles/paper/skills/research/theory-writing/auto-claude__proof-writer/SKILL.md +217 -216
  220. package/package.json +36 -32
  221. package/dist/cli/openai-only-model-catalog.d.ts +0 -3
  222. package/dist/cli/run/model-resolver.d.ts +0 -4
  223. package/dist/config/schema/git-env-prefix.d.ts +0 -5
  224. package/dist/features/background-agent/remove-task-toast-tracking.d.ts +0 -1
  225. package/dist/features/background-agent/subagent-spawn-limits.d.ts +0 -23
  226. package/dist/features/claude-code-agent-loader/claude-model-mapper.d.ts +0 -4
  227. package/dist/features/tmux-subagent/pane-state-parser.d.ts +0 -8
  228. package/dist/features/tmux-subagent/tracked-session-state.d.ts +0 -8
  229. package/dist/hooks/atlas/boulder-session-lineage.d.ts +0 -6
  230. package/dist/hooks/atlas/final-wave-approval-gate.d.ts +0 -4
  231. package/dist/hooks/atlas/idle-event.d.ts +0 -8
  232. package/dist/hooks/atlas/resolve-active-boulder-session.d.ts +0 -11
  233. package/dist/hooks/auto-slash-command/processed-command-store.d.ts +0 -7
  234. package/dist/hooks/auto-update-checker/checker/sync-package-json.d.ts +0 -7
  235. package/dist/hooks/compaction-context-injector/compaction-context-prompt.d.ts +0 -1
  236. package/dist/hooks/compaction-context-injector/constants.d.ts +0 -5
  237. package/dist/hooks/compaction-context-injector/recovery-prompt-config.d.ts +0 -6
  238. package/dist/hooks/compaction-context-injector/recovery.d.ts +0 -6
  239. package/dist/hooks/compaction-context-injector/session-id.d.ts +0 -2
  240. package/dist/hooks/compaction-context-injector/session-prompt-config-resolver.d.ts +0 -16
  241. package/dist/hooks/compaction-context-injector/tail-monitor.d.ts +0 -13
  242. package/dist/hooks/compaction-context-injector/types.d.ts +0 -43
  243. package/dist/hooks/compaction-context-injector/validated-model.d.ts +0 -13
  244. package/dist/hooks/delegate-task-english-directive/hook.d.ts +0 -14
  245. package/dist/hooks/delegate-task-english-directive/index.d.ts +0 -1
  246. package/dist/hooks/gpt-permission-continuation/assistant-message.d.ts +0 -23
  247. package/dist/hooks/gpt-permission-continuation/constants.d.ts +0 -4
  248. package/dist/hooks/gpt-permission-continuation/detector.d.ts +0 -1
  249. package/dist/hooks/gpt-permission-continuation/handler.d.ts +0 -12
  250. package/dist/hooks/gpt-permission-continuation/index.d.ts +0 -13
  251. package/dist/hooks/gpt-permission-continuation/session-state.d.ts +0 -15
  252. package/dist/hooks/ralph-loop/pending-verification-handler.d.ts +0 -16
  253. package/dist/hooks/runtime-fallback/fallback-bootstrap-model.d.ts +0 -10
  254. package/dist/hooks/runtime-fallback/fallback-retry-dispatcher.d.ts +0 -11
  255. package/dist/hooks/runtime-fallback/last-user-retry-parts.d.ts +0 -4
  256. package/dist/hooks/runtime-fallback/retry-model-payload.d.ts +0 -7
  257. package/dist/hooks/runtime-fallback/session-messages.d.ts +0 -9
  258. package/dist/hooks/runtime-fallback/session-status-handler.d.ts +0 -3
  259. package/dist/hooks/runtime-fallback/visible-assistant-response.d.ts +0 -3
  260. package/dist/hooks/session-notification-content.d.ts +0 -30
  261. package/dist/plugin/normalize-tool-arg-schemas.d.ts +0 -2
  262. package/dist/plugin/ultrawork-variant-availability.d.ts +0 -6
  263. package/dist/shared/compaction-agent-config-checkpoint.d.ts +0 -11
  264. package/dist/shared/context-limit-resolver.d.ts +0 -5
  265. package/dist/shared/fallback-chain-from-models.d.ts +0 -3
  266. package/dist/shared/question-denied-session-permission.d.ts +0 -6
  267. package/dist/shared/retry-status-utils.d.ts +0 -2
  268. package/dist/shared/vision-capable-models-cache.d.ts +0 -4
  269. package/dist/tools/delegate-task/cancel-unstable-agent-task.d.ts +0 -2
  270. package/dist/tools/look-at/multimodal-fallback-chain.d.ts +0 -4
  271. package/dist/tools/lsp/directory-diagnostics.d.ts +0 -1
  272. package/dist/tools/lsp/server-path-bases.d.ts +0 -1
@@ -15,7 +15,7 @@ End-to-end autonomous research workflow for: **$ARGUMENTS**
15
15
 
16
16
  - **AUTO_PROCEED = true** — When `true`, Gate 1 auto-selects the top-ranked idea (highest pilot signal + novelty confirmed) and continues to implementation. When `false`, always waits for explicit user confirmation before proceeding.
17
17
  - **ARXIV_DOWNLOAD = false** — When `true`, `/research-lit` downloads the top relevant arXiv PDFs during literature survey. When `false` (default), only fetches metadata via arXiv API. Passed through to `/idea-discovery` → `/research-lit`.
18
- - **HUMAN_CHECKPOINT = false** — When `true`, the ULTRAWORK loop (Stage 4) pauses after each round to let you inspect progress and provide custom modification instructions before fixes are implemented. When `false` (default), the loop runs fully autonomously. Passed through to `/ulw-loop`.
18
+ - **HUMAN_CHECKPOINT = false** — When `true`, the ULTRAWORK loop (Stage 4) pauses after each round to let you inspect progress and provide custom modification instructions before fixes are implemented. When `false` (default), the loop runs fully autonomously. Passed through to `/ulw-loop`.
19
19
 
20
20
  > 💡 Override via argument, e.g., `/research-pipeline "topic" — AUTO_PROCEED: false, human checkpoint: true`.
21
21
 
@@ -40,7 +40,7 @@ Invoke the idea discovery pipeline:
40
40
  /idea-discovery "$ARGUMENTS"
41
41
  ```
42
42
 
43
- This internally runs: `/research-lit` → `/idea-creator` → `/novelty-check`
43
+ This internally runs: `/research-lit` → `/idea-creator` → `/novelty-check`
44
44
 
45
45
  **Output:** `IDEA_REPORT.md` with ranked, validated, pilot-tested ideas.
46
46
 
@@ -109,25 +109,25 @@ Deploy the full-scale experiments:
109
109
 
110
110
  Wait for experiments to complete. Collect results.
111
111
 
112
- ### Stage 4: ULTRAWORK Loop (Workflow 2 — Part 2)
112
+ ### Stage 4: ULTRAWORK Loop (Workflow 2 — Part 2)
113
113
 
114
114
  Once initial results are in, start the autonomous improvement loop:
115
115
 
116
116
  ```
117
- /ulw-loop "$ARGUMENTS — [chosen idea title]"
117
+ /ulw-loop "$ARGUMENTS — [chosen idea title]"
118
118
  ```
119
119
 
120
- **What this does (recommended max 4 rounds):**
121
- 1. Run work → check progress → record evidence
122
- 2. Oracle verifies completion; if not verified, continue iterating
123
- 3. Deploy fixes, collect new results
124
- 4. Repeat until Oracle verification or max rounds reached
125
-
126
- **Output:** Maintain a `ULTRAWORK_LOG.md` with each round's changes and evidence.
120
+ **What this does (recommended max 4 rounds):**
121
+ 1. Run work → check progress → record evidence
122
+ 2. Oracle verifies completion; if not verified, continue iterating
123
+ 3. Deploy fixes, collect new results
124
+ 4. Repeat until Oracle verification or max rounds reached
125
+
126
+ **Output:** Maintain a `ULTRAWORK_LOG.md` with each round's changes and evidence.
127
127
 
128
128
  ### Stage 5: Final Summary
129
129
 
130
- After the ULTRAWORK loop completes, write a final status report:
130
+ After the ULTRAWORK loop completes, write a final status report:
131
131
 
132
132
  ```markdown
133
133
  # Research Pipeline Report
@@ -135,7 +135,7 @@ After the ULTRAWORK loop completes, write a final status report:
135
135
  **Direction**: $ARGUMENTS
136
136
  **Chosen Idea**: [title]
137
137
  **Date**: [start] → [end]
138
- **Pipeline**: idea-discovery → implement → run-experiment → ulw-loop
138
+ **Pipeline**: idea-discovery → implement → run-experiment → ulw-loop
139
139
 
140
140
  ## Journey Summary
141
141
  - Ideas generated: X → filtered to Y → piloted Z → chose 1
@@ -157,7 +157,7 @@ After the ULTRAWORK loop completes, write a final status report:
157
157
 
158
158
  - **Human checkpoint after Stage 1 is controlled by AUTO_PROCEED.** When `false`, do not proceed without user confirmation. When `true`, auto-select the top idea after presenting results.
159
159
  - **Stages 2-4 can run autonomously** once the user confirms the idea. This is the "sleep and wake up to results" part.
160
- - **If Stage 4 hits the max rounds without Oracle verification**, stop and report remaining issues. Do not loop forever.
160
+ - **If Stage 4 hits the max rounds without Oracle verification**, stop and report remaining issues. Do not loop forever.
161
161
  - **Budget awareness**: Track total GPU-hours across the pipeline. Flag if approaching user-defined limits.
162
162
  - **Documentation**: Every stage updates its own output file. The full history should be self-contained.
163
163
  - **Fail gracefully**: If any stage fails (no good ideas, experiments crash, review loop stuck), report clearly and suggest alternatives rather than forcing forward.
@@ -169,6 +169,6 @@ After the ULTRAWORK loop completes, write a final status report:
169
169
  | 1. Idea Discovery | 30-60 min | Yes if AUTO_PROCEED=true |
170
170
  | 2. Implementation | 15-60 min | Yes (autonomous after Gate 1) |
171
171
  | 3. Deploy | 5 min + experiment time | Yes ✅ |
172
- | 4. ULTRAWORK | 1-4 hours (depends on experiments) | Yes ✅ |
172
+ | 4. ULTRAWORK | 1-4 hours (depends on experiments) | Yes ✅ |
173
173
 
174
174
  **Sweet spot**: Run Stage 1-2 in the evening, launch Stage 3-4 before bed, wake up to a reviewed paper.
@@ -6,219 +6,220 @@ allowed-tools: "Read, Write, Edit, Grep, Glob"
6
6
  metadata:
7
7
  category: "research/theory-writing"
8
8
  ---
9
- # Proof Write: Rigorous Theorem / Lemma Drafting
10
-
11
- Write a mathematically honest proof package, not a polished fake proof.
12
-
13
- ## Constants
14
-
15
- - DEFAULT_PROOF_DOC = `PROOF_PACKAGE.md` in project root
16
- - STATUS = `PROVABLE AS STATED | PROVABLE AFTER WEAKENING / EXTRA ASSUMPTION | NOT CURRENTLY JUSTIFIED`
17
-
18
- ## Context: $ARGUMENTS
19
-
20
- ## Goal
21
-
22
- Produce exactly one of:
23
- 1. a complete proof of the original claim
24
- 2. a corrected claim plus a proof of the corrected claim
25
- 3. a blockage report explaining why the claim is not currently justified
26
-
27
- ## Inputs
28
-
29
- Extract and normalize:
30
- - exact theorem / lemma / proposition / corollary statement
31
- - explicit assumptions
32
- - notation and definitions
33
- - any user-provided proof sketch, partial proof, or intended strategy
34
- - nearby lemmas or claims in local notes, appendix files, or theorem drafts if the request points to them
35
- - desired output style if specified: concise, appendix-ready, or full-detail
36
-
37
- If notation or assumptions are ambiguous, state the exact interpretation you are using before proving anything.
38
-
39
- ## Workflow
40
-
41
- ### Step 1: Gather Proof Context
42
- Determine the target proof file with this priority:
43
- 1. a file path explicitly specified by the user
44
- 2. a proof draft already referenced in local notes or theorem files
45
- 3. `PROOF_PACKAGE.md` in project root as the default target
46
-
47
- Read the relevant local context:
48
- - the chosen target proof file, if it already exists
49
- - theorem notes, appendix drafts, or files explicitly mentioned by the user
50
-
51
- Extract:
52
- - exact claim
53
- - assumptions
54
- - notation
55
- - proof sketch or partial proof
56
- - nearby lemmas that the draft may depend on
57
-
58
- ### Step 2: Normalize the Claim
59
- Restate:
60
- - the exact claim being proved
61
- - all assumptions, separately from conclusions
62
- - all symbols used in the claim
63
-
64
- Identify:
65
- - hidden assumptions
66
- - undefined notation
67
- - scope ambiguities
68
- - whether the available sketch proves the full claim or only a weaker variant
69
-
70
- Preserve the user's original theorem statement unless a change is explicitly required.
71
- If you use a stronger normalization or cleaner internal formulation only to make the proof easier, keep that as an internal proof device rather than silently replacing the original claim.
72
-
73
- ### Step 3: Feasibility Triage
74
- Before writing a proof, classify the claim into exactly one status:
75
- - `PROVABLE AS STATED`
76
- - `PROVABLE AFTER WEAKENING / EXTRA ASSUMPTION`
77
- - `NOT CURRENTLY JUSTIFIED`
78
-
79
- Check explicitly:
80
- - does the conclusion actually follow from the listed assumptions?
81
- - is any cited theorem being used outside its conditions?
82
- - is the claim stronger than what the available argument supports?
83
- - is there an obvious counterexample, boundary case, or quantifier failure?
84
-
85
- If the claim is not provable as stated, do NOT fabricate a proof.
86
- Do NOT silently strengthen assumptions or narrow the theorem's scope just to make the proof work.
87
-
88
- ### Step 4: Build a Dependency Map
89
- Choose a proof strategy, for example:
90
- - direct
91
- - contradiction
92
- - induction
93
- - construction
94
- - reduction to a known result
95
- - coupling / probabilistic argument
96
- - optimization inequality chaining
97
-
98
- Then write a dependency map:
99
- - main claim
100
- - required intermediate lemmas
101
- - named theorems or inequalities that will be cited
102
- - which assumptions each nontrivial step depends on
103
- - boundary cases that must be handled separately
104
-
105
- If one step is substantial, isolate it as a lemma instead of burying it in one sentence.
106
-
107
- ### Step 5: Write the Proof Document
108
- Write to the chosen target proof file.
109
-
110
- If the target proof file already exists:
111
- - read it first
112
- - update the relevant claim section
113
- - do not blindly duplicate prior content
114
-
115
- If the user does not specify a target, default to `PROOF_PACKAGE.md` in project root.
116
-
117
- Do NOT write directly into paper sections or appendix `.tex` files unless the user explicitly asks for that target.
118
-
119
- The proof package must include:
120
- - exact claim
121
- - explicit assumptions
122
- - proof status
123
- - announced strategy
124
- - dependency map
125
- - numbered major steps
126
- - justification for every nontrivial implication
127
-
128
- Mathematical rigor requirements:
129
- - never use "clearly", "obviously", "it can be shown", "by standard arguments", or "similarly" to hide a gap
130
- - define every constant and symbol before use
131
- - check quantifier order carefully
132
- - handle degenerate and boundary cases explicitly, or state why they are excluded
133
- - if invoking a standard fact, state its name and why its assumptions are satisfied here
134
- - use `$...$` for inline math and `$$...$$` for display equations
135
- - never write math in plain text
136
- - if the proof uses an equivalent normalization that is stronger in appearance than the user's original theorem statement, label it explicitly as a proof device and keep the original claim separate
137
-
138
- ### Step 6: Final Verification
139
- Before finishing the target proof file, verify:
140
- - the theorem statement exactly matches what was actually shown
141
- - every assumption used is stated
142
- - every nontrivial implication is justified
143
- - every inequality direction is correct
144
- - every cited result is applicable under the stated assumptions
145
- - edge cases are handled or explicitly excluded
146
- - no hidden dependence on an unproved lemma remains
147
-
148
- If a key step still cannot be justified, downgrade the status and write a blockage report instead of forcing a proof.
149
-
150
- ## Required File Structure
151
-
152
- Write the target proof file using this structure:
153
-
154
- ```md
155
- # Proof Package
156
-
157
- ## Claim
158
- [exact statement]
159
-
160
- ## Status
161
- PROVABLE AS STATED / PROVABLE AFTER WEAKENING / NOT CURRENTLY JUSTIFIED
162
-
163
- ## Assumptions
164
- - ...
165
-
166
- ## Notation
167
- - ...
168
-
169
- ## Proof Strategy
170
- [chosen approach and why]
171
-
172
- ## Dependency Map
173
- 1. Main claim depends on ...
174
- 2. Lemma A depends on ...
175
- 3. Step k uses ...
176
-
177
- ## Proof
178
- Step 1. ...
179
- Step 2. ...
180
- ...
181
- Therefore the claim follows. ∎
182
-
183
- ## Corrections or Missing Assumptions
184
- - [only if needed]
185
-
186
- ## Open Risks
187
- - [remaining fragile points, if any]
188
- ```
189
-
190
- ## Output Modes
191
-
192
- ### If the claim is provable as stated
193
- Write the full file structure above with a complete proof.
194
-
195
- ### If the original claim is too strong
196
- Write:
197
- - why the original statement is not justified
198
- - the corrected claim
199
- - the minimal extra assumption if one exists
200
- - a proof of the corrected claim
201
-
202
- ### If the proof cannot be completed honestly
203
- Write:
204
- - `Status: NOT CURRENTLY JUSTIFIED`
205
- - the exact blocker: missing lemma, invalid implication, hidden assumption, or counterexample direction
206
- - what extra assumption, lemma, or derivation would be needed to finish the proof
207
- - a corrected weaker statement if one is available
208
-
209
- ## Chat Response
210
-
211
- After writing the target proof file, respond briefly with:
212
- - status
213
- - whether the original claim survived unchanged
214
- - what file was updated
215
-
216
- ## Key Rules
217
-
218
- - Never fabricate a missing proof step.
219
- - Prefer weakening the claim over overclaiming.
220
- - Separate assumptions, derived facts, heuristics, and conjectures.
221
- - Preserve the user's original theorem statement unless you explicitly mark a corrected claim or an internal normalization.
222
- - If the statement is false as written, say so explicitly and give a counterexample or repaired statement.
223
- - If uncertainty remains, mark it explicitly in `Open Risks`; do not hide it inside polished prose.
224
- - Correctness matters more than brevity.
9
+
10
+ # Proof Write: Rigorous Theorem / Lemma Drafting
11
+
12
+ Write a mathematically honest proof package, not a polished fake proof.
13
+
14
+ ## Constants
15
+
16
+ - DEFAULT_PROOF_DOC = `PROOF_PACKAGE.md` in project root
17
+ - STATUS = `PROVABLE AS STATED | PROVABLE AFTER WEAKENING / EXTRA ASSUMPTION | NOT CURRENTLY JUSTIFIED`
18
+
19
+ ## Context: $ARGUMENTS
20
+
21
+ ## Goal
22
+
23
+ Produce exactly one of:
24
+ 1. a complete proof of the original claim
25
+ 2. a corrected claim plus a proof of the corrected claim
26
+ 3. a blockage report explaining why the claim is not currently justified
27
+
28
+ ## Inputs
29
+
30
+ Extract and normalize:
31
+ - exact theorem / lemma / proposition / corollary statement
32
+ - explicit assumptions
33
+ - notation and definitions
34
+ - any user-provided proof sketch, partial proof, or intended strategy
35
+ - nearby lemmas or claims in local notes, appendix files, or theorem drafts if the request points to them
36
+ - desired output style if specified: concise, appendix-ready, or full-detail
37
+
38
+ If notation or assumptions are ambiguous, state the exact interpretation you are using before proving anything.
39
+
40
+ ## Workflow
41
+
42
+ ### Step 1: Gather Proof Context
43
+ Determine the target proof file with this priority:
44
+ 1. a file path explicitly specified by the user
45
+ 2. a proof draft already referenced in local notes or theorem files
46
+ 3. `PROOF_PACKAGE.md` in project root as the default target
47
+
48
+ Read the relevant local context:
49
+ - the chosen target proof file, if it already exists
50
+ - theorem notes, appendix drafts, or files explicitly mentioned by the user
51
+
52
+ Extract:
53
+ - exact claim
54
+ - assumptions
55
+ - notation
56
+ - proof sketch or partial proof
57
+ - nearby lemmas that the draft may depend on
58
+
59
+ ### Step 2: Normalize the Claim
60
+ Restate:
61
+ - the exact claim being proved
62
+ - all assumptions, separately from conclusions
63
+ - all symbols used in the claim
64
+
65
+ Identify:
66
+ - hidden assumptions
67
+ - undefined notation
68
+ - scope ambiguities
69
+ - whether the available sketch proves the full claim or only a weaker variant
70
+
71
+ Preserve the user's original theorem statement unless a change is explicitly required.
72
+ If you use a stronger normalization or cleaner internal formulation only to make the proof easier, keep that as an internal proof device rather than silently replacing the original claim.
73
+
74
+ ### Step 3: Feasibility Triage
75
+ Before writing a proof, classify the claim into exactly one status:
76
+ - `PROVABLE AS STATED`
77
+ - `PROVABLE AFTER WEAKENING / EXTRA ASSUMPTION`
78
+ - `NOT CURRENTLY JUSTIFIED`
79
+
80
+ Check explicitly:
81
+ - does the conclusion actually follow from the listed assumptions?
82
+ - is any cited theorem being used outside its conditions?
83
+ - is the claim stronger than what the available argument supports?
84
+ - is there an obvious counterexample, boundary case, or quantifier failure?
85
+
86
+ If the claim is not provable as stated, do NOT fabricate a proof.
87
+ Do NOT silently strengthen assumptions or narrow the theorem's scope just to make the proof work.
88
+
89
+ ### Step 4: Build a Dependency Map
90
+ Choose a proof strategy, for example:
91
+ - direct
92
+ - contradiction
93
+ - induction
94
+ - construction
95
+ - reduction to a known result
96
+ - coupling / probabilistic argument
97
+ - optimization inequality chaining
98
+
99
+ Then write a dependency map:
100
+ - main claim
101
+ - required intermediate lemmas
102
+ - named theorems or inequalities that will be cited
103
+ - which assumptions each nontrivial step depends on
104
+ - boundary cases that must be handled separately
105
+
106
+ If one step is substantial, isolate it as a lemma instead of burying it in one sentence.
107
+
108
+ ### Step 5: Write the Proof Document
109
+ Write to the chosen target proof file.
110
+
111
+ If the target proof file already exists:
112
+ - read it first
113
+ - update the relevant claim section
114
+ - do not blindly duplicate prior content
115
+
116
+ If the user does not specify a target, default to `PROOF_PACKAGE.md` in project root.
117
+
118
+ Do NOT write directly into paper sections or appendix `.tex` files unless the user explicitly asks for that target.
119
+
120
+ The proof package must include:
121
+ - exact claim
122
+ - explicit assumptions
123
+ - proof status
124
+ - announced strategy
125
+ - dependency map
126
+ - numbered major steps
127
+ - justification for every nontrivial implication
128
+
129
+ Mathematical rigor requirements:
130
+ - never use "clearly", "obviously", "it can be shown", "by standard arguments", or "similarly" to hide a gap
131
+ - define every constant and symbol before use
132
+ - check quantifier order carefully
133
+ - handle degenerate and boundary cases explicitly, or state why they are excluded
134
+ - if invoking a standard fact, state its name and why its assumptions are satisfied here
135
+ - use `$...$` for inline math and `$$...$$` for display equations
136
+ - never write math in plain text
137
+ - if the proof uses an equivalent normalization that is stronger in appearance than the user's original theorem statement, label it explicitly as a proof device and keep the original claim separate
138
+
139
+ ### Step 6: Final Verification
140
+ Before finishing the target proof file, verify:
141
+ - the theorem statement exactly matches what was actually shown
142
+ - every assumption used is stated
143
+ - every nontrivial implication is justified
144
+ - every inequality direction is correct
145
+ - every cited result is applicable under the stated assumptions
146
+ - edge cases are handled or explicitly excluded
147
+ - no hidden dependence on an unproved lemma remains
148
+
149
+ If a key step still cannot be justified, downgrade the status and write a blockage report instead of forcing a proof.
150
+
151
+ ## Required File Structure
152
+
153
+ Write the target proof file using this structure:
154
+
155
+ ```md
156
+ # Proof Package
157
+
158
+ ## Claim
159
+ [exact statement]
160
+
161
+ ## Status
162
+ PROVABLE AS STATED / PROVABLE AFTER WEAKENING / NOT CURRENTLY JUSTIFIED
163
+
164
+ ## Assumptions
165
+ - ...
166
+
167
+ ## Notation
168
+ - ...
169
+
170
+ ## Proof Strategy
171
+ [chosen approach and why]
172
+
173
+ ## Dependency Map
174
+ 1. Main claim depends on ...
175
+ 2. Lemma A depends on ...
176
+ 3. Step k uses ...
177
+
178
+ ## Proof
179
+ Step 1. ...
180
+ Step 2. ...
181
+ ...
182
+ Therefore the claim follows. ∎
183
+
184
+ ## Corrections or Missing Assumptions
185
+ - [only if needed]
186
+
187
+ ## Open Risks
188
+ - [remaining fragile points, if any]
189
+ ```
190
+
191
+ ## Output Modes
192
+
193
+ ### If the claim is provable as stated
194
+ Write the full file structure above with a complete proof.
195
+
196
+ ### If the original claim is too strong
197
+ Write:
198
+ - why the original statement is not justified
199
+ - the corrected claim
200
+ - the minimal extra assumption if one exists
201
+ - a proof of the corrected claim
202
+
203
+ ### If the proof cannot be completed honestly
204
+ Write:
205
+ - `Status: NOT CURRENTLY JUSTIFIED`
206
+ - the exact blocker: missing lemma, invalid implication, hidden assumption, or counterexample direction
207
+ - what extra assumption, lemma, or derivation would be needed to finish the proof
208
+ - a corrected weaker statement if one is available
209
+
210
+ ## Chat Response
211
+
212
+ After writing the target proof file, respond briefly with:
213
+ - status
214
+ - whether the original claim survived unchanged
215
+ - what file was updated
216
+
217
+ ## Key Rules
218
+
219
+ - Never fabricate a missing proof step.
220
+ - Prefer weakening the claim over overclaiming.
221
+ - Separate assumptions, derived facts, heuristics, and conjectures.
222
+ - Preserve the user's original theorem statement unless you explicitly mark a corrected claim or an internal normalization.
223
+ - If the statement is false as written, say so explicitly and give a counterexample or repaired statement.
224
+ - If uncertainty remains, mark it explicitly in `Open Risks`; do not hide it inside polished prose.
225
+ - Correctness matters more than brevity.
@@ -6,58 +6,59 @@ allowed-tools: "Bash(ssh *), Bash(echo *), Read, Write, Edit"
6
6
  metadata:
7
7
  category: "data-analysis/experiment-monitoring"
8
8
  ---
9
- # Monitor Experiment Results
10
-
11
- Monitor: $ARGUMENTS
12
-
13
- ## Workflow
14
-
15
- ### Step 1: Check What's Running
16
- ```bash
17
- ssh <server> "screen -ls"
18
- ```
19
-
20
- ### Step 2: Collect Output from Each Screen
21
- For each screen session, capture the last N lines:
22
- ```bash
23
- ssh <server> "screen -S <name> -X hardcopy /tmp/screen_<name>.txt && tail -50 /tmp/screen_<name>.txt"
24
- ```
25
-
26
- If hardcopy fails, check for log files or tee output.
27
-
28
- ### Step 3: Check for JSON Result Files
29
- ```bash
30
- ssh <server> "ls -lt <results_dir>/*.json 2>/dev/null | head -20"
31
- ```
32
-
33
- If JSON results exist, fetch and parse them:
34
- ```bash
35
- ssh <server> "cat <results_dir>/<latest>.json"
36
- ```
37
-
38
- ### Step 4: Summarize Results
39
-
40
- Present results in a comparison table:
41
- ```
42
- | Experiment | Metric | Delta vs Baseline | Status |
43
- |-----------|--------|-------------------|--------|
44
- | Baseline | X.XX | — | done |
45
- | Method A | X.XX | +Y.Y | done |
46
- ```
47
-
48
- ### Step 5: Interpret
49
- - Compare against known baselines
50
- - Flag unexpected results (negative delta, NaN, divergence)
51
- - Suggest next steps based on findings
52
-
53
- ### Step 6: Feishu Notification (if configured)
54
-
55
- After results are collected, check `~/.claude/feishu.json`:
56
- - Send `experiment_done` notification: results summary table, delta vs baseline
57
- - If config absent or mode `"off"`: skip entirely (no-op)
58
-
59
- ## Key Rules
60
- - Always show raw numbers before interpretation
61
- - Compare against the correct baseline (same config)
62
- - Note if experiments are still running (check progress bars, iteration counts)
63
- - If results look wrong, check training logs for errors before concluding
9
+
10
+ # Monitor Experiment Results
11
+
12
+ Monitor: $ARGUMENTS
13
+
14
+ ## Workflow
15
+
16
+ ### Step 1: Check What's Running
17
+ ```bash
18
+ ssh <server> "screen -ls"
19
+ ```
20
+
21
+ ### Step 2: Collect Output from Each Screen
22
+ For each screen session, capture the last N lines:
23
+ ```bash
24
+ ssh <server> "screen -S <name> -X hardcopy /tmp/screen_<name>.txt && tail -50 /tmp/screen_<name>.txt"
25
+ ```
26
+
27
+ If hardcopy fails, check for log files or tee output.
28
+
29
+ ### Step 3: Check for JSON Result Files
30
+ ```bash
31
+ ssh <server> "ls -lt <results_dir>/*.json 2>/dev/null | head -20"
32
+ ```
33
+
34
+ If JSON results exist, fetch and parse them:
35
+ ```bash
36
+ ssh <server> "cat <results_dir>/<latest>.json"
37
+ ```
38
+
39
+ ### Step 4: Summarize Results
40
+
41
+ Present results in a comparison table:
42
+ ```
43
+ | Experiment | Metric | Delta vs Baseline | Status |
44
+ |-----------|--------|-------------------|--------|
45
+ | Baseline | X.XX | | done |
46
+ | Method A | X.XX | +Y.Y | done |
47
+ ```
48
+
49
+ ### Step 5: Interpret
50
+ - Compare against known baselines
51
+ - Flag unexpected results (negative delta, NaN, divergence)
52
+ - Suggest next steps based on findings
53
+
54
+ ### Step 6: Feishu Notification (if configured)
55
+
56
+ After results are collected, check `~/.claude/feishu.json`:
57
+ - Send `experiment_done` notification: results summary table, delta vs baseline
58
+ - If config absent or mode `"off"`: skip entirely (no-op)
59
+
60
+ ## Key Rules
61
+ - Always show raw numbers before interpretation
62
+ - Compare against the correct baseline (same config)
63
+ - Note if experiments are still running (check progress bars, iteration counts)
64
+ - If results look wrong, check training logs for errors before concluding