@bohuyeshan/openagent-labforge-core 3.11.2 → 3.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/README.ja.md +34 -27
  2. package/README.ko.md +34 -27
  3. package/README.md +245 -188
  4. package/README.ru.md +28 -21
  5. package/README.zh-cn.md +245 -188
  6. package/bin/platform.test.ts +21 -20
  7. package/dist/agents/atlas/default.d.ts +1 -1
  8. package/dist/agents/atlas/gemini.d.ts +1 -1
  9. package/dist/agents/atlas/gpt.d.ts +1 -1
  10. package/dist/agents/bio-methodologist.d.ts +1 -1
  11. package/dist/agents/bio-pipeline-operator.d.ts +1 -1
  12. package/dist/agents/builtin-agents/general-agents.d.ts +1 -0
  13. package/dist/agents/dynamic-agent-prompt-builder.d.ts +2 -0
  14. package/dist/agents/env-context.d.ts +1 -1
  15. package/dist/agents/index.d.ts +1 -0
  16. package/dist/agents/metis.d.ts +1 -1
  17. package/dist/agents/prometheus/gemini.d.ts +1 -1
  18. package/dist/agents/prometheus/gpt.d.ts +1 -1
  19. package/dist/agents/prometheus/interview-mode.d.ts +1 -1
  20. package/dist/agents/prometheus/plan-generation.d.ts +1 -1
  21. package/dist/agents/prometheus/plan-template.d.ts +1 -1
  22. package/dist/agents/prometheus/system-prompt.d.ts +1 -1
  23. package/dist/agents/types.d.ts +1 -1
  24. package/dist/cli/config-manager/bun-install.d.ts +6 -1
  25. package/dist/cli/config-manager/plugin-name-with-version.d.ts +1 -1
  26. package/dist/cli/doctor/constants.d.ts +1 -1
  27. package/dist/cli/index.js +763 -467
  28. package/dist/cli/install-validators.d.ts +1 -0
  29. package/dist/cli/model-fallback-types.d.ts +1 -0
  30. package/dist/cli/openai-only-model-catalog.d.ts +3 -0
  31. package/dist/cli/run/index.d.ts +1 -0
  32. package/dist/cli/run/model-resolver.d.ts +4 -0
  33. package/dist/cli/run/types.d.ts +1 -0
  34. package/dist/cli/types.d.ts +3 -0
  35. package/dist/config/schema/agent-names.d.ts +3 -1
  36. package/dist/config/schema/background-task.d.ts +2 -0
  37. package/dist/config/schema/git-env-prefix.d.ts +5 -0
  38. package/dist/config/schema/git-master.d.ts +1 -0
  39. package/dist/config/schema/hooks.d.ts +2 -0
  40. package/dist/config/schema/oh-my-opencode-config.d.ts +54 -1
  41. package/dist/config/schema.d.ts +1 -0
  42. package/dist/create-hooks.d.ts +13 -0
  43. package/dist/features/background-agent/compaction-aware-message-resolver.d.ts +16 -1
  44. package/dist/features/background-agent/constants.d.ts +1 -1
  45. package/dist/features/background-agent/manager.d.ts +20 -4
  46. package/dist/features/background-agent/process-cleanup.d.ts +1 -1
  47. package/dist/features/background-agent/remove-task-toast-tracking.d.ts +1 -0
  48. package/dist/features/background-agent/subagent-spawn-limits.d.ts +23 -0
  49. package/dist/features/background-agent/task-history.d.ts +1 -0
  50. package/dist/features/background-agent/task-poller.d.ts +1 -0
  51. package/dist/features/background-agent/types.d.ts +4 -0
  52. package/dist/features/claude-code-agent-loader/claude-model-mapper.d.ts +4 -0
  53. package/dist/features/claude-code-agent-loader/loader.d.ts +3 -3
  54. package/dist/features/claude-code-agent-loader/types.d.ts +8 -1
  55. package/dist/features/claude-code-plugin-loader/agent-loader.d.ts +2 -2
  56. package/dist/features/claude-code-plugin-loader/loader.d.ts +2 -2
  57. package/dist/features/claude-code-plugin-loader/types.d.ts +1 -1
  58. package/dist/features/opencode-skill-loader/git-master-template-injection.d.ts +1 -1
  59. package/dist/features/skill-mcp-manager/types.d.ts +4 -0
  60. package/dist/features/tmux-subagent/index.d.ts +1 -0
  61. package/dist/features/tmux-subagent/manager.d.ts +5 -0
  62. package/dist/features/tmux-subagent/pane-state-parser.d.ts +8 -0
  63. package/dist/features/tmux-subagent/tracked-session-state.d.ts +8 -0
  64. package/dist/features/tmux-subagent/types.d.ts +2 -0
  65. package/dist/hooks/atlas/boulder-session-lineage.d.ts +6 -0
  66. package/dist/hooks/atlas/final-wave-approval-gate.d.ts +4 -0
  67. package/dist/hooks/atlas/idle-event.d.ts +8 -0
  68. package/dist/hooks/atlas/resolve-active-boulder-session.d.ts +11 -0
  69. package/dist/hooks/atlas/tool-execute-after.d.ts +2 -0
  70. package/dist/hooks/atlas/types.d.ts +2 -0
  71. package/dist/hooks/atlas/verification-reminders.d.ts +4 -0
  72. package/dist/hooks/auto-slash-command/hook.d.ts +7 -0
  73. package/dist/hooks/auto-slash-command/processed-command-store.d.ts +7 -0
  74. package/dist/hooks/auto-update-checker/checker/sync-package-json.d.ts +7 -0
  75. package/dist/hooks/auto-update-checker/checker.d.ts +3 -1
  76. package/dist/hooks/auto-update-checker/constants.d.ts +2 -2
  77. package/dist/hooks/comment-checker/downloader.d.ts +1 -1
  78. package/dist/hooks/compaction-context-injector/compaction-context-prompt.d.ts +1 -0
  79. package/dist/hooks/compaction-context-injector/constants.d.ts +5 -0
  80. package/dist/hooks/compaction-context-injector/hook.d.ts +5 -1
  81. package/dist/hooks/compaction-context-injector/recovery-prompt-config.d.ts +6 -0
  82. package/dist/hooks/compaction-context-injector/recovery.d.ts +6 -0
  83. package/dist/hooks/compaction-context-injector/session-id.d.ts +2 -0
  84. package/dist/hooks/compaction-context-injector/session-prompt-config-resolver.d.ts +16 -0
  85. package/dist/hooks/compaction-context-injector/tail-monitor.d.ts +13 -0
  86. package/dist/hooks/compaction-context-injector/types.d.ts +43 -0
  87. package/dist/hooks/compaction-context-injector/validated-model.d.ts +13 -0
  88. package/dist/hooks/context-window-monitor.d.ts +2 -5
  89. package/dist/hooks/delegate-task-english-directive/hook.d.ts +14 -0
  90. package/dist/hooks/delegate-task-english-directive/index.d.ts +1 -0
  91. package/dist/hooks/gpt-permission-continuation/assistant-message.d.ts +23 -0
  92. package/dist/hooks/gpt-permission-continuation/constants.d.ts +4 -0
  93. package/dist/hooks/gpt-permission-continuation/detector.d.ts +1 -0
  94. package/dist/hooks/gpt-permission-continuation/handler.d.ts +12 -0
  95. package/dist/hooks/gpt-permission-continuation/index.d.ts +13 -0
  96. package/dist/hooks/gpt-permission-continuation/session-state.d.ts +15 -0
  97. package/dist/hooks/index.d.ts +2 -0
  98. package/dist/hooks/keyword-detector/hook.d.ts +1 -0
  99. package/dist/hooks/preemptive-compaction.d.ts +2 -5
  100. package/dist/hooks/ralph-loop/pending-verification-handler.d.ts +16 -0
  101. package/dist/hooks/runtime-fallback/fallback-bootstrap-model.d.ts +10 -0
  102. package/dist/hooks/runtime-fallback/fallback-retry-dispatcher.d.ts +11 -0
  103. package/dist/hooks/runtime-fallback/hook.d.ts +2 -3
  104. package/dist/hooks/runtime-fallback/last-user-retry-parts.d.ts +4 -0
  105. package/dist/hooks/runtime-fallback/message-update-handler.d.ts +1 -2
  106. package/dist/hooks/runtime-fallback/retry-model-payload.d.ts +7 -0
  107. package/dist/hooks/runtime-fallback/session-messages.d.ts +9 -0
  108. package/dist/hooks/runtime-fallback/session-status-handler.d.ts +3 -0
  109. package/dist/hooks/runtime-fallback/types.d.ts +57 -3
  110. package/dist/hooks/runtime-fallback/visible-assistant-response.d.ts +3 -0
  111. package/dist/hooks/session-notification-content.d.ts +30 -0
  112. package/dist/hooks/session-notification-scheduler.d.ts +5 -3
  113. package/dist/hooks/session-notification.d.ts +2 -0
  114. package/dist/hooks/start-work/index.d.ts +1 -1
  115. package/dist/hooks/start-work/worktree-detector.d.ts +7 -0
  116. package/dist/hooks/todo-continuation-enforcer/constants.d.ts +6 -2
  117. package/dist/hooks/todo-continuation-enforcer/handler.d.ts +1 -0
  118. package/dist/hooks/todo-continuation-enforcer/idle-event.d.ts +1 -0
  119. package/dist/hooks/todo-continuation-enforcer/session-state.d.ts +10 -1
  120. package/dist/hooks/todo-continuation-enforcer/stagnation-detection.d.ts +6 -0
  121. package/dist/hooks/todo-continuation-enforcer/types.d.ts +5 -0
  122. package/dist/hooks/tool-output-truncator.d.ts +1 -0
  123. package/dist/index.d.ts +2 -2
  124. package/dist/index.js +10577 -7241
  125. package/dist/openagent-labforge.schema.json +70 -4
  126. package/dist/plugin/hooks/create-continuation-hooks.d.ts +2 -1
  127. package/dist/plugin/hooks/create-core-hooks.d.ts +1 -0
  128. package/dist/plugin/hooks/create-session-hooks.d.ts +2 -1
  129. package/dist/plugin/normalize-tool-arg-schemas.d.ts +2 -0
  130. package/dist/plugin/ultrawork-model-override.d.ts +1 -15
  131. package/dist/plugin/ultrawork-variant-availability.d.ts +6 -0
  132. package/dist/plugin-dispose.d.ts +10 -0
  133. package/dist/plugin-handlers/agent-override-protection.d.ts +3 -0
  134. package/dist/plugin-state.d.ts +5 -0
  135. package/dist/shared/compaction-agent-config-checkpoint.d.ts +11 -0
  136. package/dist/shared/context-limit-resolver.d.ts +5 -0
  137. package/dist/shared/data-path.d.ts +2 -2
  138. package/dist/shared/dynamic-truncator.d.ts +4 -7
  139. package/dist/shared/external-plugin-detector.d.ts +1 -1
  140. package/dist/shared/fallback-chain-from-models.d.ts +3 -0
  141. package/dist/shared/index.d.ts +3 -0
  142. package/dist/shared/model-error-classifier.d.ts +2 -1
  143. package/dist/shared/opencode-command-dirs.d.ts +3 -0
  144. package/dist/shared/plugin-identity.d.ts +7 -0
  145. package/dist/shared/question-denied-session-permission.d.ts +6 -0
  146. package/dist/shared/retry-status-utils.d.ts +2 -0
  147. package/dist/shared/system-directive.d.ts +6 -5
  148. package/dist/shared/vision-capable-models-cache.d.ts +4 -0
  149. package/dist/tools/call-omo-agent/background-executor.d.ts +2 -1
  150. package/dist/tools/call-omo-agent/constants.d.ts +1 -1
  151. package/dist/tools/call-omo-agent/sync-executor.d.ts +11 -3
  152. package/dist/tools/call-omo-agent/tools.d.ts +2 -1
  153. package/dist/tools/delegate-task/cancel-unstable-agent-task.d.ts +2 -0
  154. package/dist/tools/delegate-task/model-selection.d.ts +1 -0
  155. package/dist/tools/delegate-task/model-string-parser.d.ts +1 -3
  156. package/dist/tools/look-at/multimodal-fallback-chain.d.ts +4 -0
  157. package/dist/tools/lsp/constants.d.ts +1 -0
  158. package/dist/tools/lsp/directory-diagnostics.d.ts +1 -0
  159. package/dist/tools/lsp/lsp-client-transport.d.ts +4 -2
  160. package/dist/tools/lsp/lsp-client-wrapper.d.ts +2 -1
  161. package/dist/tools/lsp/server-path-bases.d.ts +1 -0
  162. package/generated/skills-bundles/catalog.json +282 -39
  163. package/generated/skills-bundles/full/INDEX.md +38 -10
  164. package/generated/skills-bundles/full/skills/data-analysis/experiment-monitoring/auto-claude__monitor-experiment/SKILL.md +63 -0
  165. package/generated/skills-bundles/full/skills/data-analysis/experiment-ops/auto-claude__run-experiment/SKILL.md +112 -0
  166. package/generated/skills-bundles/full/skills/data-analysis/optimization/auto-claude__dse-loop/SKILL.md +279 -0
  167. package/generated/skills-bundles/full/skills/data-analysis/statistics/auto-claude__analyze-results/SKILL.md +47 -0
  168. package/generated/skills-bundles/full/skills/data-analysis/visualization/auto-claude__paper-figure/SKILL.md +281 -0
  169. package/generated/skills-bundles/full/skills/productivity/communication/auto-claude__feishu-notify/SKILL.md +154 -0
  170. package/generated/skills-bundles/full/skills/productivity/visual-design/auto-claude__pixel-art/SKILL.md +138 -0
  171. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-compile/SKILL.md +252 -0
  172. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-plan/SKILL.md +254 -0
  173. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/SKILL.md +310 -0
  174. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/iclr2026.tex +84 -0
  175. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/icml2025.tex +87 -0
  176. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/math_commands.tex +48 -0
  177. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/neurips2025.tex +80 -0
  178. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-writing/SKILL.md +255 -0
  179. package/generated/skills-bundles/full/skills/research/literature-and-web-search/auto-claude__arxiv/SKILL.md +133 -0
  180. package/generated/skills-bundles/full/skills/research/literature-and-web-search/auto-claude__novelty-check/SKILL.md +87 -0
  181. package/generated/skills-bundles/full/skills/research/literature-and-web-search/auto-claude__research-lit/SKILL.md +194 -0
  182. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__idea-creator/SKILL.md +228 -0
  183. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__idea-discovery/SKILL.md +186 -0
  184. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__idea-discovery-robot/SKILL.md +351 -0
  185. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__research-pipeline/SKILL.md +174 -0
  186. package/generated/skills-bundles/full/skills/research/theory-writing/auto-claude__proof-writer/SKILL.md +224 -0
  187. package/generated/skills-bundles/paper/INDEX.md +40 -0
  188. package/generated/skills-bundles/paper/skills/data-analysis/experiment-monitoring/auto-claude__monitor-experiment/SKILL.md +63 -0
  189. package/generated/skills-bundles/paper/skills/data-analysis/experiment-ops/auto-claude__run-experiment/SKILL.md +112 -0
  190. package/generated/skills-bundles/paper/skills/data-analysis/optimization/auto-claude__dse-loop/SKILL.md +279 -0
  191. package/generated/skills-bundles/paper/skills/data-analysis/statistics/auto-claude__analyze-results/SKILL.md +47 -0
  192. package/generated/skills-bundles/paper/skills/data-analysis/visualization/auto-claude__paper-figure/SKILL.md +281 -0
  193. package/generated/skills-bundles/paper/skills/productivity/communication/auto-claude__feishu-notify/SKILL.md +154 -0
  194. package/generated/skills-bundles/paper/skills/productivity/visual-design/auto-claude__pixel-art/SKILL.md +138 -0
  195. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-compile/SKILL.md +252 -0
  196. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-plan/SKILL.md +254 -0
  197. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/SKILL.md +310 -0
  198. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/iclr2026.tex +84 -0
  199. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/icml2025.tex +87 -0
  200. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/math_commands.tex +48 -0
  201. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/neurips2025.tex +80 -0
  202. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-writing/SKILL.md +255 -0
  203. package/generated/skills-bundles/paper/skills/research/literature-and-web-search/auto-claude__arxiv/SKILL.md +133 -0
  204. package/generated/skills-bundles/paper/skills/research/literature-and-web-search/auto-claude__novelty-check/SKILL.md +87 -0
  205. package/generated/skills-bundles/paper/skills/research/literature-and-web-search/auto-claude__research-lit/SKILL.md +194 -0
  206. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__idea-creator/SKILL.md +228 -0
  207. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__idea-discovery/SKILL.md +186 -0
  208. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__idea-discovery-robot/SKILL.md +351 -0
  209. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__research-pipeline/SKILL.md +174 -0
  210. package/generated/skills-bundles/paper/skills/research/theory-writing/auto-claude__proof-writer/SKILL.md +224 -0
  211. package/package.json +21 -21
  212. package/generated/skills-bundles/full/skills/engineering/virtualization/skills-main__virtualbox/LICENSE.txt +0 -21
  213. package/generated/skills-bundles/full/skills/engineering/virtualization/skills-main__virtualbox/SKILL.md +0 -615
  214. package/generated/skills-bundles/full/skills/engineering/virtualization/skills-main__virtualbox/_meta.json +0 -11
  215. package/generated/skills-bundles/full/skills/engineering/virtualization/skills-main__virtualbox/scripts/virtualbox-utils.ts +0 -586
  216. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/.clawhubsafe +0 -0
  217. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/LICENSE +0 -21
  218. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/README.md +0 -127
  219. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/SECURITY.md +0 -68
  220. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/SKILL.md +0 -141
  221. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/_meta.json +0 -11
  222. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/scripts/speak.sh +0 -52
  223. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/scripts/transcribe.sh +0 -50
  224. package/generated/skills-bundles/full/skills/research/media-search/skills-main__youtube-search/LICENSE.txt +0 -21
  225. package/generated/skills-bundles/full/skills/research/media-search/skills-main__youtube-search/SKILL.md +0 -416
  226. package/generated/skills-bundles/full/skills/research/media-search/skills-main__youtube-search/_meta.json +0 -11
@@ -0,0 +1,228 @@
1
+ ---
2
+ name: "auto-claude/idea-creator"
3
+ description: "Generate and rank research ideas given a broad direction. Use when user says \"找idea\", \"brainstorm ideas\", \"generate research ideas\", \"what can we work on\", or wants to explore a research area for publishable directions."
4
+ argument-hint: ["research-direction"]
5
+ allowed-tools: "Bash(*), Read, Write, Grep, Glob, WebSearch, WebFetch, Agent, mcp__codex__codex, mcp__codex__codex-reply"
6
+ metadata:
7
+ category: "research/research-ideation"
8
+ ---
9
+
10
+ # Research Idea Creator
11
+
12
+ Generate publishable research ideas for: $ARGUMENTS
13
+
14
+ ## Overview
15
+
16
+ Given a broad research direction from the user, systematically generate, validate, and rank concrete research ideas. This skill composes with `/research-lit` and `/novelty-check` to form a complete idea discovery pipeline.
17
+
18
+ ## Constants
19
+
20
+ - **PILOT_MAX_HOURS = 2** — Skip any pilot estimated to take > 2 hours per GPU. Flag as "needs manual pilot".
21
+ - **PILOT_TIMEOUT_HOURS = 3** — Hard timeout: kill pilots exceeding 3 hours. Collect partial results if available.
22
+ - **MAX_PILOT_IDEAS = 3** — Pilot at most 3 ideas in parallel. Additional ideas are validated on paper only.
23
+ - **MAX_TOTAL_GPU_HOURS = 8** — Total GPU budget for all pilots combined.
24
+ - **REVIEWER_MODEL = `gpt-5.4`** — Model used via Codex MCP for brainstorming and review. Must be an OpenAI model (e.g., `gpt-5.4`, `o3`, `gpt-4o`).
25
+
26
+ > 💡 Override via argument, e.g., `/idea-creator "topic" — pilot budget: 4h per idea, 20h total`.
27
+
28
+ ## Workflow
29
+
30
+ ### Phase 1: Landscape Survey (5-10 min)
31
+
32
+ Map the research area to understand what exists and where the gaps are.
33
+
34
+ 1. **Scan local paper library first**: Check `papers/` and `literature/` in the project directory for existing PDFs. Read first 3 pages of relevant papers to build a baseline understanding before searching online. This avoids re-discovering what the user already knows.
35
+
36
+ 2. **Search recent literature** using WebSearch:
37
+ - Top venues in the last 2 years (NeurIPS, ICML, ICLR, ACL, EMNLP, etc.)
38
+ - Recent arXiv preprints (last 6 months)
39
+ - Use 5+ different query formulations
40
+ - Read abstracts and introductions of the top 10-15 papers
41
+
42
+ 2. **Build a landscape map**:
43
+ - Group papers by sub-direction / approach
44
+ - Identify what has been tried and what hasn't
45
+ - Note recurring limitations mentioned in "Future Work" sections
46
+ - Flag any open problems explicitly stated by multiple papers
47
+
48
+ 3. **Identify structural gaps**:
49
+ - Methods that work in domain A but haven't been tried in domain B
50
+ - Contradictory findings between papers (opportunity for resolution)
51
+ - Assumptions that everyone makes but nobody has tested
52
+ - Scaling regimes that haven't been explored
53
+ - Diagnostic questions that nobody has asked
54
+
55
+ ### Phase 2: Idea Generation (brainstorm with external LLM)
56
+
57
+ Use the external LLM via Codex MCP for divergent thinking:
58
+
59
+ ```
60
+ mcp__codex__codex:
61
+ model: REVIEWER_MODEL
62
+ config: {"model_reasoning_effort": "xhigh"}
63
+ prompt: |
64
+ You are a senior ML researcher brainstorming research ideas.
65
+
66
+ Research direction: [user's direction]
67
+
68
+ Here is the current landscape:
69
+ [paste landscape map from Phase 1]
70
+
71
+ Key gaps identified:
72
+ [paste gaps from Phase 1]
73
+
74
+ Generate 8-12 concrete research ideas. For each idea:
75
+ 1. One-sentence summary
76
+ 2. Core hypothesis (what you expect to find and why)
77
+ 3. Minimum viable experiment (what's the cheapest way to test this?)
78
+ 4. Expected contribution type: empirical finding / new method / theoretical result / diagnostic
79
+ 5. Risk level: LOW (likely works) / MEDIUM (50-50) / HIGH (speculative)
80
+ 6. Estimated effort: days / weeks / months
81
+
82
+ Prioritize ideas that are:
83
+ - Testable with moderate compute (8x RTX 3090 or less)
84
+ - Likely to produce a clear positive OR negative result (both are publishable)
85
+ - Not "apply X to Y" unless the application reveals genuinely surprising insights
86
+ - Differentiated from the 10-15 papers above
87
+
88
+ Be creative but grounded. A great idea is one where the answer matters regardless of which way it goes.
89
+ ```
90
+
91
+ Save the threadId for follow-up.
92
+
93
+ ### Phase 3: First-Pass Filtering
94
+
95
+ For each generated idea, quickly evaluate:
96
+
97
+ 1. **Feasibility check**: Can we actually run this experiment with available resources?
98
+ - Compute requirements (estimate GPU-hours)
99
+ - Data availability
100
+ - Implementation complexity
101
+ - Skip ideas requiring > 1 week of GPU time or unavailable datasets
102
+
103
+ 2. **Novelty quick-check**: For each idea, do 2-3 targeted searches to see if it's already been done. Full `/novelty-check` comes later for survivors.
104
+
105
+ 3. **Impact estimation**: Would a reviewer care about the result?
106
+ - "So what?" test: if the experiment succeeds, does it change how people think?
107
+ - Is the finding actionable or just interesting?
108
+
109
+ Eliminate ideas that fail any of these. Typically 8-12 ideas reduce to 4-6.
110
+
111
+ ### Phase 4: Deep Validation (for top ideas)
112
+
113
+ For each surviving idea, run a deeper evaluation:
114
+
115
+ 1. **Novelty check**: Use the `/novelty-check` workflow (multi-source search + GPT-5.4 cross-verification) for each idea
116
+
117
+ 2. **Critical review**: Perform a rigorous internal devil's-advocate pass:
118
+ - What's the strongest objection a reviewer would raise?
119
+ - What's the most likely failure mode?
120
+ - How would you rank these for a top venue submission?
121
+ - Which 2-3 would you actually work on?
122
+
123
+ 3. **Combine rankings**: Merge your assessment with GPT-5.4's ranking. Select top 2-3 ideas for pilot experiments.
124
+
125
+ ### Phase 5: Parallel Pilot Experiments (for top 2-3 ideas)
126
+
127
+ Before committing to a full research effort, run cheap pilot experiments to get empirical signal. This is the key differentiator from paper-only validation.
128
+
129
+ 1. **Design pilots**: For each top idea, define the minimal experiment that would give a positive or negative signal:
130
+ - Single seed, small scale (e.g., small dataset subset, fewer epochs)
131
+ - Target: 30 min - PILOT_MAX_HOURS per pilot on 1 GPU
132
+ - **Estimate GPU-hours BEFORE launching.** If estimated time > PILOT_MAX_HOURS, reduce scale (fewer epochs, smaller subset) or flag as "needs manual pilot"
133
+ - Clear success metric defined upfront (e.g., "if metric improves by > 1%, signal is positive")
134
+
135
+ 2. **Deploy in parallel**: Use `/run-experiment` to launch pilots on different GPUs simultaneously:
136
+ ```
137
+ GPU 0: Pilot for Idea 1
138
+ GPU 1: Pilot for Idea 2
139
+ GPU 2: Pilot for Idea 3
140
+ ```
141
+ Use `run_in_background: true` to launch all at once.
142
+
143
+ 3. **Collect results**: Use `/monitor-experiment` to check progress. If any pilot exceeds PILOT_TIMEOUT_HOURS, kill it and collect partial results. Once all pilots complete (or timeout), compare:
144
+ - Which ideas showed positive signal?
145
+ - Which showed null/negative results? (eliminate or deprioritize)
146
+ - Any surprising findings that suggest a pivot?
147
+ - Total GPU-hours consumed (track against MAX_TOTAL_GPU_HOURS budget)
148
+
149
+ 4. **Re-rank based on empirical evidence**: Update the idea ranking using pilot results. An idea with strong pilot signal jumps ahead of a theoretically appealing but untested idea.
150
+
151
+ Note: Skip this phase if the ideas are purely theoretical or if no GPU is available. Flag skipped ideas as "needs pilot validation" in the report.
152
+
153
+ ### Phase 6: Output — Ranked Idea Report
154
+
155
+ Write a structured report to `IDEA_REPORT.md` in the project root:
156
+
157
+ ```markdown
158
+ # Research Idea Report
159
+
160
+ **Direction**: [user's research direction]
161
+ **Generated**: [date]
162
+ **Ideas evaluated**: X generated → Y survived filtering → Z piloted → W recommended
163
+
164
+ ## Landscape Summary
165
+ [3-5 paragraphs on the current state of the field]
166
+
167
+ ## Recommended Ideas (ranked)
168
+
169
+ ### Idea 1: [title]
170
+ - **Hypothesis**: [one sentence]
171
+ - **Minimum experiment**: [concrete description]
172
+ - **Expected outcome**: [what success/failure looks like]
173
+ - **Novelty**: X/10 — closest work: [paper]
174
+ - **Feasibility**: [compute, data, implementation estimates]
175
+ - **Risk**: LOW/MEDIUM/HIGH
176
+ - **Contribution type**: empirical / method / theory / diagnostic
177
+ - **Pilot result**: [POSITIVE: metric +X% / NEGATIVE: no signal / SKIPPED: needs GPU]
178
+ - **Reviewer's likely objection**: [strongest counterargument]
179
+ - **Why we should do this**: [1-2 sentences]
180
+
181
+ ### Idea 2: [title]
182
+ ...
183
+
184
+ ## Eliminated Ideas (for reference)
185
+ | Idea | Reason eliminated |
186
+ |------|-------------------|
187
+ | ... | Already done by [paper] |
188
+ | ... | Requires > 1 week GPU time |
189
+ | ... | Result wouldn't be interesting either way |
190
+
191
+ ## Pilot Experiment Results
192
+ | Idea | GPU | Time | Key Metric | Signal |
193
+ |------|-----|------|------------|--------|
194
+ | Idea 1 | GPU 0 | 45 min | +2.3% CE | POSITIVE |
195
+ | Idea 2 | GPU 1 | 30 min | -0.1% CE | NEGATIVE |
196
+ | Idea 3 | GPU 2 | 1.5 hr | +0.8% CE | WEAK POSITIVE |
197
+
198
+ ## Suggested Execution Order
199
+ 1. Start with Idea 1 (positive pilot signal, lowest risk)
200
+ 2. Idea 3 as backup (weak signal, may need larger scale to confirm)
201
+ 3. Idea 2 eliminated by pilot — negative result documented
202
+
203
+ ## Next Steps
204
+ - [ ] Scale up Idea 1 to full experiment (multi-seed, full dataset)
205
+ - [ ] If confirmed, invoke /ulw-loop for full iteration
206
+ ```
207
+
208
+ ## Key Rules
209
+
210
+ - The user provides a DIRECTION, not an idea. Your job is to generate the ideas.
211
+ - Quantity first, quality second: brainstorm broadly, then filter ruthlessly.
212
+ - A good negative result is just as publishable as a positive one. Prioritize ideas where the answer matters regardless of direction.
213
+ - Don't fall in love with any idea before validating it. Be willing to kill ideas.
214
+ - Always estimate compute cost. An idea that needs 1000 GPU-hours is not actionable for most researchers.
215
+ - "Apply X to Y" is the lowest form of research idea. Push for deeper questions.
216
+ - Include eliminated ideas in the report — they save future time by documenting dead ends.
217
+ - **If the user's direction is too broad (e.g., "NLP", "computer vision", "reinforcement learning"), STOP and ask them to narrow it.** A good direction is 1-2 sentences specifying the problem, domain, and constraint — e.g., "factorized gap in discrete diffusion LMs" or "sample efficiency of offline RL with image observations". Without sufficient specificity, generated ideas will be too vague to run experiments on.
218
+
219
+ ## Composing with Other Skills
220
+
221
+ After this skill produces the ranked report:
222
+ ```
223
+ /idea-creator "direction" → ranked ideas
224
+ /novelty-check "top idea" → deep novelty verification (already done in Phase 4, but user can re-run)
225
+ implement → write code
226
+ /run-experiment → deploy to GPU
227
+ /ulw-loop → iterate until submission-ready
228
+ ```
@@ -0,0 +1,186 @@
1
+ ---
2
+ name: "auto-claude/idea-discovery"
3
+ description: "Workflow 1: Full idea discovery pipeline. Orchestrates research-lit → idea-creator → novelty-check to go from a broad research direction to validated, pilot-tested ideas. Use when user says \"找idea全流程\", \"idea discovery pipeline\", \"从零开始找方向\", or wants the complete idea exploration workflow."
4
+ argument-hint: ["research-direction"]
5
+ allowed-tools: "Bash(*), Read, Write, Edit, Grep, Glob, WebSearch, WebFetch, Agent, Skill, mcp__codex__codex, mcp__codex__codex-reply"
6
+ metadata:
7
+ category: "research/research-ideation"
8
+ ---
9
+
10
+ # Workflow 1: Idea Discovery Pipeline
11
+
12
+ Orchestrate a complete idea discovery workflow for: **$ARGUMENTS**
13
+
14
+ ## Overview
15
+
16
+ This skill chains three sub-skills into a single automated pipeline:
17
+
18
+ ```
19
+ /research-lit → /idea-creator → /novelty-check
20
+ (survey) (brainstorm) (verify novel)
21
+ ```
22
+
23
+ Each phase builds on the previous one's output. The final deliverable is a validated `IDEA_REPORT.md` with ranked ideas, pilot results, and a suggested execution plan.
24
+
25
+ ## Constants
26
+
27
+ - **PILOT_MAX_HOURS = 2** — Skip any pilot experiment estimated to take > 2 hours per GPU. Flag as "needs manual pilot" in the report.
28
+ - **PILOT_TIMEOUT_HOURS = 3** — Hard timeout: kill any running pilot that exceeds 3 hours. Collect partial results if available.
29
+ - **MAX_PILOT_IDEAS = 3** — Run pilots for at most 3 top ideas in parallel. Additional ideas are validated on paper only.
30
+ - **MAX_TOTAL_GPU_HOURS = 8** — Total GPU budget across all pilots. If exceeded, skip remaining pilots and note in report.
31
+ - **AUTO_PROCEED = true** — If user doesn't respond at a checkpoint, automatically proceed with the best option after presenting results. Set to `false` to always wait for explicit user confirmation.
32
+ - **REVIEWER_MODEL = `gpt-5.4`** — Model used via Codex MCP. Must be an OpenAI model (e.g., `gpt-5.4`, `o3`, `gpt-4o`). Passed to sub-skills.
33
+ - **ARXIV_DOWNLOAD = false** — When `true`, `/research-lit` downloads the top relevant arXiv PDFs during Phase 1. When `false` (default), only fetches metadata. Passed through to `/research-lit`.
34
+
35
+ > 💡 These are defaults. Override by telling the skill, e.g., `/idea-discovery "topic" — pilot budget: 4h per idea, 20h total` or `/idea-discovery "topic" — arxiv download: true`.
36
+
37
+ ## Pipeline
38
+
39
+ ### Phase 1: Literature Survey
40
+
41
+ Invoke `/research-lit` to map the research landscape:
42
+
43
+ ```
44
+ /research-lit "$ARGUMENTS"
45
+ ```
46
+
47
+ **What this does:**
48
+ - Search arXiv, Google Scholar, Semantic Scholar for recent papers
49
+ - Build a landscape map: sub-directions, approaches, open problems
50
+ - Identify structural gaps and recurring limitations
51
+ - Output a literature summary (saved to working notes)
52
+
53
+ **🚦 Checkpoint:** Present the landscape summary to the user. Ask:
54
+
55
+ ```
56
+ 📚 Literature survey complete. Here's what I found:
57
+ - [key findings, gaps, open problems]
58
+
59
+ Does this match your understanding? Should I adjust the scope before generating ideas?
60
+ (If no response, I'll proceed with the top-ranked direction.)
61
+ ```
62
+
63
+ - **User approves** (or no response + AUTO_PROCEED=true) → proceed to Phase 2 with best direction.
64
+ - **User requests changes** (e.g., "focus more on X", "ignore Y", "too broad") → refine the search with updated queries, re-run `/research-lit` with adjusted scope, and present again. Repeat until the user is satisfied.
65
+
66
+ ### Phase 2: Idea Generation + Filtering + Pilots
67
+
68
+ Invoke `/idea-creator` with the landscape context:
69
+
70
+ ```
71
+ /idea-creator "$ARGUMENTS"
72
+ ```
73
+
74
+ **What this does:**
75
+ - Brainstorm 8-12 concrete ideas via GPT-5.4 xhigh
76
+ - Filter by feasibility, compute cost, quick novelty search
77
+ - Deep validate top ideas (full novelty check + devil's advocate)
78
+ - Run parallel pilot experiments on available GPUs (top 2-3 ideas)
79
+ - Rank by empirical signal
80
+ - Output `IDEA_REPORT.md`
81
+
82
+ **🚦 Checkpoint:** Present `IDEA_REPORT.md` ranked ideas to the user. Ask:
83
+
84
+ ```
85
+ 💡 Generated X ideas, filtered to Y, piloted Z. Top results:
86
+
87
+ 1. [Idea 1] — Pilot: POSITIVE (+X%)
88
+ 2. [Idea 2] — Pilot: WEAK POSITIVE (+Y%)
89
+ 3. [Idea 3] — Pilot: NEGATIVE, eliminated
90
+
91
+ Which ideas should I validate further? Or should I regenerate with different constraints?
92
+ (If no response, I'll proceed with the top-ranked ideas.)
93
+ ```
94
+
95
+ - **User picks ideas** (or no response + AUTO_PROCEED=true) → proceed to Phase 3 with top-ranked ideas.
96
+ - **User unhappy with all ideas** → collect feedback ("what's missing?", "what direction do you prefer?"), update the prompt with user's constraints, and re-run Phase 2 (idea generation). Repeat until the user selects at least 1 idea.
97
+ - **User wants to adjust scope** → go back to Phase 1 with refined direction.
98
+
99
+ ### Phase 3: Deep Novelty Verification
100
+
101
+ For each top idea (positive pilot signal), run a thorough novelty check:
102
+
103
+ ```
104
+ /novelty-check "[top idea 1 description]"
105
+ /novelty-check "[top idea 2 description]"
106
+ ```
107
+
108
+ **What this does:**
109
+ - Multi-source literature search (arXiv, Scholar, Semantic Scholar)
110
+ - Cross-verify with GPT-5.4 xhigh
111
+ - Check for concurrent work (last 3-6 months)
112
+ - Identify closest existing work and differentiation points
113
+
114
+ **Update `IDEA_REPORT.md`** with deep novelty results. Eliminate any idea that turns out to be already published.
115
+
116
+ ### Phase 4: Internal Critical Review
117
+
118
+ For the surviving top idea(s), perform a rigorous internal critique:
119
+
120
+ - Identify the strongest reviewer objections
121
+ - List likely failure modes and missing controls
122
+ - Propose minimum viable improvements to de-risk the idea
123
+
124
+ **Update `IDEA_REPORT.md`** with internal review notes and a revised plan.
125
+
126
+ ### Phase 5: Final Report
127
+
128
+ Finalize `IDEA_REPORT.md` with all accumulated information:
129
+
130
+ ```markdown
131
+ # Idea Discovery Report
132
+
133
+ **Direction**: $ARGUMENTS
134
+ **Date**: [today]
135
+ **Pipeline**: research-lit → idea-creator → novelty-check → internal review
136
+
137
+ ## Executive Summary
138
+ [2-3 sentences: best idea, key evidence, recommended next step]
139
+
140
+ ## Literature Landscape
141
+ [from Phase 1]
142
+
143
+ ## Ranked Ideas
144
+ [from Phase 2, updated with Phase 3-4 results]
145
+
146
+ ### 🏆 Idea 1: [title] — RECOMMENDED
147
+ - Pilot: POSITIVE (+X%)
148
+ - Novelty: CONFIRMED (closest: [paper], differentiation: [what's different])
149
+ - Internal review notes: [top 2-3 risks + fixes]
150
+ - Next step: implement full experiment → /ulw-loop
151
+
152
+ ### Idea 2: [title] — BACKUP
153
+ ...
154
+
155
+ ## Eliminated Ideas
156
+ [ideas killed at each phase, with reasons]
157
+
158
+ ## Next Steps
159
+ - [ ] Implement Idea 1
160
+ - [ ] /run-experiment to deploy full-scale experiments
161
+ - [ ] /ulw-loop to iterate until submission-ready
162
+ - [ ] Or invoke /research-pipeline for the complete end-to-end flow
163
+ ```
164
+
165
+ ## Key Rules
166
+
167
+ - **Don't skip phases.** Each phase filters and validates — skipping leads to wasted effort later.
168
+ - **Checkpoint between phases.** Briefly summarize what was found before moving on.
169
+ - **Kill ideas early.** It's better to kill 10 bad ideas in Phase 3 than to implement one and fail.
170
+ - **Empirical signal > theoretical appeal.** An idea with a positive pilot outranks a "sounds great" idea without evidence.
171
+ - **Document everything.** Dead ends are just as valuable as successes for future reference.
172
+ - **Be honest in the critique.** Include negative results and failed pilots in the review notes.
173
+ - **Feishu notifications are optional.** If `~/.claude/feishu.json` exists, send `checkpoint` at each phase transition and `pipeline_done` at final report. If absent/off, skip silently.
174
+
175
+ ## Composing with Workflow 2
176
+
177
+ After this pipeline produces a validated top idea:
178
+
179
+ ```
180
+ /idea-discovery "direction" ← you are here (Workflow 1)
181
+ implement ← write code for the top idea
182
+ /run-experiment ← deploy full-scale experiments
183
+ /ulw-loop "top idea" ← Workflow 2: iterate until submission-ready
184
+
185
+ Or use /research-pipeline for the full end-to-end flow.
186
+ ```