@bohuyeshan/openagent-labforge-core 3.11.1 → 3.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. package/README.ja.md +34 -27
  2. package/README.ko.md +34 -27
  3. package/README.md +245 -188
  4. package/README.ru.md +28 -21
  5. package/README.zh-cn.md +245 -188
  6. package/bin/platform.test.ts +21 -20
  7. package/dist/agents/atlas/default.d.ts +1 -1
  8. package/dist/agents/atlas/gemini.d.ts +1 -1
  9. package/dist/agents/atlas/gpt.d.ts +1 -1
  10. package/dist/agents/bio-methodologist.d.ts +1 -1
  11. package/dist/agents/bio-pipeline-operator.d.ts +1 -1
  12. package/dist/agents/builtin-agents/general-agents.d.ts +1 -0
  13. package/dist/agents/dynamic-agent-prompt-builder.d.ts +2 -0
  14. package/dist/agents/env-context.d.ts +1 -1
  15. package/dist/agents/index.d.ts +1 -0
  16. package/dist/agents/metis.d.ts +1 -1
  17. package/dist/agents/prometheus/gemini.d.ts +1 -1
  18. package/dist/agents/prometheus/gpt.d.ts +1 -1
  19. package/dist/agents/prometheus/interview-mode.d.ts +1 -1
  20. package/dist/agents/prometheus/plan-generation.d.ts +1 -1
  21. package/dist/agents/prometheus/plan-template.d.ts +1 -1
  22. package/dist/agents/prometheus/system-prompt.d.ts +1 -1
  23. package/dist/agents/types.d.ts +1 -1
  24. package/dist/cli/config-manager/bun-install.d.ts +6 -1
  25. package/dist/cli/config-manager/plugin-name-with-version.d.ts +1 -1
  26. package/dist/cli/doctor/constants.d.ts +1 -1
  27. package/dist/cli/index.js +763 -467
  28. package/dist/cli/install-validators.d.ts +1 -0
  29. package/dist/cli/model-fallback-types.d.ts +1 -0
  30. package/dist/cli/openai-only-model-catalog.d.ts +3 -0
  31. package/dist/cli/run/index.d.ts +1 -0
  32. package/dist/cli/run/model-resolver.d.ts +4 -0
  33. package/dist/cli/run/types.d.ts +1 -0
  34. package/dist/cli/types.d.ts +3 -0
  35. package/dist/config/schema/agent-names.d.ts +3 -1
  36. package/dist/config/schema/background-task.d.ts +2 -0
  37. package/dist/config/schema/git-env-prefix.d.ts +5 -0
  38. package/dist/config/schema/git-master.d.ts +1 -0
  39. package/dist/config/schema/hooks.d.ts +2 -0
  40. package/dist/config/schema/oh-my-opencode-config.d.ts +54 -1
  41. package/dist/config/schema.d.ts +1 -0
  42. package/dist/create-hooks.d.ts +13 -0
  43. package/dist/features/background-agent/compaction-aware-message-resolver.d.ts +16 -1
  44. package/dist/features/background-agent/constants.d.ts +1 -1
  45. package/dist/features/background-agent/manager.d.ts +20 -4
  46. package/dist/features/background-agent/process-cleanup.d.ts +1 -1
  47. package/dist/features/background-agent/remove-task-toast-tracking.d.ts +1 -0
  48. package/dist/features/background-agent/subagent-spawn-limits.d.ts +23 -0
  49. package/dist/features/background-agent/task-history.d.ts +1 -0
  50. package/dist/features/background-agent/task-poller.d.ts +1 -0
  51. package/dist/features/background-agent/types.d.ts +4 -0
  52. package/dist/features/claude-code-agent-loader/claude-model-mapper.d.ts +4 -0
  53. package/dist/features/claude-code-agent-loader/loader.d.ts +3 -3
  54. package/dist/features/claude-code-agent-loader/types.d.ts +8 -1
  55. package/dist/features/claude-code-plugin-loader/agent-loader.d.ts +2 -2
  56. package/dist/features/claude-code-plugin-loader/loader.d.ts +2 -2
  57. package/dist/features/claude-code-plugin-loader/types.d.ts +1 -1
  58. package/dist/features/opencode-skill-loader/git-master-template-injection.d.ts +1 -1
  59. package/dist/features/skill-mcp-manager/types.d.ts +4 -0
  60. package/dist/features/tmux-subagent/index.d.ts +1 -0
  61. package/dist/features/tmux-subagent/manager.d.ts +5 -0
  62. package/dist/features/tmux-subagent/pane-state-parser.d.ts +8 -0
  63. package/dist/features/tmux-subagent/tracked-session-state.d.ts +8 -0
  64. package/dist/features/tmux-subagent/types.d.ts +2 -0
  65. package/dist/hooks/atlas/boulder-session-lineage.d.ts +6 -0
  66. package/dist/hooks/atlas/final-wave-approval-gate.d.ts +4 -0
  67. package/dist/hooks/atlas/idle-event.d.ts +8 -0
  68. package/dist/hooks/atlas/resolve-active-boulder-session.d.ts +11 -0
  69. package/dist/hooks/atlas/tool-execute-after.d.ts +2 -0
  70. package/dist/hooks/atlas/types.d.ts +2 -0
  71. package/dist/hooks/atlas/verification-reminders.d.ts +4 -0
  72. package/dist/hooks/auto-slash-command/hook.d.ts +7 -0
  73. package/dist/hooks/auto-slash-command/processed-command-store.d.ts +7 -0
  74. package/dist/hooks/auto-update-checker/checker/sync-package-json.d.ts +7 -0
  75. package/dist/hooks/auto-update-checker/checker.d.ts +3 -1
  76. package/dist/hooks/auto-update-checker/constants.d.ts +2 -2
  77. package/dist/hooks/comment-checker/downloader.d.ts +1 -1
  78. package/dist/hooks/compaction-context-injector/compaction-context-prompt.d.ts +1 -0
  79. package/dist/hooks/compaction-context-injector/constants.d.ts +5 -0
  80. package/dist/hooks/compaction-context-injector/hook.d.ts +5 -1
  81. package/dist/hooks/compaction-context-injector/recovery-prompt-config.d.ts +6 -0
  82. package/dist/hooks/compaction-context-injector/recovery.d.ts +6 -0
  83. package/dist/hooks/compaction-context-injector/session-id.d.ts +2 -0
  84. package/dist/hooks/compaction-context-injector/session-prompt-config-resolver.d.ts +16 -0
  85. package/dist/hooks/compaction-context-injector/tail-monitor.d.ts +13 -0
  86. package/dist/hooks/compaction-context-injector/types.d.ts +43 -0
  87. package/dist/hooks/compaction-context-injector/validated-model.d.ts +13 -0
  88. package/dist/hooks/context-window-monitor.d.ts +2 -5
  89. package/dist/hooks/delegate-task-english-directive/hook.d.ts +14 -0
  90. package/dist/hooks/delegate-task-english-directive/index.d.ts +1 -0
  91. package/dist/hooks/gpt-permission-continuation/assistant-message.d.ts +23 -0
  92. package/dist/hooks/gpt-permission-continuation/constants.d.ts +4 -0
  93. package/dist/hooks/gpt-permission-continuation/detector.d.ts +1 -0
  94. package/dist/hooks/gpt-permission-continuation/handler.d.ts +12 -0
  95. package/dist/hooks/gpt-permission-continuation/index.d.ts +13 -0
  96. package/dist/hooks/gpt-permission-continuation/session-state.d.ts +15 -0
  97. package/dist/hooks/index.d.ts +2 -0
  98. package/dist/hooks/keyword-detector/hook.d.ts +1 -0
  99. package/dist/hooks/preemptive-compaction.d.ts +2 -5
  100. package/dist/hooks/ralph-loop/pending-verification-handler.d.ts +16 -0
  101. package/dist/hooks/runtime-fallback/fallback-bootstrap-model.d.ts +10 -0
  102. package/dist/hooks/runtime-fallback/fallback-retry-dispatcher.d.ts +11 -0
  103. package/dist/hooks/runtime-fallback/hook.d.ts +2 -3
  104. package/dist/hooks/runtime-fallback/last-user-retry-parts.d.ts +4 -0
  105. package/dist/hooks/runtime-fallback/message-update-handler.d.ts +1 -2
  106. package/dist/hooks/runtime-fallback/retry-model-payload.d.ts +7 -0
  107. package/dist/hooks/runtime-fallback/session-messages.d.ts +9 -0
  108. package/dist/hooks/runtime-fallback/session-status-handler.d.ts +3 -0
  109. package/dist/hooks/runtime-fallback/types.d.ts +57 -3
  110. package/dist/hooks/runtime-fallback/visible-assistant-response.d.ts +3 -0
  111. package/dist/hooks/session-notification-content.d.ts +30 -0
  112. package/dist/hooks/session-notification-scheduler.d.ts +5 -3
  113. package/dist/hooks/session-notification.d.ts +2 -0
  114. package/dist/hooks/start-work/index.d.ts +1 -1
  115. package/dist/hooks/start-work/worktree-detector.d.ts +7 -0
  116. package/dist/hooks/todo-continuation-enforcer/constants.d.ts +6 -2
  117. package/dist/hooks/todo-continuation-enforcer/handler.d.ts +1 -0
  118. package/dist/hooks/todo-continuation-enforcer/idle-event.d.ts +1 -0
  119. package/dist/hooks/todo-continuation-enforcer/session-state.d.ts +10 -1
  120. package/dist/hooks/todo-continuation-enforcer/stagnation-detection.d.ts +6 -0
  121. package/dist/hooks/todo-continuation-enforcer/types.d.ts +5 -0
  122. package/dist/hooks/tool-output-truncator.d.ts +1 -0
  123. package/dist/index.d.ts +2 -2
  124. package/dist/index.js +10581 -7245
  125. package/dist/openagent-labforge.schema.json +70 -4
  126. package/dist/plugin/hooks/create-continuation-hooks.d.ts +2 -1
  127. package/dist/plugin/hooks/create-core-hooks.d.ts +1 -0
  128. package/dist/plugin/hooks/create-session-hooks.d.ts +2 -1
  129. package/dist/plugin/normalize-tool-arg-schemas.d.ts +2 -0
  130. package/dist/plugin/ultrawork-model-override.d.ts +1 -15
  131. package/dist/plugin/ultrawork-variant-availability.d.ts +6 -0
  132. package/dist/plugin-dispose.d.ts +10 -0
  133. package/dist/plugin-handlers/agent-override-protection.d.ts +3 -0
  134. package/dist/plugin-state.d.ts +5 -0
  135. package/dist/shared/compaction-agent-config-checkpoint.d.ts +11 -0
  136. package/dist/shared/context-limit-resolver.d.ts +5 -0
  137. package/dist/shared/data-path.d.ts +2 -2
  138. package/dist/shared/dynamic-truncator.d.ts +4 -7
  139. package/dist/shared/external-plugin-detector.d.ts +1 -1
  140. package/dist/shared/fallback-chain-from-models.d.ts +3 -0
  141. package/dist/shared/index.d.ts +3 -0
  142. package/dist/shared/model-error-classifier.d.ts +2 -1
  143. package/dist/shared/opencode-command-dirs.d.ts +3 -0
  144. package/dist/shared/plugin-identity.d.ts +7 -0
  145. package/dist/shared/question-denied-session-permission.d.ts +6 -0
  146. package/dist/shared/retry-status-utils.d.ts +2 -0
  147. package/dist/shared/system-directive.d.ts +6 -5
  148. package/dist/shared/vision-capable-models-cache.d.ts +4 -0
  149. package/dist/tools/call-omo-agent/background-executor.d.ts +2 -1
  150. package/dist/tools/call-omo-agent/constants.d.ts +1 -1
  151. package/dist/tools/call-omo-agent/sync-executor.d.ts +11 -3
  152. package/dist/tools/call-omo-agent/tools.d.ts +2 -1
  153. package/dist/tools/delegate-task/cancel-unstable-agent-task.d.ts +2 -0
  154. package/dist/tools/delegate-task/model-selection.d.ts +1 -0
  155. package/dist/tools/delegate-task/model-string-parser.d.ts +1 -3
  156. package/dist/tools/look-at/multimodal-fallback-chain.d.ts +4 -0
  157. package/dist/tools/lsp/constants.d.ts +1 -0
  158. package/dist/tools/lsp/directory-diagnostics.d.ts +1 -0
  159. package/dist/tools/lsp/lsp-client-transport.d.ts +4 -2
  160. package/dist/tools/lsp/lsp-client-wrapper.d.ts +2 -1
  161. package/dist/tools/lsp/server-path-bases.d.ts +1 -0
  162. package/generated/skills-bundles/catalog.json +282 -39
  163. package/generated/skills-bundles/full/INDEX.md +38 -10
  164. package/generated/skills-bundles/full/skills/data-analysis/experiment-monitoring/auto-claude__monitor-experiment/SKILL.md +63 -0
  165. package/generated/skills-bundles/full/skills/data-analysis/experiment-ops/auto-claude__run-experiment/SKILL.md +112 -0
  166. package/generated/skills-bundles/full/skills/data-analysis/optimization/auto-claude__dse-loop/SKILL.md +279 -0
  167. package/generated/skills-bundles/full/skills/data-analysis/statistics/auto-claude__analyze-results/SKILL.md +47 -0
  168. package/generated/skills-bundles/full/skills/data-analysis/visualization/auto-claude__paper-figure/SKILL.md +281 -0
  169. package/generated/skills-bundles/full/skills/productivity/communication/auto-claude__feishu-notify/SKILL.md +154 -0
  170. package/generated/skills-bundles/full/skills/productivity/visual-design/auto-claude__pixel-art/SKILL.md +138 -0
  171. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-compile/SKILL.md +252 -0
  172. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-plan/SKILL.md +254 -0
  173. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/SKILL.md +310 -0
  174. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/iclr2026.tex +84 -0
  175. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/icml2025.tex +87 -0
  176. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/math_commands.tex +48 -0
  177. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-write/templates/neurips2025.tex +80 -0
  178. package/generated/skills-bundles/full/skills/research/document-authoring/auto-claude__paper-writing/SKILL.md +255 -0
  179. package/generated/skills-bundles/full/skills/research/literature-and-web-search/auto-claude__arxiv/SKILL.md +133 -0
  180. package/generated/skills-bundles/full/skills/research/literature-and-web-search/auto-claude__novelty-check/SKILL.md +87 -0
  181. package/generated/skills-bundles/full/skills/research/literature-and-web-search/auto-claude__research-lit/SKILL.md +194 -0
  182. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__idea-creator/SKILL.md +228 -0
  183. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__idea-discovery/SKILL.md +186 -0
  184. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__idea-discovery-robot/SKILL.md +351 -0
  185. package/generated/skills-bundles/full/skills/research/research-ideation/auto-claude__research-pipeline/SKILL.md +174 -0
  186. package/generated/skills-bundles/full/skills/research/theory-writing/auto-claude__proof-writer/SKILL.md +224 -0
  187. package/generated/skills-bundles/paper/INDEX.md +40 -0
  188. package/generated/skills-bundles/paper/skills/data-analysis/experiment-monitoring/auto-claude__monitor-experiment/SKILL.md +63 -0
  189. package/generated/skills-bundles/paper/skills/data-analysis/experiment-ops/auto-claude__run-experiment/SKILL.md +112 -0
  190. package/generated/skills-bundles/paper/skills/data-analysis/optimization/auto-claude__dse-loop/SKILL.md +279 -0
  191. package/generated/skills-bundles/paper/skills/data-analysis/statistics/auto-claude__analyze-results/SKILL.md +47 -0
  192. package/generated/skills-bundles/paper/skills/data-analysis/visualization/auto-claude__paper-figure/SKILL.md +281 -0
  193. package/generated/skills-bundles/paper/skills/productivity/communication/auto-claude__feishu-notify/SKILL.md +154 -0
  194. package/generated/skills-bundles/paper/skills/productivity/visual-design/auto-claude__pixel-art/SKILL.md +138 -0
  195. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-compile/SKILL.md +252 -0
  196. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-plan/SKILL.md +254 -0
  197. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/SKILL.md +310 -0
  198. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/iclr2026.tex +84 -0
  199. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/icml2025.tex +87 -0
  200. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/math_commands.tex +48 -0
  201. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-write/templates/neurips2025.tex +80 -0
  202. package/generated/skills-bundles/paper/skills/research/document-authoring/auto-claude__paper-writing/SKILL.md +255 -0
  203. package/generated/skills-bundles/paper/skills/research/literature-and-web-search/auto-claude__arxiv/SKILL.md +133 -0
  204. package/generated/skills-bundles/paper/skills/research/literature-and-web-search/auto-claude__novelty-check/SKILL.md +87 -0
  205. package/generated/skills-bundles/paper/skills/research/literature-and-web-search/auto-claude__research-lit/SKILL.md +194 -0
  206. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__idea-creator/SKILL.md +228 -0
  207. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__idea-discovery/SKILL.md +186 -0
  208. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__idea-discovery-robot/SKILL.md +351 -0
  209. package/generated/skills-bundles/paper/skills/research/research-ideation/auto-claude__research-pipeline/SKILL.md +174 -0
  210. package/generated/skills-bundles/paper/skills/research/theory-writing/auto-claude__proof-writer/SKILL.md +224 -0
  211. package/package.json +21 -21
  212. package/generated/skills-bundles/full/skills/engineering/virtualization/skills-main__virtualbox/LICENSE.txt +0 -21
  213. package/generated/skills-bundles/full/skills/engineering/virtualization/skills-main__virtualbox/SKILL.md +0 -615
  214. package/generated/skills-bundles/full/skills/engineering/virtualization/skills-main__virtualbox/_meta.json +0 -11
  215. package/generated/skills-bundles/full/skills/engineering/virtualization/skills-main__virtualbox/scripts/virtualbox-utils.ts +0 -586
  216. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/.clawhubsafe +0 -0
  217. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/LICENSE +0 -21
  218. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/README.md +0 -127
  219. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/SECURITY.md +0 -68
  220. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/SKILL.md +0 -141
  221. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/_meta.json +0 -11
  222. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/scripts/speak.sh +0 -52
  223. package/generated/skills-bundles/full/skills/productivity/voice-and-accessibility/skills-main__voiceclaw/scripts/transcribe.sh +0 -50
  224. package/generated/skills-bundles/full/skills/research/media-search/skills-main__youtube-search/LICENSE.txt +0 -21
  225. package/generated/skills-bundles/full/skills/research/media-search/skills-main__youtube-search/SKILL.md +0 -416
  226. package/generated/skills-bundles/full/skills/research/media-search/skills-main__youtube-search/_meta.json +0 -11
@@ -0,0 +1,351 @@
1
+ ---
2
+ name: "auto-claude/idea-discovery-robot"
3
+ description: "Workflow 1 adaptation for robotics and embodied AI. Orchestrates robotics-aware literature survey, idea generation, novelty check, and internal critique to go from a broad robotics direction to benchmark-grounded, simulation-first ideas. Use when user says \"robotics idea discovery\", \"机器人找idea\", \"embodied AI idea\", \"机器人方向探索\", \"sim2real 选题\", or wants ideas for manipulation, locomotion, navigation, drones, humanoids, or general robot learning."
4
+ argument-hint: ["robotics-direction"]
5
+ allowed-tools: "Bash(*), Read, Write, Edit, Grep, Glob, WebSearch, WebFetch, Agent, Skill, mcp__codex__codex, mcp__codex__codex-reply"
6
+ metadata:
7
+ category: "research/research-ideation"
8
+ ---
9
+
10
+ # Robotics Idea Discovery Pipeline
11
+
12
+ Orchestrate a robotics-specific idea discovery workflow for: **$ARGUMENTS**
13
+
14
+ ## Overview
15
+
16
+ This skill chains three sub-skills into a single automated pipeline:
17
+
18
+ ```
19
+ /research-lit → /idea-creator (robotics framing) → /novelty-check
20
+ (survey) (filter + pilot plan) (verify novel)
21
+ ```
22
+
23
+ But every phase must be grounded in robotics-specific constraints:
24
+ - **Embodiment**: arm, mobile manipulator, drone, humanoid, quadruped, autonomous car, etc.
25
+ - **Task family**: grasping, insertion, locomotion, navigation, manipulation, rearrangement, multi-step planning
26
+ - **Observation + action interface**: RGB/RGB-D/tactile/language; torque/velocity/waypoints/end-effector actions
27
+ - **Simulator / benchmark availability**: simulation-first by default
28
+ - **Real robot constraints**: hardware availability, reset cost, safety, operator time
29
+ - **Evaluation quality**: success rate plus failure cases, safety violations, intervention count, latency, sample efficiency
30
+ - **Sim2real story**: whether the idea can stay in sim, needs offline logs, or truly requires hardware
31
+
32
+ The goal is not to produce flashy demos. The goal is to produce ideas that are:
33
+ - benchmarkable
34
+ - falsifiable
35
+ - feasible with available robotics infrastructure
36
+ - interesting even if the answer is negative
37
+
38
+ ## Constants
39
+
40
+ - **MAX_PILOT_IDEAS = 3** — Validate at most 3 top ideas deeply
41
+ - **PILOT_MODE = `sim-first`** — Prefer simulation or offline-log pilots before any hardware execution
42
+ - **REAL_ROBOT_PILOTS = `explicit approval only`** — Never assume physical robot access or approval
43
+ - **AUTO_PROCEED = true** — If user does not respond at checkpoints, proceed with the best sim-first option
44
+ - **TARGET_VENUES = CoRL, RSS, ICRA, IROS, RA-L** — Default novelty and reviewer framing
45
+
46
+ > Override inline, e.g. `/idea-discovery-robot "bimanual manipulation" — only sim ideas, no real robot` or `/idea-discovery-robot "drone navigation" — focus on CoRL/RSS, 2 pilot ideas max`
47
+
48
+ ## Execution Rule
49
+
50
+ Follow the phases in order. Do **not** stop after a checkpoint unless:
51
+ - the user explicitly says to stop, or
52
+ - the user asks to change scope and re-run an earlier phase
53
+
54
+ If `AUTO_PROCEED=true` and the user does not respond, continue immediately to the next phase using the strongest **sim-first, benchmark-grounded** option.
55
+
56
+ ## Phase 0: Frame the Robotics Problem
57
+
58
+ Before generating ideas, extract or infer this **Robotics Problem Frame** from `$ARGUMENTS` and local project context:
59
+
60
+ - **Embodiment**
61
+ - **Task family**
62
+ - **Environment type**: tabletop, warehouse, home, outdoor, aerial, driving, legged terrain
63
+ - **Observation modalities**
64
+ - **Action interface / controller abstraction**
65
+ - **Learning regime**: RL, imitation, behavior cloning, world model, planning, VLA/VLM, classical robotics, hybrid
66
+ - **Available assets**: simulator, benchmark suite, teleop data, offline logs, existing codebase, real hardware
67
+ - **Compute budget**
68
+ - **Safety constraints**
69
+ - **Desired contribution type**: method, benchmark, diagnosis, systems, sim2real, data curation
70
+
71
+ If some fields are missing, make explicit assumptions and default to:
72
+ - **simulation-first**
73
+ - **public benchmark preferred**
74
+ - **no real robot execution**
75
+
76
+ Write this frame into working notes before moving on. Every later decision should reference it.
77
+
78
+ ## Phase 1: Robotics Literature Survey
79
+
80
+ Invoke:
81
+
82
+ ```
83
+ /research-lit "$ARGUMENTS — focus venues: CoRL, RSS, ICRA, IROS, RA-L, TRO, Science Robotics"
84
+ ```
85
+
86
+ Then reorganize the findings using a robotics lens instead of a generic ML lens.
87
+
88
+ ### Build a Robotics Landscape Matrix
89
+
90
+ For each relevant paper, classify:
91
+
92
+ | Axis | Examples |
93
+ |------|----------|
94
+ | Embodiment | single-arm, mobile manipulator, humanoid, drone, quadruped |
95
+ | Task | pick-place, insertion, navigation, locomotion, long-horizon rearrangement |
96
+ | Learning setup | RL, BC, IL, offline RL, world model, planning, diffusion policy |
97
+ | Observation | RGB, RGB-D, proprioception, tactile, language |
98
+ | Action abstraction | torque, joint velocity, end-effector delta pose, waypoint planner |
99
+ | Eval regime | pure sim, sim+real, real-only, offline benchmark |
100
+ | Benchmark | ManiSkill, RLBench, Isaac Lab, Habitat, Meta-World, CALVIN, LIBERO, custom |
101
+ | Metrics | success rate, collision rate, intervention count, path length, latency, energy |
102
+ | Main bottleneck | sample inefficiency, brittleness, reset cost, perception drift, sim2real gap |
103
+
104
+ ### Search Priorities
105
+
106
+ When refining the survey, prioritize:
107
+ - recent work from **CoRL, RSS, ICRA, IROS, RA-L**
108
+ - recent arXiv papers from the last 6-12 months
109
+ - benchmark papers and follow-up reproductions
110
+ - negative-result or diagnosis papers if they reveal system bottlenecks
111
+
112
+ ### What to Look For
113
+
114
+ Do not stop at "who got the best success rate." Explicitly identify:
115
+ - recurring failure modes papers do not fix
116
+ - benchmarks that are saturated or misleading
117
+ - places where embodiment changes invalidate prior conclusions
118
+ - methods that only work with privileged observations
119
+ - ideas whose reported gains come from reset engineering, reward shaping, or hidden infrastructure
120
+ - task families where evaluation quality is weak even if performance numbers look high
121
+
122
+ **Checkpoint:** Present the landscape to the user in robotics terms:
123
+
124
+ ```
125
+ 🤖 Robotics survey complete. I grouped the field by embodiment, benchmark, action interface, and sim2real setup.
126
+
127
+ Main gaps:
128
+ 1. [...]
129
+ 2. [...]
130
+ 3. [...]
131
+
132
+ Should I generate ideas under this framing, or should I narrow to a specific robot / benchmark / modality?
133
+ ```
134
+
135
+ - **User approves** (or no response + AUTO_PROCEED=true) → proceed to Phase 2 with the best robotics frame.
136
+ - **User requests changes** (e.g. narrower embodiment, different benchmark family, no sim2real, no hardware) → refine the robotics frame, re-run Phase 1, and present again.
137
+
138
+ ## Phase 2: Robotics-Specific Idea Generation and Filtering
139
+
140
+ Generate ideas only after the robotics frame is explicit.
141
+
142
+ Invoke the existing idea generator, but pass the **Robotics Problem Frame** and landscape matrix into the prompt so it does not produce generic ML ideas:
143
+
144
+ ```
145
+ /idea-creator "$ARGUMENTS — robotics frame: [paste Robotics Problem Frame] — focus venues: CoRL, RSS, ICRA, IROS, RA-L — benchmark-specific ideas only — sim-first pilots — no real-robot execution without explicit approval — require failure metrics and baseline clarity"
146
+ ```
147
+
148
+ Then rewrite and filter the output using the robotics-specific rules below.
149
+
150
+ Each candidate idea must include:
151
+ - **One-sentence summary**
152
+ - **Target embodiment**
153
+ - **Target benchmark / simulator / dataset**
154
+ - **Core bottleneck being addressed**
155
+ - **Minimum sim-first pilot**
156
+ - **Mandatory metrics**
157
+ - **Expected failure mode if the idea does not work**
158
+ - **Whether the idea truly needs real hardware**
159
+
160
+ ### Good Robotics Idea Patterns
161
+
162
+ Prefer ideas that:
163
+ - expose a real bottleneck in perception-action coupling
164
+ - improve robustness under embodiment or environment shift
165
+ - reduce operator time, reset cost, or demonstration cost
166
+ - strengthen sim2real transfer with measurable mechanisms
167
+ - improve recovery, retry behavior, or failure detection
168
+ - create a better benchmark, diagnostic, or evaluation protocol
169
+ - test an assumption the community repeats but rarely measures
170
+
171
+ ### Weak Robotics Idea Patterns
172
+
173
+ Downrank ideas that are mostly:
174
+ - "apply a foundation model / VLM / diffusion model to robot X" with no new bottleneck analysis
175
+ - demo-driven but not benchmarkable
176
+ - dependent on inaccessible hardware, custom sensors, or massive private datasets
177
+ - impossible to evaluate without a months-long infrastructure build
178
+ - only interesting if everything works perfectly
179
+
180
+ ### Filtering Rules
181
+
182
+ For each idea, reject or heavily downrank if:
183
+ - no concrete simulator or benchmark is available
184
+ - no credible baseline exists
185
+ - no measurable metric beyond "looks better"
186
+ - real robot execution is required but hardware access is unclear
187
+ - the setup depends on privileged observations that make the claim weak
188
+ - the expected contribution disappears if evaluation is made fair
189
+
190
+ **Checkpoint:** Present the ranked robotics ideas before novelty checking:
191
+
192
+ ```
193
+ 💡 Robotics ideas generated. Top candidates:
194
+
195
+ 1. [Idea 1] — Embodiment: [...] — Benchmark: [...] — Pilot: sim/offline — Risk: LOW/MEDIUM/HIGH
196
+ 2. [Idea 2] — Embodiment: [...] — Benchmark: [...] — Pilot: sim/offline — Risk: LOW/MEDIUM/HIGH
197
+ 3. [Idea 3] — requires hardware / weak benchmark / high risk
198
+
199
+ Should I carry the top sim-first ideas into novelty checking and internal review?
200
+ (If no response, I'll continue with the strongest benchmark-grounded ideas.)
201
+ ```
202
+
203
+ - **User picks ideas** (or no response + AUTO_PROCEED=true) → proceed to Phase 3 with the top sim-first ideas, then continue to Phase 4 and Phase 5.
204
+ - **User wants different constraints** → update the robotics frame and re-run Phase 2.
205
+ - **User wants narrower scope** → go back to Phase 1 with a tighter embodiment / task / benchmark focus.
206
+
207
+ ## Phase 3: Feasibility and Pilot Design
208
+
209
+ For the top ideas, design a **minimal validation package**.
210
+
211
+ If the repository already contains a usable simulator, benchmark harness, or offline dataset pipeline, you may validate the top 1-3 ideas there. If not, do **not** force execution. Produce a concrete pilot plan instead.
212
+
213
+ By default, pilots should be one of:
214
+ - **simulation pilot**
215
+ - **offline log / dataset pilot**
216
+ - **analysis-only pilot** using existing benchmark outputs
217
+
218
+ Only propose a real-robot pilot if the user explicitly wants that.
219
+
220
+ For each surviving idea, specify:
221
+
222
+ ```markdown
223
+ - Embodiment:
224
+ - Benchmark / simulator:
225
+ - Baselines:
226
+ - Pilot type: sim / offline / real
227
+ - Compute estimate:
228
+ - Human/operator time:
229
+ - Success metrics:
230
+ - Failure metrics:
231
+ - Safety concerns:
232
+ - What result would count as positive signal:
233
+ - What negative result would still be publishable:
234
+ ```
235
+
236
+ ### Real Robot Rule
237
+
238
+ **Never auto-proceed to physical robot testing.** If an idea needs hardware:
239
+ - mark it as `needs physical validation`
240
+ - design the sim or offline precursor first
241
+ - ask for explicit user confirmation before any real-robot step
242
+
243
+ If no cheap sim/offline pilot exists, keep the idea in the report but label it **high execution risk**.
244
+
245
+ After Phase 3, continue to Phase 4 even if you only produced a pilot plan rather than running a pilot. Lack of immediate execution is not a reason to stop the workflow.
246
+
247
+ ## Phase 4: Deep Novelty Verification
248
+
249
+ For each top idea, run:
250
+
251
+ ```
252
+ /novelty-check "[idea description with embodiment + task family + benchmark + sensor stack + controller/policy class + sim2real angle + target venues: CoRL/RSS/ICRA/IROS/RA-L]"
253
+ ```
254
+
255
+ Robotics novelty checks must include:
256
+ - embodiment
257
+ - task family
258
+ - benchmark / simulator
259
+ - sensor stack
260
+ - controller / policy type
261
+ - sim2real or safety angle if relevant
262
+
263
+ Be especially skeptical of ideas that are just:
264
+ - old method + new benchmark
265
+ - VLA/VLM + standard manipulation benchmark
266
+ - sim2real claim without new transfer mechanism
267
+
268
+ If the method is not novel but the **finding** or **evaluation protocol** is, say that explicitly.
269
+
270
+ ## Phase 5: Internal Robotics Review
271
+
272
+ Perform an internal senior-reviewer pass. Focus on:
273
+ - whether the contribution is really new for robotics, not just ML
274
+ - the minimum benchmark package needed for credibility
275
+ - whether the sim2real story is justified
276
+ - missing baselines or failure analyses
277
+ - whether the idea survives realistic infrastructure constraints
278
+
279
+ Update the report with the minimum viable evidence package.
280
+
281
+ ## Phase 6: Final Report
282
+
283
+ Write or update `IDEA_REPORT.md` with a robotics-specific structure so it stays compatible with downstream workflows.
284
+
285
+ ```markdown
286
+ # Robotics Idea Discovery Report
287
+
288
+ **Direction**: $ARGUMENTS
289
+ **Date**: [today]
290
+ **Pipeline**: research-lit → idea-creator (robotics framing) → novelty-check → internal review
291
+
292
+ ## Robotics Problem Frame
293
+ - Embodiment:
294
+ - Task family:
295
+ - Observation / action interface:
296
+ - Available assets:
297
+ - Constraints:
298
+
299
+ ## Landscape Matrix
300
+ [grouped by embodiment, benchmark, and bottleneck]
301
+
302
+ ## Ranked Ideas
303
+
304
+ ### Idea 1: [title] — RECOMMENDED
305
+ - Embodiment:
306
+ - Benchmark / simulator:
307
+ - Bottleneck addressed:
308
+ - Pilot type: sim / offline / real
309
+ - Positive signal:
310
+ - Novelty:
311
+ - Internal review notes:
312
+ - Hardware risk:
313
+ - Next step:
314
+
315
+ ## Eliminated Ideas
316
+ - [idea] — killed because benchmark unclear / hardware inaccessible / novelty weak / no fair evaluation
317
+
318
+ ## Evidence Package for the Top Idea
319
+ - Required baselines:
320
+ - Required metrics:
321
+ - Required failure cases:
322
+ - Whether real robot evidence is mandatory:
323
+
324
+ ## Next Steps
325
+ - [ ] Implement sim-first pilot
326
+ - [ ] Run /novelty-check on the final idea wording
327
+ - [ ] Only after approval: consider hardware validation
328
+ ```
329
+
330
+ ## Key Rules
331
+
332
+ - **Simulation first.** Hardware is never the default.
333
+ - **Benchmark specificity is mandatory.** No benchmark, no serious idea.
334
+ - **Evaluation must include failures.** Success rate alone is not enough.
335
+ - **Embodiment matters.** Do not assume a result on one robot transfers to another.
336
+ - **Avoid foundation-model theater.** Novel terminology is not novelty.
337
+ - **Infrastructure realism matters.** Operator time, reset burden, and safety count as research constraints.
338
+ - **If the contribution is mainly diagnostic or evaluative, say so.** That can still be publishable.
339
+
340
+ ## Composing with Later Work
341
+
342
+ After this workflow identifies a strong robotics idea:
343
+
344
+ ```
345
+ /idea-discovery-robot "direction" ← you are here
346
+ implement sim-first pilot
347
+ /run-experiment ← if infrastructure exists
348
+ /ulw-loop "top robotics idea"
349
+ ```
350
+
351
+ If no simulator or benchmark is available yet, stop at the report and ask the user to choose whether to build infrastructure or pivot to a more executable idea.
@@ -0,0 +1,174 @@
1
+ ---
2
+ name: "auto-claude/research-pipeline"
3
+ description: "Full research pipeline: Workflow 1 (idea discovery) → implementation → Workflow 2 (ULTRAWORK loop). Goes from a broad research direction all the way to a submission-ready paper. Use when user says \"全流程\", \"full pipeline\", \"从找idea到投稿\", \"end-to-end research\", or wants the complete autonomous research lifecycle."
4
+ argument-hint: ["research-direction"]
5
+ allowed-tools: "Bash(*), Read, Write, Edit, Grep, Glob, WebSearch, WebFetch, Agent, Skill, mcp__codex__codex, mcp__codex__codex-reply"
6
+ metadata:
7
+ category: "research/research-ideation"
8
+ ---
9
+
10
+ # Full Research Pipeline: Idea → Experiments → Submission
11
+
12
+ End-to-end autonomous research workflow for: **$ARGUMENTS**
13
+
14
+ ## Constants
15
+
16
+ - **AUTO_PROCEED = true** — When `true`, Gate 1 auto-selects the top-ranked idea (highest pilot signal + novelty confirmed) and continues to implementation. When `false`, always waits for explicit user confirmation before proceeding.
17
+ - **ARXIV_DOWNLOAD = false** — When `true`, `/research-lit` downloads the top relevant arXiv PDFs during literature survey. When `false` (default), only fetches metadata via arXiv API. Passed through to `/idea-discovery` → `/research-lit`.
18
+ - **HUMAN_CHECKPOINT = false** — When `true`, the ULTRAWORK loop (Stage 4) pauses after each round to let you inspect progress and provide custom modification instructions before fixes are implemented. When `false` (default), the loop runs fully autonomously. Passed through to `/ulw-loop`.
19
+
20
+ > 💡 Override via argument, e.g., `/research-pipeline "topic" — AUTO_PROCEED: false, human checkpoint: true`.
21
+
22
+ ## Overview
23
+
24
+ This skill chains the entire research lifecycle into a single pipeline:
25
+
26
+ ```
27
+ /idea-discovery → implement → /run-experiment → /ulw-loop → submission-ready
28
+ ├── Workflow 1 ──┤ ├────────── Workflow 2 ──────────────┤
29
+ ```
30
+
31
+ It orchestrates two major workflows plus the implementation bridge between them.
32
+
33
+ ## Pipeline
34
+
35
+ ### Stage 1: Idea Discovery (Workflow 1)
36
+
37
+ Invoke the idea discovery pipeline:
38
+
39
+ ```
40
+ /idea-discovery "$ARGUMENTS"
41
+ ```
42
+
43
+ This internally runs: `/research-lit` → `/idea-creator` → `/novelty-check`
44
+
45
+ **Output:** `IDEA_REPORT.md` with ranked, validated, pilot-tested ideas.
46
+
47
+ **🚦 Gate 1 — Human Checkpoint:**
48
+
49
+ After `IDEA_REPORT.md` is generated, **pause and present the top ideas to the user**:
50
+
51
+ ```
52
+ 📋 Idea Discovery complete. Top ideas:
53
+
54
+ 1. [Idea 1 title] — Pilot: POSITIVE (+X%), Novelty: CONFIRMED
55
+ 2. [Idea 2 title] — Pilot: WEAK POSITIVE (+Y%), Novelty: CONFIRMED
56
+ 3. [Idea 3 title] — Pilot: NEGATIVE, eliminated
57
+
58
+ Recommended: Idea 1. Shall I proceed with implementation?
59
+ ```
60
+
61
+ **If AUTO_PROCEED=false:** Wait for user confirmation before continuing. The user may:
62
+ - **Approve an idea** → proceed to Stage 2.
63
+ - **Pick a different idea** → proceed with their choice.
64
+ - **Request changes** (e.g., "combine Idea 1 and 3", "focus more on X") → update the idea prompt with user feedback, re-run `/idea-discovery` with refined constraints, and present again.
65
+ - **Reject all ideas** → collect feedback on what's missing, re-run Stage 1 with adjusted research direction. Repeat until the user commits to an idea.
66
+ - **Stop here** → save current state to `IDEA_REPORT.md` for future reference.
67
+
68
+ **If AUTO_PROCEED=true:** Present the top ideas, wait 10 seconds for user input. If no response, auto-select the #1 ranked idea (highest pilot signal + novelty confirmed) and proceed to Stage 2. Log: `"AUTO_PROCEED: selected Idea 1 — [title]"`.
69
+
70
+ > ⚠️ **This gate waits for user confirmation when AUTO_PROCEED=false.** When `true`, it auto-selects the top idea after presenting results. The rest of the pipeline (Stages 2-4) is expensive (GPU time + multiple review rounds), so set `AUTO_PROCEED=false` if you want to manually choose which idea to pursue.
71
+
72
+ ### Stage 2: Implementation
73
+
74
+ Once the user confirms which idea to pursue:
75
+
76
+ 1. **Read the idea details** from `IDEA_REPORT.md` (hypothesis, experimental design, pilot code)
77
+
78
+ 2. **Implement the full experiment**:
79
+ - Extend pilot code to full scale (multi-seed, full dataset, proper baselines)
80
+ - Add proper evaluation metrics and logging (wandb if configured)
81
+ - Write clean, reproducible experiment scripts
82
+ - Follow existing codebase conventions
83
+
84
+ 3. **Code review**: Before deploying, do a self-review:
85
+ - Are all hyperparameters configurable via argparse?
86
+ - Is the random seed fixed and controllable?
87
+ - Are results saved to JSON/CSV for later analysis?
88
+ - Is there proper logging for debugging?
89
+
90
+ ### Stage 3: Deploy Experiments (Workflow 2 — Part 1)
91
+
92
+ Deploy the full-scale experiments:
93
+
94
+ ```
95
+ /run-experiment [experiment command]
96
+ ```
97
+
98
+ **What this does:**
99
+ - Check GPU availability on configured servers
100
+ - Sync code to remote server
101
+ - Launch experiments in screen sessions with proper CUDA_VISIBLE_DEVICES
102
+ - Verify experiments started successfully
103
+
104
+ **Monitor progress:**
105
+
106
+ ```
107
+ /monitor-experiment [server]
108
+ ```
109
+
110
+ Wait for experiments to complete. Collect results.
111
+
112
+ ### Stage 4: ULTRAWORK Loop (Workflow 2 — Part 2)
113
+
114
+ Once initial results are in, start the autonomous improvement loop:
115
+
116
+ ```
117
+ /ulw-loop "$ARGUMENTS — [chosen idea title]"
118
+ ```
119
+
120
+ **What this does (recommended max 4 rounds):**
121
+ 1. Run work → check progress → record evidence
122
+ 2. Oracle verifies completion; if not verified, continue iterating
123
+ 3. Deploy fixes, collect new results
124
+ 4. Repeat until Oracle verification or max rounds reached
125
+
126
+ **Output:** Maintain a `ULTRAWORK_LOG.md` with each round's changes and evidence.
127
+
128
+ ### Stage 5: Final Summary
129
+
130
+ After the ULTRAWORK loop completes, write a final status report:
131
+
132
+ ```markdown
133
+ # Research Pipeline Report
134
+
135
+ **Direction**: $ARGUMENTS
136
+ **Chosen Idea**: [title]
137
+ **Date**: [start] → [end]
138
+ **Pipeline**: idea-discovery → implement → run-experiment → ulw-loop
139
+
140
+ ## Journey Summary
141
+ - Ideas generated: X → filtered to Y → piloted Z → chose 1
142
+ - Implementation: [brief description of what was built]
143
+ - Experiments: [number of GPU experiments, total compute time]
144
+ - Review rounds: N/4, final score: X/10
145
+
146
+ ## Final Status
147
+ - [ ] Ready for submission / [ ] Needs manual follow-up
148
+
149
+ ## Remaining TODOs (if any)
150
+ - [items flagged by reviewer that weren't addressed]
151
+
152
+ ## Files Changed
153
+ - [list of key files created/modified]
154
+ ```
155
+
156
+ ## Key Rules
157
+
158
+ - **Human checkpoint after Stage 1 is controlled by AUTO_PROCEED.** When `false`, do not proceed without user confirmation. When `true`, auto-select the top idea after presenting results.
159
+ - **Stages 2-4 can run autonomously** once the user confirms the idea. This is the "sleep and wake up to results" part.
160
+ - **If Stage 4 hits the max rounds without Oracle verification**, stop and report remaining issues. Do not loop forever.
161
+ - **Budget awareness**: Track total GPU-hours across the pipeline. Flag if approaching user-defined limits.
162
+ - **Documentation**: Every stage updates its own output file. The full history should be self-contained.
163
+ - **Fail gracefully**: If any stage fails (no good ideas, experiments crash, review loop stuck), report clearly and suggest alternatives rather than forcing forward.
164
+
165
+ ## Typical Timeline
166
+
167
+ | Stage | Duration | Can sleep? |
168
+ |-------|----------|------------|
169
+ | 1. Idea Discovery | 30-60 min | Yes if AUTO_PROCEED=true |
170
+ | 2. Implementation | 15-60 min | Yes (autonomous after Gate 1) |
171
+ | 3. Deploy | 5 min + experiment time | Yes ✅ |
172
+ | 4. ULTRAWORK | 1-4 hours (depends on experiments) | Yes ✅ |
173
+
174
+ **Sweet spot**: Run Stage 1-2 in the evening, launch Stage 3-4 before bed, wake up to a reviewed paper.