principles-disciple 1.8.1 → 1.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (508) hide show
  1. package/ADVANCED_CONFIG_ZH.md +97 -0
  2. package/AGENT_INSTALL.md +173 -0
  3. package/AGENT_INSTALL_EN.md +173 -0
  4. package/INSTALL.md +256 -0
  5. package/SKILL.md +63 -0
  6. package/docs/COMMAND_REFERENCE.md +76 -0
  7. package/docs/COMMAND_REFERENCE_EN.md +79 -0
  8. package/esbuild.config.js +75 -0
  9. package/openclaw.plugin.json +4 -4
  10. package/package.json +11 -13
  11. package/scripts/build-web.mjs +46 -0
  12. package/scripts/install-dependencies.cjs +47 -0
  13. package/scripts/sync-plugin.mjs +802 -0
  14. package/scripts/verify-build.mjs +109 -0
  15. package/src/agents/nocturnal-dreamer.md +152 -0
  16. package/src/agents/nocturnal-philosopher.md +138 -0
  17. package/src/agents/nocturnal-reflector.md +126 -0
  18. package/src/agents/nocturnal-scribe.md +164 -0
  19. package/src/commands/capabilities.ts +85 -0
  20. package/{dist/commands/context.js → src/commands/context.ts} +78 -38
  21. package/src/commands/evolution-status.ts +146 -0
  22. package/src/commands/export.ts +111 -0
  23. package/src/commands/focus.ts +533 -0
  24. package/src/commands/nocturnal-review.ts +311 -0
  25. package/src/commands/nocturnal-rollout.ts +763 -0
  26. package/src/commands/nocturnal-train.ts +1002 -0
  27. package/{dist/commands/pain.js → src/commands/pain.ts} +68 -49
  28. package/src/commands/principle-rollback.ts +27 -0
  29. package/{dist/commands/rollback.js → src/commands/rollback.ts} +44 -12
  30. package/src/commands/samples.ts +60 -0
  31. package/src/commands/strategy.ts +38 -0
  32. package/{dist/commands/thinking-os.js → src/commands/thinking-os.ts} +59 -36
  33. package/src/commands/workflow-debug.ts +128 -0
  34. package/{dist/config/defaults/runtime.js → src/config/defaults/runtime.ts} +12 -5
  35. package/src/config/errors.ts +163 -0
  36. package/{dist/config/index.d.ts → src/config/index.ts} +2 -1
  37. package/src/constants/diagnostician.ts +66 -0
  38. package/src/constants/tools.ts +62 -0
  39. package/src/core/adaptive-thresholds.ts +476 -0
  40. package/{dist/core/config-service.js → src/core/config-service.ts} +7 -4
  41. package/{dist/core/config.js → src/core/config.ts} +158 -46
  42. package/src/core/control-ui-db.ts +435 -0
  43. package/{dist/core/detection-funnel.js → src/core/detection-funnel.ts} +36 -21
  44. package/{dist/core/detection-service.js → src/core/detection-service.ts} +7 -4
  45. package/{dist/core/dictionary-service.js → src/core/dictionary-service.ts} +7 -4
  46. package/{dist/core/dictionary.js → src/core/dictionary.ts} +57 -34
  47. package/src/core/empathy-keyword-matcher.ts +327 -0
  48. package/src/core/empathy-types.ts +218 -0
  49. package/src/core/event-log.ts +544 -0
  50. package/src/core/evolution-engine.ts +612 -0
  51. package/src/core/evolution-logger.ts +353 -0
  52. package/src/core/evolution-migration.ts +77 -0
  53. package/src/core/evolution-reducer.ts +731 -0
  54. package/src/core/evolution-types.ts +456 -0
  55. package/src/core/external-training-contract.ts +527 -0
  56. package/src/core/focus-history.ts +1458 -0
  57. package/src/core/hygiene/tracker.ts +117 -0
  58. package/{dist/core/init.js → src/core/init.ts} +39 -26
  59. package/src/core/local-worker-routing.ts +617 -0
  60. package/{dist/core/migration.js → src/core/migration.ts} +18 -11
  61. package/src/core/model-deployment-registry.ts +722 -0
  62. package/src/core/model-training-registry.ts +813 -0
  63. package/src/core/nocturnal-arbiter.ts +706 -0
  64. package/src/core/nocturnal-candidate-scoring.ts +392 -0
  65. package/src/core/nocturnal-compliance.ts +1075 -0
  66. package/src/core/nocturnal-dataset.ts +668 -0
  67. package/src/core/nocturnal-executability.ts +428 -0
  68. package/src/core/nocturnal-export.ts +390 -0
  69. package/{dist/core/nocturnal-paths.js → src/core/nocturnal-paths.ts} +49 -23
  70. package/src/core/nocturnal-trajectory-extractor.ts +484 -0
  71. package/src/core/nocturnal-trinity.ts +1384 -0
  72. package/src/core/pain.ts +122 -0
  73. package/{dist/core/path-resolver.js → src/core/path-resolver.ts} +157 -36
  74. package/{dist/core/paths.js → src/core/paths.ts} +13 -4
  75. package/src/core/principle-training-state.ts +450 -0
  76. package/src/core/profile.ts +226 -0
  77. package/src/core/promotion-gate.ts +822 -0
  78. package/{dist/core/risk-calculator.js → src/core/risk-calculator.ts} +42 -16
  79. package/{dist/core/session-tracker.js → src/core/session-tracker.ts} +175 -62
  80. package/src/core/shadow-observation-registry.ts +534 -0
  81. package/{dist/core/system-logger.js → src/core/system-logger.ts} +9 -5
  82. package/src/core/thinking-models.ts +217 -0
  83. package/src/core/training-program.ts +630 -0
  84. package/src/core/trajectory-types.ts +243 -0
  85. package/src/core/trajectory.ts +1673 -0
  86. package/{dist/core/workspace-context.js → src/core/workspace-context.ts} +57 -32
  87. package/src/hooks/bash-risk.ts +171 -0
  88. package/src/hooks/edit-verification.ts +295 -0
  89. package/src/hooks/gate-block-helper.ts +160 -0
  90. package/src/hooks/gate.ts +210 -0
  91. package/src/hooks/gfi-gate.ts +177 -0
  92. package/src/hooks/lifecycle.ts +326 -0
  93. package/{dist/hooks/llm.js → src/hooks/llm.ts} +160 -80
  94. package/src/hooks/message-sanitize.ts +45 -0
  95. package/src/hooks/pain.ts +384 -0
  96. package/src/hooks/progressive-trust-gate.ts +174 -0
  97. package/src/hooks/prompt.ts +920 -0
  98. package/src/hooks/subagent.ts +207 -0
  99. package/src/hooks/thinking-checkpoint.ts +73 -0
  100. package/src/hooks/trajectory-collector.ts +290 -0
  101. package/src/http/principles-console-route.ts +716 -0
  102. package/src/i18n/commands.ts +117 -0
  103. package/src/index.ts +694 -0
  104. package/src/service/central-database.ts +831 -0
  105. package/src/service/control-ui-query-service.ts +888 -0
  106. package/src/service/evolution-query-service.ts +405 -0
  107. package/src/service/evolution-worker.ts +1646 -0
  108. package/src/service/health-query-service.ts +836 -0
  109. package/{dist/service/nocturnal-runtime.js → src/service/nocturnal-runtime.ts} +235 -79
  110. package/src/service/nocturnal-service.ts +1015 -0
  111. package/src/service/nocturnal-target-selector.ts +532 -0
  112. package/src/service/phase3-input-filter.ts +237 -0
  113. package/src/service/runtime-summary-service.ts +757 -0
  114. package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +513 -0
  115. package/{dist/service/subagent-workflow/empathy-observer-workflow-manager.js → src/service/subagent-workflow/empathy-observer-workflow-manager.ts} +240 -117
  116. package/src/service/subagent-workflow/index.ts +51 -0
  117. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +856 -0
  118. package/src/service/subagent-workflow/runtime-direct-driver.ts +166 -0
  119. package/{dist/service/subagent-workflow/types.d.ts → src/service/subagent-workflow/types.ts} +137 -18
  120. package/src/service/subagent-workflow/workflow-store.ts +328 -0
  121. package/src/service/trajectory-service.ts +15 -0
  122. package/{dist/tools/critique-prompt.js → src/tools/critique-prompt.ts} +25 -8
  123. package/src/tools/deep-reflect.ts +349 -0
  124. package/{dist/tools/model-index.js → src/tools/model-index.ts} +33 -17
  125. package/src/types/event-types.ts +453 -0
  126. package/src/types/hygiene-types.ts +31 -0
  127. package/src/types/principle-tree-schema.ts +244 -0
  128. package/src/types/runtime-summary.ts +49 -0
  129. package/src/types.ts +74 -0
  130. package/src/utils/file-lock.ts +391 -0
  131. package/{dist/utils/glob-match.js → src/utils/glob-match.ts} +21 -20
  132. package/{dist/utils/hashing.js → src/utils/hashing.ts} +6 -4
  133. package/src/utils/io.ts +110 -0
  134. package/{dist/utils/nlp.js → src/utils/nlp.ts} +19 -12
  135. package/{dist/utils/plugin-logger.js → src/utils/plugin-logger.ts} +33 -8
  136. package/src/utils/subagent-probe.ts +94 -0
  137. package/templates/langs/en/skills/ai-sprint-orchestration/EXAMPLES.md +63 -0
  138. package/templates/langs/en/skills/ai-sprint-orchestration/REFERENCE.md +136 -0
  139. package/templates/langs/en/skills/ai-sprint-orchestration/SKILL.md +67 -0
  140. package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +214 -0
  141. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +107 -0
  142. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +107 -0
  143. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +105 -0
  144. package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +108 -0
  145. package/templates/langs/en/skills/ai-sprint-orchestration/references/workflow-v1-acceptance-checklist.md +58 -0
  146. package/templates/langs/en/skills/ai-sprint-orchestration/references/workflow-v1.4-work-unit-handoff.md +190 -0
  147. package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -0
  148. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/archive.mjs +310 -0
  149. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/contract-enforcement.mjs +683 -0
  150. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/decision.mjs +604 -0
  151. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/state-store.mjs +32 -0
  152. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/lib/task-specs.mjs +707 -0
  153. package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +3419 -0
  154. package/templates/langs/zh/skills/ai-sprint-orchestration/EXAMPLES.md +63 -0
  155. package/templates/langs/zh/skills/ai-sprint-orchestration/REFERENCE.md +136 -0
  156. package/templates/langs/zh/skills/ai-sprint-orchestration/SKILL.md +67 -0
  157. package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +214 -0
  158. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +107 -0
  159. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +107 -0
  160. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +105 -0
  161. package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +108 -0
  162. package/templates/langs/zh/skills/ai-sprint-orchestration/references/workflow-v1-acceptance-checklist.md +58 -0
  163. package/templates/langs/zh/skills/ai-sprint-orchestration/references/workflow-v1.4-work-unit-handoff.md +190 -0
  164. package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -0
  165. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/archive.mjs +310 -0
  166. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/contract-enforcement.mjs +683 -0
  167. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/decision.mjs +604 -0
  168. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/state-store.mjs +32 -0
  169. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/task-specs.mjs +707 -0
  170. package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +3419 -0
  171. package/templates/langs/zh/skills/ai-sprint-orchestration/test/archive.test.mjs +230 -0
  172. package/templates/langs/zh/skills/ai-sprint-orchestration/test/contract-enforcement.test.mjs +672 -0
  173. package/templates/langs/zh/skills/ai-sprint-orchestration/test/decision.test.mjs +1321 -0
  174. package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +1419 -0
  175. package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +70 -1
  176. package/templates/pain_settings.json +2 -1
  177. package/tests/README.md +120 -0
  178. package/tests/build-artifacts.test.ts +111 -0
  179. package/tests/commands/evolution-status.test.ts +222 -0
  180. package/tests/commands/evolver.test.ts +22 -0
  181. package/tests/commands/export.test.ts +78 -0
  182. package/tests/commands/nocturnal-review.test.ts +448 -0
  183. package/tests/commands/nocturnal-train.test.ts +97 -0
  184. package/tests/commands/pain.test.ts +108 -0
  185. package/tests/commands/samples.test.ts +65 -0
  186. package/tests/commands/strategy.test.ts +34 -0
  187. package/tests/commands/thinking-os.test.ts +88 -0
  188. package/tests/core/adaptive-thresholds.test.ts +261 -0
  189. package/tests/core/config-service.test.ts +89 -0
  190. package/tests/core/config.test.ts +90 -0
  191. package/tests/core/control-ui-db.test.ts +75 -0
  192. package/tests/core/core-template-guidance.test.ts +21 -0
  193. package/tests/core/detection-funnel.test.ts +63 -0
  194. package/tests/core/detection-service.test.ts +50 -0
  195. package/tests/core/dictionary-service.test.ts +116 -0
  196. package/tests/core/dictionary.test.ts +168 -0
  197. package/tests/core/empathy-keyword-matcher.test.ts +209 -0
  198. package/tests/core/event-log.test.ts +181 -0
  199. package/tests/core/evolution-e2e.test.ts +58 -0
  200. package/tests/core/evolution-engine-gate-integration.test.ts +543 -0
  201. package/tests/core/evolution-engine.test.ts +562 -0
  202. package/tests/core/evolution-logger.test.ts +148 -0
  203. package/tests/core/evolution-migration.test.ts +50 -0
  204. package/tests/core/evolution-paths.test.ts +21 -0
  205. package/tests/core/evolution-reducer.detector-metadata.test.ts +602 -0
  206. package/tests/core/evolution-reducer.test.ts +180 -0
  207. package/tests/core/evolution-types-loop.test.ts +48 -0
  208. package/tests/core/evolution-user-stories.e2e.test.ts +249 -0
  209. package/tests/core/external-training-contract.test.ts +463 -0
  210. package/tests/core/focus-history.test.ts +682 -0
  211. package/tests/core/init-flatten.test.ts +69 -0
  212. package/tests/core/init-refactor.test.ts +87 -0
  213. package/tests/core/init-v1.3.test.ts +46 -0
  214. package/tests/core/init.test.ts +190 -0
  215. package/tests/core/local-worker-routing.test.ts +757 -0
  216. package/tests/core/migration.test.ts +84 -0
  217. package/tests/core/model-deployment-registry.test.ts +845 -0
  218. package/tests/core/model-training-registry.test.ts +889 -0
  219. package/tests/core/nocturnal-arbiter.test.ts +494 -0
  220. package/tests/core/nocturnal-candidate-scoring.test.ts +400 -0
  221. package/tests/core/nocturnal-compliance.test.ts +646 -0
  222. package/tests/core/nocturnal-dataset.test.ts +892 -0
  223. package/tests/core/nocturnal-executability.test.ts +357 -0
  224. package/tests/core/nocturnal-export.test.ts +462 -0
  225. package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +428 -0
  226. package/tests/core/nocturnal-trajectory-extractor.test.ts +634 -0
  227. package/tests/core/nocturnal-trinity.test.ts +953 -0
  228. package/tests/core/pain.test.ts +33 -0
  229. package/tests/core/path-resolver.test.ts +57 -0
  230. package/tests/core/paths-refactor.test.ts +42 -0
  231. package/tests/core/phase7-rollout-integration.test.ts +477 -0
  232. package/tests/core/principle-training-state.test.ts +712 -0
  233. package/tests/core/profile.test.ts +56 -0
  234. package/tests/core/promotion-gate.test.ts +556 -0
  235. package/tests/core/risk-calculator.test.ts +168 -0
  236. package/tests/core/session-tracker.test.ts +191 -0
  237. package/tests/core/training-program.test.ts +472 -0
  238. package/tests/core/trajectory.test.ts +265 -0
  239. package/tests/core/workspace-context-factory.test.ts +18 -0
  240. package/tests/core/workspace-context.test.ts +134 -0
  241. package/tests/fixtures/nocturnal-reviewed-subset.json +183 -0
  242. package/tests/fixtures/production-compatibility.test.ts +147 -0
  243. package/tests/fixtures/production-mock-generator.ts +282 -0
  244. package/tests/hooks/bash-risk-integration.test.ts +137 -0
  245. package/tests/hooks/bash-risk.test.ts +81 -0
  246. package/tests/hooks/edit-verification.test.ts +678 -0
  247. package/tests/hooks/gate-edit-verification-p1.test.ts +632 -0
  248. package/tests/hooks/gate-edit-verification.test.ts +435 -0
  249. package/tests/hooks/gate-pipeline-integration.test.ts +404 -0
  250. package/tests/hooks/gate.test.ts +271 -0
  251. package/tests/hooks/gfi-gate-unit.test.ts +422 -0
  252. package/tests/hooks/gfi-gate.test.ts +669 -0
  253. package/tests/hooks/lifecycle.test.ts +248 -0
  254. package/tests/hooks/llm.test.ts +308 -0
  255. package/tests/hooks/message-sanitize.test.ts +36 -0
  256. package/tests/hooks/pain.test.ts +141 -0
  257. package/tests/hooks/progressive-trust-gate.test.ts +277 -0
  258. package/tests/hooks/prompt.test.ts +1411 -0
  259. package/tests/hooks/subagent.test.ts +467 -0
  260. package/tests/hooks/thinking-gate.test.ts +313 -0
  261. package/tests/http/principles-console-route.test.ts +140 -0
  262. package/tests/hygiene-tracker.test.ts +77 -0
  263. package/tests/index.integration.test.ts +179 -0
  264. package/tests/index.shadow-routing.integration.test.ts +140 -0
  265. package/tests/index.test.ts +9 -0
  266. package/tests/integration/empathy-workflow-integration.test.ts +627 -0
  267. package/tests/service/control-ui-query-service.test.ts +121 -0
  268. package/tests/service/empathy-observer-workflow-manager.test.ts +176 -0
  269. package/tests/service/evolution-worker.test.ts +585 -0
  270. package/tests/service/nocturnal-runtime.test.ts +470 -0
  271. package/tests/service/nocturnal-service.test.ts +577 -0
  272. package/tests/service/nocturnal-target-selector.test.ts +615 -0
  273. package/tests/service/nocturnal-workflow-manager.test.ts +439 -0
  274. package/tests/service/phase3-input-filter.test.ts +289 -0
  275. package/tests/service/runtime-summary-service.test.ts +919 -0
  276. package/tests/task-compliance.test.ts +166 -0
  277. package/tests/test-utils.ts +48 -0
  278. package/tests/tools/critique-prompt.test.ts +260 -0
  279. package/tests/tools/deep-reflect.test.ts +232 -0
  280. package/tests/tools/model-index.test.ts +246 -0
  281. package/tests/ui/app.test.tsx +114 -0
  282. package/tests/utils/file-lock.test.ts +407 -0
  283. package/tests/utils/hashing.test.ts +32 -0
  284. package/tests/utils/io.test.ts +39 -0
  285. package/tests/utils/nlp.test.ts +53 -0
  286. package/tests/utils/plugin-logger.test.ts +156 -0
  287. package/tsconfig.json +16 -0
  288. package/tsconfig.tsbuildinfo +1 -0
  289. package/ui/src/App.tsx +45 -0
  290. package/ui/src/api.ts +216 -0
  291. package/ui/src/charts.tsx +586 -0
  292. package/ui/src/components/ErrorState.tsx +6 -0
  293. package/ui/src/components/Loading.tsx +13 -0
  294. package/ui/src/components/ProtectedRoute.tsx +12 -0
  295. package/ui/src/components/Shell.tsx +91 -0
  296. package/ui/src/components/WorkspaceConfig.tsx +146 -0
  297. package/ui/src/components/index.ts +5 -0
  298. package/ui/src/context/auth.tsx +80 -0
  299. package/ui/src/context/theme.tsx +66 -0
  300. package/ui/src/hooks/useAutoRefresh.ts +39 -0
  301. package/ui/src/i18n/ui.ts +363 -0
  302. package/ui/src/main.tsx +16 -0
  303. package/ui/src/pages/EvolutionPage.tsx +352 -0
  304. package/ui/src/pages/FeedbackPage.tsx +140 -0
  305. package/ui/src/pages/GateMonitorPage.tsx +136 -0
  306. package/ui/src/pages/LoginPage.tsx +88 -0
  307. package/ui/src/pages/OverviewPage.tsx +238 -0
  308. package/ui/src/pages/SamplesPage.tsx +174 -0
  309. package/ui/src/pages/ThinkingModelsPage.tsx +127 -0
  310. package/ui/src/styles.css +1661 -0
  311. package/ui/src/types.ts +368 -0
  312. package/ui/src/utils/format.ts +15 -0
  313. package/vitest.config.ts +23 -0
  314. package/dist/commands/capabilities.d.ts +0 -3
  315. package/dist/commands/capabilities.js +0 -73
  316. package/dist/commands/context.d.ts +0 -5
  317. package/dist/commands/evolution-status.d.ts +0 -4
  318. package/dist/commands/evolution-status.js +0 -117
  319. package/dist/commands/evolver.d.ts +0 -9
  320. package/dist/commands/evolver.js +0 -26
  321. package/dist/commands/export.d.ts +0 -2
  322. package/dist/commands/export.js +0 -98
  323. package/dist/commands/focus.d.ts +0 -14
  324. package/dist/commands/focus.js +0 -457
  325. package/dist/commands/nocturnal-review.d.ts +0 -24
  326. package/dist/commands/nocturnal-review.js +0 -265
  327. package/dist/commands/nocturnal-rollout.d.ts +0 -27
  328. package/dist/commands/nocturnal-rollout.js +0 -671
  329. package/dist/commands/nocturnal-train.d.ts +0 -25
  330. package/dist/commands/nocturnal-train.js +0 -919
  331. package/dist/commands/pain.d.ts +0 -5
  332. package/dist/commands/principle-rollback.d.ts +0 -4
  333. package/dist/commands/principle-rollback.js +0 -22
  334. package/dist/commands/rollback.d.ts +0 -19
  335. package/dist/commands/samples.d.ts +0 -2
  336. package/dist/commands/samples.js +0 -55
  337. package/dist/commands/strategy.d.ts +0 -3
  338. package/dist/commands/strategy.js +0 -29
  339. package/dist/commands/thinking-os.d.ts +0 -2
  340. package/dist/config/defaults/runtime.d.ts +0 -40
  341. package/dist/config/errors.d.ts +0 -84
  342. package/dist/config/errors.js +0 -94
  343. package/dist/config/index.js +0 -7
  344. package/dist/constants/diagnostician.d.ts +0 -12
  345. package/dist/constants/diagnostician.js +0 -56
  346. package/dist/constants/tools.d.ts +0 -17
  347. package/dist/constants/tools.js +0 -54
  348. package/dist/core/adaptive-thresholds.d.ts +0 -186
  349. package/dist/core/adaptive-thresholds.js +0 -300
  350. package/dist/core/config-service.d.ts +0 -15
  351. package/dist/core/config.d.ts +0 -129
  352. package/dist/core/control-ui-db.d.ts +0 -95
  353. package/dist/core/control-ui-db.js +0 -292
  354. package/dist/core/detection-funnel.d.ts +0 -33
  355. package/dist/core/detection-service.d.ts +0 -15
  356. package/dist/core/dictionary-service.d.ts +0 -15
  357. package/dist/core/dictionary.d.ts +0 -38
  358. package/dist/core/event-log.d.ts +0 -82
  359. package/dist/core/event-log.js +0 -463
  360. package/dist/core/evolution-engine.d.ts +0 -118
  361. package/dist/core/evolution-engine.js +0 -464
  362. package/dist/core/evolution-logger.d.ts +0 -137
  363. package/dist/core/evolution-logger.js +0 -256
  364. package/dist/core/evolution-migration.d.ts +0 -5
  365. package/dist/core/evolution-migration.js +0 -65
  366. package/dist/core/evolution-reducer.d.ts +0 -98
  367. package/dist/core/evolution-reducer.js +0 -465
  368. package/dist/core/evolution-types.d.ts +0 -287
  369. package/dist/core/evolution-types.js +0 -78
  370. package/dist/core/external-training-contract.d.ts +0 -276
  371. package/dist/core/external-training-contract.js +0 -269
  372. package/dist/core/focus-history.d.ts +0 -210
  373. package/dist/core/focus-history.js +0 -1185
  374. package/dist/core/hygiene/tracker.d.ts +0 -22
  375. package/dist/core/hygiene/tracker.js +0 -106
  376. package/dist/core/init.d.ts +0 -12
  377. package/dist/core/local-worker-routing.d.ts +0 -175
  378. package/dist/core/local-worker-routing.js +0 -525
  379. package/dist/core/migration.d.ts +0 -6
  380. package/dist/core/model-deployment-registry.d.ts +0 -218
  381. package/dist/core/model-deployment-registry.js +0 -503
  382. package/dist/core/model-training-registry.d.ts +0 -295
  383. package/dist/core/model-training-registry.js +0 -475
  384. package/dist/core/nocturnal-arbiter.d.ts +0 -159
  385. package/dist/core/nocturnal-arbiter.js +0 -534
  386. package/dist/core/nocturnal-candidate-scoring.d.ts +0 -137
  387. package/dist/core/nocturnal-candidate-scoring.js +0 -266
  388. package/dist/core/nocturnal-compliance.d.ts +0 -175
  389. package/dist/core/nocturnal-compliance.js +0 -824
  390. package/dist/core/nocturnal-dataset.d.ts +0 -224
  391. package/dist/core/nocturnal-dataset.js +0 -443
  392. package/dist/core/nocturnal-executability.d.ts +0 -85
  393. package/dist/core/nocturnal-executability.js +0 -331
  394. package/dist/core/nocturnal-export.d.ts +0 -124
  395. package/dist/core/nocturnal-export.js +0 -275
  396. package/dist/core/nocturnal-paths.d.ts +0 -124
  397. package/dist/core/nocturnal-trajectory-extractor.d.ts +0 -242
  398. package/dist/core/nocturnal-trajectory-extractor.js +0 -307
  399. package/dist/core/nocturnal-trinity.d.ts +0 -311
  400. package/dist/core/nocturnal-trinity.js +0 -880
  401. package/dist/core/pain.d.ts +0 -4
  402. package/dist/core/pain.js +0 -70
  403. package/dist/core/path-resolver.d.ts +0 -46
  404. package/dist/core/paths.d.ts +0 -65
  405. package/dist/core/principle-training-state.d.ts +0 -121
  406. package/dist/core/principle-training-state.js +0 -321
  407. package/dist/core/profile.d.ts +0 -62
  408. package/dist/core/profile.js +0 -210
  409. package/dist/core/promotion-gate.d.ts +0 -238
  410. package/dist/core/promotion-gate.js +0 -529
  411. package/dist/core/risk-calculator.d.ts +0 -22
  412. package/dist/core/session-tracker.d.ts +0 -101
  413. package/dist/core/shadow-observation-registry.d.ts +0 -217
  414. package/dist/core/shadow-observation-registry.js +0 -308
  415. package/dist/core/system-logger.d.ts +0 -8
  416. package/dist/core/thinking-models.d.ts +0 -38
  417. package/dist/core/thinking-models.js +0 -170
  418. package/dist/core/training-program.d.ts +0 -233
  419. package/dist/core/training-program.js +0 -433
  420. package/dist/core/trajectory.d.ts +0 -411
  421. package/dist/core/trajectory.js +0 -1307
  422. package/dist/core/workspace-context.d.ts +0 -71
  423. package/dist/hooks/bash-risk.d.ts +0 -57
  424. package/dist/hooks/bash-risk.js +0 -137
  425. package/dist/hooks/edit-verification.d.ts +0 -62
  426. package/dist/hooks/edit-verification.js +0 -256
  427. package/dist/hooks/gate-block-helper.d.ts +0 -44
  428. package/dist/hooks/gate-block-helper.js +0 -119
  429. package/dist/hooks/gate.d.ts +0 -24
  430. package/dist/hooks/gate.js +0 -173
  431. package/dist/hooks/gfi-gate.d.ts +0 -40
  432. package/dist/hooks/gfi-gate.js +0 -113
  433. package/dist/hooks/lifecycle.d.ts +0 -5
  434. package/dist/hooks/lifecycle.js +0 -284
  435. package/dist/hooks/llm.d.ts +0 -13
  436. package/dist/hooks/message-sanitize.d.ts +0 -3
  437. package/dist/hooks/message-sanitize.js +0 -37
  438. package/dist/hooks/pain.d.ts +0 -5
  439. package/dist/hooks/pain.js +0 -301
  440. package/dist/hooks/progressive-trust-gate.d.ts +0 -52
  441. package/dist/hooks/progressive-trust-gate.js +0 -134
  442. package/dist/hooks/prompt.d.ts +0 -49
  443. package/dist/hooks/prompt.js +0 -905
  444. package/dist/hooks/subagent.d.ts +0 -10
  445. package/dist/hooks/subagent.js +0 -387
  446. package/dist/hooks/thinking-checkpoint.d.ts +0 -37
  447. package/dist/hooks/thinking-checkpoint.js +0 -51
  448. package/dist/hooks/trajectory-collector.d.ts +0 -32
  449. package/dist/hooks/trajectory-collector.js +0 -256
  450. package/dist/http/principles-console-route.d.ts +0 -9
  451. package/dist/http/principles-console-route.js +0 -681
  452. package/dist/i18n/commands.d.ts +0 -26
  453. package/dist/i18n/commands.js +0 -116
  454. package/dist/index.d.ts +0 -7
  455. package/dist/index.js +0 -581
  456. package/dist/service/central-database.d.ts +0 -104
  457. package/dist/service/central-database.js +0 -649
  458. package/dist/service/control-ui-query-service.d.ts +0 -221
  459. package/dist/service/control-ui-query-service.js +0 -543
  460. package/dist/service/empathy-observer-manager.d.ts +0 -88
  461. package/dist/service/empathy-observer-manager.js +0 -414
  462. package/dist/service/evolution-query-service.d.ts +0 -155
  463. package/dist/service/evolution-query-service.js +0 -258
  464. package/dist/service/evolution-worker.d.ts +0 -101
  465. package/dist/service/evolution-worker.js +0 -975
  466. package/dist/service/health-query-service.d.ts +0 -170
  467. package/dist/service/health-query-service.js +0 -662
  468. package/dist/service/nocturnal-runtime.d.ts +0 -183
  469. package/dist/service/nocturnal-service.d.ts +0 -163
  470. package/dist/service/nocturnal-service.js +0 -787
  471. package/dist/service/nocturnal-target-selector.d.ts +0 -145
  472. package/dist/service/nocturnal-target-selector.js +0 -315
  473. package/dist/service/phase3-input-filter.d.ts +0 -73
  474. package/dist/service/phase3-input-filter.js +0 -172
  475. package/dist/service/runtime-summary-service.d.ts +0 -122
  476. package/dist/service/runtime-summary-service.js +0 -485
  477. package/dist/service/subagent-workflow/empathy-observer-workflow-manager.d.ts +0 -48
  478. package/dist/service/subagent-workflow/index.d.ts +0 -4
  479. package/dist/service/subagent-workflow/index.js +0 -3
  480. package/dist/service/subagent-workflow/runtime-direct-driver.d.ts +0 -77
  481. package/dist/service/subagent-workflow/runtime-direct-driver.js +0 -75
  482. package/dist/service/subagent-workflow/types.js +0 -11
  483. package/dist/service/subagent-workflow/workflow-store.d.ts +0 -26
  484. package/dist/service/subagent-workflow/workflow-store.js +0 -165
  485. package/dist/service/trajectory-service.d.ts +0 -2
  486. package/dist/service/trajectory-service.js +0 -15
  487. package/dist/tools/critique-prompt.d.ts +0 -14
  488. package/dist/tools/deep-reflect.d.ts +0 -39
  489. package/dist/tools/deep-reflect.js +0 -350
  490. package/dist/tools/model-index.d.ts +0 -9
  491. package/dist/types/event-types.d.ts +0 -306
  492. package/dist/types/event-types.js +0 -106
  493. package/dist/types/hygiene-types.d.ts +0 -20
  494. package/dist/types/hygiene-types.js +0 -12
  495. package/dist/types/runtime-summary.d.ts +0 -47
  496. package/dist/types/runtime-summary.js +0 -1
  497. package/dist/types.d.ts +0 -50
  498. package/dist/types.js +0 -22
  499. package/dist/utils/file-lock.d.ts +0 -71
  500. package/dist/utils/file-lock.js +0 -309
  501. package/dist/utils/glob-match.d.ts +0 -28
  502. package/dist/utils/hashing.d.ts +0 -9
  503. package/dist/utils/io.d.ts +0 -6
  504. package/dist/utils/io.js +0 -106
  505. package/dist/utils/nlp.d.ts +0 -9
  506. package/dist/utils/plugin-logger.d.ts +0 -39
  507. package/dist/utils/subagent-probe.d.ts +0 -34
  508. package/dist/utils/subagent-probe.js +0 -81
@@ -0,0 +1,1002 @@
1
+ /**
2
+ * Nocturnal Training Command Handler
3
+ * ==================================
4
+ *
5
+ * Plugin command handler for nocturnal training operations.
6
+ * Provides commands for:
7
+ * - create-experiment: Create a new training experiment
8
+ * - show-experiment: Show experiment details
9
+ * - import-result: Import trainer result
10
+ * - attach-eval: Attach benchmark eval to checkpoint
11
+ * - show-lineage: Show checkpoint lineage
12
+ * - list-experiments: List all experiments
13
+ * - list-checkpoints: List all checkpoints
14
+ *
15
+ * Usage:
16
+ * /nocturnal-train create-experiment --backend=peft-trl-orpo --family=<model-family> [--hyperparams=...]
17
+ * /nocturnal-train show-experiment <experimentId>
18
+ * /nocturnal-train import-result <experimentId> --result=<path-or-json>
19
+ * /nocturnal-train attach-eval <checkpointId> --benchmark-id=<id> --delta=<number> --verdict=<pass|fail>
20
+ * /nocturnal-train show-lineage <checkpointId>
21
+ * /nocturnal-train list-experiments
22
+ * /nocturnal-train list-checkpoints
23
+ */
24
+
25
+ import * as path from 'path';
26
+ import * as fs from 'fs';
27
+ import { execFileSync, spawn } from 'child_process';
28
+ import { fileURLToPath } from 'url';
29
+ import { WorkspaceContext } from '../core/workspace-context.js';
30
+ import type { PluginCommandContext, PluginCommandResult } from '../openclaw-sdk.js';
31
+ import {
32
+ type TrainerBackendKind,
33
+ type HardwareTier,
34
+ generateExperimentId,
35
+ computeDatasetFingerprint,
36
+ } from '../core/external-training-contract.js';
37
+ import {
38
+ TrainingProgram,
39
+ DEFAULT_ORPO_HYPERPARAMETERS,
40
+ type CreateExperimentParams,
41
+ } from '../core/training-program.js';
42
+ import {
43
+ listTrainingRuns,
44
+ getTrainingRun,
45
+ listCheckpoints,
46
+ getCheckpoint,
47
+ getCheckpointLineage,
48
+ getTrainingRegistryStats,
49
+ } from '../core/model-training-registry.js';
50
+ import { getDeployment } from '../core/model-deployment-registry.js';
51
+ import {
52
+ DEFAULT_MIN_DELTA,
53
+ DEFAULT_ALLOWED_MARGIN,
54
+ DEFAULT_BASELINE_METRICS,
55
+ } from '../core/promotion-gate.js';
56
+
57
+ function isZh(ctx: PluginCommandContext): boolean {
58
+ return String(ctx.config?.language || 'en').startsWith('zh');
59
+ }
60
+
61
+ function zh(cond: string): string {
62
+ return cond;
63
+ }
64
+
65
+ const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
66
+ const REPO_ROOT = path.resolve(MODULE_DIR, '..', '..', '..', '..');
67
+ const TRAINER_SCRIPTS_DIR = path.join(REPO_ROOT, 'scripts', 'nocturnal', 'trainer');
68
+ const BENCHMARK_SCRIPT_PATH = path.join(REPO_ROOT, 'scripts', 'nocturnal', 'run-benchmark.ts');
69
+
70
+ /**
71
+ * Parse backend from argument string.
72
+ */
73
+ function parseBackend(arg: string | undefined): TrainerBackendKind {
74
+ if (!arg) return 'peft-trl-orpo';
75
+ const valid: TrainerBackendKind[] = ['peft-trl-orpo', 'unsloth-orpo', 'dry-run'];
76
+ if (valid.includes(arg as TrainerBackendKind)) {
77
+ return arg as TrainerBackendKind;
78
+ }
79
+ return 'peft-trl-orpo';
80
+ }
81
+
82
+ /**
83
+ * Parse hardware tier from argument string.
84
+ */
85
+ function parseHardwareTier(arg: string | undefined): HardwareTier {
86
+ if (!arg) return 'consumer-gpu';
87
+ const valid: HardwareTier[] = ['consumer-gpu', 'small-gpu', 'cpu-experimental'];
88
+ if (valid.includes(arg as HardwareTier)) {
89
+ return arg as HardwareTier;
90
+ }
91
+ return 'consumer-gpu';
92
+ }
93
+
94
+ /**
95
+ * Format training run for display.
96
+ */
97
+ function formatTrainingRun(run: ReturnType<typeof getTrainingRun>, zh: boolean): string {
98
+ if (!run) return zh ? '未找到' : 'Not found';
99
+ const lines = [
100
+ `ID: ${run.trainRunId.substring(0, 8)}...`,
101
+ `Family: ${run.targetModelFamily}`,
102
+ `Status: ${run.status}`,
103
+ `Dataset FP: ${run.datasetFingerprint.substring(0, 12)}...`,
104
+ `Created: ${new Date(run.createdAt).toLocaleString()}`,
105
+ ];
106
+ if (run.completedAt) lines.push(`Completed: ${new Date(run.completedAt).toLocaleString()}`);
107
+ if (run.failureReason) lines.push(`Failure: ${run.failureReason}`);
108
+ if (run.checkpointIds.length > 0) {
109
+ lines.push(`Checkpoints: ${run.checkpointIds.length}`);
110
+ }
111
+ return lines.join('\n ');
112
+ }
113
+
114
+ /**
115
+ * Format checkpoint for display.
116
+ */
117
+ function formatCheckpoint(cp: ReturnType<typeof getCheckpoint>, zh: boolean): string {
118
+ if (!cp) return zh ? '未找到' : 'Not found';
119
+ const lines = [
120
+ `ID: ${cp.checkpointId.substring(0, 8)}...`,
121
+ `Family: ${cp.targetModelFamily}`,
122
+ `Artifact: ${cp.artifactPath}`,
123
+ `Deployable: ${cp.deployable ? (zh ? '是' : 'Yes') : (zh ? '否' : 'No')}`,
124
+ `Created: ${new Date(cp.createdAt).toLocaleString()}`,
125
+ ];
126
+ if (cp.lastEvalSummaryRef) {
127
+ lines.push(`Eval: ${cp.lastEvalSummaryRef.substring(0, 12)}...`);
128
+ }
129
+ return lines.join('\n ');
130
+ }
131
+
132
+ export async function handleNocturnalTrainCommand(ctx: PluginCommandContext): Promise<PluginCommandResult> {
133
+ const workspaceDir = (ctx.config?.workspaceDir as string) || process.cwd();
134
+ const zh = isZh(ctx);
135
+ const args = (ctx.args || '').trim();
136
+ const parts = args.split(/\s+/).filter(Boolean);
137
+ const [subcommand = 'help'] = parts;
138
+
139
+ const backendArg = parts.find((p) => p.startsWith('--backend='))?.split('=')[1];
140
+ const familyArg = parts.find((p) => p.startsWith('--family='))?.split('=')[1];
141
+ const hardwareTierArg = parts.find((p) => p.startsWith('--tier='))?.split('=')[1];
142
+ const datasetExportIdArg = parts.find((p) => p.startsWith('--dataset='))?.split('=')[1];
143
+ const benchmarkExportIdArg = parts.find((p) => p.startsWith('--benchmark='))?.split('=')[1];
144
+ const resultArg = parts.find((p) => p.startsWith('--result='))?.split('=')[1];
145
+ const checkpointIdArg = parts.find((p) => p.startsWith('--checkpoint-id='))?.split('=')[1];
146
+ const benchmarkIdArg = parts.find((p) => p.startsWith('--benchmark-id='))?.split('=')[1];
147
+ const deltaArg = parts.find((p) => p.startsWith('--delta='))?.split('=')[1];
148
+ const verdictArg = parts.find((p) => p.startsWith('--verdict='))?.split('=')[1];
149
+ const modeArg = parts.find((p) => p.startsWith('--mode='))?.split('=')[1];
150
+ const baselineScoreArg = parts.find((p) => p.startsWith('--baseline='))?.split('=')[1];
151
+ const candidateScoreArg = parts.find((p) => p.startsWith('--candidate='))?.split('=')[1];
152
+
153
+ try {
154
+ // ── Help ────────────────────────────────────────────────────────────────
155
+ if (subcommand === 'help' || subcommand === '--help') {
156
+ return {
157
+ text: zh
158
+ ? ` nocturnal-train 命令帮助
159
+
160
+ 用法:
161
+ /nocturnal-train create-experiment --backend=<backend> --family=<model-family> [--dataset=<export-id>] [--benchmark=<export-id>] [--run]
162
+ /nocturnal-train show-experiment <experimentId>
163
+ /nocturnal-train import-result <experimentId> --result=<path-or-json>
164
+ /nocturnal-train attach-eval <checkpointId> --benchmark-id=<id> [--baseline-ref=<checkpointId>] [--delta=<number>] [--verdict=<pass|fail>] [--run-benchmark]
165
+ /nocturnal-train show-lineage <checkpointId>
166
+ /nocturnal-train list-experiments
167
+ /nocturnal-train list-checkpoints [--family=<model-family>]
168
+
169
+ 示例:
170
+ /nocturnal-train create-experiment --backend=peft-trl-orpo --family=qwen2.5-7b-reader --dataset=export-123 --benchmark=bench-456 --run
171
+ /nocturnal-train show-experiment exp-abc123
172
+ /nocturnal-train import-result exp-abc123 --result=.state/nocturnal/evals/result-exp-abc123.json
173
+ /nocturnal-train attach-eval ckpt-xyz --benchmark-id=bench-001 --delta=0.08 --verdict=pass --run-benchmark
174
+ /nocturnal-train show-lineage ckpt-xyz
175
+ /nocturnal-train list-checkpoints --family=qwen2.5-7b-reader
176
+
177
+ 后端选项:
178
+ peft-trl-orpo - PEFT + TRL ORPO (生产用)
179
+ unsloth-orpo - Unsloth 加速 ORPO
180
+ dry-run - 仅验证,不实际训练
181
+
182
+ 硬件层级:
183
+ consumer-gpu - RTX 4090 24GB (默认)
184
+ small-gpu - 8-16GB VRAM
185
+ cpu-experimental - 仅 dry-run`
186
+ : ` nocturnal-train command help
187
+
188
+ Usage:
189
+ /nocturnal-train create-experiment --backend=<backend> --family=<model-family> [--dataset=<export-id>] [--benchmark=<export-id>]
190
+ /nocturnal-train show-experiment <experimentId>
191
+ /nocturnal-train import-result <experimentId> --result=<path-or-json>
192
+ /nocturnal-train attach-eval <checkpointId> --benchmark-id=<id> --delta=<number> --verdict=<pass|fail> [--baseline=<score>] [--candidate=<score>]
193
+ /nocturnal-train show-lineage <checkpointId>
194
+ /nocturnal-train list-experiments
195
+ /nocturnal-train list-checkpoints [--family=<model-family>]
196
+
197
+ Examples:
198
+ /nocturnal-train create-experiment --backend=peft-trl-orpo --family=qwen2.5-7b-reader --dataset=export-123 --benchmark=bench-456
199
+ /nocturnal-train show-experiment exp-abc123
200
+ /nocturnal-train import-result exp-abc123 --result=.state/nocturnal/evals/result-exp-abc123.json
201
+ /nocturnal-train attach-eval ckpt-xyz --benchmark-id=bench-001 --delta=0.08 --verdict=pass
202
+ /nocturnal-train show-lineage ckpt-xyz
203
+ /nocturnal-train list-checkpoints --family=qwen2.5-7b-reader
204
+
205
+ Backend options:
206
+ peft-trl-orpo - PEFT + TRL ORPO (production)
207
+ unsloth-orpo - Unsloth accelerated ORPO
208
+ dry-run - Validation only, no real training
209
+
210
+ Hardware tiers:
211
+ consumer-gpu - RTX 4090 24GB (default)
212
+ small-gpu - 8-16GB VRAM
213
+ cpu-experimental - dry-run only`,
214
+ };
215
+ }
216
+
217
+ // ── Create Experiment ─────────────────────────────────────────────────
218
+ if (subcommand === 'create-experiment') {
219
+ if (!familyArg) {
220
+ return { text: zh ? '错误: 需要 --family 参数' : 'Error: --family is required' };
221
+ }
222
+
223
+ const backend = parseBackend(backendArg);
224
+ const hardwareTier = parseHardwareTier(hardwareTierArg);
225
+ const runNow = args.includes('--run');
226
+
227
+ // Find ORPO export if dataset not specified
228
+ let datasetExportId = datasetExportIdArg;
229
+ let datasetExportPath = '';
230
+ if (!datasetExportId) {
231
+ // Try to find latest ORPO export
232
+ const exportsDir = path.join(workspaceDir, '.state', 'exports', 'orpo');
233
+ if (fs.existsSync(exportsDir)) {
234
+ const files = fs.readdirSync(exportsDir).filter((f) => f.endsWith('-manifest.json'));
235
+ if (files.length > 0) {
236
+ const manifest = JSON.parse(fs.readFileSync(path.join(exportsDir, files[0]), 'utf-8'));
237
+ datasetExportId = manifest.exportId;
238
+ datasetExportPath = manifest.exportPath;
239
+ }
240
+ }
241
+ if (!datasetExportId) {
242
+ return {
243
+ text: zh
244
+ ? '错误: 未找到 ORPO 导出。请先运行 /pd-nocturnal-review 导出数据。'
245
+ : 'Error: No ORPO export found. Run /pd-nocturnal-review to export data first.',
246
+ };
247
+ }
248
+ } else {
249
+ datasetExportPath = path.join(workspaceDir, '.state', 'exports', 'orpo', `${datasetExportId}.jsonl`);
250
+ }
251
+
252
+ // Get dataset fingerprint
253
+ let datasetFingerprint = 'unknown';
254
+ const manifestPath = path.join(workspaceDir, '.state', 'exports', 'orpo', `${datasetExportId}-manifest.json`);
255
+ if (fs.existsSync(manifestPath)) {
256
+ const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf-8'));
257
+ if (!manifest.datasetFingerprint) {
258
+ return {
259
+ text: zh
260
+ ? `错误: manifest 文件缺少 datasetFingerprint: ${manifestPath}`
261
+ : `Error: manifest missing datasetFingerprint: ${manifestPath}`,
262
+ };
263
+ }
264
+ datasetFingerprint = manifest.datasetFingerprint;
265
+ }
266
+
267
+ const benchmarkExportId = benchmarkExportIdArg || datasetExportId || 'benchmark-default';
268
+ const outputDir = path.join(workspaceDir, '.state', 'nocturnal', 'checkpoints');
269
+
270
+ const program = new TrainingProgram(workspaceDir);
271
+ const createResult = program.createExperiment({
272
+ backend,
273
+ targetWorkerProfile: 'local-reader', // Phase 7 only
274
+ targetModelFamily: familyArg,
275
+ hardwareTier,
276
+ datasetExportId,
277
+ datasetExportPath,
278
+ datasetFingerprint,
279
+ benchmarkExportId,
280
+ outputDir,
281
+ });
282
+
283
+ // --- Write spec files for the manual chain ---
284
+ // The trainer reads spec from scripts/nocturnal/trainer/experiment-<id>.json.
285
+ // import-result reads spec from .state/nocturnal/checkpoints/experiment-<id>.json.
286
+ // Both must be written so the manual create-experiment -> trainer -> import-result chain works.
287
+ const spec = createResult.spec;
288
+ const trainerSpecPath = path.join(TRAINER_SCRIPTS_DIR, `experiment-${spec.experimentId}.json`);
289
+ const workspaceSpecPath = path.join(workspaceDir, '.state', 'nocturnal', 'checkpoints', `experiment-${spec.experimentId}.json`);
290
+ const trainerSpecDir = path.dirname(trainerSpecPath);
291
+ const workspaceCheckpointsDir = path.dirname(workspaceSpecPath);
292
+ if (!fs.existsSync(trainerSpecDir)) {
293
+ fs.mkdirSync(trainerSpecDir, { recursive: true });
294
+ }
295
+ if (!fs.existsSync(workspaceCheckpointsDir)) {
296
+ fs.mkdirSync(workspaceCheckpointsDir, { recursive: true });
297
+ }
298
+ fs.writeFileSync(trainerSpecPath, JSON.stringify(spec, null, 2), 'utf-8');
299
+ fs.writeFileSync(workspaceSpecPath, JSON.stringify(spec, null, 2), 'utf-8');
300
+
301
+ // --- Auto-run mode: execute trainer immediately ---
302
+ // This closes the gap in the create-experiment -> trainer -> import-result chain.
303
+ // NOTE: This blocks until training completes (could be minutes).
304
+ if (runNow) {
305
+ const spec = createResult.spec;
306
+ const baseDir = TRAINER_SCRIPTS_DIR;
307
+ const scriptPath = path.join(baseDir, 'main.py');
308
+ const specPath = path.join(baseDir, `experiment-${spec.experimentId}.json`);
309
+ const outputDir = spec.outputDir;
310
+ const resultFilePath = path.join(outputDir, `result-${spec.experimentId}.json`);
311
+
312
+ // Write spec file
313
+ const specDir = path.dirname(specPath);
314
+ if (!fs.existsSync(specDir)) {
315
+ fs.mkdirSync(specDir, { recursive: true });
316
+ }
317
+ fs.writeFileSync(specPath, JSON.stringify(spec, null, 2), 'utf-8');
318
+
319
+ let trainerResult!: import('../core/external-training-contract.js').TrainingExperimentResult;
320
+
321
+ try {
322
+ if (spec.backend === 'dry-run') {
323
+ trainerResult = {
324
+ experimentId: spec.experimentId,
325
+ backend: 'dry-run',
326
+ status: 'dry_run' as const,
327
+ targetWorkerProfile: spec.targetWorkerProfile,
328
+ targetModelFamily: spec.targetModelFamily,
329
+ datasetFingerprint: spec.datasetFingerprint,
330
+ configFingerprint: spec.configFingerprint,
331
+ codeHash: spec.codeHash,
332
+ createdAt: new Date().toISOString(),
333
+ };
334
+ } else {
335
+ // Execute trainer using spawn (streaming, no full log buffering).
336
+ // stdout is collected into a fixed-size buffer (1MB) to avoid OOM.
337
+ // stderr is piped directly to parent stderr to avoid memory accumulation.
338
+ const timeoutMs = (spec.budget.maxWallClockMinutes * 60 * 1000) + 30000;
339
+ const pythonExe = process.platform === 'win32' ? 'python' : 'python3';
340
+ const MAX_STDOUT_BUFFER = 1 * 1024 * 1024; // 1MB cap
341
+
342
+ trainerResult = await new Promise((resolve, reject) => {
343
+ const proc = spawn(pythonExe, [scriptPath, '--spec', specPath, '--output-dir', outputDir], {
344
+ timeout: timeoutMs,
345
+ });
346
+
347
+ // Collect stdout with size cap to prevent OOM
348
+ const stdoutChunks: Buffer[] = [];
349
+ let stdoutSize = 0;
350
+
351
+ proc.stdout.on('data', (chunk: Buffer) => {
352
+ const remaining = MAX_STDOUT_BUFFER - stdoutSize;
353
+ if (remaining > 0) {
354
+ stdoutChunks.push(chunk.slice(0, remaining));
355
+ stdoutSize += Math.min(chunk.length, remaining);
356
+ }
357
+ });
358
+
359
+ // Pipe stderr directly — training logs can be large, don't buffer
360
+ proc.stderr.pipe(process.stderr);
361
+
362
+ const timer = setTimeout(() => {
363
+ proc.kill();
364
+ reject(new Error(`Trainer timed out after ${timeoutMs}ms`));
365
+ }, timeoutMs);
366
+
367
+ proc.on('close', (code) => {
368
+ clearTimeout(timer);
369
+ if (code === 0) {
370
+ const stdout = Buffer.concat(stdoutChunks).toString('utf-8');
371
+ const trimmed = stdout.trim();
372
+ if (trimmed) {
373
+ try {
374
+ resolve(JSON.parse(trimmed));
375
+ return;
376
+ } catch {
377
+ // fall through to result file
378
+ }
379
+ }
380
+ // Fallback to result file
381
+ if (fs.existsSync(resultFilePath)) {
382
+ try {
383
+ resolve(JSON.parse(fs.readFileSync(resultFilePath, 'utf-8')));
384
+ return;
385
+ } catch {
386
+ reject(new Error(`Trainer stdout was not valid JSON and result file also invalid: ${resultFilePath}`));
387
+ return;
388
+ }
389
+ }
390
+ reject(new Error(`Trainer produced no parseable stdout and no result file found at: ${resultFilePath}`));
391
+ } else {
392
+ // Non-zero exit: try result file as last resort
393
+ if (fs.existsSync(resultFilePath)) {
394
+ try {
395
+ resolve(JSON.parse(fs.readFileSync(resultFilePath, 'utf-8')));
396
+ } catch {
397
+ reject(new Error(`Trainer exited with code ${code} and result file was invalid`));
398
+ }
399
+ } else {
400
+ reject(new Error(`Trainer exited with code ${code} and no result file found`));
401
+ }
402
+ }
403
+ });
404
+
405
+ proc.on('error', (err) => {
406
+ clearTimeout(timer);
407
+ reject(new Error(`Trainer spawn failed: ${err instanceof Error ? err.message : String(err)}`));
408
+ });
409
+ });
410
+ }
411
+ } catch (err: unknown) {
412
+ return {
413
+ text: zh
414
+ ? `❌ 训练执行失败: ${err instanceof Error ? err.message : String(err)}\n\n训练 Run ID: ${createResult.trainRunId}\n请检查 trainer 输出或使用 dry-run 后端重试。`
415
+ : `❌ Trainer execution failed: ${err instanceof Error ? err.message : String(err)}\n\nTraining Run ID: ${createResult.trainRunId}\nCheck trainer output or retry with --backend=dry-run.`,
416
+ };
417
+ } finally {
418
+ // Clean up spec file
419
+ if (fs.existsSync(specPath)) {
420
+ fs.unlinkSync(specPath);
421
+ }
422
+ }
423
+
424
+ // Process trainer result (register checkpoint)
425
+ // dry_run returns null (no checkpoint); other statuses throw on error
426
+ let processed: { checkpointId: string; checkpointRef: string } | null;
427
+ try {
428
+ processed = program.processResult({
429
+ spec: createResult.spec,
430
+ trainRunId: createResult.trainRunId,
431
+ result: trainerResult,
432
+ });
433
+ } catch (err: unknown) {
434
+ return {
435
+ text: zh
436
+ ? `❌ 结果导入失败: ${err instanceof Error ? err.message : String(err)}`
437
+ : `❌ Result import failed: ${err instanceof Error ? err.message : String(err)}`,
438
+ };
439
+ }
440
+
441
+ if (processed === null) {
442
+ // dry_run completed with no checkpoint — this is a non-error outcome
443
+ return {
444
+ text: zh
445
+ ? `✅ Dry-run 完成(未产生 checkpoint)
446
+ 实验 ID: ${createResult.spec.experimentId}
447
+ 训练 Run ID: ${createResult.trainRunId}
448
+ 状态: ${trainerResult.status}
449
+
450
+ 下一步:
451
+ 若需产生可部署的 checkpoint,请使用 --backend=peft-trl-orpo 或 --backend=unsloth-orpo 重试。`
452
+ : `✅ Dry-run complete (no checkpoint produced)
453
+ Experiment ID: ${createResult.spec.experimentId}
454
+ Training Run ID: ${createResult.trainRunId}
455
+ Status: ${trainerResult.status}
456
+
457
+ Next steps:
458
+ To produce a deployable checkpoint, retry with --backend=peft-trl-orpo or --backend=unsloth-orpo.`,
459
+ };
460
+ }
461
+
462
+ return {
463
+ text: zh
464
+ ? `✅ 训练完成
465
+ 实验 ID: ${createResult.spec.experimentId}
466
+ 训练 Run ID: ${createResult.trainRunId}
467
+ Checkpoint ID: ${processed.checkpointId}
468
+ 状态: ${trainerResult.status}
469
+ ${trainerResult.failureReason ? `失败原因: ${trainerResult.failureReason}` : ''}
470
+
471
+ 下一步:
472
+ 1. 运行评估: /nocturnal-train attach-eval ${processed.checkpointId} --benchmark-id=<id> --delta=<number> --verdict=<pass|fail> --run-benchmark
473
+ 2. 查看检查点: /nocturnal-train show-lineage ${processed.checkpointId}`
474
+ : `✅ Training complete
475
+ Experiment ID: ${createResult.spec.experimentId}
476
+ Training Run ID: ${createResult.trainRunId}
477
+ Checkpoint ID: ${processed.checkpointId}
478
+ Status: ${trainerResult.status}
479
+ ${trainerResult.failureReason ? `Failure: ${trainerResult.failureReason}` : ''}
480
+
481
+ Next steps:
482
+ 1. Run eval: /nocturnal-train attach-eval ${processed.checkpointId} --benchmark-id=<id> --delta=<number> --verdict=<pass|fail> --run-benchmark
483
+ 2. View checkpoint: /nocturnal-train show-lineage ${processed.checkpointId}`,
484
+ };
485
+ }
486
+
487
+ return {
488
+ text: zh
489
+ ? `✅ 实验已创建
490
+ 实验 ID: ${createResult.spec.experimentId}
491
+ 后端: ${createResult.spec.backend}
492
+ 模型家族: ${createResult.spec.targetModelFamily}
493
+ 硬件层级: ${createResult.spec.hardwareTier}
494
+ 数据集: ${createResult.spec.datasetExportId}
495
+ 输出目录: ${createResult.spec.outputDir}
496
+ 训练 Run ID: ${createResult.trainRunId}
497
+
498
+ 下一步:
499
+ 1. 运行外部训练器: python "${path.join(TRAINER_SCRIPTS_DIR, 'main.py')}" --spec "${trainerSpecPath}" --output-dir ${outputDir}
500
+ 2. 导入结果: /nocturnal-train import-result ${createResult.spec.experimentId} --result=<path>
501
+ 3. 附加评估: /nocturnal-train attach-eval <checkpointId> --benchmark-id=<id> --delta=<number> --verdict=<pass|fail>
502
+ 4. 手动链路 spec 已写入:
503
+ - ${trainerSpecPath}
504
+ - ${workspaceSpecPath}`
505
+ : `✅ Experiment created
506
+ Experiment ID: ${createResult.spec.experimentId}
507
+ Backend: ${createResult.spec.backend}
508
+ Model Family: ${createResult.spec.targetModelFamily}
509
+ Hardware Tier: ${createResult.spec.hardwareTier}
510
+ Dataset: ${createResult.spec.datasetExportId}
511
+ Output Dir: ${createResult.spec.outputDir}
512
+ Training Run ID: ${createResult.trainRunId}
513
+
514
+ Next steps:
515
+ 1. Run external trainer: python "${path.join(TRAINER_SCRIPTS_DIR, 'main.py')}" --spec "${trainerSpecPath}" --output-dir ${outputDir}
516
+ 2. Import result: /nocturnal-train import-result ${createResult.spec.experimentId} --result=<path>
517
+ 3. Attach eval: /nocturnal-train attach-eval <checkpointId> --benchmark-id=<id> --delta=<number> --verdict=<pass|fail>
518
+ 4. Durable spec files written to:
519
+ - ${trainerSpecPath}
520
+ - ${workspaceSpecPath}`,
521
+ };
522
+ }
523
+
524
+ // ── Show Experiment ───────────────────────────────────────────────────
525
+ if (subcommand === 'show-experiment') {
526
+ const experimentId = parts[1];
527
+ if (!experimentId) {
528
+ return { text: zh ? '错误: 需要实验 ID' : 'Error: experiment ID required' };
529
+ }
530
+
531
+ const runs = listTrainingRuns(workspaceDir, { targetModelFamily: undefined });
532
+ const run = runs.find((r) => r.trainRunId.startsWith(experimentId) || r.trainRunId === experimentId);
533
+
534
+ if (!run) {
535
+ return { text: zh ? `未找到实验: ${experimentId}` : `Experiment not found: ${experimentId}` };
536
+ }
537
+
538
+ return { text: formatTrainingRun(run, zh) };
539
+ }
540
+
541
+ // ── Import Result ─────────────────────────────────────────────────────
542
+ if (subcommand === 'import-result') {
543
+ const experimentId = parts[1];
544
+ if (!experimentId) {
545
+ return { text: zh ? '错误: 需要实验 ID' : 'Error: experiment ID required' };
546
+ }
547
+
548
+ // Get result from argument or file
549
+ let resultJson = resultArg;
550
+ if (resultJson && fs.existsSync(resultJson)) {
551
+ resultJson = fs.readFileSync(resultJson, 'utf-8');
552
+ }
553
+ if (!resultJson) {
554
+ // Try to find result file
555
+ const resultPath = path.join(workspaceDir, '.state', 'nocturnal', 'checkpoints', `result-${experimentId}.json`);
556
+ if (fs.existsSync(resultPath)) {
557
+ resultJson = fs.readFileSync(resultPath, 'utf-8');
558
+ } else {
559
+ return {
560
+ text: zh
561
+ ? `错误: 未找到结果文件。请使用 --result 参数指定路径或 JSON 内容。`
562
+ : `Error: Result not found. Use --result to specify path or JSON content.`,
563
+ };
564
+ }
565
+ }
566
+
567
+ let result: any;
568
+ try {
569
+ result = JSON.parse(resultJson);
570
+ } catch {
571
+ return { text: zh ? '错误: 无效的 JSON 格式' : 'Error: Invalid JSON format' };
572
+ }
573
+
574
+ // Find the training run
575
+ const runs = listTrainingRuns(workspaceDir);
576
+ const run = runs.find(
577
+ (r) => r.trainRunId === result.trainRunId || r.trainRunId.startsWith(experimentId)
578
+ );
579
+
580
+ if (!run) {
581
+ return { text: zh ? `错误: 未找到训练 Run: ${result.trainRunId}` : `Error: Training run not found: ${result.trainRunId}` };
582
+ }
583
+
584
+ // Validate spec exists
585
+ const specPath = path.join(workspaceDir, '.state', 'nocturnal', 'checkpoints', `experiment-${experimentId}.json`);
586
+ if (!fs.existsSync(specPath)) {
587
+ return {
588
+ text: zh
589
+ ? `错误: 未找到实验 spec 文件: ${specPath}`
590
+ : `Error: Experiment spec not found: ${specPath}`,
591
+ };
592
+ }
593
+ const spec = JSON.parse(fs.readFileSync(specPath, 'utf-8'));
594
+
595
+ // Process the result
596
+ const program = new TrainingProgram(workspaceDir);
597
+ let processed: { checkpointId: string; checkpointRef: string } | null;
598
+ try {
599
+ processed = program.processResult({
600
+ spec,
601
+ trainRunId: run.trainRunId,
602
+ result,
603
+ });
604
+ } catch (err: unknown) {
605
+ return {
606
+ text: zh
607
+ ? `❌ 导入失败: ${err instanceof Error ? err.message : String(err)}`
608
+ : `❌ Import failed: ${err instanceof Error ? err.message : String(err)}`,
609
+ };
610
+ }
611
+
612
+ if (processed === null) {
613
+ // dry_run: non-error outcome with no checkpoint
614
+ return {
615
+ text: zh
616
+ ? `✅ Dry-run 结果已导入(无 checkpoint)
617
+ Status: ${result.status}
618
+ 训练 Run: ${run.trainRunId}
619
+
620
+ 若需产生可部署的 checkpoint,请使用 --backend=peft-trl-orpo 或 --backend=unsloth-orpo 重试。`
621
+ : `✅ Dry-run result imported (no checkpoint)
622
+ Status: ${result.status}
623
+ Training Run: ${run.trainRunId}
624
+
625
+ To produce a deployable checkpoint, retry with --backend=peft-trl-orpo or --backend=unsloth-orpo.`,
626
+ };
627
+ }
628
+
629
+ return {
630
+ text: zh
631
+ ? `✅ 结果已导入
632
+ Status: ${result.status}
633
+ Checkpoint ID: ${processed.checkpointId}
634
+ Checkpoint Ref: ${processed.checkpointRef}
635
+ ${result.artifact ? `Artifact: ${result.artifact.artifactPath}` : ''}
636
+ ${result.metrics ? `Wall Time: ${result.metrics.wallClockMinutes} min` : ''}
637
+ ${result.failureReason ? `Failure: ${result.failureReason}` : ''}
638
+
639
+ 下一步:
640
+ 1. 运行评估: /nocturnal-train attach-eval ${processed.checkpointId} --benchmark-id=<id> --delta=<number> --verdict=<pass|fail>
641
+ 2. 查看详情: /nocturnal-train show-lineage ${processed.checkpointId}`
642
+ : `✅ Result imported
643
+ Status: ${result.status}
644
+ Checkpoint ID: ${processed.checkpointId}
645
+ Checkpoint Ref: ${processed.checkpointRef}
646
+ ${result.artifact ? `Artifact: ${result.artifact.artifactPath}` : ''}
647
+ ${result.metrics ? `Wall Time: ${result.metrics.wallClockMinutes} min` : ''}
648
+ ${result.failureReason ? `Failure: ${result.failureReason}` : ''}
649
+
650
+ Next steps:
651
+ 1. Run eval: /nocturnal-train attach-eval ${processed.checkpointId} --benchmark-id=<id> --delta=<number> --verdict=<pass|fail>
652
+ 2. View details: /nocturnal-train show-lineage ${processed.checkpointId}`,
653
+ };
654
+ }
655
+
656
+ // ── Attach Eval ──────────────────────────────────────────────────────
657
+ if (subcommand === 'attach-eval') {
658
+ const checkpointId = parts[1] || checkpointIdArg;
659
+ if (!checkpointId) {
660
+ return { text: zh ? '错误: 需要 checkpointId' : 'Error: checkpointId required' };
661
+ }
662
+
663
+ const runBenchmark = args.includes('--run-benchmark');
664
+ const baselineRefArg = parts.find((p) => p.startsWith('--baseline-ref='))?.split('=')[1];
665
+
666
+ const program = new TrainingProgram(workspaceDir);
667
+ const checkpoint = getCheckpoint(workspaceDir, checkpointId);
668
+
669
+ if (!checkpoint) {
670
+ return { text: zh ? `错误: Checkpoint 未找到: ${checkpointId}` : `Error: Checkpoint not found: ${checkpointId}` };
671
+ }
672
+
673
+ let benchmarkId = benchmarkIdArg || `bench-${Date.now()}`;
674
+ let delta = deltaArg ? parseFloat(deltaArg) : NaN;
675
+ let verdict: 'fail' | 'pass' | 'compare_only' = verdictArg === 'pass' || verdictArg === 'fail' || verdictArg === 'compare_only'
676
+ ? (verdictArg as 'fail' | 'pass' | 'compare_only')
677
+ : 'compare_only';
678
+ let baselineScore = baselineScoreArg ? parseFloat(baselineScoreArg) : 0.5;
679
+ let candidateScore = candidateScoreArg ? parseFloat(candidateScoreArg) : 0.5;
680
+ const mode = (modeArg === 'prompt_assisted' ? 'prompt_assisted' : 'reduced_prompt') as 'prompt_assisted' | 'reduced_prompt';
681
+
682
+ // --- Run benchmark mode: execute real benchmark to get scores ---
683
+ // This closes the gap in the attach-eval command chain.
684
+ if (runBenchmark) {
685
+ // Determine baseline checkpoint ref
686
+ let baselineRef = baselineRefArg;
687
+ if (!baselineRef) {
688
+ // Try to auto-detect from deployment registry: use the currently active checkpoint as baseline
689
+ const deployment = getDeployment(workspaceDir, 'local-reader');
690
+ if (deployment?.activeCheckpointId && deployment.activeCheckpointId !== checkpointId) {
691
+ baselineRef = deployment.activeCheckpointId;
692
+ }
693
+ }
694
+ if (!baselineRef) {
695
+ return {
696
+ text: zh
697
+ ? `错误: --run-benchmark 需要 --baseline-ref 参数指定基线检查点,或当前需要有已启用的 local-reader 部署。`
698
+ : `Error: --run-benchmark requires --baseline-ref to specify the baseline checkpoint, or an active local-reader deployment must exist.`,
699
+ };
700
+ }
701
+
702
+ // Resolve both checkpoint refs to artifact paths for the scorer.
703
+ // The scorer (resolveCheckpointPath) expects filesystem paths to PEFT adapters,
704
+ // not checkpoint registry IDs. Look them up from the registry.
705
+ const baselineCheckpoint = getCheckpoint(workspaceDir, baselineRef);
706
+ if (!baselineCheckpoint) {
707
+ return {
708
+ text: zh
709
+ ? `错误: Baseline 检查点未找到: ${baselineRef}`
710
+ : `Error: Baseline checkpoint not found: ${baselineRef}`,
711
+ };
712
+ }
713
+ // Candidate checkpoint was already validated above (line 550)
714
+
715
+ // Find the export ID from the parent training run
716
+ let exportId = checkpointId;
717
+ if (checkpoint.trainRunId) {
718
+ const run = getTrainingRun(workspaceDir, checkpoint.trainRunId);
719
+ if (run?.exportId) {
720
+ exportId = run.exportId;
721
+ }
722
+ }
723
+ const scorerType = 'local-model'; // Use real model scorer
724
+
725
+ // Run benchmark via ts-node subprocess
726
+ const benchmarkScript = BENCHMARK_SCRIPT_PATH;
727
+ const outputDir = path.join(workspaceDir, '.state', 'nocturnal', 'evals');
728
+
729
+ // Build the compare command - pass ARTIFACT PATHS as separate arguments
730
+ // to avoid shell injection when paths contain special characters.
731
+ // Use execFileSync to pass arguments as an array (no shell interpolation).
732
+ const cmdArgs = [
733
+ '--yes',
734
+ 'ts-node',
735
+ benchmarkScript,
736
+ 'compare',
737
+ `--export-id=${exportId}`,
738
+ `--baseline=${baselineCheckpoint.artifactPath}`,
739
+ `--candidate=${checkpoint.artifactPath}`,
740
+ `--mode=${mode}`,
741
+ `--scorer=${scorerType}`,
742
+ `--output-dir=${outputDir}`,
743
+ ];
744
+
745
+ let benchmarkResult: {
746
+ delta: { delta: number; baselineScore: number; candidateScore: number };
747
+ verdict: 'pass' | 'fail' | 'compare_only';
748
+ benchmarkId: string;
749
+ } | null = null;
750
+ let benchmarkError = '';
751
+
752
+ try {
753
+ // Use execFileSync to avoid shell injection — paths are passed as args, not interpolated
754
+ const stdout = execFileSync('npx', cmdArgs, {
755
+ cwd: process.cwd(),
756
+ timeout: 300000, // 5 min timeout
757
+ encoding: 'utf-8',
758
+ });
759
+ // stdout is the JSON result from run-benchmark
760
+ try {
761
+ benchmarkResult = JSON.parse(stdout.trim());
762
+ } catch {
763
+ benchmarkError = `Failed to parse benchmark output: ${stdout.substring(0, 200)}`;
764
+ }
765
+ } catch (err: unknown) {
766
+ // execSync throws on non-zero exit code; stdout may contain partial data
767
+ const stdout = ((err as { stdout?: string }).stdout) ?? '';
768
+ try {
769
+ benchmarkResult = JSON.parse(stdout.trim());
770
+ } catch {
771
+ benchmarkError = `Benchmark failed: ${err instanceof Error ? err.message : String(err)}. stdout: ${stdout.substring(0, 200)}`;
772
+ }
773
+ }
774
+
775
+ if (benchmarkError || !benchmarkResult) {
776
+ return {
777
+ text: zh
778
+ ? `❌ Benchmark 执行失败: ${benchmarkError || '无法解析结果'}`
779
+ : `❌ Benchmark execution failed: ${benchmarkError || 'Could not parse result'}`,
780
+ };
781
+ }
782
+
783
+ delta = benchmarkResult.delta.delta;
784
+ baselineScore = benchmarkResult.delta.baselineScore;
785
+ candidateScore = benchmarkResult.delta.candidateScore;
786
+ benchmarkId = benchmarkResult.benchmarkId;
787
+ verdict = benchmarkResult.verdict;
788
+ } else {
789
+ // Manual mode: require explicit delta and verdict
790
+ if (!deltaArg || !verdictArg) {
791
+ return {
792
+ text: zh
793
+ ? '错误: 需要 --benchmark-id, --delta, --verdict 参数(或使用 --run-benchmark 自动运行)'
794
+ : 'Error: --benchmark-id, --delta, --verdict are required (or use --run-benchmark to auto-run)',
795
+ };
796
+ }
797
+ if (isNaN(delta)) {
798
+ return { text: zh ? '错误: delta 必须是数字' : 'Error: delta must be a number' };
799
+ }
800
+ }
801
+
802
+ const evalSummary = {
803
+ evalId: `eval-${Date.now()}`,
804
+ checkpointId,
805
+ benchmarkId,
806
+ targetModelFamily: checkpoint.targetModelFamily,
807
+ mode,
808
+ baselineScore,
809
+ candidateScore,
810
+ delta,
811
+ verdict,
812
+ };
813
+
814
+ try {
815
+ program.attachEvalAndMarkDeployable(checkpointId, evalSummary);
816
+ const deployable = verdict === 'pass' || verdict === 'compare_only';
817
+
818
+ return {
819
+ text: zh
820
+ ? `✅ 评估已附加${runBenchmark ? '(自动 Benchmark)' : ''}
821
+ Checkpoint: ${checkpointId.substring(0, 8)}...
822
+ Benchmark: ${benchmarkId}
823
+ 基线分数: ${baselineScore.toFixed(4)}
824
+ 候选分数: ${candidateScore.toFixed(4)}
825
+ Delta: ${delta >= 0 ? '+' : ''}${delta.toFixed(4)}
826
+ Verdict: ${verdict}
827
+ Mode: ${mode}
828
+ Deployable: ${deployable ? 'Yes' : 'No'}
829
+
830
+ 下一步:
831
+ 1. 评估晋升: /nocturnal-rollout evaluate-promotion ${checkpointId}
832
+ 2. 绑定部署: /nocturnal-rollout bind ${checkpointId} --profile=local-reader`
833
+ : `✅ Eval attached${runBenchmark ? ' (auto benchmark)' : ''}
834
+ Checkpoint: ${checkpointId.substring(0, 8)}...
835
+ Benchmark: ${benchmarkId}
836
+ Baseline Score: ${baselineScore.toFixed(4)}
837
+ Candidate Score: ${candidateScore.toFixed(4)}
838
+ Delta: ${delta >= 0 ? '+' : ''}${delta.toFixed(4)}
839
+ Verdict: ${verdict}
840
+ Mode: ${mode}
841
+ Deployable: ${deployable ? 'Yes' : 'No'}
842
+
843
+ Next steps:
844
+ 1. Evaluate promotion: /nocturnal-rollout evaluate-promotion ${checkpointId}
845
+ 2. Bind deployment: /nocturnal-rollout bind ${checkpointId} --profile=local-reader`,
846
+ };
847
+ } catch (err: unknown) {
848
+ return {
849
+ text: zh
850
+ ? `❌ 附加评估失败: ${err instanceof Error ? err.message : String(err)}`
851
+ : `❌ Attach eval failed: ${err instanceof Error ? err.message : String(err)}`,
852
+ };
853
+ }
854
+ }
855
+
856
+ // ── Show Lineage ─────────────────────────────────────────────────────
857
+ if (subcommand === 'show-lineage') {
858
+ const checkpointId = parts[1];
859
+ if (!checkpointId) {
860
+ return { text: zh ? '错误: 需要 checkpointId' : 'Error: checkpointId required' };
861
+ }
862
+
863
+ const lineage = getCheckpointLineage(workspaceDir, checkpointId);
864
+ if (!lineage) {
865
+ return { text: zh ? `未找到 lineage: ${checkpointId}` : `Lineage not found: ${checkpointId}` };
866
+ }
867
+
868
+ const { run, checkpoint, eval: eval_ } = lineage;
869
+
870
+ let text = zh
871
+ ? `=== Checkpoint Lineage ===
872
+ Checkpoint: ${checkpoint.checkpointId}
873
+ Family: ${checkpoint.targetModelFamily}
874
+ Deployable: ${checkpoint.deployable}
875
+ Artifact: ${checkpoint.artifactPath}
876
+
877
+ --- Training Run ---
878
+ ${formatTrainingRun(run, zh)}
879
+
880
+ --- Eval Summary ---`
881
+ : `=== Checkpoint Lineage ===
882
+ Checkpoint: ${checkpoint.checkpointId}
883
+ Family: ${checkpoint.targetModelFamily}
884
+ Deployable: ${checkpoint.deployable}
885
+ Artifact: ${checkpoint.artifactPath}
886
+
887
+ --- Training Run ---
888
+ ${formatTrainingRun(run, zh)}
889
+
890
+ --- Eval Summary ---`;
891
+
892
+ if (eval_) {
893
+ text += `
894
+ ID: ${eval_.evalId}
895
+ Mode: ${eval_.mode}
896
+ Delta: ${eval_.delta >= 0 ? '+' : ''}${eval_.delta.toFixed(4)}
897
+ Baseline: ${eval_.baselineScore.toFixed(3)}
898
+ Candidate: ${eval_.candidateScore.toFixed(3)}
899
+ Verdict: ${eval_.verdict}`;
900
+ } else {
901
+ text += zh ? '\n(无)' : '\n(None)';
902
+ }
903
+
904
+ return { text };
905
+ }
906
+
907
+ // ── List Experiments ──────────────────────────────────────────────────
908
+ if (subcommand === 'list-experiments') {
909
+ const runs = listTrainingRuns(workspaceDir);
910
+ if (runs.length === 0) {
911
+ return { text: zh ? '没有训练实验' : 'No training experiments' };
912
+ }
913
+
914
+ const lines = runs.slice(0, 20).map((run) => {
915
+ const date = new Date(run.createdAt).toLocaleDateString();
916
+ return `${run.trainRunId.substring(0, 8)}... | ${run.status} | ${run.targetModelFamily} | ${date} | ${run.checkpointIds.length} ckpts`;
917
+ });
918
+
919
+ return {
920
+ text: zh
921
+ ? `训练实验 (${runs.length}):
922
+ ${lines.join('\n')}`
923
+ : `Training experiments (${runs.length}):
924
+ ${lines.join('\n')}`,
925
+ };
926
+ }
927
+
928
+ // ── List Checkpoints ─────────────────────────────────────────────────
929
+ if (subcommand === 'list-checkpoints') {
930
+ const checkpoints = listCheckpoints(workspaceDir);
931
+ if (checkpoints.length === 0) {
932
+ return { text: zh ? '没有 Checkpoint' : 'No checkpoints' };
933
+ }
934
+
935
+ const filtered = familyArg
936
+ ? checkpoints.filter((cp) => cp.targetModelFamily.includes(familyArg))
937
+ : checkpoints;
938
+
939
+ if (filtered.length === 0) {
940
+ return { text: zh ? '没有匹配的 Checkpoint' : 'No matching checkpoints' };
941
+ }
942
+
943
+ const lines = filtered.slice(0, 20).map((cp) => {
944
+ const date = new Date(cp.createdAt).toLocaleDateString();
945
+ return `${cp.checkpointId.substring(0, 8)}... | ${cp.deployable ? 'deployable' : 'not-deployable'} | ${cp.targetModelFamily} | ${date}`;
946
+ });
947
+
948
+ return {
949
+ text: zh
950
+ ? `Checkpoints (${filtered.length}):
951
+ ${lines.join('\n')}`
952
+ : `Checkpoints (${filtered.length}):
953
+ ${lines.join('\n')}`,
954
+ };
955
+ }
956
+
957
+ // ── Stats ────────────────────────────────────────────────────────────
958
+ if (subcommand === 'stats') {
959
+ const stats = getTrainingRegistryStats(workspaceDir);
960
+ return {
961
+ text: zh
962
+ ? `=== 训练注册统计 ===
963
+ 总实验数: ${stats.totalRuns}
964
+ 完成: ${stats.completedRuns}
965
+ 失败: ${stats.failedRuns}
966
+ 进行中: ${stats.pendingRuns + stats.runningRuns}
967
+
968
+ 总 Checkpoint: ${stats.totalCheckpoints}
969
+ 可部署: ${stats.deployableCheckpoints}
970
+
971
+ 总评估: ${stats.totalEvals}
972
+ 通过: ${stats.passingEvals}
973
+ 失败: ${stats.failingEvals}`
974
+ : `=== Training Registry Stats ===
975
+ Total runs: ${stats.totalRuns}
976
+ Completed: ${stats.completedRuns}
977
+ Failed: ${stats.failedRuns}
978
+ In progress: ${stats.pendingRuns + stats.runningRuns}
979
+
980
+ Total checkpoints: ${stats.totalCheckpoints}
981
+ Deployable: ${stats.deployableCheckpoints}
982
+
983
+ Total evals: ${stats.totalEvals}
984
+ Passing: ${stats.passingEvals}
985
+ Failing: ${stats.failingEvals}`,
986
+ };
987
+ }
988
+
989
+ // Unknown subcommand
990
+ return {
991
+ text: zh
992
+ ? `未知子命令: ${subcommand}。运行 /nocturnal-train help 查看帮助。`
993
+ : `Unknown subcommand: ${subcommand}. Run /nocturnal-train help for usage.`,
994
+ };
995
+ } catch (err: unknown) {
996
+ return {
997
+ text: zh
998
+ ? `❌ 命令失败: ${err instanceof Error ? err.message : String(err)}`
999
+ : `❌ Command failed: ${err instanceof Error ? err.message : String(err)}`,
1000
+ };
1001
+ }
1002
+ }