principles-disciple 1.8.0 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (460) hide show
  1. package/ADVANCED_CONFIG_ZH.md +97 -0
  2. package/AGENT_INSTALL.md +173 -0
  3. package/AGENT_INSTALL_EN.md +173 -0
  4. package/INSTALL.md +256 -0
  5. package/SKILL.md +63 -0
  6. package/docs/COMMAND_REFERENCE.md +76 -0
  7. package/docs/COMMAND_REFERENCE_EN.md +79 -0
  8. package/esbuild.config.js +75 -0
  9. package/openclaw.plugin.json +6 -1
  10. package/package.json +13 -15
  11. package/scripts/build-web.mjs +46 -0
  12. package/scripts/install-dependencies.cjs +47 -0
  13. package/scripts/sync-plugin.mjs +802 -0
  14. package/scripts/verify-build.mjs +109 -0
  15. package/src/agents/nocturnal-dreamer.md +152 -0
  16. package/src/agents/nocturnal-philosopher.md +138 -0
  17. package/src/agents/nocturnal-reflector.md +126 -0
  18. package/src/agents/nocturnal-scribe.md +164 -0
  19. package/src/commands/capabilities.ts +85 -0
  20. package/{dist/commands/context.js → src/commands/context.ts} +78 -38
  21. package/src/commands/evolution-status.ts +146 -0
  22. package/src/commands/export.ts +111 -0
  23. package/src/commands/focus.ts +533 -0
  24. package/src/commands/nocturnal-review.ts +311 -0
  25. package/src/commands/nocturnal-rollout.ts +763 -0
  26. package/src/commands/nocturnal-train.ts +1002 -0
  27. package/{dist/commands/pain.js → src/commands/pain.ts} +68 -49
  28. package/src/commands/principle-rollback.ts +27 -0
  29. package/{dist/commands/rollback.js → src/commands/rollback.ts} +44 -12
  30. package/src/commands/samples.ts +60 -0
  31. package/src/commands/strategy.ts +38 -0
  32. package/{dist/commands/thinking-os.js → src/commands/thinking-os.ts} +59 -36
  33. package/src/commands/workflow-debug.ts +128 -0
  34. package/{dist/config/defaults/runtime.js → src/config/defaults/runtime.ts} +12 -5
  35. package/src/config/errors.ts +163 -0
  36. package/{dist/config/index.d.ts → src/config/index.ts} +2 -1
  37. package/src/constants/diagnostician.ts +66 -0
  38. package/src/constants/tools.ts +62 -0
  39. package/src/core/adaptive-thresholds.ts +476 -0
  40. package/{dist/core/config-service.js → src/core/config-service.ts} +7 -4
  41. package/{dist/core/config.js → src/core/config.ts} +158 -46
  42. package/src/core/control-ui-db.ts +435 -0
  43. package/{dist/core/detection-funnel.js → src/core/detection-funnel.ts} +36 -21
  44. package/{dist/core/detection-service.js → src/core/detection-service.ts} +7 -4
  45. package/{dist/core/dictionary-service.js → src/core/dictionary-service.ts} +7 -4
  46. package/{dist/core/dictionary.js → src/core/dictionary.ts} +57 -34
  47. package/src/core/empathy-keyword-matcher.ts +327 -0
  48. package/src/core/empathy-types.ts +218 -0
  49. package/src/core/event-log.ts +544 -0
  50. package/src/core/evolution-engine.ts +612 -0
  51. package/src/core/evolution-logger.ts +353 -0
  52. package/src/core/evolution-migration.ts +77 -0
  53. package/src/core/evolution-reducer.ts +731 -0
  54. package/src/core/evolution-types.ts +456 -0
  55. package/src/core/external-training-contract.ts +527 -0
  56. package/src/core/focus-history.ts +1458 -0
  57. package/src/core/hygiene/tracker.ts +117 -0
  58. package/{dist/core/init.js → src/core/init.ts} +39 -26
  59. package/src/core/local-worker-routing.ts +617 -0
  60. package/{dist/core/migration.js → src/core/migration.ts} +18 -11
  61. package/src/core/model-deployment-registry.ts +722 -0
  62. package/src/core/model-training-registry.ts +813 -0
  63. package/src/core/nocturnal-arbiter.ts +706 -0
  64. package/src/core/nocturnal-candidate-scoring.ts +392 -0
  65. package/src/core/nocturnal-compliance.ts +1075 -0
  66. package/src/core/nocturnal-dataset.ts +668 -0
  67. package/src/core/nocturnal-executability.ts +428 -0
  68. package/src/core/nocturnal-export.ts +390 -0
  69. package/{dist/core/nocturnal-paths.js → src/core/nocturnal-paths.ts} +49 -23
  70. package/src/core/nocturnal-trajectory-extractor.ts +484 -0
  71. package/src/core/nocturnal-trinity.ts +1384 -0
  72. package/src/core/pain.ts +122 -0
  73. package/{dist/core/path-resolver.js → src/core/path-resolver.ts} +157 -36
  74. package/{dist/core/paths.js → src/core/paths.ts} +13 -4
  75. package/src/core/principle-training-state.ts +450 -0
  76. package/src/core/profile.ts +226 -0
  77. package/src/core/promotion-gate.ts +822 -0
  78. package/{dist/core/risk-calculator.js → src/core/risk-calculator.ts} +42 -16
  79. package/{dist/core/session-tracker.js → src/core/session-tracker.ts} +185 -63
  80. package/src/core/shadow-observation-registry.ts +534 -0
  81. package/{dist/core/system-logger.js → src/core/system-logger.ts} +9 -5
  82. package/src/core/thinking-models.ts +217 -0
  83. package/src/core/training-program.ts +630 -0
  84. package/src/core/trajectory-types.ts +243 -0
  85. package/src/core/trajectory.ts +1673 -0
  86. package/{dist/core/workspace-context.js → src/core/workspace-context.ts} +57 -32
  87. package/src/hooks/bash-risk.ts +171 -0
  88. package/src/hooks/edit-verification.ts +295 -0
  89. package/src/hooks/gate-block-helper.ts +160 -0
  90. package/src/hooks/gate.ts +210 -0
  91. package/src/hooks/gfi-gate.ts +177 -0
  92. package/src/hooks/lifecycle.ts +326 -0
  93. package/{dist/hooks/llm.js → src/hooks/llm.ts} +166 -139
  94. package/src/hooks/message-sanitize.ts +45 -0
  95. package/src/hooks/pain.ts +384 -0
  96. package/src/hooks/progressive-trust-gate.ts +174 -0
  97. package/src/hooks/prompt.ts +920 -0
  98. package/src/hooks/subagent.ts +207 -0
  99. package/src/hooks/thinking-checkpoint.ts +73 -0
  100. package/src/hooks/trajectory-collector.ts +290 -0
  101. package/src/http/principles-console-route.ts +716 -0
  102. package/src/i18n/commands.ts +117 -0
  103. package/src/index.ts +694 -0
  104. package/src/service/central-database.ts +831 -0
  105. package/src/service/control-ui-query-service.ts +888 -0
  106. package/src/service/evolution-query-service.ts +405 -0
  107. package/src/service/evolution-worker.ts +1646 -0
  108. package/src/service/health-query-service.ts +836 -0
  109. package/{dist/service/nocturnal-runtime.js → src/service/nocturnal-runtime.ts} +263 -36
  110. package/src/service/nocturnal-service.ts +1015 -0
  111. package/src/service/nocturnal-target-selector.ts +532 -0
  112. package/src/service/phase3-input-filter.ts +237 -0
  113. package/src/service/runtime-summary-service.ts +757 -0
  114. package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +513 -0
  115. package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +603 -0
  116. package/src/service/subagent-workflow/index.ts +51 -0
  117. package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +856 -0
  118. package/src/service/subagent-workflow/runtime-direct-driver.ts +166 -0
  119. package/src/service/subagent-workflow/types.ts +378 -0
  120. package/src/service/subagent-workflow/workflow-store.ts +328 -0
  121. package/src/service/trajectory-service.ts +15 -0
  122. package/{dist/tools/critique-prompt.js → src/tools/critique-prompt.ts} +25 -8
  123. package/src/tools/deep-reflect.ts +349 -0
  124. package/{dist/tools/model-index.js → src/tools/model-index.ts} +33 -17
  125. package/src/types/event-types.ts +453 -0
  126. package/src/types/hygiene-types.ts +31 -0
  127. package/src/types/principle-tree-schema.ts +244 -0
  128. package/src/types/runtime-summary.ts +49 -0
  129. package/src/types.ts +74 -0
  130. package/src/utils/file-lock.ts +391 -0
  131. package/{dist/utils/glob-match.js → src/utils/glob-match.ts} +21 -20
  132. package/{dist/utils/hashing.js → src/utils/hashing.ts} +6 -4
  133. package/src/utils/io.ts +110 -0
  134. package/{dist/utils/nlp.js → src/utils/nlp.ts} +19 -12
  135. package/{dist/utils/plugin-logger.js → src/utils/plugin-logger.ts} +33 -8
  136. package/src/utils/subagent-probe.ts +94 -0
  137. package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +70 -1
  138. package/templates/pain_settings.json +2 -1
  139. package/tests/README.md +120 -0
  140. package/tests/build-artifacts.test.ts +111 -0
  141. package/tests/commands/evolution-status.test.ts +222 -0
  142. package/tests/commands/evolver.test.ts +22 -0
  143. package/tests/commands/export.test.ts +78 -0
  144. package/tests/commands/nocturnal-review.test.ts +448 -0
  145. package/tests/commands/nocturnal-train.test.ts +97 -0
  146. package/tests/commands/pain.test.ts +108 -0
  147. package/tests/commands/samples.test.ts +65 -0
  148. package/tests/commands/strategy.test.ts +34 -0
  149. package/tests/commands/thinking-os.test.ts +88 -0
  150. package/tests/core/adaptive-thresholds.test.ts +261 -0
  151. package/tests/core/config-service.test.ts +89 -0
  152. package/tests/core/config.test.ts +90 -0
  153. package/tests/core/control-ui-db.test.ts +75 -0
  154. package/tests/core/core-template-guidance.test.ts +21 -0
  155. package/tests/core/detection-funnel.test.ts +63 -0
  156. package/tests/core/detection-service.test.ts +50 -0
  157. package/tests/core/dictionary-service.test.ts +116 -0
  158. package/tests/core/dictionary.test.ts +168 -0
  159. package/tests/core/empathy-keyword-matcher.test.ts +209 -0
  160. package/tests/core/event-log.test.ts +181 -0
  161. package/tests/core/evolution-e2e.test.ts +58 -0
  162. package/tests/core/evolution-engine-gate-integration.test.ts +543 -0
  163. package/tests/core/evolution-engine.test.ts +562 -0
  164. package/tests/core/evolution-logger.test.ts +148 -0
  165. package/tests/core/evolution-migration.test.ts +50 -0
  166. package/tests/core/evolution-paths.test.ts +21 -0
  167. package/tests/core/evolution-reducer.detector-metadata.test.ts +602 -0
  168. package/tests/core/evolution-reducer.test.ts +180 -0
  169. package/tests/core/evolution-types-loop.test.ts +48 -0
  170. package/tests/core/evolution-user-stories.e2e.test.ts +249 -0
  171. package/tests/core/external-training-contract.test.ts +463 -0
  172. package/tests/core/focus-history.test.ts +682 -0
  173. package/tests/core/init-flatten.test.ts +69 -0
  174. package/tests/core/init-refactor.test.ts +87 -0
  175. package/tests/core/init-v1.3.test.ts +46 -0
  176. package/tests/core/init.test.ts +190 -0
  177. package/tests/core/local-worker-routing.test.ts +757 -0
  178. package/tests/core/migration.test.ts +84 -0
  179. package/tests/core/model-deployment-registry.test.ts +845 -0
  180. package/tests/core/model-training-registry.test.ts +889 -0
  181. package/tests/core/nocturnal-arbiter.test.ts +494 -0
  182. package/tests/core/nocturnal-candidate-scoring.test.ts +400 -0
  183. package/tests/core/nocturnal-compliance.test.ts +646 -0
  184. package/tests/core/nocturnal-dataset.test.ts +892 -0
  185. package/tests/core/nocturnal-executability.test.ts +357 -0
  186. package/tests/core/nocturnal-export.test.ts +462 -0
  187. package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +428 -0
  188. package/tests/core/nocturnal-trajectory-extractor.test.ts +634 -0
  189. package/tests/core/nocturnal-trinity.test.ts +953 -0
  190. package/tests/core/pain.test.ts +33 -0
  191. package/tests/core/path-resolver.test.ts +57 -0
  192. package/tests/core/paths-refactor.test.ts +42 -0
  193. package/tests/core/phase7-rollout-integration.test.ts +477 -0
  194. package/tests/core/principle-training-state.test.ts +712 -0
  195. package/tests/core/profile.test.ts +56 -0
  196. package/tests/core/promotion-gate.test.ts +556 -0
  197. package/tests/core/risk-calculator.test.ts +168 -0
  198. package/tests/core/session-tracker.test.ts +191 -0
  199. package/tests/core/training-program.test.ts +472 -0
  200. package/tests/core/trajectory.test.ts +265 -0
  201. package/tests/core/workspace-context-factory.test.ts +18 -0
  202. package/tests/core/workspace-context.test.ts +134 -0
  203. package/tests/fixtures/nocturnal-reviewed-subset.json +183 -0
  204. package/tests/fixtures/production-compatibility.test.ts +147 -0
  205. package/tests/fixtures/production-mock-generator.ts +282 -0
  206. package/tests/hooks/bash-risk-integration.test.ts +137 -0
  207. package/tests/hooks/bash-risk.test.ts +81 -0
  208. package/tests/hooks/edit-verification.test.ts +678 -0
  209. package/tests/hooks/gate-edit-verification-p1.test.ts +632 -0
  210. package/tests/hooks/gate-edit-verification.test.ts +435 -0
  211. package/tests/hooks/gate-pipeline-integration.test.ts +404 -0
  212. package/tests/hooks/gate.test.ts +271 -0
  213. package/tests/hooks/gfi-gate-unit.test.ts +422 -0
  214. package/tests/hooks/gfi-gate.test.ts +669 -0
  215. package/tests/hooks/lifecycle.test.ts +248 -0
  216. package/tests/hooks/llm.test.ts +308 -0
  217. package/tests/hooks/message-sanitize.test.ts +36 -0
  218. package/tests/hooks/pain.test.ts +141 -0
  219. package/tests/hooks/progressive-trust-gate.test.ts +277 -0
  220. package/tests/hooks/prompt.test.ts +1411 -0
  221. package/tests/hooks/subagent.test.ts +467 -0
  222. package/tests/hooks/thinking-gate.test.ts +313 -0
  223. package/tests/http/principles-console-route.test.ts +140 -0
  224. package/tests/hygiene-tracker.test.ts +77 -0
  225. package/tests/index.integration.test.ts +179 -0
  226. package/tests/index.shadow-routing.integration.test.ts +140 -0
  227. package/tests/index.test.ts +9 -0
  228. package/tests/integration/empathy-workflow-integration.test.ts +627 -0
  229. package/tests/service/control-ui-query-service.test.ts +121 -0
  230. package/tests/service/empathy-observer-workflow-manager.test.ts +176 -0
  231. package/tests/service/evolution-worker.test.ts +585 -0
  232. package/tests/service/nocturnal-runtime.test.ts +470 -0
  233. package/tests/service/nocturnal-service.test.ts +577 -0
  234. package/tests/service/nocturnal-target-selector.test.ts +615 -0
  235. package/tests/service/nocturnal-workflow-manager.test.ts +439 -0
  236. package/tests/service/phase3-input-filter.test.ts +289 -0
  237. package/tests/service/runtime-summary-service.test.ts +919 -0
  238. package/tests/task-compliance.test.ts +166 -0
  239. package/tests/test-utils.ts +48 -0
  240. package/tests/tools/critique-prompt.test.ts +260 -0
  241. package/tests/tools/deep-reflect.test.ts +232 -0
  242. package/tests/tools/model-index.test.ts +246 -0
  243. package/tests/ui/app.test.tsx +114 -0
  244. package/tests/utils/file-lock.test.ts +407 -0
  245. package/tests/utils/hashing.test.ts +32 -0
  246. package/tests/utils/io.test.ts +39 -0
  247. package/tests/utils/nlp.test.ts +53 -0
  248. package/tests/utils/plugin-logger.test.ts +156 -0
  249. package/tsconfig.json +16 -0
  250. package/tsconfig.tsbuildinfo +1 -0
  251. package/ui/src/App.tsx +45 -0
  252. package/ui/src/api.ts +216 -0
  253. package/ui/src/charts.tsx +586 -0
  254. package/ui/src/components/ErrorState.tsx +6 -0
  255. package/ui/src/components/Loading.tsx +13 -0
  256. package/ui/src/components/ProtectedRoute.tsx +12 -0
  257. package/ui/src/components/Shell.tsx +91 -0
  258. package/ui/src/components/WorkspaceConfig.tsx +146 -0
  259. package/ui/src/components/index.ts +5 -0
  260. package/ui/src/context/auth.tsx +80 -0
  261. package/ui/src/context/theme.tsx +66 -0
  262. package/ui/src/hooks/useAutoRefresh.ts +39 -0
  263. package/ui/src/i18n/ui.ts +363 -0
  264. package/ui/src/main.tsx +16 -0
  265. package/ui/src/pages/EvolutionPage.tsx +352 -0
  266. package/ui/src/pages/FeedbackPage.tsx +140 -0
  267. package/ui/src/pages/GateMonitorPage.tsx +136 -0
  268. package/ui/src/pages/LoginPage.tsx +88 -0
  269. package/ui/src/pages/OverviewPage.tsx +238 -0
  270. package/ui/src/pages/SamplesPage.tsx +174 -0
  271. package/ui/src/pages/ThinkingModelsPage.tsx +127 -0
  272. package/ui/src/styles.css +1661 -0
  273. package/ui/src/types.ts +368 -0
  274. package/ui/src/utils/format.ts +15 -0
  275. package/vitest.config.ts +23 -0
  276. package/dist/commands/capabilities.d.ts +0 -3
  277. package/dist/commands/capabilities.js +0 -73
  278. package/dist/commands/context.d.ts +0 -5
  279. package/dist/commands/evolution-status.d.ts +0 -4
  280. package/dist/commands/evolution-status.js +0 -117
  281. package/dist/commands/evolver.d.ts +0 -9
  282. package/dist/commands/evolver.js +0 -26
  283. package/dist/commands/export.d.ts +0 -2
  284. package/dist/commands/export.js +0 -98
  285. package/dist/commands/focus.d.ts +0 -14
  286. package/dist/commands/focus.js +0 -457
  287. package/dist/commands/nocturnal-review.d.ts +0 -24
  288. package/dist/commands/nocturnal-review.js +0 -265
  289. package/dist/commands/nocturnal-rollout.d.ts +0 -27
  290. package/dist/commands/nocturnal-rollout.js +0 -671
  291. package/dist/commands/nocturnal-train.d.ts +0 -25
  292. package/dist/commands/nocturnal-train.js +0 -919
  293. package/dist/commands/pain.d.ts +0 -5
  294. package/dist/commands/principle-rollback.d.ts +0 -4
  295. package/dist/commands/principle-rollback.js +0 -22
  296. package/dist/commands/rollback.d.ts +0 -19
  297. package/dist/commands/samples.d.ts +0 -2
  298. package/dist/commands/samples.js +0 -55
  299. package/dist/commands/strategy.d.ts +0 -3
  300. package/dist/commands/strategy.js +0 -29
  301. package/dist/commands/thinking-os.d.ts +0 -2
  302. package/dist/config/defaults/runtime.d.ts +0 -40
  303. package/dist/config/errors.d.ts +0 -84
  304. package/dist/config/errors.js +0 -94
  305. package/dist/config/index.js +0 -7
  306. package/dist/constants/diagnostician.d.ts +0 -12
  307. package/dist/constants/diagnostician.js +0 -56
  308. package/dist/constants/tools.d.ts +0 -17
  309. package/dist/constants/tools.js +0 -54
  310. package/dist/core/adaptive-thresholds.d.ts +0 -186
  311. package/dist/core/adaptive-thresholds.js +0 -300
  312. package/dist/core/config-service.d.ts +0 -15
  313. package/dist/core/config.d.ts +0 -127
  314. package/dist/core/control-ui-db.d.ts +0 -95
  315. package/dist/core/control-ui-db.js +0 -292
  316. package/dist/core/detection-funnel.d.ts +0 -33
  317. package/dist/core/detection-service.d.ts +0 -15
  318. package/dist/core/dictionary-service.d.ts +0 -15
  319. package/dist/core/dictionary.d.ts +0 -38
  320. package/dist/core/event-log.d.ts +0 -82
  321. package/dist/core/event-log.js +0 -463
  322. package/dist/core/evolution-engine.d.ts +0 -118
  323. package/dist/core/evolution-engine.js +0 -464
  324. package/dist/core/evolution-logger.d.ts +0 -137
  325. package/dist/core/evolution-logger.js +0 -256
  326. package/dist/core/evolution-migration.d.ts +0 -5
  327. package/dist/core/evolution-migration.js +0 -65
  328. package/dist/core/evolution-reducer.d.ts +0 -98
  329. package/dist/core/evolution-reducer.js +0 -465
  330. package/dist/core/evolution-types.d.ts +0 -287
  331. package/dist/core/evolution-types.js +0 -78
  332. package/dist/core/external-training-contract.d.ts +0 -276
  333. package/dist/core/external-training-contract.js +0 -269
  334. package/dist/core/focus-history.d.ts +0 -210
  335. package/dist/core/focus-history.js +0 -1185
  336. package/dist/core/hygiene/tracker.d.ts +0 -22
  337. package/dist/core/hygiene/tracker.js +0 -106
  338. package/dist/core/init.d.ts +0 -12
  339. package/dist/core/local-worker-routing.d.ts +0 -175
  340. package/dist/core/local-worker-routing.js +0 -525
  341. package/dist/core/migration.d.ts +0 -6
  342. package/dist/core/model-deployment-registry.d.ts +0 -218
  343. package/dist/core/model-deployment-registry.js +0 -503
  344. package/dist/core/model-training-registry.d.ts +0 -295
  345. package/dist/core/model-training-registry.js +0 -475
  346. package/dist/core/nocturnal-arbiter.d.ts +0 -159
  347. package/dist/core/nocturnal-arbiter.js +0 -534
  348. package/dist/core/nocturnal-candidate-scoring.d.ts +0 -137
  349. package/dist/core/nocturnal-candidate-scoring.js +0 -266
  350. package/dist/core/nocturnal-compliance.d.ts +0 -175
  351. package/dist/core/nocturnal-compliance.js +0 -824
  352. package/dist/core/nocturnal-dataset.d.ts +0 -224
  353. package/dist/core/nocturnal-dataset.js +0 -443
  354. package/dist/core/nocturnal-executability.d.ts +0 -85
  355. package/dist/core/nocturnal-executability.js +0 -331
  356. package/dist/core/nocturnal-export.d.ts +0 -124
  357. package/dist/core/nocturnal-export.js +0 -275
  358. package/dist/core/nocturnal-paths.d.ts +0 -124
  359. package/dist/core/nocturnal-trajectory-extractor.d.ts +0 -242
  360. package/dist/core/nocturnal-trajectory-extractor.js +0 -307
  361. package/dist/core/nocturnal-trinity.d.ts +0 -311
  362. package/dist/core/nocturnal-trinity.js +0 -880
  363. package/dist/core/pain.d.ts +0 -4
  364. package/dist/core/pain.js +0 -70
  365. package/dist/core/path-resolver.d.ts +0 -46
  366. package/dist/core/paths.d.ts +0 -65
  367. package/dist/core/principle-training-state.d.ts +0 -121
  368. package/dist/core/principle-training-state.js +0 -321
  369. package/dist/core/profile.d.ts +0 -62
  370. package/dist/core/profile.js +0 -210
  371. package/dist/core/promotion-gate.d.ts +0 -238
  372. package/dist/core/promotion-gate.js +0 -529
  373. package/dist/core/risk-calculator.d.ts +0 -22
  374. package/dist/core/session-tracker.d.ts +0 -99
  375. package/dist/core/shadow-observation-registry.d.ts +0 -217
  376. package/dist/core/shadow-observation-registry.js +0 -308
  377. package/dist/core/system-logger.d.ts +0 -8
  378. package/dist/core/thinking-models.d.ts +0 -38
  379. package/dist/core/thinking-models.js +0 -170
  380. package/dist/core/training-program.d.ts +0 -233
  381. package/dist/core/training-program.js +0 -433
  382. package/dist/core/trajectory.d.ts +0 -411
  383. package/dist/core/trajectory.js +0 -1307
  384. package/dist/core/workspace-context.d.ts +0 -71
  385. package/dist/hooks/bash-risk.d.ts +0 -57
  386. package/dist/hooks/bash-risk.js +0 -137
  387. package/dist/hooks/edit-verification.d.ts +0 -62
  388. package/dist/hooks/edit-verification.js +0 -256
  389. package/dist/hooks/gate-block-helper.d.ts +0 -44
  390. package/dist/hooks/gate-block-helper.js +0 -119
  391. package/dist/hooks/gate.d.ts +0 -24
  392. package/dist/hooks/gate.js +0 -173
  393. package/dist/hooks/gfi-gate.d.ts +0 -40
  394. package/dist/hooks/gfi-gate.js +0 -113
  395. package/dist/hooks/lifecycle.d.ts +0 -5
  396. package/dist/hooks/lifecycle.js +0 -284
  397. package/dist/hooks/llm.d.ts +0 -12
  398. package/dist/hooks/message-sanitize.d.ts +0 -3
  399. package/dist/hooks/message-sanitize.js +0 -37
  400. package/dist/hooks/pain.d.ts +0 -5
  401. package/dist/hooks/pain.js +0 -301
  402. package/dist/hooks/progressive-trust-gate.d.ts +0 -51
  403. package/dist/hooks/progressive-trust-gate.js +0 -89
  404. package/dist/hooks/prompt.d.ts +0 -47
  405. package/dist/hooks/prompt.js +0 -884
  406. package/dist/hooks/subagent.d.ts +0 -10
  407. package/dist/hooks/subagent.js +0 -387
  408. package/dist/hooks/thinking-checkpoint.d.ts +0 -37
  409. package/dist/hooks/thinking-checkpoint.js +0 -51
  410. package/dist/hooks/trajectory-collector.d.ts +0 -32
  411. package/dist/hooks/trajectory-collector.js +0 -256
  412. package/dist/http/principles-console-route.d.ts +0 -9
  413. package/dist/http/principles-console-route.js +0 -567
  414. package/dist/i18n/commands.d.ts +0 -26
  415. package/dist/i18n/commands.js +0 -116
  416. package/dist/index.d.ts +0 -7
  417. package/dist/index.js +0 -581
  418. package/dist/service/central-database.d.ts +0 -104
  419. package/dist/service/central-database.js +0 -649
  420. package/dist/service/control-ui-query-service.d.ts +0 -221
  421. package/dist/service/control-ui-query-service.js +0 -543
  422. package/dist/service/empathy-observer-manager.d.ts +0 -52
  423. package/dist/service/empathy-observer-manager.js +0 -229
  424. package/dist/service/evolution-query-service.d.ts +0 -155
  425. package/dist/service/evolution-query-service.js +0 -258
  426. package/dist/service/evolution-worker.d.ts +0 -101
  427. package/dist/service/evolution-worker.js +0 -974
  428. package/dist/service/nocturnal-runtime.d.ts +0 -183
  429. package/dist/service/nocturnal-service.d.ts +0 -163
  430. package/dist/service/nocturnal-service.js +0 -787
  431. package/dist/service/nocturnal-target-selector.d.ts +0 -145
  432. package/dist/service/nocturnal-target-selector.js +0 -315
  433. package/dist/service/phase3-input-filter.d.ts +0 -73
  434. package/dist/service/phase3-input-filter.js +0 -172
  435. package/dist/service/runtime-summary-service.d.ts +0 -122
  436. package/dist/service/runtime-summary-service.js +0 -485
  437. package/dist/service/trajectory-service.d.ts +0 -2
  438. package/dist/service/trajectory-service.js +0 -15
  439. package/dist/tools/critique-prompt.d.ts +0 -14
  440. package/dist/tools/deep-reflect.d.ts +0 -39
  441. package/dist/tools/deep-reflect.js +0 -350
  442. package/dist/tools/model-index.d.ts +0 -9
  443. package/dist/types/event-types.d.ts +0 -306
  444. package/dist/types/event-types.js +0 -106
  445. package/dist/types/hygiene-types.d.ts +0 -20
  446. package/dist/types/hygiene-types.js +0 -12
  447. package/dist/types/runtime-summary.d.ts +0 -47
  448. package/dist/types/runtime-summary.js +0 -1
  449. package/dist/types.d.ts +0 -50
  450. package/dist/types.js +0 -22
  451. package/dist/utils/file-lock.d.ts +0 -71
  452. package/dist/utils/file-lock.js +0 -309
  453. package/dist/utils/glob-match.d.ts +0 -28
  454. package/dist/utils/hashing.d.ts +0 -9
  455. package/dist/utils/io.d.ts +0 -6
  456. package/dist/utils/io.js +0 -106
  457. package/dist/utils/nlp.d.ts +0 -9
  458. package/dist/utils/plugin-logger.d.ts +0 -39
  459. package/dist/utils/subagent-probe.d.ts +0 -34
  460. package/dist/utils/subagent-probe.js +0 -81
@@ -0,0 +1,889 @@
1
+ /**
2
+ * Model Training Registry — Tests
3
+ * ===============================
4
+ *
5
+ * Tests for the training run, checkpoint, and eval summary registry.
6
+ * Follows the same patterns as other core domain tests.
7
+ */
8
+
9
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import * as os from 'os';
13
+ import {
14
+ registerTrainingRun,
15
+ completeTrainingRun,
16
+ failTrainingRun,
17
+ startTrainingRun,
18
+ updateTrainingRunStatus,
19
+ getTrainingRun,
20
+ listTrainingRuns,
21
+ registerCheckpoint,
22
+ getCheckpoint,
23
+ listCheckpoints,
24
+ listDeployableCheckpoints,
25
+ attachEvalSummary,
26
+ getEvalSummary,
27
+ listEvalSummaries,
28
+ markCheckpointDeployable,
29
+ isCheckpointDeployable,
30
+ getCheckpointLineage,
31
+ getFullRegistry,
32
+ getTrainingRegistryStats,
33
+ } from '../../src/core/model-training-registry.js';
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // Test Fixtures
37
+ // ---------------------------------------------------------------------------
38
+
39
+ function makeTmpDir(): string {
40
+ return fs.mkdtempSync(path.join(os.tmpdir(), 'pd-training-registry-test-'));
41
+ }
42
+
43
+ function rmdir(dir: string): void {
44
+ try {
45
+ if (fs.existsSync(dir)) {
46
+ fs.rmSync(dir, { recursive: true, force: true });
47
+ }
48
+ } catch {
49
+ // Ignore
50
+ }
51
+ }
52
+
53
+ // ---------------------------------------------------------------------------
54
+ // Tests: Training Run Registration
55
+ // ---------------------------------------------------------------------------
56
+
57
+ describe('ModelTrainingRegistry registerTrainingRun', () => {
58
+ let tmpDir: string;
59
+
60
+ beforeEach(() => {
61
+ tmpDir = makeTmpDir();
62
+ });
63
+
64
+ afterEach(() => {
65
+ rmdir(tmpDir);
66
+ });
67
+
68
+ it('registers a new training run', () => {
69
+ const run = registerTrainingRun(tmpDir, {
70
+ targetModelFamily: 'gpt-4',
71
+ datasetFingerprint: 'sha256-abc123',
72
+ exportId: 'export-001',
73
+ sampleCount: 42,
74
+ configFingerprint: 'config-default-v0.1',
75
+ });
76
+
77
+ expect(run.trainRunId).toBeDefined();
78
+ expect(run.targetModelFamily).toBe('gpt-4');
79
+ expect(run.datasetFingerprint).toBe('sha256-abc123');
80
+ expect(run.exportId).toBe('export-001');
81
+ expect(run.sampleCount).toBe(42);
82
+ expect(run.configFingerprint).toBe('config-default-v0.1');
83
+ expect(run.status).toBe('pending');
84
+ expect(run.checkpointIds).toEqual([]);
85
+ expect(run.createdAt).toBeDefined();
86
+ });
87
+
88
+ it('persists the run to disk', () => {
89
+ const run = registerTrainingRun(tmpDir, {
90
+ targetModelFamily: 'claude-3',
91
+ datasetFingerprint: 'sha256-def456',
92
+ exportId: 'export-002',
93
+ sampleCount: 30,
94
+ configFingerprint: 'config-v1',
95
+ });
96
+
97
+ const retrieved = getTrainingRun(tmpDir, run.trainRunId);
98
+ expect(retrieved).not.toBeNull();
99
+ expect(retrieved!.trainRunId).toBe(run.trainRunId);
100
+ expect(retrieved!.targetModelFamily).toBe('claude-3');
101
+ });
102
+
103
+ it('generates unique trainRunIds', () => {
104
+ const run1 = registerTrainingRun(tmpDir, {
105
+ targetModelFamily: 'gpt-4',
106
+ datasetFingerprint: 'sha256-abc',
107
+ exportId: 'e1',
108
+ sampleCount: 10,
109
+ configFingerprint: 'c1',
110
+ });
111
+ const run2 = registerTrainingRun(tmpDir, {
112
+ targetModelFamily: 'gpt-4',
113
+ datasetFingerprint: 'sha256-def',
114
+ exportId: 'e2',
115
+ sampleCount: 20,
116
+ configFingerprint: 'c2',
117
+ });
118
+ expect(run1.trainRunId).not.toBe(run2.trainRunId);
119
+ });
120
+ });
121
+
122
+ // ---------------------------------------------------------------------------
123
+ // Tests: Training Run Status Transitions
124
+ // ---------------------------------------------------------------------------
125
+
126
+ describe('ModelTrainingRegistry run status transitions', () => {
127
+ let tmpDir: string;
128
+ let runId: string;
129
+
130
+ beforeEach(() => {
131
+ tmpDir = makeTmpDir();
132
+ runId = registerTrainingRun(tmpDir, {
133
+ targetModelFamily: 'gpt-4',
134
+ datasetFingerprint: 'sha256-abc',
135
+ exportId: 'exp-1',
136
+ sampleCount: 10,
137
+ configFingerprint: 'cfg-1',
138
+ }).trainRunId;
139
+ });
140
+
141
+ afterEach(() => {
142
+ rmdir(tmpDir);
143
+ });
144
+
145
+ it('starts as pending', () => {
146
+ const run = getTrainingRun(tmpDir, runId)!;
147
+ expect(run.status).toBe('pending');
148
+ });
149
+
150
+ it('can transition pending → running', () => {
151
+ const run = startTrainingRun(tmpDir, runId);
152
+ expect(run.status).toBe('running');
153
+ });
154
+
155
+ it('can transition running → completed', () => {
156
+ startTrainingRun(tmpDir, runId);
157
+ const run = completeTrainingRun(tmpDir, runId);
158
+ expect(run.status).toBe('completed');
159
+ expect(run.completedAt).toBeDefined();
160
+ });
161
+
162
+ it('can transition running → failed', () => {
163
+ startTrainingRun(tmpDir, runId);
164
+ const run = failTrainingRun(tmpDir, runId, 'Out of memory');
165
+ expect(run.status).toBe('failed');
166
+ expect(run.failureReason).toBe('Out of memory');
167
+ expect(run.completedAt).toBeDefined();
168
+ });
169
+
170
+ it('cannot transition pending → completed directly', () => {
171
+ expect(() => completeTrainingRun(tmpDir, runId)).toThrow('Invalid status transition');
172
+ });
173
+
174
+ it('cannot transition pending → failed directly', () => {
175
+ expect(() => failTrainingRun(tmpDir, runId, 'reason')).toThrow('Invalid status transition');
176
+ });
177
+
178
+ it('cannot transition completed → anything', () => {
179
+ startTrainingRun(tmpDir, runId);
180
+ completeTrainingRun(tmpDir, runId);
181
+ expect(() => startTrainingRun(tmpDir, runId)).toThrow('Invalid status transition');
182
+ expect(() => failTrainingRun(tmpDir, runId, 'reason')).toThrow('Invalid status transition');
183
+ });
184
+
185
+ it('listTrainingRuns filters by status', () => {
186
+ // BeforeEach already created runId as pending
187
+ // Create run2 (also pending by default)
188
+ const run2 = registerTrainingRun(tmpDir, {
189
+ targetModelFamily: 'gpt-4',
190
+ datasetFingerprint: 'sha256-def',
191
+ exportId: 'exp-2',
192
+ sampleCount: 10,
193
+ configFingerprint: 'cfg-1',
194
+ });
195
+ // Start the first run (runId) — now runId is running, run2 is still pending
196
+ startTrainingRun(tmpDir, runId);
197
+
198
+ const pending = listTrainingRuns(tmpDir, { status: 'pending' });
199
+ const running = listTrainingRuns(tmpDir, { status: 'running' });
200
+ const completed = listTrainingRuns(tmpDir, { status: 'completed' });
201
+
202
+ // runId was started → running; run2 was NOT started → pending
203
+ expect(running.map((r) => r.trainRunId)).toContain(runId);
204
+ expect(pending.map((r) => r.trainRunId)).toContain(run2.trainRunId);
205
+ expect(completed).toHaveLength(0);
206
+ });
207
+
208
+ it('listTrainingRuns filters by targetModelFamily', () => {
209
+ // Register gpt-4 and claude-3 runs using the tmpDir from beforeEach
210
+ const gpt4Run = registerTrainingRun(tmpDir, {
211
+ targetModelFamily: 'gpt-4',
212
+ datasetFingerprint: 'sha256-a',
213
+ exportId: 'e1',
214
+ sampleCount: 10,
215
+ configFingerprint: 'c1',
216
+ });
217
+ registerTrainingRun(tmpDir, {
218
+ targetModelFamily: 'claude-3',
219
+ datasetFingerprint: 'sha256-b',
220
+ exportId: 'e2',
221
+ sampleCount: 10,
222
+ configFingerprint: 'c1',
223
+ });
224
+
225
+ const gpt4Runs = listTrainingRuns(tmpDir, { targetModelFamily: 'gpt-4' });
226
+ const claudeRuns = listTrainingRuns(tmpDir, { targetModelFamily: 'claude-3' });
227
+
228
+ // beforeEach created 1 gpt-4 (runId) + this test created 1 more = 2 gpt-4 total
229
+ expect(gpt4Runs.map((r) => r.trainRunId)).toContain(gpt4Run.trainRunId);
230
+ expect(gpt4Runs).toHaveLength(2);
231
+ expect(claudeRuns).toHaveLength(1);
232
+ expect(claudeRuns[0].targetModelFamily).toBe('claude-3');
233
+ });
234
+ });
235
+
236
+ // ---------------------------------------------------------------------------
237
+ // Tests: Checkpoint Registration
238
+ // ---------------------------------------------------------------------------
239
+
240
+ describe('ModelTrainingRegistry registerCheckpoint', () => {
241
+ let tmpDir: string;
242
+ let runId: string;
243
+
244
+ beforeEach(() => {
245
+ tmpDir = makeTmpDir();
246
+ runId = registerTrainingRun(tmpDir, {
247
+ targetModelFamily: 'gpt-4',
248
+ datasetFingerprint: 'sha256-abc',
249
+ exportId: 'exp-1',
250
+ sampleCount: 50,
251
+ configFingerprint: 'cfg-v1',
252
+ }).trainRunId;
253
+ });
254
+
255
+ afterEach(() => {
256
+ rmdir(tmpDir);
257
+ });
258
+
259
+ it('registers a checkpoint for a training run', () => {
260
+ const checkpoint = registerCheckpoint(tmpDir, {
261
+ trainRunId: runId,
262
+ targetModelFamily: 'gpt-4',
263
+ artifactPath: '/path/to/checkpoint-001.safetensors',
264
+ });
265
+
266
+ expect(checkpoint.checkpointId).toBeDefined();
267
+ expect(checkpoint.trainRunId).toBe(runId);
268
+ expect(checkpoint.targetModelFamily).toBe('gpt-4');
269
+ expect(checkpoint.artifactPath).toBe('/path/to/checkpoint-001.safetensors');
270
+ expect(checkpoint.deployable).toBe(false);
271
+ expect(checkpoint.lastEvalSummaryRef).toBeUndefined();
272
+ });
273
+
274
+ it('adds the checkpoint to the training run', () => {
275
+ const ck1 = registerCheckpoint(tmpDir, {
276
+ trainRunId: runId,
277
+ targetModelFamily: 'gpt-4',
278
+ artifactPath: '/ck1.safetensors',
279
+ });
280
+ const ck2 = registerCheckpoint(tmpDir, {
281
+ trainRunId: runId,
282
+ targetModelFamily: 'gpt-4',
283
+ artifactPath: '/ck2.safetensors',
284
+ });
285
+
286
+ const run = getTrainingRun(tmpDir, runId)!;
287
+ expect(run.checkpointIds).toContain(ck1.checkpointId);
288
+ expect(run.checkpointIds).toContain(ck2.checkpointId);
289
+ expect(run.checkpointIds).toHaveLength(2);
290
+ });
291
+
292
+ it('throws if training run not found', () => {
293
+ expect(() =>
294
+ registerCheckpoint(tmpDir, {
295
+ trainRunId: 'nonexistent-run',
296
+ targetModelFamily: 'gpt-4',
297
+ artifactPath: '/ck.safetensors',
298
+ })
299
+ ).toThrow('Training run not found');
300
+ });
301
+
302
+ it('throws if targetModelFamily does not match the training run', () => {
303
+ expect(() =>
304
+ registerCheckpoint(tmpDir, {
305
+ trainRunId: runId,
306
+ targetModelFamily: 'claude-3', // Does not match run's gpt-4
307
+ artifactPath: '/ck.safetensors',
308
+ })
309
+ ).toThrow('Target model family mismatch');
310
+ });
311
+
312
+ it('checkpoint starts as non-deployable', () => {
313
+ const ck = registerCheckpoint(tmpDir, {
314
+ trainRunId: runId,
315
+ targetModelFamily: 'gpt-4',
316
+ artifactPath: '/ck.safetensors',
317
+ });
318
+ expect(ck.deployable).toBe(false);
319
+ });
320
+
321
+ it('getCheckpointLineage returns run, checkpoint, and eval', () => {
322
+ const ck = registerCheckpoint(tmpDir, {
323
+ trainRunId: runId,
324
+ targetModelFamily: 'gpt-4',
325
+ artifactPath: '/ck.safetensors',
326
+ });
327
+
328
+ const lineage = getCheckpointLineage(tmpDir, ck.checkpointId);
329
+ expect(lineage).not.toBeNull();
330
+ expect(lineage!.run.trainRunId).toBe(runId);
331
+ expect(lineage!.checkpoint.checkpointId).toBe(ck.checkpointId);
332
+ expect(lineage!.eval).toBeNull();
333
+ });
334
+ });
335
+
336
+ // ---------------------------------------------------------------------------
337
+ // Tests: Eval Summary Attachment
338
+ // ---------------------------------------------------------------------------
339
+
340
+ describe('ModelTrainingRegistry attachEvalSummary', () => {
341
+ let tmpDir: string;
342
+ let runId: string;
343
+ let checkpointId: string;
344
+
345
+ beforeEach(() => {
346
+ tmpDir = makeTmpDir();
347
+ const run = registerTrainingRun(tmpDir, {
348
+ targetModelFamily: 'gpt-4',
349
+ datasetFingerprint: 'sha256-abc',
350
+ exportId: 'exp-1',
351
+ sampleCount: 50,
352
+ configFingerprint: 'cfg-v1',
353
+ });
354
+ runId = run.trainRunId;
355
+ const ck = registerCheckpoint(tmpDir, {
356
+ trainRunId: runId,
357
+ targetModelFamily: 'gpt-4',
358
+ artifactPath: '/ck.safetensors',
359
+ });
360
+ checkpointId = ck.checkpointId;
361
+ });
362
+
363
+ afterEach(() => {
364
+ rmdir(tmpDir);
365
+ });
366
+
367
+ it('attaches an eval summary to a checkpoint', () => {
368
+ const evalSummary = attachEvalSummary(tmpDir, checkpointId, {
369
+ evalId: 'eval-001',
370
+ checkpointId,
371
+ targetModelFamily: 'gpt-4',
372
+ benchmarkId: 'bench-001',
373
+ mode: 'reduced_prompt',
374
+ baselineScore: 0.5,
375
+ candidateScore: 0.65,
376
+ delta: 0.15,
377
+ verdict: 'pass',
378
+ });
379
+
380
+ expect(evalSummary.evalId).toBe('eval-001');
381
+ expect(evalSummary.checkpointId).toBe(checkpointId);
382
+ expect(evalSummary.verdict).toBe('pass');
383
+ expect(evalSummary.createdAt).toBeDefined();
384
+ });
385
+
386
+ it('updates the checkpoint lastEvalSummaryRef', () => {
387
+ attachEvalSummary(tmpDir, checkpointId, {
388
+ evalId: 'eval-002',
389
+ checkpointId,
390
+ targetModelFamily: 'gpt-4',
391
+ benchmarkId: 'bench-002',
392
+ mode: 'prompt_assisted',
393
+ baselineScore: 0.5,
394
+ candidateScore: 0.7,
395
+ delta: 0.2,
396
+ verdict: 'pass',
397
+ });
398
+
399
+ const ck = getCheckpoint(tmpDir, checkpointId);
400
+ expect(ck!.lastEvalSummaryRef).toBe('eval-002');
401
+ });
402
+
403
+ it('can retrieve the attached eval summary', () => {
404
+ attachEvalSummary(tmpDir, checkpointId, {
405
+ evalId: 'eval-003',
406
+ checkpointId,
407
+ targetModelFamily: 'gpt-4',
408
+ benchmarkId: 'bench-003',
409
+ mode: 'reduced_prompt',
410
+ baselineScore: 0.5,
411
+ candidateScore: 0.6,
412
+ delta: 0.1,
413
+ verdict: 'pass',
414
+ });
415
+
416
+ const eval_ = getEvalSummary(tmpDir, 'eval-003');
417
+ expect(eval_).not.toBeNull();
418
+ expect(eval_!.checkpointId).toBe(checkpointId);
419
+ });
420
+
421
+ it('listEvalSummaries filters by checkpointId', () => {
422
+ attachEvalSummary(tmpDir, checkpointId, {
423
+ evalId: 'eval-ck1',
424
+ checkpointId,
425
+ targetModelFamily: 'gpt-4',
426
+ benchmarkId: 'bench-1',
427
+ mode: 'reduced_prompt',
428
+ baselineScore: 0.5,
429
+ candidateScore: 0.6,
430
+ delta: 0.1,
431
+ verdict: 'pass',
432
+ });
433
+
434
+ const evals = listEvalSummaries(tmpDir, { checkpointId });
435
+ expect(evals.map((e) => e.evalId)).toContain('eval-ck1');
436
+ });
437
+
438
+ it('throws if eval targetModelFamily does not match checkpoint family', () => {
439
+ expect(() =>
440
+ attachEvalSummary(tmpDir, checkpointId, {
441
+ evalId: 'eval-wrong-family',
442
+ checkpointId,
443
+ targetModelFamily: 'claude-3', // Does not match checkpoint's gpt-4
444
+ benchmarkId: 'bench-family',
445
+ mode: 'reduced_prompt',
446
+ baselineScore: 0.5,
447
+ candidateScore: 0.6,
448
+ delta: 0.1,
449
+ verdict: 'pass',
450
+ })
451
+ ).toThrow('Family mismatch');
452
+ });
453
+ });
454
+
455
+ // ---------------------------------------------------------------------------
456
+ // Tests: Deployability Gating
457
+ // ---------------------------------------------------------------------------
458
+
459
+ describe('ModelTrainingRegistry deployability gating', () => {
460
+ let tmpDir: string;
461
+ let runId: string;
462
+ let checkpointId: string;
463
+
464
+ beforeEach(() => {
465
+ tmpDir = makeTmpDir();
466
+ const run = registerTrainingRun(tmpDir, {
467
+ targetModelFamily: 'gpt-4',
468
+ datasetFingerprint: 'sha256-abc',
469
+ exportId: 'exp-1',
470
+ sampleCount: 50,
471
+ configFingerprint: 'cfg-v1',
472
+ });
473
+ runId = run.trainRunId;
474
+ const ck = registerCheckpoint(tmpDir, {
475
+ trainRunId: runId,
476
+ targetModelFamily: 'gpt-4',
477
+ artifactPath: '/ck.safetensors',
478
+ });
479
+ checkpointId = ck.checkpointId;
480
+ });
481
+
482
+ afterEach(() => {
483
+ rmdir(tmpDir);
484
+ });
485
+
486
+ it('cannot mark deployable without eval summary', () => {
487
+ expect(() => markCheckpointDeployable(tmpDir, checkpointId, true)).toThrow(
488
+ 'no eval summary attached'
489
+ );
490
+ });
491
+
492
+ it('cannot mark deployable with failed eval', () => {
493
+ attachEvalSummary(tmpDir, checkpointId, {
494
+ evalId: 'eval-fail',
495
+ checkpointId,
496
+ targetModelFamily: 'gpt-4',
497
+ benchmarkId: 'bench-fail',
498
+ mode: 'reduced_prompt',
499
+ baselineScore: 0.5,
500
+ candidateScore: 0.3,
501
+ delta: -0.2,
502
+ verdict: 'fail',
503
+ });
504
+
505
+ expect(() => markCheckpointDeployable(tmpDir, checkpointId, true)).toThrow(
506
+ "verdict is 'fail'"
507
+ );
508
+ });
509
+
510
+ it('can mark deployable with passing eval and completed run', () => {
511
+ attachEvalSummary(tmpDir, checkpointId, {
512
+ evalId: 'eval-pass',
513
+ checkpointId,
514
+ targetModelFamily: 'gpt-4',
515
+ benchmarkId: 'bench-pass',
516
+ mode: 'reduced_prompt',
517
+ baselineScore: 0.5,
518
+ candidateScore: 0.65,
519
+ delta: 0.15,
520
+ verdict: 'pass',
521
+ });
522
+ startTrainingRun(tmpDir, runId);
523
+ completeTrainingRun(tmpDir, runId);
524
+
525
+ const ck = markCheckpointDeployable(tmpDir, checkpointId, true);
526
+ expect(ck.deployable).toBe(true);
527
+ expect(ck.lastEvalSummaryRef).toBe('eval-pass');
528
+ });
529
+
530
+ it('can mark deployable with compare_only eval and completed run', () => {
531
+ attachEvalSummary(tmpDir, checkpointId, {
532
+ evalId: 'eval-compare',
533
+ checkpointId,
534
+ targetModelFamily: 'gpt-4',
535
+ benchmarkId: 'bench-compare',
536
+ mode: 'reduced_prompt',
537
+ baselineScore: 0.5,
538
+ candidateScore: 0.52,
539
+ delta: 0.02,
540
+ verdict: 'compare_only',
541
+ });
542
+ startTrainingRun(tmpDir, runId);
543
+ completeTrainingRun(tmpDir, runId);
544
+
545
+ const ck = markCheckpointDeployable(tmpDir, checkpointId, true);
546
+ expect(ck.deployable).toBe(true);
547
+ });
548
+
549
+ it('cannot mark deployable if training run is not completed', () => {
550
+ attachEvalSummary(tmpDir, checkpointId, {
551
+ evalId: 'eval-pending',
552
+ checkpointId,
553
+ targetModelFamily: 'gpt-4',
554
+ benchmarkId: 'bench-pending',
555
+ mode: 'reduced_prompt',
556
+ baselineScore: 0.5,
557
+ candidateScore: 0.7,
558
+ delta: 0.2,
559
+ verdict: 'pass',
560
+ });
561
+ // Run is still 'pending', not completed
562
+ expect(() => markCheckpointDeployable(tmpDir, checkpointId, true)).toThrow(
563
+ "training run is in 'pending' status"
564
+ );
565
+ });
566
+
567
+ it('cannot mark deployable if training run is failed', () => {
568
+ attachEvalSummary(tmpDir, checkpointId, {
569
+ evalId: 'eval-failed-run',
570
+ checkpointId,
571
+ targetModelFamily: 'gpt-4',
572
+ benchmarkId: 'bench-failed-run',
573
+ mode: 'reduced_prompt',
574
+ baselineScore: 0.5,
575
+ candidateScore: 0.7,
576
+ delta: 0.2,
577
+ verdict: 'pass',
578
+ });
579
+ startTrainingRun(tmpDir, runId);
580
+ failTrainingRun(tmpDir, runId, 'CUDA out of memory');
581
+
582
+ expect(() => markCheckpointDeployable(tmpDir, checkpointId, true)).toThrow(
583
+ "training run is in 'failed' status"
584
+ );
585
+ });
586
+
587
+ it('can revoke deployability by marking false', () => {
588
+ attachEvalSummary(tmpDir, checkpointId, {
589
+ evalId: 'eval-revoke',
590
+ checkpointId,
591
+ targetModelFamily: 'gpt-4',
592
+ benchmarkId: 'bench-revoke',
593
+ mode: 'reduced_prompt',
594
+ baselineScore: 0.5,
595
+ candidateScore: 0.7,
596
+ delta: 0.2,
597
+ verdict: 'pass',
598
+ });
599
+ startTrainingRun(tmpDir, runId);
600
+ completeTrainingRun(tmpDir, runId);
601
+
602
+ markCheckpointDeployable(tmpDir, checkpointId, true);
603
+ expect(isCheckpointDeployable(tmpDir, checkpointId)).toBe(true);
604
+
605
+ const ck = markCheckpointDeployable(tmpDir, checkpointId, false);
606
+ expect(ck.deployable).toBe(false);
607
+ expect(isCheckpointDeployable(tmpDir, checkpointId)).toBe(false);
608
+ });
609
+
610
+ it('isCheckpointDeployable returns false for nonexistent checkpoint', () => {
611
+ expect(isCheckpointDeployable(tmpDir, 'nonexistent-id')).toBe(false);
612
+ });
613
+
614
+ it('listDeployableCheckpoints returns only deployable checkpoints', () => {
615
+ // Create another run and checkpoint
616
+ const run2 = registerTrainingRun(tmpDir, {
617
+ targetModelFamily: 'gpt-4',
618
+ datasetFingerprint: 'sha256-def',
619
+ exportId: 'exp-2',
620
+ sampleCount: 30,
621
+ configFingerprint: 'cfg-v2',
622
+ });
623
+ const ck2 = registerCheckpoint(tmpDir, {
624
+ trainRunId: run2.trainRunId,
625
+ targetModelFamily: 'gpt-4',
626
+ artifactPath: '/ck2.safetensors',
627
+ });
628
+
629
+ // Make ck1 deployable
630
+ attachEvalSummary(tmpDir, checkpointId, {
631
+ evalId: 'eval-d1',
632
+ checkpointId,
633
+ targetModelFamily: 'gpt-4',
634
+ benchmarkId: 'bench-d1',
635
+ mode: 'reduced_prompt',
636
+ baselineScore: 0.5,
637
+ candidateScore: 0.7,
638
+ delta: 0.2,
639
+ verdict: 'pass',
640
+ });
641
+ startTrainingRun(tmpDir, runId);
642
+ completeTrainingRun(tmpDir, runId);
643
+ markCheckpointDeployable(tmpDir, checkpointId, true);
644
+
645
+ // ck2 is not deployable
646
+ const deployable = listDeployableCheckpoints(tmpDir, 'gpt-4');
647
+ expect(deployable).toHaveLength(1);
648
+ expect(deployable[0].checkpointId).toBe(checkpointId);
649
+ });
650
+ });
651
+
652
+ // ---------------------------------------------------------------------------
653
+ // Tests: Lineage Tracing
654
+ // ---------------------------------------------------------------------------
655
+
656
+ describe('ModelTrainingRegistry lineage tracing', () => {
657
+ let tmpDir: string;
658
+ let runId: string;
659
+ let checkpointId: string;
660
+
661
+ beforeEach(() => {
662
+ tmpDir = makeTmpDir();
663
+ const run = registerTrainingRun(tmpDir, {
664
+ targetModelFamily: 'gpt-4',
665
+ datasetFingerprint: 'sha256-full',
666
+ exportId: 'exp-full',
667
+ sampleCount: 100,
668
+ configFingerprint: 'cfg-final',
669
+ });
670
+ runId = run.trainRunId;
671
+ const ck = registerCheckpoint(tmpDir, {
672
+ trainRunId: runId,
673
+ targetModelFamily: 'gpt-4',
674
+ artifactPath: '/final.safetensors',
675
+ });
676
+ checkpointId = ck.checkpointId;
677
+ });
678
+
679
+ afterEach(() => {
680
+ rmdir(tmpDir);
681
+ });
682
+
683
+ it('getCheckpointLineage returns full chain', () => {
684
+ attachEvalSummary(tmpDir, checkpointId, {
685
+ evalId: 'eval-full',
686
+ checkpointId,
687
+ targetModelFamily: 'gpt-4',
688
+ benchmarkId: 'bench-full',
689
+ mode: 'reduced_prompt',
690
+ baselineScore: 0.4,
691
+ candidateScore: 0.6,
692
+ delta: 0.2,
693
+ verdict: 'pass',
694
+ });
695
+ startTrainingRun(tmpDir, runId);
696
+ completeTrainingRun(tmpDir, runId);
697
+ markCheckpointDeployable(tmpDir, checkpointId, true);
698
+
699
+ const lineage = getCheckpointLineage(tmpDir, checkpointId)!;
700
+ expect(lineage.run.trainRunId).toBe(runId);
701
+ expect(lineage.run.status).toBe('completed');
702
+ expect(lineage.checkpoint.checkpointId).toBe(checkpointId);
703
+ expect(lineage.checkpoint.deployable).toBe(true);
704
+ expect(lineage.eval).not.toBeNull();
705
+ expect(lineage.eval!.verdict).toBe('pass');
706
+ expect(lineage.eval!.delta).toBe(0.2);
707
+ });
708
+
709
+ it('getCheckpointLineage returns eval: null if no eval attached', () => {
710
+ const lineage = getCheckpointLineage(tmpDir, checkpointId)!;
711
+ expect(lineage.run).not.toBeNull();
712
+ expect(lineage.checkpoint).not.toBeNull();
713
+ expect(lineage.eval).toBeNull();
714
+ });
715
+
716
+ it('getCheckpointLineage returns null for nonexistent checkpoint', () => {
717
+ expect(getCheckpointLineage(tmpDir, 'nonexistent')).toBeNull();
718
+ });
719
+ });
720
+
721
+ // ---------------------------------------------------------------------------
722
+ // Tests: Stats
723
+ // ---------------------------------------------------------------------------
724
+
725
+ describe('ModelTrainingRegistry stats', () => {
726
+ let tmpDir: string;
727
+
728
+ beforeEach(() => {
729
+ tmpDir = makeTmpDir();
730
+ });
731
+
732
+ afterEach(() => {
733
+ rmdir(tmpDir);
734
+ });
735
+
736
+ it('returns zero stats for empty registry', () => {
737
+ const stats = getTrainingRegistryStats(tmpDir);
738
+ expect(stats.totalRuns).toBe(0);
739
+ expect(stats.totalCheckpoints).toBe(0);
740
+ expect(stats.totalEvals).toBe(0);
741
+ expect(stats.deployableCheckpoints).toBe(0);
742
+ });
743
+
744
+ it('counts runs in each status', () => {
745
+ const run1 = registerTrainingRun(tmpDir, {
746
+ targetModelFamily: 'gpt-4',
747
+ datasetFingerprint: 'sha256-a',
748
+ exportId: 'e1',
749
+ sampleCount: 10,
750
+ configFingerprint: 'c1',
751
+ });
752
+ const run2 = registerTrainingRun(tmpDir, {
753
+ targetModelFamily: 'gpt-4',
754
+ datasetFingerprint: 'sha256-b',
755
+ exportId: 'e2',
756
+ sampleCount: 10,
757
+ configFingerprint: 'c1',
758
+ });
759
+ const run3 = registerTrainingRun(tmpDir, {
760
+ targetModelFamily: 'gpt-4',
761
+ datasetFingerprint: 'sha256-c',
762
+ exportId: 'e3',
763
+ sampleCount: 10,
764
+ configFingerprint: 'c1',
765
+ });
766
+
767
+ startTrainingRun(tmpDir, run1.trainRunId);
768
+ startTrainingRun(tmpDir, run2.trainRunId);
769
+ completeTrainingRun(tmpDir, run1.trainRunId);
770
+ failTrainingRun(tmpDir, run2.trainRunId, 'err');
771
+
772
+ const stats = getTrainingRegistryStats(tmpDir);
773
+ expect(stats.totalRuns).toBe(3);
774
+ expect(stats.pendingRuns).toBe(1);
775
+ expect(stats.runningRuns).toBe(0);
776
+ expect(stats.completedRuns).toBe(1);
777
+ expect(stats.failedRuns).toBe(1);
778
+ });
779
+
780
+ it('counts passing vs failing evals', () => {
781
+ const run = registerTrainingRun(tmpDir, {
782
+ targetModelFamily: 'gpt-4',
783
+ datasetFingerprint: 'sha256-abc',
784
+ exportId: 'exp-1',
785
+ sampleCount: 50,
786
+ configFingerprint: 'cfg-v1',
787
+ });
788
+ const ck = registerCheckpoint(tmpDir, {
789
+ trainRunId: run.trainRunId,
790
+ targetModelFamily: 'gpt-4',
791
+ artifactPath: '/ck.safetensors',
792
+ });
793
+
794
+ attachEvalSummary(tmpDir, ck.checkpointId, {
795
+ evalId: 'pass-eval',
796
+ checkpointId: ck.checkpointId,
797
+ targetModelFamily: 'gpt-4',
798
+ benchmarkId: 'bench-1',
799
+ mode: 'reduced_prompt',
800
+ baselineScore: 0.5,
801
+ candidateScore: 0.7,
802
+ delta: 0.2,
803
+ verdict: 'pass',
804
+ });
805
+ attachEvalSummary(tmpDir, ck.checkpointId, {
806
+ evalId: 'fail-eval',
807
+ checkpointId: ck.checkpointId,
808
+ targetModelFamily: 'gpt-4',
809
+ benchmarkId: 'bench-2',
810
+ mode: 'reduced_prompt',
811
+ baselineScore: 0.5,
812
+ candidateScore: 0.3,
813
+ delta: -0.2,
814
+ verdict: 'fail',
815
+ });
816
+
817
+ const stats = getTrainingRegistryStats(tmpDir);
818
+ expect(stats.totalEvals).toBe(2);
819
+ expect(stats.passingEvals).toBe(1);
820
+ expect(stats.failingEvals).toBe(1);
821
+ });
822
+ });
823
+
824
+ // ---------------------------------------------------------------------------
825
+ // Tests: Registry Persistence
826
+ // ---------------------------------------------------------------------------
827
+
828
+ describe('ModelTrainingRegistry persistence', () => {
829
+ let tmpDir: string;
830
+
831
+ beforeEach(() => {
832
+ tmpDir = makeTmpDir();
833
+ });
834
+
835
+ afterEach(() => {
836
+ rmdir(tmpDir);
837
+ });
838
+
839
+ it('getFullRegistry returns all record types', () => {
840
+ const run = registerTrainingRun(tmpDir, {
841
+ targetModelFamily: 'gpt-4',
842
+ datasetFingerprint: 'sha256-abc',
843
+ exportId: 'exp-1',
844
+ sampleCount: 50,
845
+ configFingerprint: 'cfg-v1',
846
+ });
847
+ const ck = registerCheckpoint(tmpDir, {
848
+ trainRunId: run.trainRunId,
849
+ targetModelFamily: 'gpt-4',
850
+ artifactPath: '/ck.safetensors',
851
+ });
852
+ attachEvalSummary(tmpDir, ck.checkpointId, {
853
+ evalId: 'eval-1',
854
+ checkpointId: ck.checkpointId,
855
+ targetModelFamily: 'gpt-4',
856
+ benchmarkId: 'bench-1',
857
+ mode: 'reduced_prompt',
858
+ baselineScore: 0.5,
859
+ candidateScore: 0.7,
860
+ delta: 0.2,
861
+ verdict: 'pass',
862
+ });
863
+
864
+ const registry = getFullRegistry(tmpDir);
865
+ expect(registry.trainingRuns).toHaveLength(1);
866
+ expect(registry.checkpoints).toHaveLength(1);
867
+ expect(registry.evalSummaries).toHaveLength(1);
868
+ });
869
+
870
+ it('registry persists across module re-invocations', () => {
871
+ // This test verifies the registry is written to disk
872
+ const run = registerTrainingRun(tmpDir, {
873
+ targetModelFamily: 'gpt-4',
874
+ datasetFingerprint: 'sha256-abc',
875
+ exportId: 'exp-1',
876
+ sampleCount: 50,
877
+ configFingerprint: 'cfg-v1',
878
+ });
879
+
880
+ // Simulate re-invocation by reading from disk directly
881
+ const registryPath = path.join(tmpDir, '.state', 'nocturnal', 'training-registry.json');
882
+ expect(fs.existsSync(registryPath)).toBe(true);
883
+
884
+ const rawContent = fs.readFileSync(registryPath, 'utf-8');
885
+ const parsed = JSON.parse(rawContent);
886
+ expect(parsed.trainingRuns).toHaveLength(1);
887
+ expect(parsed.trainingRuns[0].trainRunId).toBe(run.trainRunId);
888
+ });
889
+ });