principles-disciple 1.8.0 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ADVANCED_CONFIG_ZH.md +97 -0
- package/AGENT_INSTALL.md +173 -0
- package/AGENT_INSTALL_EN.md +173 -0
- package/INSTALL.md +256 -0
- package/SKILL.md +63 -0
- package/docs/COMMAND_REFERENCE.md +76 -0
- package/docs/COMMAND_REFERENCE_EN.md +79 -0
- package/esbuild.config.js +75 -0
- package/openclaw.plugin.json +6 -1
- package/package.json +13 -15
- package/scripts/build-web.mjs +46 -0
- package/scripts/install-dependencies.cjs +47 -0
- package/scripts/sync-plugin.mjs +802 -0
- package/scripts/verify-build.mjs +109 -0
- package/src/agents/nocturnal-dreamer.md +152 -0
- package/src/agents/nocturnal-philosopher.md +138 -0
- package/src/agents/nocturnal-reflector.md +126 -0
- package/src/agents/nocturnal-scribe.md +164 -0
- package/src/commands/capabilities.ts +85 -0
- package/{dist/commands/context.js → src/commands/context.ts} +78 -38
- package/src/commands/evolution-status.ts +146 -0
- package/src/commands/export.ts +111 -0
- package/src/commands/focus.ts +533 -0
- package/src/commands/nocturnal-review.ts +311 -0
- package/src/commands/nocturnal-rollout.ts +763 -0
- package/src/commands/nocturnal-train.ts +1002 -0
- package/{dist/commands/pain.js → src/commands/pain.ts} +68 -49
- package/src/commands/principle-rollback.ts +27 -0
- package/{dist/commands/rollback.js → src/commands/rollback.ts} +44 -12
- package/src/commands/samples.ts +60 -0
- package/src/commands/strategy.ts +38 -0
- package/{dist/commands/thinking-os.js → src/commands/thinking-os.ts} +59 -36
- package/src/commands/workflow-debug.ts +128 -0
- package/{dist/config/defaults/runtime.js → src/config/defaults/runtime.ts} +12 -5
- package/src/config/errors.ts +163 -0
- package/{dist/config/index.d.ts → src/config/index.ts} +2 -1
- package/src/constants/diagnostician.ts +66 -0
- package/src/constants/tools.ts +62 -0
- package/src/core/adaptive-thresholds.ts +476 -0
- package/{dist/core/config-service.js → src/core/config-service.ts} +7 -4
- package/{dist/core/config.js → src/core/config.ts} +158 -46
- package/src/core/control-ui-db.ts +435 -0
- package/{dist/core/detection-funnel.js → src/core/detection-funnel.ts} +36 -21
- package/{dist/core/detection-service.js → src/core/detection-service.ts} +7 -4
- package/{dist/core/dictionary-service.js → src/core/dictionary-service.ts} +7 -4
- package/{dist/core/dictionary.js → src/core/dictionary.ts} +57 -34
- package/src/core/empathy-keyword-matcher.ts +327 -0
- package/src/core/empathy-types.ts +218 -0
- package/src/core/event-log.ts +544 -0
- package/src/core/evolution-engine.ts +612 -0
- package/src/core/evolution-logger.ts +353 -0
- package/src/core/evolution-migration.ts +77 -0
- package/src/core/evolution-reducer.ts +731 -0
- package/src/core/evolution-types.ts +456 -0
- package/src/core/external-training-contract.ts +527 -0
- package/src/core/focus-history.ts +1458 -0
- package/src/core/hygiene/tracker.ts +117 -0
- package/{dist/core/init.js → src/core/init.ts} +39 -26
- package/src/core/local-worker-routing.ts +617 -0
- package/{dist/core/migration.js → src/core/migration.ts} +18 -11
- package/src/core/model-deployment-registry.ts +722 -0
- package/src/core/model-training-registry.ts +813 -0
- package/src/core/nocturnal-arbiter.ts +706 -0
- package/src/core/nocturnal-candidate-scoring.ts +392 -0
- package/src/core/nocturnal-compliance.ts +1075 -0
- package/src/core/nocturnal-dataset.ts +668 -0
- package/src/core/nocturnal-executability.ts +428 -0
- package/src/core/nocturnal-export.ts +390 -0
- package/{dist/core/nocturnal-paths.js → src/core/nocturnal-paths.ts} +49 -23
- package/src/core/nocturnal-trajectory-extractor.ts +484 -0
- package/src/core/nocturnal-trinity.ts +1384 -0
- package/src/core/pain.ts +122 -0
- package/{dist/core/path-resolver.js → src/core/path-resolver.ts} +157 -36
- package/{dist/core/paths.js → src/core/paths.ts} +13 -4
- package/src/core/principle-training-state.ts +450 -0
- package/src/core/profile.ts +226 -0
- package/src/core/promotion-gate.ts +822 -0
- package/{dist/core/risk-calculator.js → src/core/risk-calculator.ts} +42 -16
- package/{dist/core/session-tracker.js → src/core/session-tracker.ts} +185 -63
- package/src/core/shadow-observation-registry.ts +534 -0
- package/{dist/core/system-logger.js → src/core/system-logger.ts} +9 -5
- package/src/core/thinking-models.ts +217 -0
- package/src/core/training-program.ts +630 -0
- package/src/core/trajectory-types.ts +243 -0
- package/src/core/trajectory.ts +1673 -0
- package/{dist/core/workspace-context.js → src/core/workspace-context.ts} +57 -32
- package/src/hooks/bash-risk.ts +171 -0
- package/src/hooks/edit-verification.ts +295 -0
- package/src/hooks/gate-block-helper.ts +160 -0
- package/src/hooks/gate.ts +210 -0
- package/src/hooks/gfi-gate.ts +177 -0
- package/src/hooks/lifecycle.ts +326 -0
- package/{dist/hooks/llm.js → src/hooks/llm.ts} +166 -139
- package/src/hooks/message-sanitize.ts +45 -0
- package/src/hooks/pain.ts +384 -0
- package/src/hooks/progressive-trust-gate.ts +174 -0
- package/src/hooks/prompt.ts +920 -0
- package/src/hooks/subagent.ts +207 -0
- package/src/hooks/thinking-checkpoint.ts +73 -0
- package/src/hooks/trajectory-collector.ts +290 -0
- package/src/http/principles-console-route.ts +716 -0
- package/src/i18n/commands.ts +117 -0
- package/src/index.ts +694 -0
- package/src/service/central-database.ts +831 -0
- package/src/service/control-ui-query-service.ts +888 -0
- package/src/service/evolution-query-service.ts +405 -0
- package/src/service/evolution-worker.ts +1646 -0
- package/src/service/health-query-service.ts +836 -0
- package/{dist/service/nocturnal-runtime.js → src/service/nocturnal-runtime.ts} +263 -36
- package/src/service/nocturnal-service.ts +1015 -0
- package/src/service/nocturnal-target-selector.ts +532 -0
- package/src/service/phase3-input-filter.ts +237 -0
- package/src/service/runtime-summary-service.ts +757 -0
- package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +513 -0
- package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +603 -0
- package/src/service/subagent-workflow/index.ts +51 -0
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +856 -0
- package/src/service/subagent-workflow/runtime-direct-driver.ts +166 -0
- package/src/service/subagent-workflow/types.ts +378 -0
- package/src/service/subagent-workflow/workflow-store.ts +328 -0
- package/src/service/trajectory-service.ts +15 -0
- package/{dist/tools/critique-prompt.js → src/tools/critique-prompt.ts} +25 -8
- package/src/tools/deep-reflect.ts +349 -0
- package/{dist/tools/model-index.js → src/tools/model-index.ts} +33 -17
- package/src/types/event-types.ts +453 -0
- package/src/types/hygiene-types.ts +31 -0
- package/src/types/principle-tree-schema.ts +244 -0
- package/src/types/runtime-summary.ts +49 -0
- package/src/types.ts +74 -0
- package/src/utils/file-lock.ts +391 -0
- package/{dist/utils/glob-match.js → src/utils/glob-match.ts} +21 -20
- package/{dist/utils/hashing.js → src/utils/hashing.ts} +6 -4
- package/src/utils/io.ts +110 -0
- package/{dist/utils/nlp.js → src/utils/nlp.ts} +19 -12
- package/{dist/utils/plugin-logger.js → src/utils/plugin-logger.ts} +33 -8
- package/src/utils/subagent-probe.ts +94 -0
- package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +70 -1
- package/templates/pain_settings.json +2 -1
- package/tests/README.md +120 -0
- package/tests/build-artifacts.test.ts +111 -0
- package/tests/commands/evolution-status.test.ts +222 -0
- package/tests/commands/evolver.test.ts +22 -0
- package/tests/commands/export.test.ts +78 -0
- package/tests/commands/nocturnal-review.test.ts +448 -0
- package/tests/commands/nocturnal-train.test.ts +97 -0
- package/tests/commands/pain.test.ts +108 -0
- package/tests/commands/samples.test.ts +65 -0
- package/tests/commands/strategy.test.ts +34 -0
- package/tests/commands/thinking-os.test.ts +88 -0
- package/tests/core/adaptive-thresholds.test.ts +261 -0
- package/tests/core/config-service.test.ts +89 -0
- package/tests/core/config.test.ts +90 -0
- package/tests/core/control-ui-db.test.ts +75 -0
- package/tests/core/core-template-guidance.test.ts +21 -0
- package/tests/core/detection-funnel.test.ts +63 -0
- package/tests/core/detection-service.test.ts +50 -0
- package/tests/core/dictionary-service.test.ts +116 -0
- package/tests/core/dictionary.test.ts +168 -0
- package/tests/core/empathy-keyword-matcher.test.ts +209 -0
- package/tests/core/event-log.test.ts +181 -0
- package/tests/core/evolution-e2e.test.ts +58 -0
- package/tests/core/evolution-engine-gate-integration.test.ts +543 -0
- package/tests/core/evolution-engine.test.ts +562 -0
- package/tests/core/evolution-logger.test.ts +148 -0
- package/tests/core/evolution-migration.test.ts +50 -0
- package/tests/core/evolution-paths.test.ts +21 -0
- package/tests/core/evolution-reducer.detector-metadata.test.ts +602 -0
- package/tests/core/evolution-reducer.test.ts +180 -0
- package/tests/core/evolution-types-loop.test.ts +48 -0
- package/tests/core/evolution-user-stories.e2e.test.ts +249 -0
- package/tests/core/external-training-contract.test.ts +463 -0
- package/tests/core/focus-history.test.ts +682 -0
- package/tests/core/init-flatten.test.ts +69 -0
- package/tests/core/init-refactor.test.ts +87 -0
- package/tests/core/init-v1.3.test.ts +46 -0
- package/tests/core/init.test.ts +190 -0
- package/tests/core/local-worker-routing.test.ts +757 -0
- package/tests/core/migration.test.ts +84 -0
- package/tests/core/model-deployment-registry.test.ts +845 -0
- package/tests/core/model-training-registry.test.ts +889 -0
- package/tests/core/nocturnal-arbiter.test.ts +494 -0
- package/tests/core/nocturnal-candidate-scoring.test.ts +400 -0
- package/tests/core/nocturnal-compliance.test.ts +646 -0
- package/tests/core/nocturnal-dataset.test.ts +892 -0
- package/tests/core/nocturnal-executability.test.ts +357 -0
- package/tests/core/nocturnal-export.test.ts +462 -0
- package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +428 -0
- package/tests/core/nocturnal-trajectory-extractor.test.ts +634 -0
- package/tests/core/nocturnal-trinity.test.ts +953 -0
- package/tests/core/pain.test.ts +33 -0
- package/tests/core/path-resolver.test.ts +57 -0
- package/tests/core/paths-refactor.test.ts +42 -0
- package/tests/core/phase7-rollout-integration.test.ts +477 -0
- package/tests/core/principle-training-state.test.ts +712 -0
- package/tests/core/profile.test.ts +56 -0
- package/tests/core/promotion-gate.test.ts +556 -0
- package/tests/core/risk-calculator.test.ts +168 -0
- package/tests/core/session-tracker.test.ts +191 -0
- package/tests/core/training-program.test.ts +472 -0
- package/tests/core/trajectory.test.ts +265 -0
- package/tests/core/workspace-context-factory.test.ts +18 -0
- package/tests/core/workspace-context.test.ts +134 -0
- package/tests/fixtures/nocturnal-reviewed-subset.json +183 -0
- package/tests/fixtures/production-compatibility.test.ts +147 -0
- package/tests/fixtures/production-mock-generator.ts +282 -0
- package/tests/hooks/bash-risk-integration.test.ts +137 -0
- package/tests/hooks/bash-risk.test.ts +81 -0
- package/tests/hooks/edit-verification.test.ts +678 -0
- package/tests/hooks/gate-edit-verification-p1.test.ts +632 -0
- package/tests/hooks/gate-edit-verification.test.ts +435 -0
- package/tests/hooks/gate-pipeline-integration.test.ts +404 -0
- package/tests/hooks/gate.test.ts +271 -0
- package/tests/hooks/gfi-gate-unit.test.ts +422 -0
- package/tests/hooks/gfi-gate.test.ts +669 -0
- package/tests/hooks/lifecycle.test.ts +248 -0
- package/tests/hooks/llm.test.ts +308 -0
- package/tests/hooks/message-sanitize.test.ts +36 -0
- package/tests/hooks/pain.test.ts +141 -0
- package/tests/hooks/progressive-trust-gate.test.ts +277 -0
- package/tests/hooks/prompt.test.ts +1411 -0
- package/tests/hooks/subagent.test.ts +467 -0
- package/tests/hooks/thinking-gate.test.ts +313 -0
- package/tests/http/principles-console-route.test.ts +140 -0
- package/tests/hygiene-tracker.test.ts +77 -0
- package/tests/index.integration.test.ts +179 -0
- package/tests/index.shadow-routing.integration.test.ts +140 -0
- package/tests/index.test.ts +9 -0
- package/tests/integration/empathy-workflow-integration.test.ts +627 -0
- package/tests/service/control-ui-query-service.test.ts +121 -0
- package/tests/service/empathy-observer-workflow-manager.test.ts +176 -0
- package/tests/service/evolution-worker.test.ts +585 -0
- package/tests/service/nocturnal-runtime.test.ts +470 -0
- package/tests/service/nocturnal-service.test.ts +577 -0
- package/tests/service/nocturnal-target-selector.test.ts +615 -0
- package/tests/service/nocturnal-workflow-manager.test.ts +439 -0
- package/tests/service/phase3-input-filter.test.ts +289 -0
- package/tests/service/runtime-summary-service.test.ts +919 -0
- package/tests/task-compliance.test.ts +166 -0
- package/tests/test-utils.ts +48 -0
- package/tests/tools/critique-prompt.test.ts +260 -0
- package/tests/tools/deep-reflect.test.ts +232 -0
- package/tests/tools/model-index.test.ts +246 -0
- package/tests/ui/app.test.tsx +114 -0
- package/tests/utils/file-lock.test.ts +407 -0
- package/tests/utils/hashing.test.ts +32 -0
- package/tests/utils/io.test.ts +39 -0
- package/tests/utils/nlp.test.ts +53 -0
- package/tests/utils/plugin-logger.test.ts +156 -0
- package/tsconfig.json +16 -0
- package/tsconfig.tsbuildinfo +1 -0
- package/ui/src/App.tsx +45 -0
- package/ui/src/api.ts +216 -0
- package/ui/src/charts.tsx +586 -0
- package/ui/src/components/ErrorState.tsx +6 -0
- package/ui/src/components/Loading.tsx +13 -0
- package/ui/src/components/ProtectedRoute.tsx +12 -0
- package/ui/src/components/Shell.tsx +91 -0
- package/ui/src/components/WorkspaceConfig.tsx +146 -0
- package/ui/src/components/index.ts +5 -0
- package/ui/src/context/auth.tsx +80 -0
- package/ui/src/context/theme.tsx +66 -0
- package/ui/src/hooks/useAutoRefresh.ts +39 -0
- package/ui/src/i18n/ui.ts +363 -0
- package/ui/src/main.tsx +16 -0
- package/ui/src/pages/EvolutionPage.tsx +352 -0
- package/ui/src/pages/FeedbackPage.tsx +140 -0
- package/ui/src/pages/GateMonitorPage.tsx +136 -0
- package/ui/src/pages/LoginPage.tsx +88 -0
- package/ui/src/pages/OverviewPage.tsx +238 -0
- package/ui/src/pages/SamplesPage.tsx +174 -0
- package/ui/src/pages/ThinkingModelsPage.tsx +127 -0
- package/ui/src/styles.css +1661 -0
- package/ui/src/types.ts +368 -0
- package/ui/src/utils/format.ts +15 -0
- package/vitest.config.ts +23 -0
- package/dist/commands/capabilities.d.ts +0 -3
- package/dist/commands/capabilities.js +0 -73
- package/dist/commands/context.d.ts +0 -5
- package/dist/commands/evolution-status.d.ts +0 -4
- package/dist/commands/evolution-status.js +0 -117
- package/dist/commands/evolver.d.ts +0 -9
- package/dist/commands/evolver.js +0 -26
- package/dist/commands/export.d.ts +0 -2
- package/dist/commands/export.js +0 -98
- package/dist/commands/focus.d.ts +0 -14
- package/dist/commands/focus.js +0 -457
- package/dist/commands/nocturnal-review.d.ts +0 -24
- package/dist/commands/nocturnal-review.js +0 -265
- package/dist/commands/nocturnal-rollout.d.ts +0 -27
- package/dist/commands/nocturnal-rollout.js +0 -671
- package/dist/commands/nocturnal-train.d.ts +0 -25
- package/dist/commands/nocturnal-train.js +0 -919
- package/dist/commands/pain.d.ts +0 -5
- package/dist/commands/principle-rollback.d.ts +0 -4
- package/dist/commands/principle-rollback.js +0 -22
- package/dist/commands/rollback.d.ts +0 -19
- package/dist/commands/samples.d.ts +0 -2
- package/dist/commands/samples.js +0 -55
- package/dist/commands/strategy.d.ts +0 -3
- package/dist/commands/strategy.js +0 -29
- package/dist/commands/thinking-os.d.ts +0 -2
- package/dist/config/defaults/runtime.d.ts +0 -40
- package/dist/config/errors.d.ts +0 -84
- package/dist/config/errors.js +0 -94
- package/dist/config/index.js +0 -7
- package/dist/constants/diagnostician.d.ts +0 -12
- package/dist/constants/diagnostician.js +0 -56
- package/dist/constants/tools.d.ts +0 -17
- package/dist/constants/tools.js +0 -54
- package/dist/core/adaptive-thresholds.d.ts +0 -186
- package/dist/core/adaptive-thresholds.js +0 -300
- package/dist/core/config-service.d.ts +0 -15
- package/dist/core/config.d.ts +0 -127
- package/dist/core/control-ui-db.d.ts +0 -95
- package/dist/core/control-ui-db.js +0 -292
- package/dist/core/detection-funnel.d.ts +0 -33
- package/dist/core/detection-service.d.ts +0 -15
- package/dist/core/dictionary-service.d.ts +0 -15
- package/dist/core/dictionary.d.ts +0 -38
- package/dist/core/event-log.d.ts +0 -82
- package/dist/core/event-log.js +0 -463
- package/dist/core/evolution-engine.d.ts +0 -118
- package/dist/core/evolution-engine.js +0 -464
- package/dist/core/evolution-logger.d.ts +0 -137
- package/dist/core/evolution-logger.js +0 -256
- package/dist/core/evolution-migration.d.ts +0 -5
- package/dist/core/evolution-migration.js +0 -65
- package/dist/core/evolution-reducer.d.ts +0 -98
- package/dist/core/evolution-reducer.js +0 -465
- package/dist/core/evolution-types.d.ts +0 -287
- package/dist/core/evolution-types.js +0 -78
- package/dist/core/external-training-contract.d.ts +0 -276
- package/dist/core/external-training-contract.js +0 -269
- package/dist/core/focus-history.d.ts +0 -210
- package/dist/core/focus-history.js +0 -1185
- package/dist/core/hygiene/tracker.d.ts +0 -22
- package/dist/core/hygiene/tracker.js +0 -106
- package/dist/core/init.d.ts +0 -12
- package/dist/core/local-worker-routing.d.ts +0 -175
- package/dist/core/local-worker-routing.js +0 -525
- package/dist/core/migration.d.ts +0 -6
- package/dist/core/model-deployment-registry.d.ts +0 -218
- package/dist/core/model-deployment-registry.js +0 -503
- package/dist/core/model-training-registry.d.ts +0 -295
- package/dist/core/model-training-registry.js +0 -475
- package/dist/core/nocturnal-arbiter.d.ts +0 -159
- package/dist/core/nocturnal-arbiter.js +0 -534
- package/dist/core/nocturnal-candidate-scoring.d.ts +0 -137
- package/dist/core/nocturnal-candidate-scoring.js +0 -266
- package/dist/core/nocturnal-compliance.d.ts +0 -175
- package/dist/core/nocturnal-compliance.js +0 -824
- package/dist/core/nocturnal-dataset.d.ts +0 -224
- package/dist/core/nocturnal-dataset.js +0 -443
- package/dist/core/nocturnal-executability.d.ts +0 -85
- package/dist/core/nocturnal-executability.js +0 -331
- package/dist/core/nocturnal-export.d.ts +0 -124
- package/dist/core/nocturnal-export.js +0 -275
- package/dist/core/nocturnal-paths.d.ts +0 -124
- package/dist/core/nocturnal-trajectory-extractor.d.ts +0 -242
- package/dist/core/nocturnal-trajectory-extractor.js +0 -307
- package/dist/core/nocturnal-trinity.d.ts +0 -311
- package/dist/core/nocturnal-trinity.js +0 -880
- package/dist/core/pain.d.ts +0 -4
- package/dist/core/pain.js +0 -70
- package/dist/core/path-resolver.d.ts +0 -46
- package/dist/core/paths.d.ts +0 -65
- package/dist/core/principle-training-state.d.ts +0 -121
- package/dist/core/principle-training-state.js +0 -321
- package/dist/core/profile.d.ts +0 -62
- package/dist/core/profile.js +0 -210
- package/dist/core/promotion-gate.d.ts +0 -238
- package/dist/core/promotion-gate.js +0 -529
- package/dist/core/risk-calculator.d.ts +0 -22
- package/dist/core/session-tracker.d.ts +0 -99
- package/dist/core/shadow-observation-registry.d.ts +0 -217
- package/dist/core/shadow-observation-registry.js +0 -308
- package/dist/core/system-logger.d.ts +0 -8
- package/dist/core/thinking-models.d.ts +0 -38
- package/dist/core/thinking-models.js +0 -170
- package/dist/core/training-program.d.ts +0 -233
- package/dist/core/training-program.js +0 -433
- package/dist/core/trajectory.d.ts +0 -411
- package/dist/core/trajectory.js +0 -1307
- package/dist/core/workspace-context.d.ts +0 -71
- package/dist/hooks/bash-risk.d.ts +0 -57
- package/dist/hooks/bash-risk.js +0 -137
- package/dist/hooks/edit-verification.d.ts +0 -62
- package/dist/hooks/edit-verification.js +0 -256
- package/dist/hooks/gate-block-helper.d.ts +0 -44
- package/dist/hooks/gate-block-helper.js +0 -119
- package/dist/hooks/gate.d.ts +0 -24
- package/dist/hooks/gate.js +0 -173
- package/dist/hooks/gfi-gate.d.ts +0 -40
- package/dist/hooks/gfi-gate.js +0 -113
- package/dist/hooks/lifecycle.d.ts +0 -5
- package/dist/hooks/lifecycle.js +0 -284
- package/dist/hooks/llm.d.ts +0 -12
- package/dist/hooks/message-sanitize.d.ts +0 -3
- package/dist/hooks/message-sanitize.js +0 -37
- package/dist/hooks/pain.d.ts +0 -5
- package/dist/hooks/pain.js +0 -301
- package/dist/hooks/progressive-trust-gate.d.ts +0 -51
- package/dist/hooks/progressive-trust-gate.js +0 -89
- package/dist/hooks/prompt.d.ts +0 -47
- package/dist/hooks/prompt.js +0 -884
- package/dist/hooks/subagent.d.ts +0 -10
- package/dist/hooks/subagent.js +0 -387
- package/dist/hooks/thinking-checkpoint.d.ts +0 -37
- package/dist/hooks/thinking-checkpoint.js +0 -51
- package/dist/hooks/trajectory-collector.d.ts +0 -32
- package/dist/hooks/trajectory-collector.js +0 -256
- package/dist/http/principles-console-route.d.ts +0 -9
- package/dist/http/principles-console-route.js +0 -567
- package/dist/i18n/commands.d.ts +0 -26
- package/dist/i18n/commands.js +0 -116
- package/dist/index.d.ts +0 -7
- package/dist/index.js +0 -581
- package/dist/service/central-database.d.ts +0 -104
- package/dist/service/central-database.js +0 -649
- package/dist/service/control-ui-query-service.d.ts +0 -221
- package/dist/service/control-ui-query-service.js +0 -543
- package/dist/service/empathy-observer-manager.d.ts +0 -52
- package/dist/service/empathy-observer-manager.js +0 -229
- package/dist/service/evolution-query-service.d.ts +0 -155
- package/dist/service/evolution-query-service.js +0 -258
- package/dist/service/evolution-worker.d.ts +0 -101
- package/dist/service/evolution-worker.js +0 -974
- package/dist/service/nocturnal-runtime.d.ts +0 -183
- package/dist/service/nocturnal-service.d.ts +0 -163
- package/dist/service/nocturnal-service.js +0 -787
- package/dist/service/nocturnal-target-selector.d.ts +0 -145
- package/dist/service/nocturnal-target-selector.js +0 -315
- package/dist/service/phase3-input-filter.d.ts +0 -73
- package/dist/service/phase3-input-filter.js +0 -172
- package/dist/service/runtime-summary-service.d.ts +0 -122
- package/dist/service/runtime-summary-service.js +0 -485
- package/dist/service/trajectory-service.d.ts +0 -2
- package/dist/service/trajectory-service.js +0 -15
- package/dist/tools/critique-prompt.d.ts +0 -14
- package/dist/tools/deep-reflect.d.ts +0 -39
- package/dist/tools/deep-reflect.js +0 -350
- package/dist/tools/model-index.d.ts +0 -9
- package/dist/types/event-types.d.ts +0 -306
- package/dist/types/event-types.js +0 -106
- package/dist/types/hygiene-types.d.ts +0 -20
- package/dist/types/hygiene-types.js +0 -12
- package/dist/types/runtime-summary.d.ts +0 -47
- package/dist/types/runtime-summary.js +0 -1
- package/dist/types.d.ts +0 -50
- package/dist/types.js +0 -22
- package/dist/utils/file-lock.d.ts +0 -71
- package/dist/utils/file-lock.js +0 -309
- package/dist/utils/glob-match.d.ts +0 -28
- package/dist/utils/hashing.d.ts +0 -9
- package/dist/utils/io.d.ts +0 -6
- package/dist/utils/io.js +0 -106
- package/dist/utils/nlp.d.ts +0 -9
- package/dist/utils/plugin-logger.d.ts +0 -39
- package/dist/utils/subagent-probe.d.ts +0 -34
- package/dist/utils/subagent-probe.js +0 -81
|
@@ -0,0 +1,822 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Promotion Gate — Checkpoint Promotion State Machine and Gate Logic
|
|
3
|
+
* ==================================================================
|
|
4
|
+
*
|
|
5
|
+
* PURPOSE: Control when a checkpoint can advance from training → shadow → promotion.
|
|
6
|
+
* Training success alone is not enough — a checkpoint must prove it improves
|
|
7
|
+
* bounded worker behavior under the existing offline benchmark and does not
|
|
8
|
+
* regress runtime safety signals.
|
|
9
|
+
*
|
|
10
|
+
* PROMOTION STATES:
|
|
11
|
+
* - rejected: The checkpoint must not be routed
|
|
12
|
+
* - candidate_only: The checkpoint is valid but not yet ready for shadow
|
|
13
|
+
* - shadow_ready: The checkpoint may enter controlled shadow rollout
|
|
14
|
+
* - promotable: The checkpoint may replace the active checkpoint
|
|
15
|
+
*
|
|
16
|
+
* STATE TRANSITIONS:
|
|
17
|
+
* training_completed
|
|
18
|
+
* ↓
|
|
19
|
+
* candidate_only ←── (eval attached, lineage complete)
|
|
20
|
+
* ↓
|
|
21
|
+
* shadow_ready ←── (positive delta, safe constraints)
|
|
22
|
+
* ↓
|
|
23
|
+
* promotable ←── (shadow window passed, orchestrator review passed)
|
|
24
|
+
* ↓
|
|
25
|
+
* deployed
|
|
26
|
+
*
|
|
27
|
+
* PRIMARY OBJECTIVE:
|
|
28
|
+
* maximize reduced_prompt_holdout_delta
|
|
29
|
+
*
|
|
30
|
+
* CONSTRAINT METRICS (must all pass for promotion):
|
|
31
|
+
* - arbiterRejectRate <= baseline + allowedMargin
|
|
32
|
+
* - executabilityRejectRate <= baseline + allowedMargin
|
|
33
|
+
* - reviewedSubsetQuality >= baseline
|
|
34
|
+
* - routingScopeNotExpanded == true
|
|
35
|
+
*
|
|
36
|
+
* DESIGN CONSTRAINTS:
|
|
37
|
+
* - No automatic promotion without explicit gate approval
|
|
38
|
+
* - Orchestrator review remains mandatory for all promotions
|
|
39
|
+
* - Rollback path must be always available
|
|
40
|
+
* - First rollout limited to `local-reader` only
|
|
41
|
+
*/
|
|
42
|
+
|
|
43
|
+
import * as fs from 'fs';
|
|
44
|
+
import * as path from 'path';
|
|
45
|
+
import * as crypto from 'crypto';
|
|
46
|
+
import { withLock } from '../utils/file-lock.js';
|
|
47
|
+
import type { WorkerProfile } from './model-deployment-registry.js';
|
|
48
|
+
import {
|
|
49
|
+
getCheckpoint,
|
|
50
|
+
getEvalSummary,
|
|
51
|
+
listEvalSummaries,
|
|
52
|
+
getCheckpointLineage,
|
|
53
|
+
} from './model-training-registry.js';
|
|
54
|
+
import {
|
|
55
|
+
getDeployment,
|
|
56
|
+
getActiveCheckpointForProfile,
|
|
57
|
+
rollbackDeployment,
|
|
58
|
+
} from './model-deployment-registry.js';
|
|
59
|
+
import {
|
|
60
|
+
type TrainableWorkerProfile,
|
|
61
|
+
} from './external-training-contract.js';
|
|
62
|
+
import {
|
|
63
|
+
computeShadowStats,
|
|
64
|
+
type ShadowStats,
|
|
65
|
+
} from './shadow-observation-registry.js';
|
|
66
|
+
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
// Constants
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Candidate delta must exceed this to enter shadow_ready.
|
|
73
|
+
*/
|
|
74
|
+
export const DEFAULT_MIN_DELTA = 0.05;
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Default allowed margin for constraint metrics.
|
|
78
|
+
* Constraint metrics can regress by at most this amount.
|
|
79
|
+
*/
|
|
80
|
+
export const DEFAULT_ALLOWED_MARGIN = 0.05;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Allowed worker profiles for Phase 7 shadow rollout.
|
|
84
|
+
* Only bounded local workers eligible. local-reader first, local-editor deferred.
|
|
85
|
+
*/
|
|
86
|
+
const ALLOWED_ROLLOUT_PROFILES: readonly TrainableWorkerProfile[] = ['local-reader'];
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Registry file for promotion records.
|
|
90
|
+
*/
|
|
91
|
+
const PROMOTION_REGISTRY_FILE = 'promotion-registry.json';
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Minimum shadow window duration in milliseconds.
|
|
95
|
+
* A checkpoint must remain in shadow_ready for at least this duration
|
|
96
|
+
* before it can be promoted to promotable.
|
|
97
|
+
*
|
|
98
|
+
* Phase 7 default: 1 hour (3600000 ms)
|
|
99
|
+
* This gives time for real-world feedback before full promotion.
|
|
100
|
+
*/
|
|
101
|
+
export const MIN_SHADOW_WINDOW_MS = 60 * 60 * 1000; // 1 hour
|
|
102
|
+
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
// Types
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Promotion states for a checkpoint.
|
|
109
|
+
*/
|
|
110
|
+
export type PromotionState =
|
|
111
|
+
| 'rejected'
|
|
112
|
+
| 'candidate_only'
|
|
113
|
+
| 'shadow_ready'
|
|
114
|
+
| 'promotable';
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Constraint metrics for promotion gate.
|
|
118
|
+
*/
|
|
119
|
+
export interface PromotionConstraints {
|
|
120
|
+
arbiterRejectRate: number;
|
|
121
|
+
executabilityRejectRate: number;
|
|
122
|
+
reviewedSubsetQuality: number;
|
|
123
|
+
routingScopeNotExpanded: boolean;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Baseline metrics for comparison.
|
|
128
|
+
*/
|
|
129
|
+
export interface BaselineMetrics {
|
|
130
|
+
arbiterRejectRate: number;
|
|
131
|
+
executabilityRejectRate: number;
|
|
132
|
+
reviewedSubsetQuality: number;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* A promotion record — tracks the state and lineage of a promoted checkpoint.
|
|
137
|
+
*/
|
|
138
|
+
export interface PromotionRecord {
|
|
139
|
+
/** Unique identifier for this promotion record */
|
|
140
|
+
promotionId: string;
|
|
141
|
+
|
|
142
|
+
/** Checkpoint being promoted */
|
|
143
|
+
checkpointId: string;
|
|
144
|
+
|
|
145
|
+
/** Current promotion state */
|
|
146
|
+
state: PromotionState;
|
|
147
|
+
|
|
148
|
+
/** Worker profile this promotion targets */
|
|
149
|
+
targetProfile: TrainableWorkerProfile;
|
|
150
|
+
|
|
151
|
+
/** Target model family */
|
|
152
|
+
targetModelFamily: string;
|
|
153
|
+
|
|
154
|
+
/** Reduced-prompt holdout delta (primary objective) */
|
|
155
|
+
reducedPromptDelta: number;
|
|
156
|
+
|
|
157
|
+
/** Constraint metrics at time of promotion */
|
|
158
|
+
constraintMetrics: PromotionConstraints;
|
|
159
|
+
|
|
160
|
+
/** Baseline metrics used for comparison */
|
|
161
|
+
baselineMetrics: BaselineMetrics;
|
|
162
|
+
|
|
163
|
+
/** Whether orchestrator review was passed */
|
|
164
|
+
orchestratorReviewPassed: boolean;
|
|
165
|
+
|
|
166
|
+
/** Human review note (if any) */
|
|
167
|
+
reviewNote?: string;
|
|
168
|
+
|
|
169
|
+
/** ISO-8601 timestamp when state last changed */
|
|
170
|
+
stateChangedAt: string;
|
|
171
|
+
|
|
172
|
+
/** ISO-8601 timestamp when promotion record was created */
|
|
173
|
+
createdAt: string;
|
|
174
|
+
|
|
175
|
+
/** ISO-8601 timestamp when shadow window opened (if applicable) */
|
|
176
|
+
shadowStartedAt?: string;
|
|
177
|
+
|
|
178
|
+
/** ISO-8601 timestamp when promotable was achieved */
|
|
179
|
+
promotableAt?: string;
|
|
180
|
+
|
|
181
|
+
/** Previous promotion record ID (for rollback chain) */
|
|
182
|
+
previousPromotionId?: string;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* The complete promotion registry.
|
|
187
|
+
*/
|
|
188
|
+
export interface PromotionRegistry {
|
|
189
|
+
promotions: PromotionRecord[];
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
// Registry Path
|
|
194
|
+
// ---------------------------------------------------------------------------
|
|
195
|
+
|
|
196
|
+
function getRegistryPath(stateDir: string): string {
|
|
197
|
+
return path.join(stateDir, PROMOTION_REGISTRY_FILE);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Ensure the registry directory exists.
|
|
202
|
+
*/
|
|
203
|
+
function ensureRegistryDir(stateDir: string): void {
|
|
204
|
+
const registryPath = getRegistryPath(stateDir);
|
|
205
|
+
const dir = path.dirname(registryPath);
|
|
206
|
+
if (!fs.existsSync(dir)) {
|
|
207
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// ---------------------------------------------------------------------------
|
|
212
|
+
// File Operations
|
|
213
|
+
// ---------------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Read the registry from disk. Returns empty registry if missing.
|
|
217
|
+
*/
|
|
218
|
+
function readRegistry(stateDir: string): PromotionRegistry {
|
|
219
|
+
const registryPath = getRegistryPath(stateDir);
|
|
220
|
+
if (!fs.existsSync(registryPath)) {
|
|
221
|
+
return { promotions: [] };
|
|
222
|
+
}
|
|
223
|
+
try {
|
|
224
|
+
const content = fs.readFileSync(registryPath, 'utf-8');
|
|
225
|
+
return JSON.parse(content) as PromotionRegistry;
|
|
226
|
+
} catch (err) {
|
|
227
|
+
console.warn(`[promotion-gate] Registry corrupted at ${registryPath}, recovering with empty state: ${String(err)}`);
|
|
228
|
+
return { promotions: [] };
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Write the registry to disk atomically.
|
|
234
|
+
*/
|
|
235
|
+
function writeRegistry(stateDir: string, registry: PromotionRegistry): void {
|
|
236
|
+
ensureRegistryDir(stateDir);
|
|
237
|
+
const registryPath = getRegistryPath(stateDir);
|
|
238
|
+
const tmpPath = `${registryPath}.tmp`;
|
|
239
|
+
fs.writeFileSync(tmpPath, JSON.stringify(registry, null, 2), 'utf-8');
|
|
240
|
+
fs.renameSync(tmpPath, registryPath);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Execute a read-modify-write under an exclusive file lock.
|
|
245
|
+
*/
|
|
246
|
+
function withPromotionRegistryLock<T>(
|
|
247
|
+
stateDir: string,
|
|
248
|
+
fn: (registry: PromotionRegistry) => T
|
|
249
|
+
): T {
|
|
250
|
+
const registryPath = getRegistryPath(stateDir);
|
|
251
|
+
return withLock(registryPath, () => {
|
|
252
|
+
const registry = readRegistry(stateDir);
|
|
253
|
+
return fn(registry);
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ---------------------------------------------------------------------------
|
|
258
|
+
// Promotion Gate Logic
|
|
259
|
+
// ---------------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Result of evaluating the promotion gate.
|
|
263
|
+
*/
|
|
264
|
+
export interface PromotionGateResult {
|
|
265
|
+
/** Whether the checkpoint passes the gate */
|
|
266
|
+
passes: boolean;
|
|
267
|
+
|
|
268
|
+
/** The promotion state if passed */
|
|
269
|
+
suggestedState?: PromotionState;
|
|
270
|
+
|
|
271
|
+
/** Reasons for rejection (if not passed) */
|
|
272
|
+
blockers: string[];
|
|
273
|
+
|
|
274
|
+
/** Details about each constraint check */
|
|
275
|
+
constraintChecks: {
|
|
276
|
+
constraint: string;
|
|
277
|
+
actual: number;
|
|
278
|
+
baseline: number;
|
|
279
|
+
threshold: number;
|
|
280
|
+
passed: boolean;
|
|
281
|
+
/** Source of the evidence: 'shadow' (real runtime) or 'eval-proxy' (fallback) */
|
|
282
|
+
source?: 'shadow' | 'eval-proxy';
|
|
283
|
+
}[];
|
|
284
|
+
|
|
285
|
+
/** Primary objective (delta) check */
|
|
286
|
+
deltaCheck: {
|
|
287
|
+
actual: number;
|
|
288
|
+
threshold: number;
|
|
289
|
+
passed: boolean;
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Parameters for evaluating the promotion gate.
|
|
295
|
+
*/
|
|
296
|
+
export interface EvaluateGateParams {
|
|
297
|
+
/** Checkpoint ID to evaluate */
|
|
298
|
+
checkpointId: string;
|
|
299
|
+
|
|
300
|
+
/** Target worker profile */
|
|
301
|
+
targetProfile: TrainableWorkerProfile;
|
|
302
|
+
|
|
303
|
+
/** Baseline metrics for comparison */
|
|
304
|
+
baselineMetrics: BaselineMetrics;
|
|
305
|
+
|
|
306
|
+
/** Minimum delta threshold for positive signal */
|
|
307
|
+
minDelta?: number;
|
|
308
|
+
|
|
309
|
+
/** Allowed margin for constraint metrics */
|
|
310
|
+
allowedMargin?: number;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Evaluate whether a checkpoint passes the promotion gate.
|
|
315
|
+
*
|
|
316
|
+
* @param stateDir - Workspace state directory
|
|
317
|
+
* @param params - Evaluation parameters
|
|
318
|
+
* @returns PromotionGateResult with pass/fail and details
|
|
319
|
+
*
|
|
320
|
+
* FAIL-CLOSED: Returns { passes: false } if:
|
|
321
|
+
* - No eval attached to checkpoint
|
|
322
|
+
* - Delta is negative or below threshold
|
|
323
|
+
* - Any constraint metric regresses beyond allowed margin
|
|
324
|
+
* - Profile is not in allowed rollout list
|
|
325
|
+
*/
|
|
326
|
+
export function evaluatePromotionGate(
|
|
327
|
+
stateDir: string,
|
|
328
|
+
params: EvaluateGateParams
|
|
329
|
+
): PromotionGateResult {
|
|
330
|
+
const {
|
|
331
|
+
checkpointId,
|
|
332
|
+
targetProfile,
|
|
333
|
+
baselineMetrics,
|
|
334
|
+
minDelta = DEFAULT_MIN_DELTA,
|
|
335
|
+
allowedMargin = DEFAULT_ALLOWED_MARGIN,
|
|
336
|
+
} = params;
|
|
337
|
+
|
|
338
|
+
const blockers: string[] = [];
|
|
339
|
+
const constraintChecks: PromotionGateResult['constraintChecks'] = [];
|
|
340
|
+
|
|
341
|
+
// --- Check 1: Checkpoint exists ---
|
|
342
|
+
const checkpoint = getCheckpoint(stateDir, checkpointId);
|
|
343
|
+
if (!checkpoint) {
|
|
344
|
+
blockers.push(`Checkpoint not found: ${checkpointId}`);
|
|
345
|
+
return {
|
|
346
|
+
passes: false,
|
|
347
|
+
blockers,
|
|
348
|
+
constraintChecks: [],
|
|
349
|
+
deltaCheck: { actual: 0, threshold: minDelta, passed: false },
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// --- Check 2: Has eval attached ---
|
|
354
|
+
if (!checkpoint.lastEvalSummaryRef) {
|
|
355
|
+
blockers.push(
|
|
356
|
+
`Checkpoint ${checkpointId} has no eval summary attached. ` +
|
|
357
|
+
`Run benchmark evaluation before promotion gate.`
|
|
358
|
+
);
|
|
359
|
+
return {
|
|
360
|
+
passes: false,
|
|
361
|
+
blockers,
|
|
362
|
+
constraintChecks: [],
|
|
363
|
+
deltaCheck: { actual: 0, threshold: minDelta, passed: false },
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// --- Check 3: Get eval summary ---
|
|
368
|
+
const evalSummary = getEvalSummary(stateDir, checkpoint.lastEvalSummaryRef);
|
|
369
|
+
if (!evalSummary) {
|
|
370
|
+
blockers.push(
|
|
371
|
+
`Eval summary '${checkpoint.lastEvalSummaryRef}' not found. ` +
|
|
372
|
+
`Cannot evaluate promotion gate without valid eval.`
|
|
373
|
+
);
|
|
374
|
+
return {
|
|
375
|
+
passes: false,
|
|
376
|
+
blockers,
|
|
377
|
+
constraintChecks: [],
|
|
378
|
+
deltaCheck: { actual: 0, threshold: minDelta, passed: false },
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// --- Check 4: Delta must be positive and above threshold ---
|
|
383
|
+
const delta = evalSummary.delta;
|
|
384
|
+
const deltaCheck = {
|
|
385
|
+
actual: delta,
|
|
386
|
+
threshold: minDelta,
|
|
387
|
+
passed: delta >= minDelta,
|
|
388
|
+
};
|
|
389
|
+
|
|
390
|
+
if (!deltaCheck.passed) {
|
|
391
|
+
blockers.push(
|
|
392
|
+
`Reduced-prompt holdout delta (${delta.toFixed(4)}) is below threshold (${minDelta}). ` +
|
|
393
|
+
`Checkpoint must show positive improvement to be promoted.`
|
|
394
|
+
);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// --- Check 5: Arbiter reject rate constraint ---
|
|
398
|
+
// PREFER real shadow evidence over eval verdict proxy
|
|
399
|
+
// Shadow evidence comes from actual runtime routing decisions
|
|
400
|
+
const shadowStats = computeShadowStats(stateDir, { checkpointId });
|
|
401
|
+
let arbiterRejectRate: number;
|
|
402
|
+
let arbiterRejectSource: 'shadow' | 'eval-proxy';
|
|
403
|
+
|
|
404
|
+
if (shadowStats && shadowStats.isStatisticallySignificant) {
|
|
405
|
+
// Use real shadow evidence: reject rate from shadow routing
|
|
406
|
+
arbiterRejectRate = shadowStats.rejectRate;
|
|
407
|
+
arbiterRejectSource = 'shadow';
|
|
408
|
+
} else {
|
|
409
|
+
// Fall back to eval verdict proxy (Phase 7 initial state)
|
|
410
|
+
// This is a coarse approximation: 'fail' verdict maps to 100% reject
|
|
411
|
+
arbiterRejectRate = evalSummary.verdict === 'fail' ? 1 : 0;
|
|
412
|
+
arbiterRejectSource = 'eval-proxy';
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
const arbiterRejectCheck = {
|
|
416
|
+
constraint: 'arbiterRejectRate',
|
|
417
|
+
actual: arbiterRejectRate,
|
|
418
|
+
baseline: baselineMetrics.arbiterRejectRate,
|
|
419
|
+
threshold: baselineMetrics.arbiterRejectRate + allowedMargin,
|
|
420
|
+
passed: arbiterRejectRate <= baselineMetrics.arbiterRejectRate + allowedMargin,
|
|
421
|
+
source: arbiterRejectSource,
|
|
422
|
+
};
|
|
423
|
+
constraintChecks.push(arbiterRejectCheck);
|
|
424
|
+
|
|
425
|
+
if (!arbiterRejectCheck.passed) {
|
|
426
|
+
blockers.push(
|
|
427
|
+
`arbiterRejectRate regressed: ${arbiterRejectRate.toFixed(4)} > ${arbiterRejectCheck.threshold.toFixed(4)} ` +
|
|
428
|
+
`(baseline: ${baselineMetrics.arbiterRejectRate.toFixed(4)}, margin: ${allowedMargin}) ` +
|
|
429
|
+
`[source: ${arbiterRejectSource}${shadowStats ? `, n=${shadowStats.totalCount}` : ''}]`
|
|
430
|
+
);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// --- Check 6: Executability reject rate constraint ---
|
|
434
|
+
// PREFER real shadow evidence: escalation rate + profile rejection rate
|
|
435
|
+
let executabilityRejectRate: number;
|
|
436
|
+
let executabilityRejectSource: 'shadow' | 'eval-proxy';
|
|
437
|
+
|
|
438
|
+
if (shadowStats && shadowStats.isStatisticallySignificant) {
|
|
439
|
+
// Use real shadow evidence: escalation + profile rejection from routing
|
|
440
|
+
executabilityRejectRate = shadowStats.escalationRate + shadowStats.profileRejectedRate;
|
|
441
|
+
executabilityRejectSource = 'shadow';
|
|
442
|
+
} else {
|
|
443
|
+
// Fall back to eval verdict proxy
|
|
444
|
+
// This is a coarse approximation
|
|
445
|
+
executabilityRejectRate = evalSummary.verdict === 'fail' ? 0.1 : 0;
|
|
446
|
+
executabilityRejectSource = 'eval-proxy';
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const executabilityRejectCheck = {
|
|
450
|
+
constraint: 'executabilityRejectRate',
|
|
451
|
+
actual: executabilityRejectRate,
|
|
452
|
+
baseline: baselineMetrics.executabilityRejectRate,
|
|
453
|
+
threshold: baselineMetrics.executabilityRejectRate + allowedMargin,
|
|
454
|
+
passed: executabilityRejectRate <= baselineMetrics.executabilityRejectRate + allowedMargin,
|
|
455
|
+
source: executabilityRejectSource,
|
|
456
|
+
};
|
|
457
|
+
constraintChecks.push(executabilityRejectCheck);
|
|
458
|
+
|
|
459
|
+
if (!executabilityRejectCheck.passed) {
|
|
460
|
+
blockers.push(
|
|
461
|
+
`executabilityRejectRate regressed: ${executabilityRejectRate.toFixed(4)} > ${executabilityRejectCheck.threshold.toFixed(4)} ` +
|
|
462
|
+
`(baseline: ${baselineMetrics.executabilityRejectRate.toFixed(4)}, margin: ${allowedMargin}) ` +
|
|
463
|
+
`[source: ${executabilityRejectSource}${shadowStats ? `, n=${shadowStats.totalCount}` : ''}]`
|
|
464
|
+
);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// --- Check 7: Reviewed subset quality constraint ---
|
|
468
|
+
// Use eval score as proxy for quality
|
|
469
|
+
const reviewedSubsetQuality = evalSummary.candidateScore;
|
|
470
|
+
const qualityCheck = {
|
|
471
|
+
constraint: 'reviewedSubsetQuality',
|
|
472
|
+
actual: reviewedSubsetQuality,
|
|
473
|
+
baseline: baselineMetrics.reviewedSubsetQuality,
|
|
474
|
+
threshold: baselineMetrics.reviewedSubsetQuality - allowedMargin,
|
|
475
|
+
passed: reviewedSubsetQuality >= baselineMetrics.reviewedSubsetQuality - allowedMargin,
|
|
476
|
+
};
|
|
477
|
+
constraintChecks.push(qualityCheck);
|
|
478
|
+
|
|
479
|
+
if (!qualityCheck.passed) {
|
|
480
|
+
blockers.push(
|
|
481
|
+
`reviewedSubsetQuality regressed: ${reviewedSubsetQuality.toFixed(4)} < ${qualityCheck.threshold.toFixed(4)} ` +
|
|
482
|
+
`(baseline: ${baselineMetrics.reviewedSubsetQuality.toFixed(4)})`
|
|
483
|
+
);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// --- Determine if passes ---
|
|
487
|
+
const allPassed = deltaCheck.passed &&
|
|
488
|
+
arbiterRejectCheck.passed &&
|
|
489
|
+
executabilityRejectCheck.passed &&
|
|
490
|
+
qualityCheck.passed;
|
|
491
|
+
|
|
492
|
+
// --- Suggest state based on checks ---
|
|
493
|
+
let suggestedState: PromotionState | undefined;
|
|
494
|
+
if (allPassed) {
|
|
495
|
+
suggestedState = 'candidate_only';
|
|
496
|
+
// If delta is strong enough, could be shadow_ready directly
|
|
497
|
+
if (delta >= minDelta * 2) {
|
|
498
|
+
suggestedState = 'shadow_ready';
|
|
499
|
+
}
|
|
500
|
+
} else {
|
|
501
|
+
suggestedState = 'rejected';
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
return {
|
|
505
|
+
passes: allPassed,
|
|
506
|
+
suggestedState,
|
|
507
|
+
blockers,
|
|
508
|
+
constraintChecks,
|
|
509
|
+
deltaCheck,
|
|
510
|
+
};
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// ---------------------------------------------------------------------------
|
|
514
|
+
// Promotion State Machine
|
|
515
|
+
// ---------------------------------------------------------------------------
|
|
516
|
+
|
|
517
|
+
/**
|
|
518
|
+
* Parameters for advancing promotion state.
|
|
519
|
+
*/
|
|
520
|
+
export interface AdvancePromotionParams {
|
|
521
|
+
/** Checkpoint ID to promote */
|
|
522
|
+
checkpointId: string;
|
|
523
|
+
|
|
524
|
+
/** Target worker profile */
|
|
525
|
+
targetProfile: TrainableWorkerProfile;
|
|
526
|
+
|
|
527
|
+
/** Baseline metrics for comparison */
|
|
528
|
+
baselineMetrics: BaselineMetrics;
|
|
529
|
+
|
|
530
|
+
/** Orchestrator review passed (required for promotable) */
|
|
531
|
+
orchestratorReviewPassed?: boolean;
|
|
532
|
+
|
|
533
|
+
/** Human review note */
|
|
534
|
+
reviewNote?: string;
|
|
535
|
+
|
|
536
|
+
/** Minimum delta threshold */
|
|
537
|
+
minDelta?: number;
|
|
538
|
+
|
|
539
|
+
/** Allowed margin for constraints */
|
|
540
|
+
allowedMargin?: number;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
/**
|
|
544
|
+
* Advance a checkpoint's promotion state.
|
|
545
|
+
*
|
|
546
|
+
* @param stateDir - Workspace state directory
|
|
547
|
+
* @param params - Advancement parameters
|
|
548
|
+
* @returns The updated PromotionRecord
|
|
549
|
+
*
|
|
550
|
+
* @throws Error if gate evaluation fails
|
|
551
|
+
* @throws Error if state transition is not allowed
|
|
552
|
+
*/
|
|
553
|
+
export function advancePromotion(
|
|
554
|
+
stateDir: string,
|
|
555
|
+
params: AdvancePromotionParams
|
|
556
|
+
): PromotionRecord {
|
|
557
|
+
const {
|
|
558
|
+
checkpointId,
|
|
559
|
+
targetProfile,
|
|
560
|
+
baselineMetrics,
|
|
561
|
+
orchestratorReviewPassed = false,
|
|
562
|
+
reviewNote,
|
|
563
|
+
minDelta = DEFAULT_MIN_DELTA,
|
|
564
|
+
allowedMargin = DEFAULT_ALLOWED_MARGIN,
|
|
565
|
+
} = params;
|
|
566
|
+
|
|
567
|
+
// First, evaluate the gate
|
|
568
|
+
const gateResult = evaluatePromotionGate(stateDir, {
|
|
569
|
+
checkpointId,
|
|
570
|
+
targetProfile,
|
|
571
|
+
baselineMetrics,
|
|
572
|
+
minDelta,
|
|
573
|
+
allowedMargin,
|
|
574
|
+
});
|
|
575
|
+
|
|
576
|
+
// Find existing promotion record (if any) - need this to know current state
|
|
577
|
+
return withPromotionRegistryLock(stateDir, (registry) => {
|
|
578
|
+
const now = new Date().toISOString();
|
|
579
|
+
const existingIdx = registry.promotions.findIndex(
|
|
580
|
+
(p) => p.checkpointId === checkpointId
|
|
581
|
+
);
|
|
582
|
+
const currentState = existingIdx >= 0 ? registry.promotions[existingIdx].state : 'candidate_only';
|
|
583
|
+
|
|
584
|
+
// Determine the target state based on current state, gate result, and review
|
|
585
|
+
//
|
|
586
|
+
// STATE TRANSITION RULES:
|
|
587
|
+
// - Any state → rejected: if gate fails
|
|
588
|
+
// - rejected/candidate_only → candidate_only: if gate passes but no review yet
|
|
589
|
+
// - shadow_ready → promotable: if gate passes + review + shadow window elapsed
|
|
590
|
+
// - rejected → candidate_only/shadow_ready: allowed via re-evaluation
|
|
591
|
+
// (new eval data may reverse a previous rejection)
|
|
592
|
+
//
|
|
593
|
+
let targetState: PromotionState;
|
|
594
|
+
if (!gateResult.passes) {
|
|
595
|
+
targetState = 'rejected';
|
|
596
|
+
} else if (!orchestratorReviewPassed) {
|
|
597
|
+
// Gate passed but need orchestrator review before shadow_ready
|
|
598
|
+
// Review is ALWAYS required to reach shadow_ready, regardless of delta strength
|
|
599
|
+
targetState = 'candidate_only';
|
|
600
|
+
} else {
|
|
601
|
+
// Gate passed and orchestrator review passed: advance one level
|
|
602
|
+
// Only go to promotable if already at shadow_ready; otherwise advance to shadow_ready
|
|
603
|
+
if (currentState === 'shadow_ready') {
|
|
604
|
+
// Check shadow window duration before allowing promotion
|
|
605
|
+
const existing = existingIdx >= 0 ? registry.promotions[existingIdx] : null;
|
|
606
|
+
const shadowStartedAt = existing?.shadowStartedAt;
|
|
607
|
+
if (shadowStartedAt) {
|
|
608
|
+
const shadowElapsed = Date.now() - new Date(shadowStartedAt).getTime();
|
|
609
|
+
if (shadowElapsed < MIN_SHADOW_WINDOW_MS) {
|
|
610
|
+
// Shadow window not elapsed yet — stay at shadow_ready
|
|
611
|
+
targetState = 'shadow_ready';
|
|
612
|
+
} else {
|
|
613
|
+
// Shadow window elapsed — allow promotion to promotable
|
|
614
|
+
targetState = 'promotable';
|
|
615
|
+
}
|
|
616
|
+
} else {
|
|
617
|
+
// No shadowStartedAt, allow promotion (backward compat)
|
|
618
|
+
targetState = 'promotable';
|
|
619
|
+
}
|
|
620
|
+
} else {
|
|
621
|
+
// At candidate_only (or new), advance to shadow_ready
|
|
622
|
+
targetState = 'shadow_ready';
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// Get previous promotion ID for chain
|
|
627
|
+
const previousPromotionId = existingIdx >= 0
|
|
628
|
+
? registry.promotions[existingIdx].promotionId
|
|
629
|
+
: undefined;
|
|
630
|
+
|
|
631
|
+
// Get checkpoint info for lineage
|
|
632
|
+
const checkpoint = getCheckpoint(stateDir, checkpointId);
|
|
633
|
+
const evalSummary = checkpoint?.lastEvalSummaryRef
|
|
634
|
+
? getEvalSummary(stateDir, checkpoint.lastEvalSummaryRef)
|
|
635
|
+
: null;
|
|
636
|
+
|
|
637
|
+
// Get current delta
|
|
638
|
+
const reducedPromptDelta = evalSummary?.delta ?? 0;
|
|
639
|
+
|
|
640
|
+
// Create/update promotion record
|
|
641
|
+
const promotion: PromotionRecord = {
|
|
642
|
+
promotionId: existingIdx >= 0
|
|
643
|
+
? registry.promotions[existingIdx].promotionId
|
|
644
|
+
: crypto.randomUUID(),
|
|
645
|
+
checkpointId,
|
|
646
|
+
state: targetState,
|
|
647
|
+
targetProfile,
|
|
648
|
+
targetModelFamily: checkpoint?.targetModelFamily ?? 'unknown',
|
|
649
|
+
reducedPromptDelta,
|
|
650
|
+
constraintMetrics: {
|
|
651
|
+
arbiterRejectRate: evalSummary?.verdict === 'fail' ? 1 : 0,
|
|
652
|
+
executabilityRejectRate: evalSummary?.verdict === 'fail' ? 0.1 : 0,
|
|
653
|
+
reviewedSubsetQuality: evalSummary?.candidateScore ?? 0,
|
|
654
|
+
routingScopeNotExpanded: true, // Always true in Phase 7
|
|
655
|
+
},
|
|
656
|
+
baselineMetrics,
|
|
657
|
+
orchestratorReviewPassed,
|
|
658
|
+
reviewNote,
|
|
659
|
+
stateChangedAt: now,
|
|
660
|
+
createdAt: existingIdx >= 0
|
|
661
|
+
? registry.promotions[existingIdx].createdAt
|
|
662
|
+
: now,
|
|
663
|
+
shadowStartedAt: (targetState === 'shadow_ready' || targetState === 'promotable')
|
|
664
|
+
? (() => {
|
|
665
|
+
const existing = existingIdx >= 0 ? registry.promotions[existingIdx] : null;
|
|
666
|
+
// Only preserve shadowStartedAt if the checkpoint was already on the
|
|
667
|
+
// shadow path (shadow_ready or promotable). A demotion to candidate_only
|
|
668
|
+
// or rejected means the next shadow entry is a fresh start — use now.
|
|
669
|
+
if (existing?.shadowStartedAt &&
|
|
670
|
+
(existing.state === 'shadow_ready' || existing.state === 'promotable')) {
|
|
671
|
+
return existing.shadowStartedAt;
|
|
672
|
+
}
|
|
673
|
+
return now;
|
|
674
|
+
})()
|
|
675
|
+
: existingIdx >= 0
|
|
676
|
+
? registry.promotions[existingIdx].shadowStartedAt
|
|
677
|
+
: undefined,
|
|
678
|
+
promotableAt: targetState === 'promotable'
|
|
679
|
+
? now
|
|
680
|
+
: existingIdx >= 0
|
|
681
|
+
? registry.promotions[existingIdx].promotableAt
|
|
682
|
+
: undefined,
|
|
683
|
+
previousPromotionId,
|
|
684
|
+
};
|
|
685
|
+
|
|
686
|
+
if (existingIdx >= 0) {
|
|
687
|
+
registry.promotions[existingIdx] = promotion;
|
|
688
|
+
} else {
|
|
689
|
+
registry.promotions.push(promotion);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
writeRegistry(stateDir, registry);
|
|
693
|
+
return promotion;
|
|
694
|
+
});
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// ---------------------------------------------------------------------------
|
|
698
|
+
// Promotion Queries
|
|
699
|
+
// ---------------------------------------------------------------------------
|
|
700
|
+
|
|
701
|
+
/**
|
|
702
|
+
* Get the current promotion state for a checkpoint.
|
|
703
|
+
*/
|
|
704
|
+
export function getPromotionState(
|
|
705
|
+
stateDir: string,
|
|
706
|
+
checkpointId: string
|
|
707
|
+
): PromotionState | null {
|
|
708
|
+
const registry = readRegistry(stateDir);
|
|
709
|
+
const promotion = registry.promotions.find((p) => p.checkpointId === checkpointId);
|
|
710
|
+
return promotion?.state ?? null;
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
/**
|
|
714
|
+
* Get the promotion record for a checkpoint.
|
|
715
|
+
*/
|
|
716
|
+
export function getPromotionRecord(
|
|
717
|
+
stateDir: string,
|
|
718
|
+
checkpointId: string
|
|
719
|
+
): PromotionRecord | null {
|
|
720
|
+
const registry = readRegistry(stateDir);
|
|
721
|
+
return registry.promotions.find((p) => p.checkpointId === checkpointId) ?? null;
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
/**
|
|
725
|
+
* List promotions by state.
|
|
726
|
+
*/
|
|
727
|
+
export function listPromotionsByState(
|
|
728
|
+
stateDir: string,
|
|
729
|
+
state: PromotionState
|
|
730
|
+
): PromotionRecord[] {
|
|
731
|
+
const registry = readRegistry(stateDir);
|
|
732
|
+
return registry.promotions.filter((p) => p.state === state);
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
/**
|
|
736
|
+
* List all promotions for a profile.
|
|
737
|
+
*/
|
|
738
|
+
export function listPromotionsForProfile(
|
|
739
|
+
stateDir: string,
|
|
740
|
+
targetProfile: TrainableWorkerProfile
|
|
741
|
+
): PromotionRecord[] {
|
|
742
|
+
const registry = readRegistry(stateDir);
|
|
743
|
+
return registry.promotions.filter((p) => p.targetProfile === targetProfile);
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
// ---------------------------------------------------------------------------
|
|
747
|
+
// Rollback Support
|
|
748
|
+
// ---------------------------------------------------------------------------
|
|
749
|
+
|
|
750
|
+
/**
|
|
751
|
+
* Reject a checkpoint, preventing it from being promoted.
|
|
752
|
+
*
|
|
753
|
+
* @param stateDir - Workspace state directory
|
|
754
|
+
* @param checkpointId - Checkpoint to reject
|
|
755
|
+
* @param reason - Reason for rejection
|
|
756
|
+
* @returns The updated PromotionRecord
|
|
757
|
+
*/
|
|
758
|
+
export function rejectCheckpoint(
|
|
759
|
+
stateDir: string,
|
|
760
|
+
checkpointId: string,
|
|
761
|
+
reason: string
|
|
762
|
+
): PromotionRecord {
|
|
763
|
+
return withPromotionRegistryLock(stateDir, (registry) => {
|
|
764
|
+
const now = new Date().toISOString();
|
|
765
|
+
const existingIdx = registry.promotions.findIndex(
|
|
766
|
+
(p) => p.checkpointId === checkpointId
|
|
767
|
+
);
|
|
768
|
+
|
|
769
|
+
const checkpoint = getCheckpoint(stateDir, checkpointId);
|
|
770
|
+
|
|
771
|
+
const promotion: PromotionRecord = {
|
|
772
|
+
promotionId: existingIdx >= 0
|
|
773
|
+
? registry.promotions[existingIdx].promotionId
|
|
774
|
+
: crypto.randomUUID(),
|
|
775
|
+
checkpointId,
|
|
776
|
+
state: 'rejected',
|
|
777
|
+
targetProfile: 'local-reader', // Default, should be overridden
|
|
778
|
+
targetModelFamily: checkpoint?.targetModelFamily ?? 'unknown',
|
|
779
|
+
reducedPromptDelta: 0,
|
|
780
|
+
constraintMetrics: {
|
|
781
|
+
arbiterRejectRate: 1,
|
|
782
|
+
executabilityRejectRate: 1,
|
|
783
|
+
reviewedSubsetQuality: 0,
|
|
784
|
+
routingScopeNotExpanded: true,
|
|
785
|
+
},
|
|
786
|
+
baselineMetrics: {
|
|
787
|
+
arbiterRejectRate: 0,
|
|
788
|
+
executabilityRejectRate: 0,
|
|
789
|
+
reviewedSubsetQuality: 0,
|
|
790
|
+
},
|
|
791
|
+
orchestratorReviewPassed: false,
|
|
792
|
+
reviewNote: reason,
|
|
793
|
+
stateChangedAt: now,
|
|
794
|
+
createdAt: existingIdx >= 0
|
|
795
|
+
? registry.promotions[existingIdx].createdAt
|
|
796
|
+
: now,
|
|
797
|
+
};
|
|
798
|
+
|
|
799
|
+
if (existingIdx >= 0) {
|
|
800
|
+
registry.promotions[existingIdx] = promotion;
|
|
801
|
+
} else {
|
|
802
|
+
registry.promotions.push(promotion);
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
writeRegistry(stateDir, registry);
|
|
806
|
+
return promotion;
|
|
807
|
+
});
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
// ---------------------------------------------------------------------------
|
|
811
|
+
// Default Baseline Metrics
|
|
812
|
+
// ---------------------------------------------------------------------------
|
|
813
|
+
|
|
814
|
+
/**
|
|
815
|
+
* Default baseline metrics for Phase 7.
|
|
816
|
+
* These represent the "acceptable" thresholds that new checkpoints must meet.
|
|
817
|
+
*/
|
|
818
|
+
export const DEFAULT_BASELINE_METRICS: BaselineMetrics = {
|
|
819
|
+
arbiterRejectRate: 0.15, // 15% max arbiter rejection
|
|
820
|
+
executabilityRejectRate: 0.10, // 10% max executability rejection
|
|
821
|
+
reviewedSubsetQuality: 0.70, // 70% minimum quality score
|
|
822
|
+
};
|