principles-disciple 1.72.0 → 1.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/INSTALL.md +1 -3
- package/openclaw.plugin.json +10 -5
- package/package.json +17 -19
- package/scripts/acceptance-test.mjs +16 -73
- package/scripts/sync-plugin.mjs +382 -77
- package/src/commands/archive-impl.ts +2 -1
- package/src/commands/capabilities.ts +2 -2
- package/src/commands/context.ts +2 -2
- package/src/commands/disable-impl.ts +2 -1
- package/src/commands/evolution-status.ts +16 -16
- package/src/commands/export.ts +12 -67
- package/src/commands/pain.ts +91 -1
- package/src/commands/principle-rollback.ts +2 -1
- package/src/commands/promote-impl.ts +7 -43
- package/src/commands/rollback-impl.ts +2 -1
- package/src/commands/rollback.ts +2 -1
- package/src/commands/samples.ts +2 -1
- package/src/commands/thinking-os.ts +2 -1
- package/src/config/errors.ts +18 -2
- package/src/constants/diagnostician.ts +2 -2
- package/src/constants/tools.ts +2 -1
- package/src/core/__tests__/focus-history.test.ts +210 -0
- package/src/core/config.ts +1 -1
- package/src/core/correction-cue-learner.ts +2 -136
- package/src/core/correction-types.ts +16 -88
- package/src/core/dictionary.ts +19 -20
- package/src/core/empathy-keyword-matcher.ts +17 -289
- package/src/core/empathy-types.ts +18 -229
- package/src/core/event-log.ts +29 -132
- package/src/core/evolution-reducer.ts +21 -2
- package/src/core/evolution-types.ts +76 -464
- package/src/core/file-store.ts +80 -0
- package/src/core/focus-history.ts +228 -955
- package/src/core/local-worker-routing.ts +34 -314
- package/src/core/merge-gate-audit.ts +0 -195
- package/src/core/migration.ts +0 -1
- package/src/core/pain-diagnostic-gate.ts +154 -0
- package/src/core/pain-signal.ts +21 -138
- package/src/core/pain.ts +15 -88
- package/src/core/path-resolver.ts +0 -1
- package/src/core/paths.ts +0 -1
- package/src/core/pd-task-reconciler.ts +26 -115
- package/src/core/pd-task-service.ts +9 -9
- package/src/core/pd-task-types.ts +23 -127
- package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
- package/src/core/principle-compiler/code-validator.ts +15 -42
- package/src/core/principle-compiler/compiler.ts +100 -15
- package/src/core/principle-compiler/index.ts +5 -2
- package/src/core/principle-compiler/template-generator.ts +4 -104
- package/src/core/principle-injection.ts +10 -202
- package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
- package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
- package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
- package/src/core/principle-tree-ledger-adapter.ts +145 -0
- package/src/core/principle-tree-ledger.ts +8 -6
- package/src/core/reflection/reflection-context.ts +14 -109
- package/src/core/replay-engine.ts +8 -500
- package/src/core/rule-host-helpers.ts +5 -35
- package/src/core/rule-host-types.ts +10 -82
- package/src/core/rule-host.ts +6 -63
- package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
- package/src/core/session-tracker.ts +87 -101
- package/src/core/shadow-observation-registry.ts +19 -48
- package/src/core/trajectory.ts +3 -1
- package/src/core/workflow-funnel-loader.ts +62 -68
- package/src/core/workspace-context.ts +46 -0
- package/src/core/workspace-dir-service.ts +1 -1
- package/src/core/workspace-dir-validation.ts +18 -9
- package/src/hooks/AGENTS.md +1 -1
- package/src/hooks/gate-block-helper.ts +71 -64
- package/src/hooks/gate.ts +183 -31
- package/src/hooks/lifecycle.ts +30 -32
- package/src/hooks/llm.ts +60 -32
- package/src/hooks/pain.ts +297 -103
- package/src/hooks/prompt.ts +400 -440
- package/src/hooks/subagent.ts +2 -29
- package/src/i18n/commands.ts +2 -10
- package/src/index.ts +95 -85
- package/src/openclaw-sdk.ts +311 -0
- package/src/service/central-database.ts +8 -4
- package/src/service/evolution-queue-migration.ts +2 -1
- package/src/service/evolution-worker.ts +163 -1786
- package/src/service/internalization-trigger-adapter.ts +302 -0
- package/src/service/keyword-optimization-service.ts +4 -4
- package/src/service/monitoring-query-service.ts +1 -215
- package/src/service/queue-io.ts +60 -331
- package/src/service/runtime-summary-service.ts +59 -16
- package/src/service/subagent-workflow/index.ts +0 -41
- package/src/service/subagent-workflow/types.ts +9 -120
- package/src/service/subagent-workflow/workflow-store.ts +2 -119
- package/src/service/workflow-watchdog.ts +0 -43
- package/src/types/event-payload.ts +16 -74
- package/src/types/event-types.ts +38 -547
- package/src/types/hygiene-types.ts +7 -30
- package/src/types/principle-tree-schema.ts +20 -222
- package/src/types/queue.ts +15 -70
- package/src/types/runtime-summary.ts +5 -49
- package/src/utils/io.ts +8 -20
- package/src/utils/retry.ts +1 -1
- package/src/utils/shadow-fingerprint.ts +2 -2
- package/src/utils/workspace-resolver.ts +50 -0
- package/templates/langs/en/core/AGENTS.md +7 -7
- package/templates/langs/en/core/BOOT.md +1 -1
- package/templates/langs/en/core/HEARTBEAT.md +2 -2
- package/templates/langs/en/principles/THINKING_OS.md +3 -2
- package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/en/skills/evolve-task/SKILL.md +3 -3
- package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-mentor/SKILL.md +2 -3
- package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
- package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
- package/templates/langs/zh/core/AGENTS.md +7 -7
- package/templates/langs/zh/core/BOOT.md +1 -1
- package/templates/langs/zh/core/HEARTBEAT.md +2 -2
- package/templates/langs/zh/principles/THINKING_OS.md +3 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
- package/templates/langs/zh/skills/evolve-task/SKILL.md +4 -4
- package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/zh/skills/pd-mentor/SKILL.md +2 -3
- package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
- package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
- package/tests/build-artifacts.test.ts +1 -3
- package/tests/commands/evolution-status.test.ts +0 -118
- package/tests/core/bootstrap-rules.test.ts +1 -1
- package/tests/core/config.test.ts +1 -1
- package/tests/core/event-log.test.ts +35 -0
- package/tests/core/evolution-engine.test.ts +610 -0
- package/tests/core/file-store.test.ts +102 -0
- package/tests/core/focus-history.test.ts +203 -11
- package/tests/core/merge-gate-audit.test.ts +2 -169
- package/tests/core/migration.test.ts +7 -7
- package/tests/core/model-deployment-registry.test.ts +7 -1
- package/tests/core/model-training-registry.test.ts +19 -0
- package/tests/core/observability.test.ts +0 -1
- package/tests/core/pain-diagnostic-gate.test.ts +498 -0
- package/tests/core/pain.test.ts +0 -1
- package/tests/core/path-resolver.test.ts +1 -1
- package/tests/core/paths-refactor.test.ts +0 -22
- package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
- package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
- package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
- package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
- package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
- package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
- package/tests/core/reflection-context.test.ts +0 -14
- package/tests/core/replay-engine.test.ts +127 -215
- package/tests/core/rule-host-helpers.test.ts +2 -2
- package/tests/core/rule-implementation-runtime.test.ts +0 -27
- package/tests/core/workflow-funnel-loader.test.ts +162 -0
- package/tests/core/workspace-context.test.ts +2 -2
- package/tests/core/workspace-dir-validation.test.ts +8 -1
- package/tests/core-anti-growth.test.ts +191 -0
- package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
- package/tests/hooks/confirm-first-removal.test.ts +188 -0
- package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
- package/tests/hooks/gate-auto-correct.test.ts +665 -0
- package/tests/hooks/gate-no-path-write-tool.test.ts +172 -0
- package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
- package/tests/hooks/pain.test.ts +269 -12
- package/tests/hooks/prompt-characterization.test.ts +500 -0
- package/tests/hooks/prompt-size-guard.test.ts +32 -17
- package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
- package/tests/index.test.ts +94 -1
- package/tests/integration/auto-entry-gate.test.ts +248 -0
- package/tests/integration/internalization-trigger-guard.test.ts +69 -0
- package/tests/integration/m8-legacy-paths.test.ts +63 -0
- package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
- package/tests/plugin-config-resolution-cutover.test.ts +359 -0
- package/tests/runtime-v2-discovery-guard.test.ts +154 -0
- package/tests/service/central-database.test.ts +457 -0
- package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
- package/tests/service/evolution-worker.timeout.test.ts +11 -129
- package/tests/service/internalization-trigger-adapter.test.ts +251 -0
- package/tests/service/monitoring-query-service.test.ts +1 -47
- package/tests/service/queue-io.test.ts +1 -62
- package/tests/service/runtime-summary-service.test.ts +3 -1
- package/tests/service/workflow-watchdog.test.ts +0 -91
- package/tests/utils/file-lock.test.ts +5 -3
- package/tests/utils/session-key.test.ts +52 -0
- package/tests/utils/subagent-probe.test.ts +48 -1
- package/vitest.config.ts +4 -11
- package/.planning/codebase/ARCHITECTURE.md +0 -157
- package/.planning/codebase/CONCERNS.md +0 -145
- package/.planning/codebase/CONVENTIONS.md +0 -148
- package/.planning/codebase/INTEGRATIONS.md +0 -81
- package/.planning/codebase/STACK.md +0 -87
- package/.planning/codebase/STRUCTURE.md +0 -193
- package/.planning/codebase/TESTING.md +0 -243
- package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
- package/docs/COMMAND_REFERENCE.md +0 -76
- package/docs/COMMAND_REFERENCE_EN.md +0 -79
- package/scripts/build-web.mjs +0 -46
- package/scripts/diagnose-nocturnal.mjs +0 -537
- package/scripts/seed-nocturnal-scenarios.mjs +0 -384
- package/src/commands/nocturnal-review.ts +0 -322
- package/src/commands/nocturnal-rollout.ts +0 -790
- package/src/commands/nocturnal-train.ts +0 -986
- package/src/commands/pd-reflect.ts +0 -88
- package/src/core/adaptive-thresholds.ts +0 -478
- package/src/core/diagnostician-task-store.ts +0 -192
- package/src/core/nocturnal-arbiter.ts +0 -715
- package/src/core/nocturnal-artifact-lineage.ts +0 -116
- package/src/core/nocturnal-artificer.ts +0 -257
- package/src/core/nocturnal-candidate-scoring.ts +0 -530
- package/src/core/nocturnal-compliance.ts +0 -1146
- package/src/core/nocturnal-dataset.ts +0 -763
- package/src/core/nocturnal-executability.ts +0 -428
- package/src/core/nocturnal-export.ts +0 -499
- package/src/core/nocturnal-paths.ts +0 -240
- package/src/core/nocturnal-reasoning-deriver.ts +0 -343
- package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
- package/src/core/nocturnal-snapshot-contract.ts +0 -99
- package/src/core/nocturnal-trajectory-extractor.ts +0 -512
- package/src/core/nocturnal-trinity-types.ts +0 -218
- package/src/core/nocturnal-trinity.ts +0 -2680
- package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
- package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
- package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
- package/src/http/principles-console-route.ts +0 -709
- package/src/service/central-health-service.ts +0 -49
- package/src/service/central-overview-service.ts +0 -138
- package/src/service/control-ui-query-service.ts +0 -900
- package/src/service/cooldown-strategy.ts +0 -97
- package/src/service/evolution-pain-context.ts +0 -79
- package/src/service/evolution-query-service.ts +0 -407
- package/src/service/health-query-service.ts +0 -1038
- package/src/service/nocturnal-config.ts +0 -214
- package/src/service/nocturnal-runtime.ts +0 -734
- package/src/service/nocturnal-service.ts +0 -1605
- package/src/service/nocturnal-target-selector.ts +0 -545
- package/src/service/sleep-cycle.ts +0 -157
- package/src/service/startup-reconciler.ts +0 -112
- package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
- package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
- package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
- package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
- package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
- package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
- package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
- package/src/tools/write-pain-flag.ts +0 -215
- package/templates/langs/en/skills/plan-script/SKILL.md +0 -32
- package/templates/langs/zh/skills/plan-script/SKILL.md +0 -32
- package/tests/commands/nocturnal-review.test.ts +0 -448
- package/tests/commands/nocturnal-train.test.ts +0 -97
- package/tests/commands/pd-reflect.test.ts +0 -49
- package/tests/core/adaptive-thresholds.test.ts +0 -261
- package/tests/core/nocturnal-arbiter.test.ts +0 -559
- package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
- package/tests/core/nocturnal-artificer.test.ts +0 -241
- package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
- package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
- package/tests/core/nocturnal-compliance.test.ts +0 -646
- package/tests/core/nocturnal-dataset.test.ts +0 -892
- package/tests/core/nocturnal-e2e.test.ts +0 -234
- package/tests/core/nocturnal-executability.test.ts +0 -357
- package/tests/core/nocturnal-export.test.ts +0 -517
- package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
- package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
- package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
- package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
- package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
- package/tests/core/nocturnal-trinity.test.ts +0 -2053
- package/tests/core/pain-auto-repair.test.ts +0 -96
- package/tests/core/pain-integration.test.ts +0 -510
- package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
- package/tests/http/principles-console-route.test.ts +0 -162
- package/tests/integration/chaos-resilience.test.ts +0 -348
- package/tests/integration/empathy-workflow-integration.test.ts +0 -626
- package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
- package/tests/service/control-ui-query-service.test.ts +0 -121
- package/tests/service/cooldown-strategy.test.ts +0 -164
- package/tests/service/data-endpoints-regression.test.ts +0 -834
- package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
- package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
- package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
- package/tests/service/nocturnal-runtime.test.ts +0 -473
- package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
- package/tests/service/nocturnal-target-selector.test.ts +0 -615
- package/tests/service/startup-reconciler.test.ts +0 -148
- package/tests/tools/write-pain-flag.test.ts +0 -358
- package/ui/src/App.tsx +0 -45
- package/ui/src/api.ts +0 -220
- package/ui/src/charts.tsx +0 -955
- package/ui/src/components/ErrorState.tsx +0 -6
- package/ui/src/components/Loading.tsx +0 -13
- package/ui/src/components/ProtectedRoute.tsx +0 -12
- package/ui/src/components/Shell.tsx +0 -91
- package/ui/src/components/WorkspaceConfig.tsx +0 -178
- package/ui/src/components/index.ts +0 -5
- package/ui/src/context/auth.tsx +0 -80
- package/ui/src/context/theme.tsx +0 -66
- package/ui/src/hooks/useAutoRefresh.ts +0 -39
- package/ui/src/i18n/ui.ts +0 -473
- package/ui/src/main.tsx +0 -16
- package/ui/src/pages/EvolutionPage.tsx +0 -333
- package/ui/src/pages/FeedbackPage.tsx +0 -138
- package/ui/src/pages/GateMonitorPage.tsx +0 -136
- package/ui/src/pages/LoginPage.tsx +0 -89
- package/ui/src/pages/OverviewPage.tsx +0 -599
- package/ui/src/pages/SamplesPage.tsx +0 -174
- package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
- package/ui/src/styles.css +0 -2020
- package/ui/src/types.ts +0 -384
- package/ui/src/utils/format.ts +0 -15
|
@@ -67,6 +67,7 @@ describe('ModelTrainingRegistry registerTrainingRun', () => {
|
|
|
67
67
|
|
|
68
68
|
it('registers a new training run', () => {
|
|
69
69
|
const run = registerTrainingRun(tmpDir, {
|
|
70
|
+
experimentId: 'mock-experiment-id',
|
|
70
71
|
targetModelFamily: 'gpt-4',
|
|
71
72
|
datasetFingerprint: 'sha256-abc123',
|
|
72
73
|
exportId: 'export-001',
|
|
@@ -87,6 +88,7 @@ describe('ModelTrainingRegistry registerTrainingRun', () => {
|
|
|
87
88
|
|
|
88
89
|
it('persists the run to disk', () => {
|
|
89
90
|
const run = registerTrainingRun(tmpDir, {
|
|
91
|
+
experimentId: 'mock-experiment-id',
|
|
90
92
|
targetModelFamily: 'claude-3',
|
|
91
93
|
datasetFingerprint: 'sha256-def456',
|
|
92
94
|
exportId: 'export-002',
|
|
@@ -102,6 +104,7 @@ describe('ModelTrainingRegistry registerTrainingRun', () => {
|
|
|
102
104
|
|
|
103
105
|
it('generates unique trainRunIds', () => {
|
|
104
106
|
const run1 = registerTrainingRun(tmpDir, {
|
|
107
|
+
experimentId: 'mock-experiment-id',
|
|
105
108
|
targetModelFamily: 'gpt-4',
|
|
106
109
|
datasetFingerprint: 'sha256-abc',
|
|
107
110
|
exportId: 'e1',
|
|
@@ -109,6 +112,7 @@ describe('ModelTrainingRegistry registerTrainingRun', () => {
|
|
|
109
112
|
configFingerprint: 'c1',
|
|
110
113
|
});
|
|
111
114
|
const run2 = registerTrainingRun(tmpDir, {
|
|
115
|
+
experimentId: 'mock-experiment-id',
|
|
112
116
|
targetModelFamily: 'gpt-4',
|
|
113
117
|
datasetFingerprint: 'sha256-def',
|
|
114
118
|
exportId: 'e2',
|
|
@@ -130,6 +134,7 @@ describe('ModelTrainingRegistry run status transitions', () => {
|
|
|
130
134
|
beforeEach(() => {
|
|
131
135
|
tmpDir = makeTmpDir();
|
|
132
136
|
runId = registerTrainingRun(tmpDir, {
|
|
137
|
+
experimentId: 'mock-experiment-id',
|
|
133
138
|
targetModelFamily: 'gpt-4',
|
|
134
139
|
datasetFingerprint: 'sha256-abc',
|
|
135
140
|
exportId: 'exp-1',
|
|
@@ -186,6 +191,7 @@ describe('ModelTrainingRegistry run status transitions', () => {
|
|
|
186
191
|
// BeforeEach already created runId as pending
|
|
187
192
|
// Create run2 (also pending by default)
|
|
188
193
|
const run2 = registerTrainingRun(tmpDir, {
|
|
194
|
+
experimentId: 'mock-experiment-id',
|
|
189
195
|
targetModelFamily: 'gpt-4',
|
|
190
196
|
datasetFingerprint: 'sha256-def',
|
|
191
197
|
exportId: 'exp-2',
|
|
@@ -208,6 +214,7 @@ describe('ModelTrainingRegistry run status transitions', () => {
|
|
|
208
214
|
it('listTrainingRuns filters by targetModelFamily', () => {
|
|
209
215
|
// Register gpt-4 and claude-3 runs using the tmpDir from beforeEach
|
|
210
216
|
const gpt4Run = registerTrainingRun(tmpDir, {
|
|
217
|
+
experimentId: 'mock-experiment-id',
|
|
211
218
|
targetModelFamily: 'gpt-4',
|
|
212
219
|
datasetFingerprint: 'sha256-a',
|
|
213
220
|
exportId: 'e1',
|
|
@@ -215,6 +222,7 @@ describe('ModelTrainingRegistry run status transitions', () => {
|
|
|
215
222
|
configFingerprint: 'c1',
|
|
216
223
|
});
|
|
217
224
|
registerTrainingRun(tmpDir, {
|
|
225
|
+
experimentId: 'mock-experiment-id',
|
|
218
226
|
targetModelFamily: 'claude-3',
|
|
219
227
|
datasetFingerprint: 'sha256-b',
|
|
220
228
|
exportId: 'e2',
|
|
@@ -244,6 +252,7 @@ describe('ModelTrainingRegistry registerCheckpoint', () => {
|
|
|
244
252
|
beforeEach(() => {
|
|
245
253
|
tmpDir = makeTmpDir();
|
|
246
254
|
runId = registerTrainingRun(tmpDir, {
|
|
255
|
+
experimentId: 'mock-experiment-id',
|
|
247
256
|
targetModelFamily: 'gpt-4',
|
|
248
257
|
datasetFingerprint: 'sha256-abc',
|
|
249
258
|
exportId: 'exp-1',
|
|
@@ -345,6 +354,7 @@ describe('ModelTrainingRegistry attachEvalSummary', () => {
|
|
|
345
354
|
beforeEach(() => {
|
|
346
355
|
tmpDir = makeTmpDir();
|
|
347
356
|
const run = registerTrainingRun(tmpDir, {
|
|
357
|
+
experimentId: 'mock-experiment-id',
|
|
348
358
|
targetModelFamily: 'gpt-4',
|
|
349
359
|
datasetFingerprint: 'sha256-abc',
|
|
350
360
|
exportId: 'exp-1',
|
|
@@ -464,6 +474,7 @@ describe('ModelTrainingRegistry deployability gating', () => {
|
|
|
464
474
|
beforeEach(() => {
|
|
465
475
|
tmpDir = makeTmpDir();
|
|
466
476
|
const run = registerTrainingRun(tmpDir, {
|
|
477
|
+
experimentId: 'mock-experiment-id',
|
|
467
478
|
targetModelFamily: 'gpt-4',
|
|
468
479
|
datasetFingerprint: 'sha256-abc',
|
|
469
480
|
exportId: 'exp-1',
|
|
@@ -614,6 +625,7 @@ describe('ModelTrainingRegistry deployability gating', () => {
|
|
|
614
625
|
it('listDeployableCheckpoints returns only deployable checkpoints', () => {
|
|
615
626
|
// Create another run and checkpoint
|
|
616
627
|
const run2 = registerTrainingRun(tmpDir, {
|
|
628
|
+
experimentId: 'mock-experiment-id',
|
|
617
629
|
targetModelFamily: 'gpt-4',
|
|
618
630
|
datasetFingerprint: 'sha256-def',
|
|
619
631
|
exportId: 'exp-2',
|
|
@@ -661,6 +673,7 @@ describe('ModelTrainingRegistry lineage tracing', () => {
|
|
|
661
673
|
beforeEach(() => {
|
|
662
674
|
tmpDir = makeTmpDir();
|
|
663
675
|
const run = registerTrainingRun(tmpDir, {
|
|
676
|
+
experimentId: 'mock-experiment-id',
|
|
664
677
|
targetModelFamily: 'gpt-4',
|
|
665
678
|
datasetFingerprint: 'sha256-full',
|
|
666
679
|
exportId: 'exp-full',
|
|
@@ -743,6 +756,7 @@ describe('ModelTrainingRegistry stats', () => {
|
|
|
743
756
|
|
|
744
757
|
it('counts runs in each status', () => {
|
|
745
758
|
const run1 = registerTrainingRun(tmpDir, {
|
|
759
|
+
experimentId: 'mock-experiment-id',
|
|
746
760
|
targetModelFamily: 'gpt-4',
|
|
747
761
|
datasetFingerprint: 'sha256-a',
|
|
748
762
|
exportId: 'e1',
|
|
@@ -750,6 +764,7 @@ describe('ModelTrainingRegistry stats', () => {
|
|
|
750
764
|
configFingerprint: 'c1',
|
|
751
765
|
});
|
|
752
766
|
const run2 = registerTrainingRun(tmpDir, {
|
|
767
|
+
experimentId: 'mock-experiment-id',
|
|
753
768
|
targetModelFamily: 'gpt-4',
|
|
754
769
|
datasetFingerprint: 'sha256-b',
|
|
755
770
|
exportId: 'e2',
|
|
@@ -757,6 +772,7 @@ describe('ModelTrainingRegistry stats', () => {
|
|
|
757
772
|
configFingerprint: 'c1',
|
|
758
773
|
});
|
|
759
774
|
const run3 = registerTrainingRun(tmpDir, {
|
|
775
|
+
experimentId: 'mock-experiment-id',
|
|
760
776
|
targetModelFamily: 'gpt-4',
|
|
761
777
|
datasetFingerprint: 'sha256-c',
|
|
762
778
|
exportId: 'e3',
|
|
@@ -779,6 +795,7 @@ describe('ModelTrainingRegistry stats', () => {
|
|
|
779
795
|
|
|
780
796
|
it('counts passing vs failing evals', () => {
|
|
781
797
|
const run = registerTrainingRun(tmpDir, {
|
|
798
|
+
experimentId: 'mock-experiment-id',
|
|
782
799
|
targetModelFamily: 'gpt-4',
|
|
783
800
|
datasetFingerprint: 'sha256-abc',
|
|
784
801
|
exportId: 'exp-1',
|
|
@@ -838,6 +855,7 @@ describe('ModelTrainingRegistry persistence', () => {
|
|
|
838
855
|
|
|
839
856
|
it('getFullRegistry returns all record types', () => {
|
|
840
857
|
const run = registerTrainingRun(tmpDir, {
|
|
858
|
+
experimentId: 'mock-experiment-id',
|
|
841
859
|
targetModelFamily: 'gpt-4',
|
|
842
860
|
datasetFingerprint: 'sha256-abc',
|
|
843
861
|
exportId: 'exp-1',
|
|
@@ -870,6 +888,7 @@ describe('ModelTrainingRegistry persistence', () => {
|
|
|
870
888
|
it('registry persists across module re-invocations', () => {
|
|
871
889
|
// This test verifies the registry is written to disk
|
|
872
890
|
const run = registerTrainingRun(tmpDir, {
|
|
891
|
+
experimentId: 'mock-experiment-id',
|
|
873
892
|
targetModelFamily: 'gpt-4',
|
|
874
893
|
datasetFingerprint: 'sha256-abc',
|
|
875
894
|
exportId: 'exp-1',
|
|
@@ -187,7 +187,6 @@ describe('calculateBaselines', () => {
|
|
|
187
187
|
|
|
188
188
|
// Create a trajectory DB with pain events
|
|
189
189
|
const dbPath = path.join(tmpDir, 'trajectory.db');
|
|
190
|
-
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
191
190
|
const Database = require('better-sqlite3');
|
|
192
191
|
const db = new Database(dbPath);
|
|
193
192
|
db.exec(`
|
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it } from 'vitest';
|
|
2
|
+
import { evaluatePainDiagnosticGate, resetPainDiagnosticGateForTest } from '../../src/core/pain-diagnostic-gate.js';
|
|
3
|
+
|
|
4
|
+
describe('PainDiagnosticGate', () => {
|
|
5
|
+
beforeEach(() => {
|
|
6
|
+
resetPainDiagnosticGateForTest();
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
it('lets manual pain bypass automatic thresholds', () => {
|
|
10
|
+
const decision = evaluatePainDiagnosticGate({
|
|
11
|
+
source: 'manual',
|
|
12
|
+
score: 1,
|
|
13
|
+
currentGfi: 0,
|
|
14
|
+
sessionId: 's1',
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
expect(decision).toMatchObject({
|
|
18
|
+
shouldDiagnose: true,
|
|
19
|
+
reason: 'manual',
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it('does not diagnose ordinary low-signal tool failures', () => {
|
|
24
|
+
const decision = evaluatePainDiagnosticGate({
|
|
25
|
+
source: 'tool_failure',
|
|
26
|
+
score: 50,
|
|
27
|
+
currentGfi: 15,
|
|
28
|
+
consecutiveErrors: 1,
|
|
29
|
+
sessionId: 's1',
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
expect(decision).toMatchObject({
|
|
33
|
+
shouldDiagnose: false,
|
|
34
|
+
reason: 'below_gate',
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('diagnoses repeated same failures', () => {
|
|
39
|
+
const decision = evaluatePainDiagnosticGate({
|
|
40
|
+
source: 'tool_failure',
|
|
41
|
+
score: 50,
|
|
42
|
+
currentGfi: 50,
|
|
43
|
+
consecutiveErrors: 4,
|
|
44
|
+
sessionId: 's1',
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
expect(decision).toMatchObject({
|
|
48
|
+
shouldDiagnose: true,
|
|
49
|
+
reason: 'repeated_failure',
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('diagnoses high GFI episodes', () => {
|
|
54
|
+
const decision = evaluatePainDiagnosticGate({
|
|
55
|
+
source: 'tool_failure',
|
|
56
|
+
score: 50,
|
|
57
|
+
currentGfi: 72,
|
|
58
|
+
consecutiveErrors: 2,
|
|
59
|
+
sessionId: 's1',
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
expect(decision).toMatchObject({
|
|
63
|
+
shouldDiagnose: true,
|
|
64
|
+
reason: 'high_gfi',
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it('requires stronger score for generic semantic pain', () => {
|
|
69
|
+
const low = evaluatePainDiagnosticGate({
|
|
70
|
+
source: 'semantic',
|
|
71
|
+
score: 45,
|
|
72
|
+
currentGfi: 0,
|
|
73
|
+
sessionId: 's1',
|
|
74
|
+
});
|
|
75
|
+
const high = evaluatePainDiagnosticGate({
|
|
76
|
+
source: 'semantic',
|
|
77
|
+
score: 60,
|
|
78
|
+
currentGfi: 0,
|
|
79
|
+
sessionId: 's2',
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
expect(low.shouldDiagnose).toBe(false);
|
|
83
|
+
expect(high).toMatchObject({
|
|
84
|
+
shouldDiagnose: true,
|
|
85
|
+
reason: 'semantic_pain',
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('deduplicates repeated diagnosis within cooldown', () => {
|
|
90
|
+
const input = {
|
|
91
|
+
source: 'tool_failure',
|
|
92
|
+
score: 50,
|
|
93
|
+
currentGfi: 72,
|
|
94
|
+
sessionId: 's1',
|
|
95
|
+
errorHash: 'same',
|
|
96
|
+
nowMs: 1_000,
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
expect(evaluatePainDiagnosticGate(input).shouldDiagnose).toBe(true);
|
|
100
|
+
expect(evaluatePainDiagnosticGate({ ...input, nowMs: 2_000 })).toMatchObject({
|
|
101
|
+
shouldDiagnose: false,
|
|
102
|
+
reason: 'cooldown',
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it('diagnoses subagent_error when score >= painTrigger', () => {
|
|
107
|
+
const decision = evaluatePainDiagnosticGate({
|
|
108
|
+
source: 'subagent_error',
|
|
109
|
+
score: 40,
|
|
110
|
+
currentGfi: 0,
|
|
111
|
+
sessionId: 's1',
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
expect(decision).toMatchObject({
|
|
115
|
+
shouldDiagnose: true,
|
|
116
|
+
reason: 'subagent_error',
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('skips subagent_error when score < painTrigger', () => {
|
|
121
|
+
const decision = evaluatePainDiagnosticGate({
|
|
122
|
+
source: 'subagent_error',
|
|
123
|
+
score: 39,
|
|
124
|
+
currentGfi: 0,
|
|
125
|
+
sessionId: 's1',
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
expect(decision).toMatchObject({
|
|
129
|
+
shouldDiagnose: false,
|
|
130
|
+
reason: 'below_gate',
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it('diagnoses llm_paralysis when score >= painTrigger', () => {
|
|
135
|
+
const decision = evaluatePainDiagnosticGate({
|
|
136
|
+
source: 'llm_paralysis',
|
|
137
|
+
score: 40,
|
|
138
|
+
currentGfi: 0,
|
|
139
|
+
sessionId: 's1',
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
expect(decision).toMatchObject({
|
|
143
|
+
shouldDiagnose: true,
|
|
144
|
+
reason: 'llm_paralysis',
|
|
145
|
+
});
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it('diagnoses risky_high_score when isRisky=true and score >= highSeverity', () => {
|
|
149
|
+
const decision = evaluatePainDiagnosticGate({
|
|
150
|
+
source: 'tool_failure',
|
|
151
|
+
score: 70,
|
|
152
|
+
currentGfi: 0,
|
|
153
|
+
isRisky: true,
|
|
154
|
+
sessionId: 's1',
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
expect(decision).toMatchObject({
|
|
158
|
+
shouldDiagnose: true,
|
|
159
|
+
reason: 'risky_high_score',
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
it('skips risky_high_score when isRisky=true but score < highSeverity', () => {
|
|
164
|
+
const decision = evaluatePainDiagnosticGate({
|
|
165
|
+
source: 'tool_failure',
|
|
166
|
+
score: 69,
|
|
167
|
+
currentGfi: 0,
|
|
168
|
+
isRisky: true,
|
|
169
|
+
sessionId: 's1',
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
expect(decision).toMatchObject({
|
|
173
|
+
shouldDiagnose: false,
|
|
174
|
+
reason: 'below_gate',
|
|
175
|
+
});
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it('diagnoses user_empathy when score >= semanticPain threshold', () => {
|
|
179
|
+
const decision = evaluatePainDiagnosticGate({
|
|
180
|
+
source: 'user_empathy',
|
|
181
|
+
score: 60,
|
|
182
|
+
currentGfi: 0,
|
|
183
|
+
sessionId: 's1',
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
expect(decision).toMatchObject({
|
|
187
|
+
shouldDiagnose: true,
|
|
188
|
+
reason: 'semantic_pain',
|
|
189
|
+
});
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
it('uses custom threshold overrides', () => {
|
|
193
|
+
const decision = evaluatePainDiagnosticGate({
|
|
194
|
+
source: 'tool_failure',
|
|
195
|
+
score: 50,
|
|
196
|
+
currentGfi: 60,
|
|
197
|
+
consecutiveErrors: 2,
|
|
198
|
+
sessionId: 's1',
|
|
199
|
+
thresholds: {
|
|
200
|
+
painTrigger: 40,
|
|
201
|
+
highSeverity: 70,
|
|
202
|
+
highGfi: 55,
|
|
203
|
+
repeatedFailure: 4,
|
|
204
|
+
semanticPain: 60,
|
|
205
|
+
},
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
expect(decision).toMatchObject({
|
|
209
|
+
shouldDiagnose: true,
|
|
210
|
+
reason: 'high_gfi',
|
|
211
|
+
});
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
it('handles exact threshold boundary (score === painTrigger)', () => {
|
|
215
|
+
const decision = evaluatePainDiagnosticGate({
|
|
216
|
+
source: 'subagent_error',
|
|
217
|
+
score: 40,
|
|
218
|
+
currentGfi: 0,
|
|
219
|
+
sessionId: 's1',
|
|
220
|
+
thresholds: { painTrigger: 40 },
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
expect(decision).toMatchObject({
|
|
224
|
+
shouldDiagnose: true,
|
|
225
|
+
reason: 'subagent_error',
|
|
226
|
+
});
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
it('normalizes llm_ prefixed sources (non-paralysis) to semantic', () => {
|
|
230
|
+
const decision = evaluatePainDiagnosticGate({
|
|
231
|
+
source: 'llm_confusion',
|
|
232
|
+
score: 60,
|
|
233
|
+
currentGfi: 0,
|
|
234
|
+
sessionId: 's1',
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
expect(decision).toMatchObject({
|
|
238
|
+
shouldDiagnose: true,
|
|
239
|
+
reason: 'semantic_pain',
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
it('llm_paralysis is NOT normalized to semantic', () => {
|
|
244
|
+
const decision = evaluatePainDiagnosticGate({
|
|
245
|
+
source: 'llm_paralysis',
|
|
246
|
+
score: 40,
|
|
247
|
+
currentGfi: 0,
|
|
248
|
+
sessionId: 's1',
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
expect(decision).toMatchObject({
|
|
252
|
+
shouldDiagnose: true,
|
|
253
|
+
reason: 'llm_paralysis',
|
|
254
|
+
});
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
it('skips llm_paralysis when score < painTrigger', () => {
|
|
258
|
+
const decision = evaluatePainDiagnosticGate({
|
|
259
|
+
source: 'llm_paralysis',
|
|
260
|
+
score: 39,
|
|
261
|
+
currentGfi: 0,
|
|
262
|
+
sessionId: 's1',
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
expect(decision).toMatchObject({
|
|
266
|
+
shouldDiagnose: false,
|
|
267
|
+
reason: 'below_gate',
|
|
268
|
+
});
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
it('diagnoses llm_paralysis with score 45 (new config default, PRI-274)', () => {
|
|
272
|
+
const decision = evaluatePainDiagnosticGate({
|
|
273
|
+
source: 'llm_paralysis',
|
|
274
|
+
score: 45,
|
|
275
|
+
currentGfi: 0,
|
|
276
|
+
sessionId: 's1',
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
expect(decision).toMatchObject({
|
|
280
|
+
shouldDiagnose: true,
|
|
281
|
+
reason: 'llm_paralysis',
|
|
282
|
+
});
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
it('cooldownMs=0 disables cooldown (allows re-diagnosis)', () => {
|
|
286
|
+
const input = {
|
|
287
|
+
source: 'tool_failure',
|
|
288
|
+
score: 50,
|
|
289
|
+
currentGfi: 72,
|
|
290
|
+
sessionId: 's1',
|
|
291
|
+
errorHash: 'same',
|
|
292
|
+
nowMs: 1_000,
|
|
293
|
+
cooldownMs: 0,
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
expect(evaluatePainDiagnosticGate(input).shouldDiagnose).toBe(true);
|
|
297
|
+
expect(evaluatePainDiagnosticGate({ ...input, nowMs: 2_000 }).shouldDiagnose).toBe(true);
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
it('treats NaN score as 0 (below gate)', () => {
|
|
301
|
+
const decision = evaluatePainDiagnosticGate({
|
|
302
|
+
source: 'tool_failure',
|
|
303
|
+
score: NaN,
|
|
304
|
+
currentGfi: 72,
|
|
305
|
+
consecutiveErrors: 4,
|
|
306
|
+
sessionId: 's1',
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
expect(decision.shouldDiagnose).toBe(true);
|
|
310
|
+
expect(decision.reason).toBe('repeated_failure');
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
it('treats NaN currentGfi as 0', () => {
|
|
314
|
+
const decision = evaluatePainDiagnosticGate({
|
|
315
|
+
source: 'tool_failure',
|
|
316
|
+
score: 50,
|
|
317
|
+
currentGfi: NaN,
|
|
318
|
+
consecutiveErrors: 1,
|
|
319
|
+
sessionId: 's1',
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
expect(decision).toMatchObject({
|
|
323
|
+
shouldDiagnose: false,
|
|
324
|
+
reason: 'below_gate',
|
|
325
|
+
});
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
it('treats Infinity score as finite for gate evaluation', () => {
|
|
329
|
+
const decision = evaluatePainDiagnosticGate({
|
|
330
|
+
source: 'tool_failure',
|
|
331
|
+
score: Infinity,
|
|
332
|
+
currentGfi: 0,
|
|
333
|
+
isRisky: true,
|
|
334
|
+
sessionId: 's1',
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
expect(decision.shouldDiagnose).toBe(false);
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
it('treats NaN consecutiveErrors as 0', () => {
|
|
341
|
+
const decision = evaluatePainDiagnosticGate({
|
|
342
|
+
source: 'tool_failure',
|
|
343
|
+
score: 50,
|
|
344
|
+
currentGfi: 50,
|
|
345
|
+
consecutiveErrors: NaN,
|
|
346
|
+
sessionId: 's1',
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
expect(decision).toMatchObject({
|
|
350
|
+
shouldDiagnose: false,
|
|
351
|
+
reason: 'below_gate',
|
|
352
|
+
});
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
it('episodeKey includes sessionId, source, and errorHash', () => {
|
|
356
|
+
const decision = evaluatePainDiagnosticGate({
|
|
357
|
+
source: 'manual',
|
|
358
|
+
score: 1,
|
|
359
|
+
currentGfi: 0,
|
|
360
|
+
sessionId: 's-ep',
|
|
361
|
+
errorHash: 'hash-abc',
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
expect(decision.episodeKey).toContain('s-ep');
|
|
365
|
+
expect(decision.episodeKey).toContain('manual');
|
|
366
|
+
expect(decision.episodeKey).toContain('hash-abc');
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
it('episodeKey uses "unknown" when sessionId missing', () => {
|
|
370
|
+
const decision = evaluatePainDiagnosticGate({
|
|
371
|
+
source: 'manual',
|
|
372
|
+
score: 1,
|
|
373
|
+
currentGfi: 0,
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
expect(decision.episodeKey).toContain('unknown');
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
it('episodeKey uses "no-hash" when errorHash missing', () => {
|
|
380
|
+
const decision = evaluatePainDiagnosticGate({
|
|
381
|
+
source: 'manual',
|
|
382
|
+
score: 1,
|
|
383
|
+
currentGfi: 0,
|
|
384
|
+
sessionId: 's1',
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
expect(decision.episodeKey).toContain('no-hash');
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
it('manual pain is still subject to cooldown', () => {
|
|
391
|
+
const input = {
|
|
392
|
+
source: 'manual',
|
|
393
|
+
score: 100,
|
|
394
|
+
currentGfi: 0,
|
|
395
|
+
sessionId: 's1',
|
|
396
|
+
nowMs: 1_000,
|
|
397
|
+
};
|
|
398
|
+
|
|
399
|
+
const first = evaluatePainDiagnosticGate(input);
|
|
400
|
+
expect(first.shouldDiagnose).toBe(true);
|
|
401
|
+
|
|
402
|
+
const second = evaluatePainDiagnosticGate({ ...input, nowMs: 2_000 });
|
|
403
|
+
expect(second).toMatchObject({
|
|
404
|
+
shouldDiagnose: false,
|
|
405
|
+
reason: 'cooldown',
|
|
406
|
+
});
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
it('different errorHash produces different episodeKey (no cooldown cross-contamination)', () => {
|
|
410
|
+
const base = {
|
|
411
|
+
source: 'tool_failure',
|
|
412
|
+
score: 50,
|
|
413
|
+
currentGfi: 72,
|
|
414
|
+
sessionId: 's1',
|
|
415
|
+
nowMs: 1_000,
|
|
416
|
+
};
|
|
417
|
+
|
|
418
|
+
expect(evaluatePainDiagnosticGate({ ...base, errorHash: 'hash-a' }).shouldDiagnose).toBe(true);
|
|
419
|
+
expect(evaluatePainDiagnosticGate({ ...base, errorHash: 'hash-b', nowMs: 2_000 }).shouldDiagnose).toBe(true);
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
it('highGfi defaults to max(highSeverity, painTrigger+30)', () => {
|
|
423
|
+
const decision = evaluatePainDiagnosticGate({
|
|
424
|
+
source: 'tool_failure',
|
|
425
|
+
score: 50,
|
|
426
|
+
currentGfi: 72,
|
|
427
|
+
consecutiveErrors: 1,
|
|
428
|
+
sessionId: 's1',
|
|
429
|
+
thresholds: {
|
|
430
|
+
painTrigger: 40,
|
|
431
|
+
highSeverity: 70,
|
|
432
|
+
},
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
expect(decision).toMatchObject({
|
|
436
|
+
shouldDiagnose: true,
|
|
437
|
+
reason: 'high_gfi',
|
|
438
|
+
});
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
it('gate_block source falls through to below_gate when score below painTrigger', () => {
|
|
442
|
+
const decision = evaluatePainDiagnosticGate({
|
|
443
|
+
source: 'gate_blocked',
|
|
444
|
+
score: 10,
|
|
445
|
+
currentGfi: 5,
|
|
446
|
+
consecutiveErrors: 0,
|
|
447
|
+
sessionId: 's1',
|
|
448
|
+
});
|
|
449
|
+
|
|
450
|
+
expect(decision).toMatchObject({
|
|
451
|
+
shouldDiagnose: false,
|
|
452
|
+
reason: 'below_gate',
|
|
453
|
+
});
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
it('diagnoses gate_blocked when score >= painTrigger (PRI-274)', () => {
|
|
457
|
+
const decision = evaluatePainDiagnosticGate({
|
|
458
|
+
source: 'gate_blocked',
|
|
459
|
+
score: 45,
|
|
460
|
+
currentGfi: 0,
|
|
461
|
+
consecutiveErrors: 0,
|
|
462
|
+
sessionId: 's1',
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
expect(decision).toMatchObject({
|
|
466
|
+
shouldDiagnose: true,
|
|
467
|
+
reason: 'gate_blocked',
|
|
468
|
+
});
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
it('skips gate_blocked when score < painTrigger', () => {
|
|
472
|
+
const decision = evaluatePainDiagnosticGate({
|
|
473
|
+
source: 'gate_blocked',
|
|
474
|
+
score: 39,
|
|
475
|
+
currentGfi: 0,
|
|
476
|
+
consecutiveErrors: 0,
|
|
477
|
+
sessionId: 's1',
|
|
478
|
+
});
|
|
479
|
+
|
|
480
|
+
expect(decision).toMatchObject({
|
|
481
|
+
shouldDiagnose: false,
|
|
482
|
+
reason: 'below_gate',
|
|
483
|
+
});
|
|
484
|
+
});
|
|
485
|
+
|
|
486
|
+
it('returns detail string in every decision', () => {
|
|
487
|
+
const cases = [
|
|
488
|
+
{ source: 'manual' as const, score: 1, currentGfi: 0 },
|
|
489
|
+
{ source: 'tool_failure' as const, score: 10, currentGfi: 5, consecutiveErrors: 0 },
|
|
490
|
+
];
|
|
491
|
+
|
|
492
|
+
for (const input of cases) {
|
|
493
|
+
const decision = evaluatePainDiagnosticGate({ ...input, sessionId: 's1' });
|
|
494
|
+
expect(typeof decision.detail).toBe('string');
|
|
495
|
+
expect(decision.detail.length).toBeGreaterThan(0);
|
|
496
|
+
}
|
|
497
|
+
});
|
|
498
|
+
});
|
package/tests/core/pain.test.ts
CHANGED
|
@@ -20,7 +20,7 @@ describe('PathResolver', () => {
|
|
|
20
20
|
const { PathResolver } = await import('../../src/core/path-resolver.js');
|
|
21
21
|
const resolver = new PathResolver({ workspaceDir: '/test/workspace' });
|
|
22
22
|
|
|
23
|
-
const requiredKeys = ['PROFILE', '
|
|
23
|
+
const requiredKeys = ['PROFILE', 'AGENT_SCORECARD', 'PAIN_FLAG', 'EVOLUTION_QUEUE', 'THINKING_OS', 'THINKING_OS_USAGE', 'THINKING_OS_CANDIDATES'];
|
|
24
24
|
|
|
25
25
|
for (const key of requiredKeys) {
|
|
26
26
|
expect(() => resolver.resolve(key)).not.toThrow();
|
|
@@ -1,9 +1,5 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
|
-
import { planStatus } from '../../src/utils/io.js';
|
|
3
2
|
import { resolvePdPath } from '../../src/core/paths.js';
|
|
4
|
-
import * as fs from 'fs';
|
|
5
|
-
|
|
6
|
-
vi.mock('fs');
|
|
7
3
|
|
|
8
4
|
describe('Path Anchoring Integration', () => {
|
|
9
5
|
const workspaceDir = '/mock/workspace';
|
|
@@ -17,26 +13,8 @@ describe('Path Anchoring Integration', () => {
|
|
|
17
13
|
expect(resolvePdPath(workspaceDir, 'PROFILE')).toBe(expected);
|
|
18
14
|
});
|
|
19
15
|
|
|
20
|
-
it('should resolve PLAN.md at the project root', () => {
|
|
21
|
-
const expected = '/mock/workspace/PLAN.md';
|
|
22
|
-
expect(resolvePdPath(workspaceDir, 'PLAN')).toBe(expected);
|
|
23
|
-
});
|
|
24
|
-
|
|
25
16
|
it('should resolve AGENT_SCORECARD.json inside .state/', () => {
|
|
26
17
|
const expected = '/mock/workspace/.state/AGENT_SCORECARD.json';
|
|
27
18
|
expect(resolvePdPath(workspaceDir, 'AGENT_SCORECARD')).toBe(expected);
|
|
28
19
|
});
|
|
29
|
-
|
|
30
|
-
it('planStatus should look for PLAN.md in the root', () => {
|
|
31
|
-
const rootPlanPath = '/mock/workspace/PLAN.md';
|
|
32
|
-
vi.mocked(fs.existsSync).mockImplementation((p) => p === rootPlanPath);
|
|
33
|
-
vi.mocked(fs.readFileSync).mockReturnValue('STATUS: READY');
|
|
34
|
-
|
|
35
|
-
const status = planStatus(workspaceDir);
|
|
36
|
-
|
|
37
|
-
expect(status).toBe('READY');
|
|
38
|
-
expect(fs.existsSync).toHaveBeenCalledWith(rootPlanPath);
|
|
39
|
-
// Verify it does NOT look in docs/
|
|
40
|
-
expect(fs.existsSync).not.toHaveBeenCalledWith(expect.stringContaining('docs/PLAN.md'));
|
|
41
|
-
});
|
|
42
20
|
});
|