principles-disciple 1.71.0 → 1.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +10 -5
- package/package.json +17 -19
- package/scripts/acceptance-test.mjs +16 -73
- package/scripts/sync-plugin.mjs +382 -77
- package/src/commands/archive-impl.ts +2 -1
- package/src/commands/capabilities.ts +2 -2
- package/src/commands/context.ts +2 -2
- package/src/commands/disable-impl.ts +2 -1
- package/src/commands/evolution-status.ts +16 -16
- package/src/commands/export.ts +12 -67
- package/src/commands/pain.ts +91 -1
- package/src/commands/principle-rollback.ts +2 -1
- package/src/commands/promote-impl.ts +7 -43
- package/src/commands/rollback-impl.ts +2 -1
- package/src/commands/rollback.ts +2 -1
- package/src/commands/samples.ts +2 -1
- package/src/commands/thinking-os.ts +2 -1
- package/src/config/errors.ts +18 -2
- package/src/constants/diagnostician.ts +2 -2
- package/src/constants/tools.ts +2 -1
- package/src/core/__tests__/focus-history.test.ts +210 -0
- package/src/core/config.ts +1 -1
- package/src/core/confirm-first-gate.ts +255 -0
- package/src/core/correction-cue-learner.ts +2 -136
- package/src/core/correction-types.ts +16 -88
- package/src/core/dictionary.ts +19 -20
- package/src/core/empathy-keyword-matcher.ts +17 -289
- package/src/core/empathy-types.ts +18 -229
- package/src/core/event-log.ts +38 -132
- package/src/core/evolution-reducer.ts +21 -2
- package/src/core/evolution-types.ts +76 -464
- package/src/core/file-store.ts +80 -0
- package/src/core/focus-history.ts +228 -955
- package/src/core/local-worker-routing.ts +34 -314
- package/src/core/merge-gate-audit.ts +0 -195
- package/src/core/pain-diagnostic-gate.ts +154 -0
- package/src/core/pain-signal.ts +21 -138
- package/src/core/pain.ts +15 -88
- package/src/core/pd-task-reconciler.ts +26 -115
- package/src/core/pd-task-service.ts +9 -9
- package/src/core/pd-task-types.ts +23 -127
- package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
- package/src/core/principle-compiler/code-validator.ts +15 -42
- package/src/core/principle-compiler/compiler.ts +100 -15
- package/src/core/principle-compiler/index.ts +5 -2
- package/src/core/principle-compiler/template-generator.ts +4 -104
- package/src/core/principle-injection.ts +10 -202
- package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
- package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
- package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
- package/src/core/principle-tree-ledger-adapter.ts +145 -0
- package/src/core/principle-tree-ledger.ts +8 -6
- package/src/core/reflection/reflection-context.ts +14 -109
- package/src/core/replay-engine.ts +8 -500
- package/src/core/rule-host-helpers.ts +5 -35
- package/src/core/rule-host-types.ts +10 -82
- package/src/core/rule-host.ts +6 -63
- package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
- package/src/core/session-tracker.ts +87 -101
- package/src/core/shadow-observation-registry.ts +19 -48
- package/src/core/trajectory.ts +3 -1
- package/src/core/workflow-funnel-loader.ts +62 -68
- package/src/core/workspace-context.ts +46 -0
- package/src/core/workspace-dir-service.ts +1 -1
- package/src/core/workspace-dir-validation.ts +18 -9
- package/src/hooks/AGENTS.md +1 -1
- package/src/hooks/gate-block-helper.ts +46 -44
- package/src/hooks/gate.ts +207 -7
- package/src/hooks/lifecycle.ts +30 -32
- package/src/hooks/llm.ts +60 -32
- package/src/hooks/pain.ts +297 -103
- package/src/hooks/prompt.ts +469 -339
- package/src/hooks/subagent.ts +2 -29
- package/src/i18n/commands.ts +2 -10
- package/src/index.ts +95 -85
- package/src/openclaw-sdk.ts +311 -0
- package/src/service/central-database.ts +8 -4
- package/src/service/evolution-queue-migration.ts +2 -1
- package/src/service/evolution-worker.ts +163 -1786
- package/src/service/internalization-trigger-adapter.ts +302 -0
- package/src/service/keyword-optimization-service.ts +4 -4
- package/src/service/monitoring-query-service.ts +1 -215
- package/src/service/queue-io.ts +60 -331
- package/src/service/runtime-summary-service.ts +115 -18
- package/src/service/subagent-workflow/index.ts +0 -41
- package/src/service/subagent-workflow/types.ts +9 -120
- package/src/service/subagent-workflow/workflow-store.ts +2 -119
- package/src/service/workflow-watchdog.ts +0 -43
- package/src/types/event-payload.ts +16 -74
- package/src/types/event-types.ts +39 -547
- package/src/types/hygiene-types.ts +7 -30
- package/src/types/principle-tree-schema.ts +20 -222
- package/src/types/queue.ts +15 -70
- package/src/types/runtime-summary.ts +5 -49
- package/src/utils/io.ts +10 -0
- package/src/utils/retry.ts +1 -1
- package/src/utils/shadow-fingerprint.ts +2 -2
- package/src/utils/workspace-resolver.ts +50 -0
- package/templates/langs/en/core/AGENTS.md +2 -2
- package/templates/langs/en/core/BOOT.md +1 -1
- package/templates/langs/en/core/HEARTBEAT.md +2 -2
- package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/en/skills/evolve-task/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-mentor/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
- package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
- package/templates/langs/zh/core/AGENTS.md +2 -2
- package/templates/langs/zh/core/BOOT.md +1 -1
- package/templates/langs/zh/core/HEARTBEAT.md +2 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
- package/templates/langs/zh/skills/evolve-task/SKILL.md +2 -2
- package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/zh/skills/pd-mentor/SKILL.md +1 -1
- package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
- package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
- package/tests/build-artifacts.test.ts +1 -3
- package/tests/commands/evolution-status.test.ts +0 -118
- package/tests/core/bootstrap-rules.test.ts +1 -1
- package/tests/core/config.test.ts +1 -1
- package/tests/core/event-log.test.ts +35 -0
- package/tests/core/evolution-engine.test.ts +610 -0
- package/tests/core/file-store.test.ts +102 -0
- package/tests/core/focus-history.test.ts +203 -11
- package/tests/core/merge-gate-audit.test.ts +2 -169
- package/tests/core/model-deployment-registry.test.ts +7 -1
- package/tests/core/model-training-registry.test.ts +19 -0
- package/tests/core/observability.test.ts +0 -1
- package/tests/core/pain-diagnostic-gate.test.ts +498 -0
- package/tests/core/pain.test.ts +0 -1
- package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
- package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
- package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
- package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
- package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
- package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
- package/tests/core/reflection-context.test.ts +0 -14
- package/tests/core/replay-engine.test.ts +127 -215
- package/tests/core/rule-host-helpers.test.ts +2 -2
- package/tests/core/rule-implementation-runtime.test.ts +0 -27
- package/tests/core/workflow-funnel-loader.test.ts +162 -0
- package/tests/core/workspace-dir-validation.test.ts +8 -1
- package/tests/core-anti-growth.test.ts +192 -0
- package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
- package/tests/hooks/confirm-first-gate.test.ts +333 -0
- package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
- package/tests/hooks/gate-auto-correct.test.ts +665 -0
- package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
- package/tests/hooks/pain.test.ts +269 -12
- package/tests/hooks/prompt-characterization.test.ts +500 -0
- package/tests/hooks/prompt-size-guard.test.ts +329 -0
- package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
- package/tests/index.test.ts +94 -1
- package/tests/integration/auto-entry-gate.test.ts +248 -0
- package/tests/integration/internalization-trigger-guard.test.ts +69 -0
- package/tests/integration/m8-legacy-paths.test.ts +63 -0
- package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
- package/tests/plugin-config-resolution-cutover.test.ts +359 -0
- package/tests/runtime-v2-discovery-guard.test.ts +154 -0
- package/tests/service/central-database.test.ts +457 -0
- package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
- package/tests/service/evolution-worker.timeout.test.ts +11 -129
- package/tests/service/internalization-trigger-adapter.test.ts +251 -0
- package/tests/service/monitoring-query-service.test.ts +1 -47
- package/tests/service/queue-io.test.ts +1 -62
- package/tests/service/runtime-summary-service.test.ts +184 -3
- package/tests/service/workflow-watchdog.test.ts +0 -91
- package/tests/utils/file-lock.test.ts +5 -3
- package/tests/utils/session-key.test.ts +52 -0
- package/tests/utils/subagent-probe.test.ts +48 -1
- package/vitest.config.ts +4 -11
- package/.planning/codebase/ARCHITECTURE.md +0 -157
- package/.planning/codebase/CONCERNS.md +0 -145
- package/.planning/codebase/CONVENTIONS.md +0 -148
- package/.planning/codebase/INTEGRATIONS.md +0 -81
- package/.planning/codebase/STACK.md +0 -87
- package/.planning/codebase/STRUCTURE.md +0 -193
- package/.planning/codebase/TESTING.md +0 -243
- package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
- package/docs/COMMAND_REFERENCE.md +0 -76
- package/docs/COMMAND_REFERENCE_EN.md +0 -79
- package/scripts/build-web.mjs +0 -46
- package/scripts/diagnose-nocturnal.mjs +0 -537
- package/scripts/seed-nocturnal-scenarios.mjs +0 -384
- package/src/commands/nocturnal-review.ts +0 -322
- package/src/commands/nocturnal-rollout.ts +0 -790
- package/src/commands/nocturnal-train.ts +0 -986
- package/src/commands/pd-reflect.ts +0 -88
- package/src/core/adaptive-thresholds.ts +0 -478
- package/src/core/diagnostician-task-store.ts +0 -192
- package/src/core/nocturnal-arbiter.ts +0 -715
- package/src/core/nocturnal-artifact-lineage.ts +0 -116
- package/src/core/nocturnal-artificer.ts +0 -257
- package/src/core/nocturnal-candidate-scoring.ts +0 -530
- package/src/core/nocturnal-compliance.ts +0 -1146
- package/src/core/nocturnal-dataset.ts +0 -763
- package/src/core/nocturnal-executability.ts +0 -428
- package/src/core/nocturnal-export.ts +0 -499
- package/src/core/nocturnal-paths.ts +0 -240
- package/src/core/nocturnal-reasoning-deriver.ts +0 -343
- package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
- package/src/core/nocturnal-snapshot-contract.ts +0 -99
- package/src/core/nocturnal-trajectory-extractor.ts +0 -512
- package/src/core/nocturnal-trinity-types.ts +0 -218
- package/src/core/nocturnal-trinity.ts +0 -2680
- package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
- package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
- package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
- package/src/http/principles-console-route.ts +0 -709
- package/src/service/central-health-service.ts +0 -49
- package/src/service/central-overview-service.ts +0 -138
- package/src/service/control-ui-query-service.ts +0 -900
- package/src/service/cooldown-strategy.ts +0 -97
- package/src/service/evolution-pain-context.ts +0 -79
- package/src/service/evolution-query-service.ts +0 -407
- package/src/service/health-query-service.ts +0 -1038
- package/src/service/nocturnal-config.ts +0 -214
- package/src/service/nocturnal-runtime.ts +0 -734
- package/src/service/nocturnal-service.ts +0 -1605
- package/src/service/nocturnal-target-selector.ts +0 -545
- package/src/service/sleep-cycle.ts +0 -157
- package/src/service/startup-reconciler.ts +0 -112
- package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
- package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
- package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
- package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
- package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
- package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
- package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
- package/src/tools/write-pain-flag.ts +0 -215
- package/tests/commands/nocturnal-review.test.ts +0 -448
- package/tests/commands/nocturnal-train.test.ts +0 -97
- package/tests/commands/pd-reflect.test.ts +0 -49
- package/tests/core/adaptive-thresholds.test.ts +0 -261
- package/tests/core/nocturnal-arbiter.test.ts +0 -559
- package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
- package/tests/core/nocturnal-artificer.test.ts +0 -241
- package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
- package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
- package/tests/core/nocturnal-compliance.test.ts +0 -646
- package/tests/core/nocturnal-dataset.test.ts +0 -892
- package/tests/core/nocturnal-e2e.test.ts +0 -234
- package/tests/core/nocturnal-executability.test.ts +0 -357
- package/tests/core/nocturnal-export.test.ts +0 -517
- package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
- package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
- package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
- package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
- package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
- package/tests/core/nocturnal-trinity.test.ts +0 -2053
- package/tests/core/pain-auto-repair.test.ts +0 -96
- package/tests/core/pain-integration.test.ts +0 -510
- package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
- package/tests/http/principles-console-route.test.ts +0 -162
- package/tests/integration/chaos-resilience.test.ts +0 -348
- package/tests/integration/empathy-workflow-integration.test.ts +0 -626
- package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
- package/tests/service/control-ui-query-service.test.ts +0 -121
- package/tests/service/cooldown-strategy.test.ts +0 -164
- package/tests/service/data-endpoints-regression.test.ts +0 -834
- package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
- package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
- package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
- package/tests/service/nocturnal-runtime.test.ts +0 -473
- package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
- package/tests/service/nocturnal-target-selector.test.ts +0 -615
- package/tests/service/startup-reconciler.test.ts +0 -148
- package/tests/tools/write-pain-flag.test.ts +0 -358
- package/ui/src/App.tsx +0 -45
- package/ui/src/api.ts +0 -220
- package/ui/src/charts.tsx +0 -955
- package/ui/src/components/ErrorState.tsx +0 -6
- package/ui/src/components/Loading.tsx +0 -13
- package/ui/src/components/ProtectedRoute.tsx +0 -12
- package/ui/src/components/Shell.tsx +0 -91
- package/ui/src/components/WorkspaceConfig.tsx +0 -178
- package/ui/src/components/index.ts +0 -5
- package/ui/src/context/auth.tsx +0 -80
- package/ui/src/context/theme.tsx +0 -66
- package/ui/src/hooks/useAutoRefresh.ts +0 -39
- package/ui/src/i18n/ui.ts +0 -473
- package/ui/src/main.tsx +0 -16
- package/ui/src/pages/EvolutionPage.tsx +0 -333
- package/ui/src/pages/FeedbackPage.tsx +0 -138
- package/ui/src/pages/GateMonitorPage.tsx +0 -136
- package/ui/src/pages/LoginPage.tsx +0 -89
- package/ui/src/pages/OverviewPage.tsx +0 -599
- package/ui/src/pages/SamplesPage.tsx +0 -174
- package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
- package/ui/src/styles.css +0 -2020
- package/ui/src/types.ts +0 -384
- package/ui/src/utils/format.ts +0 -15
|
@@ -1,532 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from 'vitest';
|
|
2
|
-
import {
|
|
3
|
-
scoreCandidate,
|
|
4
|
-
checkThresholds,
|
|
5
|
-
rankCandidates,
|
|
6
|
-
runTournament,
|
|
7
|
-
DEFAULT_SCORING_WEIGHTS,
|
|
8
|
-
validateCandidateDiversity,
|
|
9
|
-
} from '../../src/core/nocturnal-candidate-scoring.js';
|
|
10
|
-
import type { DreamerCandidate, PhilosopherJudgment } from '../../src/core/nocturnal-trinity.js';
|
|
11
|
-
import type { ThresholdValues } from '../../src/core/adaptive-thresholds.js';
|
|
12
|
-
|
|
13
|
-
// ---------------------------------------------------------------------------
|
|
14
|
-
// Test Fixtures
|
|
15
|
-
// ---------------------------------------------------------------------------
|
|
16
|
-
|
|
17
|
-
function makeCandidate(overrides: Partial<DreamerCandidate> = {}): DreamerCandidate {
|
|
18
|
-
return {
|
|
19
|
-
candidateIndex: 0,
|
|
20
|
-
badDecision: 'Did something wrong without verifying preconditions',
|
|
21
|
-
betterDecision: 'Read the relevant file to understand its structure before making changes',
|
|
22
|
-
rationale: 'Verifying preconditions prevents errors and ensures actions are appropriate',
|
|
23
|
-
confidence: 0.85,
|
|
24
|
-
...overrides,
|
|
25
|
-
};
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
function makeJudgment(candidateIndex: number, overrides: Partial<PhilosopherJudgment> = {}): PhilosopherJudgment {
|
|
29
|
-
return {
|
|
30
|
-
candidateIndex,
|
|
31
|
-
critique: 'Strong alignment with the principle',
|
|
32
|
-
principleAligned: true,
|
|
33
|
-
score: 0.85,
|
|
34
|
-
rank: 1,
|
|
35
|
-
...overrides,
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
const DEFAULT_THRESHOLDS: ThresholdValues = {
|
|
40
|
-
schemaCompletenessMin: 0.6,
|
|
41
|
-
principleAlignmentMin: 0.7,
|
|
42
|
-
executabilityMin: 0.65,
|
|
43
|
-
boundednessMin: 0.5,
|
|
44
|
-
confidenceMin: 0.6,
|
|
45
|
-
aggregateMin: 0.65,
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
// ---------------------------------------------------------------------------
|
|
49
|
-
// Tests: scoreCandidate
|
|
50
|
-
// ---------------------------------------------------------------------------
|
|
51
|
-
|
|
52
|
-
describe('scoreCandidate', () => {
|
|
53
|
-
it('scores a valid candidate correctly', () => {
|
|
54
|
-
const candidate = makeCandidate();
|
|
55
|
-
const judgment = makeJudgment(0);
|
|
56
|
-
const scores = scoreCandidate(candidate, judgment);
|
|
57
|
-
|
|
58
|
-
expect(scores.schemaCompleteness).toBeGreaterThan(0);
|
|
59
|
-
expect(scores.principleAlignment).toBe(1.0); // principleAligned: true
|
|
60
|
-
expect(scores.executability).toBeGreaterThan(0);
|
|
61
|
-
expect(scores.boundedness).toBeGreaterThan(0);
|
|
62
|
-
expect(scores.confidence).toBeGreaterThan(0);
|
|
63
|
-
expect(scores.aggregate).toBeGreaterThan(0);
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
it('penalizes non-principle-aligned candidates', () => {
|
|
67
|
-
const candidate = makeCandidate();
|
|
68
|
-
const judgment = makeJudgment(0, { principleAligned: false, score: 0.4 });
|
|
69
|
-
const scores = scoreCandidate(candidate, judgment);
|
|
70
|
-
|
|
71
|
-
expect(scores.principleAlignment).toBeLessThan(0.5);
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
it('penalizes missing fields in schema completeness', () => {
|
|
75
|
-
const candidate = makeCandidate({ betterDecision: '' });
|
|
76
|
-
const judgment = makeJudgment(0);
|
|
77
|
-
const scores = scoreCandidate(candidate, judgment);
|
|
78
|
-
|
|
79
|
-
expect(scores.schemaCompleteness).toBeLessThan(1.0);
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
it('penalizes generic betterDecision without actionable verbs', () => {
|
|
83
|
-
const candidate = makeCandidate({ betterDecision: 'Do something better' });
|
|
84
|
-
const judgment = makeJudgment(0);
|
|
85
|
-
const scores = scoreCandidate(candidate, judgment);
|
|
86
|
-
|
|
87
|
-
expect(scores.executability).toBeLessThan(1.0);
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
it('rewards specific betterDecision with file paths', () => {
|
|
91
|
-
const candidate = makeCandidate({
|
|
92
|
-
betterDecision: 'Read src/main.ts to understand the structure',
|
|
93
|
-
});
|
|
94
|
-
const judgment = makeJudgment(0);
|
|
95
|
-
const scores = scoreCandidate(candidate, judgment);
|
|
96
|
-
|
|
97
|
-
expect(scores.boundedness).toBeGreaterThan(0.5);
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
it('does not penalize words that merely contain "it" as a substring', () => {
|
|
101
|
-
const candidate = makeCandidate({
|
|
102
|
-
betterDecision: 'Verify preconditions in config.json before retrying',
|
|
103
|
-
confidence: 0.92,
|
|
104
|
-
});
|
|
105
|
-
const judgment = makeJudgment(0, { score: 0.92, principleAligned: true });
|
|
106
|
-
const scores = scoreCandidate(candidate, judgment);
|
|
107
|
-
|
|
108
|
-
// Boundedness should remain 0.7 (0.5 base + 0.2 specific target) because
|
|
109
|
-
// "preconditions" must not trigger the generic word "it" penalty.
|
|
110
|
-
expect(scores.boundedness).toBe(0.7);
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
it('uses custom weights when provided', () => {
|
|
114
|
-
const candidate = makeCandidate();
|
|
115
|
-
const judgment = makeJudgment(0);
|
|
116
|
-
const customWeights = { ...DEFAULT_SCORING_WEIGHTS, principleAlignment: 0.5 };
|
|
117
|
-
const scores = scoreCandidate(candidate, judgment, customWeights);
|
|
118
|
-
|
|
119
|
-
// With higher weight on principleAlignment, aggregate should be higher for aligned candidates
|
|
120
|
-
expect(scores.aggregate).toBeGreaterThan(0);
|
|
121
|
-
});
|
|
122
|
-
|
|
123
|
-
it('does not crash when badDecision is undefined — lowers score instead', () => {
|
|
124
|
-
const candidate = makeCandidate({ badDecision: undefined as unknown as string });
|
|
125
|
-
const judgment = makeJudgment(0);
|
|
126
|
-
const scores = scoreCandidate(candidate, judgment);
|
|
127
|
-
|
|
128
|
-
expect(scores.schemaCompleteness).toBeLessThan(1.0);
|
|
129
|
-
expect(scores.aggregate).toBeGreaterThanOrEqual(0);
|
|
130
|
-
});
|
|
131
|
-
|
|
132
|
-
it('does not crash when betterDecision is undefined — lowers score instead', () => {
|
|
133
|
-
const candidate = makeCandidate({ betterDecision: undefined as unknown as string });
|
|
134
|
-
const judgment = makeJudgment(0);
|
|
135
|
-
const scores = scoreCandidate(candidate, judgment);
|
|
136
|
-
|
|
137
|
-
expect(scores.schemaCompleteness).toBeLessThan(1.0);
|
|
138
|
-
expect(scores.aggregate).toBeGreaterThanOrEqual(0);
|
|
139
|
-
});
|
|
140
|
-
|
|
141
|
-
it('does not crash when both badDecision and betterDecision are undefined', () => {
|
|
142
|
-
const candidate = makeCandidate({
|
|
143
|
-
badDecision: undefined as unknown as string,
|
|
144
|
-
betterDecision: undefined as unknown as string,
|
|
145
|
-
});
|
|
146
|
-
const judgment = makeJudgment(0);
|
|
147
|
-
const scores = scoreCandidate(candidate, judgment);
|
|
148
|
-
|
|
149
|
-
expect(scores.schemaCompleteness).toBeLessThan(1.0);
|
|
150
|
-
expect(scores.aggregate).toBeGreaterThanOrEqual(0);
|
|
151
|
-
});
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
// ---------------------------------------------------------------------------
|
|
155
|
-
// Tests: checkThresholds
|
|
156
|
-
// ---------------------------------------------------------------------------
|
|
157
|
-
|
|
158
|
-
describe('checkThresholds', () => {
|
|
159
|
-
it('passes all thresholds with good scores', () => {
|
|
160
|
-
const scores = {
|
|
161
|
-
schemaCompleteness: 0.9,
|
|
162
|
-
principleAlignment: 0.9,
|
|
163
|
-
executability: 0.9,
|
|
164
|
-
boundedness: 0.9,
|
|
165
|
-
confidence: 0.9,
|
|
166
|
-
aggregate: 0.9,
|
|
167
|
-
};
|
|
168
|
-
const [passed, failed] = checkThresholds(scores, DEFAULT_THRESHOLDS);
|
|
169
|
-
|
|
170
|
-
expect(passed).toBe(true);
|
|
171
|
-
expect(failed).toHaveLength(0);
|
|
172
|
-
});
|
|
173
|
-
|
|
174
|
-
it('fails when schema completeness is below threshold', () => {
|
|
175
|
-
const scores = {
|
|
176
|
-
schemaCompleteness: 0.3,
|
|
177
|
-
principleAlignment: 0.9,
|
|
178
|
-
executability: 0.9,
|
|
179
|
-
boundedness: 0.9,
|
|
180
|
-
confidence: 0.9,
|
|
181
|
-
aggregate: 0.9,
|
|
182
|
-
};
|
|
183
|
-
const [passed, failed] = checkThresholds(scores, DEFAULT_THRESHOLDS);
|
|
184
|
-
|
|
185
|
-
expect(passed).toBe(false);
|
|
186
|
-
// checkThresholds returns formatted strings like "schemaCompleteness (0.3 < 0.6)"
|
|
187
|
-
expect(failed.some(f => f.includes('schemaCompleteness'))).toBe(true);
|
|
188
|
-
});
|
|
189
|
-
|
|
190
|
-
it('fails when multiple thresholds are broken', () => {
|
|
191
|
-
const scores = {
|
|
192
|
-
schemaCompleteness: 0.3,
|
|
193
|
-
principleAlignment: 0.3,
|
|
194
|
-
executability: 0.3,
|
|
195
|
-
boundedness: 0.3,
|
|
196
|
-
confidence: 0.3,
|
|
197
|
-
aggregate: 0.3,
|
|
198
|
-
};
|
|
199
|
-
const [passed, failed] = checkThresholds(scores, DEFAULT_THRESHOLDS);
|
|
200
|
-
|
|
201
|
-
expect(passed).toBe(false);
|
|
202
|
-
expect(failed.length).toBeGreaterThan(1);
|
|
203
|
-
});
|
|
204
|
-
|
|
205
|
-
it('reports all failed thresholds', () => {
|
|
206
|
-
const scores = {
|
|
207
|
-
schemaCompleteness: 0.5, // < 0.6 → FAIL
|
|
208
|
-
principleAlignment: 0.7, // >= 0.7 → PASS (at threshold)
|
|
209
|
-
executability: 0.5, // < 0.65 → FAIL
|
|
210
|
-
boundedness: 0.7, // >= 0.65 → PASS (above new threshold)
|
|
211
|
-
confidence: 0.5, // < 0.6 → FAIL
|
|
212
|
-
aggregate: 0.5, // < 0.65 → FAIL
|
|
213
|
-
};
|
|
214
|
-
const [passed, failed] = checkThresholds(scores, DEFAULT_THRESHOLDS);
|
|
215
|
-
|
|
216
|
-
expect(passed).toBe(false);
|
|
217
|
-
// Exactly 4 failures: schemaCompleteness, executability, confidence, aggregate
|
|
218
|
-
expect(failed.length).toBe(4);
|
|
219
|
-
expect(failed.some(f => f.includes('schemaCompleteness'))).toBe(true);
|
|
220
|
-
expect(failed.some(f => f.includes('executability'))).toBe(true);
|
|
221
|
-
expect(failed.some(f => f.includes('confidence'))).toBe(true);
|
|
222
|
-
expect(failed.some(f => f.includes('aggregate'))).toBe(true);
|
|
223
|
-
});
|
|
224
|
-
});
|
|
225
|
-
|
|
226
|
-
// ---------------------------------------------------------------------------
|
|
227
|
-
// Tests: rankCandidates
|
|
228
|
-
// ---------------------------------------------------------------------------
|
|
229
|
-
|
|
230
|
-
describe('rankCandidates', () => {
|
|
231
|
-
it('ranks candidates by aggregate score', () => {
|
|
232
|
-
// Use very different confidence levels to ensure clear ranking
|
|
233
|
-
// Candidate 0: low confidence (0.5) - lower aggregate
|
|
234
|
-
// Candidate 1: high confidence (0.9) - higher aggregate
|
|
235
|
-
const candidates = [
|
|
236
|
-
makeCandidate({ candidateIndex: 0, confidence: 0.5, betterDecision: 'Read config.json to understand setup' }),
|
|
237
|
-
makeCandidate({ candidateIndex: 1, confidence: 0.9, betterDecision: 'Read main.ts to understand setup' }),
|
|
238
|
-
];
|
|
239
|
-
const judgments = [
|
|
240
|
-
makeJudgment(0, { score: 0.5, rank: 1, principleAligned: true }),
|
|
241
|
-
makeJudgment(1, { score: 0.9, rank: 1, principleAligned: true }),
|
|
242
|
-
];
|
|
243
|
-
|
|
244
|
-
const ranked = rankCandidates(candidates, judgments, DEFAULT_THRESHOLDS);
|
|
245
|
-
|
|
246
|
-
// Candidate 1 has higher score and should be ranked first
|
|
247
|
-
expect(ranked[0].candidateIndex).toBe(1);
|
|
248
|
-
expect(ranked[0].rank).toBe(1);
|
|
249
|
-
expect(ranked[1].rank).toBe(2);
|
|
250
|
-
});
|
|
251
|
-
|
|
252
|
-
it('excludes candidates that fail thresholds', () => {
|
|
253
|
-
// Candidate 0 has low confidence and fails principle alignment - should fail
|
|
254
|
-
// Candidate 1 has high confidence and passes - should pass
|
|
255
|
-
const candidates = [
|
|
256
|
-
makeCandidate({ candidateIndex: 0, confidence: 0.3, betterDecision: 'Check errors in src/main.ts' }),
|
|
257
|
-
makeCandidate({ candidateIndex: 1, confidence: 0.9, betterDecision: 'Read error logs in error.json' }),
|
|
258
|
-
];
|
|
259
|
-
const judgments = [
|
|
260
|
-
makeJudgment(0, { score: 0.5, principleAligned: false }),
|
|
261
|
-
makeJudgment(1, { score: 0.9, principleAligned: true }),
|
|
262
|
-
];
|
|
263
|
-
|
|
264
|
-
const ranked = rankCandidates(candidates, judgments, DEFAULT_THRESHOLDS);
|
|
265
|
-
|
|
266
|
-
// Candidate 1 passes thresholds (high confidence, principle aligned, has file path)
|
|
267
|
-
expect(ranked[0].thresholdPassed).toBe(true);
|
|
268
|
-
// Candidate 0 fails thresholds (low confidence, not principle aligned)
|
|
269
|
-
expect(ranked[1].thresholdPassed).toBe(false);
|
|
270
|
-
});
|
|
271
|
-
|
|
272
|
-
it('uses candidateIndex as stable tie-break', () => {
|
|
273
|
-
// Two candidates with same scoring profile but different indices
|
|
274
|
-
const candidates = [
|
|
275
|
-
makeCandidate({ candidateIndex: 5, betterDecision: 'Read src/index.ts to understand', confidence: 0.8 }),
|
|
276
|
-
makeCandidate({ candidateIndex: 1, betterDecision: 'Read src/index.ts to understand', confidence: 0.8 }),
|
|
277
|
-
];
|
|
278
|
-
// Both have identical judgments (same score, both aligned)
|
|
279
|
-
const judgments = [
|
|
280
|
-
makeJudgment(1, { score: 0.8, principleAligned: true }),
|
|
281
|
-
makeJudgment(5, { score: 0.8, principleAligned: true }),
|
|
282
|
-
];
|
|
283
|
-
|
|
284
|
-
const ranked = rankCandidates(candidates, judgments, DEFAULT_THRESHOLDS);
|
|
285
|
-
|
|
286
|
-
// Lower candidateIndex wins tie
|
|
287
|
-
expect(ranked[0].candidateIndex).toBe(1);
|
|
288
|
-
});
|
|
289
|
-
|
|
290
|
-
it('handles empty input gracefully', () => {
|
|
291
|
-
const ranked = rankCandidates([], [], DEFAULT_THRESHOLDS);
|
|
292
|
-
expect(ranked).toHaveLength(0);
|
|
293
|
-
});
|
|
294
|
-
|
|
295
|
-
it('skips candidates without matching judgments', () => {
|
|
296
|
-
const candidates = [makeCandidate({ candidateIndex: 0 })];
|
|
297
|
-
const judgments = [makeJudgment(99)]; // No matching judgment
|
|
298
|
-
|
|
299
|
-
const ranked = rankCandidates(candidates, judgments, DEFAULT_THRESHOLDS);
|
|
300
|
-
expect(ranked).toHaveLength(0);
|
|
301
|
-
});
|
|
302
|
-
});
|
|
303
|
-
|
|
304
|
-
// ---------------------------------------------------------------------------
|
|
305
|
-
// Tests: runTournament
|
|
306
|
-
// ---------------------------------------------------------------------------
|
|
307
|
-
|
|
308
|
-
describe('runTournament', () => {
|
|
309
|
-
it('selects the highest-scoring threshold-passing candidate', () => {
|
|
310
|
-
// Use actionable verbs and proper file paths to pass boundedness threshold
|
|
311
|
-
const candidates = [
|
|
312
|
-
makeCandidate({ candidateIndex: 0, confidence: 0.7, betterDecision: 'Read config.json to verify settings' }),
|
|
313
|
-
makeCandidate({ candidateIndex: 1, confidence: 0.9, betterDecision: 'Review error.json logs for errors' }),
|
|
314
|
-
makeCandidate({ candidateIndex: 2, confidence: 0.5, betterDecision: 'Check main.ts before proceeding' }),
|
|
315
|
-
];
|
|
316
|
-
const judgments = [
|
|
317
|
-
makeJudgment(0, { score: 0.7, principleAligned: true }),
|
|
318
|
-
makeJudgment(1, { score: 0.9, principleAligned: true }),
|
|
319
|
-
makeJudgment(2, { score: 0.5, principleAligned: true }),
|
|
320
|
-
];
|
|
321
|
-
|
|
322
|
-
const result = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
|
|
323
|
-
|
|
324
|
-
expect(result.success).toBe(true);
|
|
325
|
-
expect(result.winner).not.toBeNull();
|
|
326
|
-
expect(result.winner!.candidateIndex).toBe(1);
|
|
327
|
-
expect(result.rankedCandidates).toHaveLength(3);
|
|
328
|
-
});
|
|
329
|
-
|
|
330
|
-
it('fails when all candidates fail thresholds', () => {
|
|
331
|
-
// Candidates with poor confidence and not principle-aligned should fail
|
|
332
|
-
const candidates = [
|
|
333
|
-
makeCandidate({ candidateIndex: 0, confidence: 0.2, betterDecision: 'Do something in src.ts' }),
|
|
334
|
-
makeCandidate({ candidateIndex: 1, confidence: 0.1, betterDecision: 'Try again with config.json' }),
|
|
335
|
-
];
|
|
336
|
-
const judgments = [
|
|
337
|
-
makeJudgment(0, { score: 0.3, principleAligned: false }),
|
|
338
|
-
makeJudgment(1, { score: 0.2, principleAligned: false }),
|
|
339
|
-
];
|
|
340
|
-
|
|
341
|
-
const result = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
|
|
342
|
-
|
|
343
|
-
expect(result.success).toBe(false);
|
|
344
|
-
expect(result.winner).toBeNull();
|
|
345
|
-
expect(result.failureReason).toContain('threshold');
|
|
346
|
-
});
|
|
347
|
-
|
|
348
|
-
it('provides explainable trace', () => {
|
|
349
|
-
const candidates = [makeCandidate({ candidateIndex: 0, betterDecision: 'Read error.json to check logs' })];
|
|
350
|
-
const judgments = [makeJudgment(0, { score: 0.9, principleAligned: true })];
|
|
351
|
-
|
|
352
|
-
const result = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
|
|
353
|
-
|
|
354
|
-
expect(result.trace).toBeDefined();
|
|
355
|
-
expect(result.trace.length).toBeGreaterThan(0);
|
|
356
|
-
expect(result.trace[0].step).toBeDefined();
|
|
357
|
-
expect(result.trace[0].details).toBeDefined();
|
|
358
|
-
});
|
|
359
|
-
|
|
360
|
-
it('is deterministic — same inputs yield same winner', () => {
|
|
361
|
-
const candidates = [
|
|
362
|
-
makeCandidate({ candidateIndex: 0, confidence: 0.8, betterDecision: 'Read config.json to understand' }),
|
|
363
|
-
makeCandidate({ candidateIndex: 1, confidence: 0.9, betterDecision: 'Review error.json for issues' }),
|
|
364
|
-
];
|
|
365
|
-
const judgments = [
|
|
366
|
-
makeJudgment(0, { score: 0.8, principleAligned: true }),
|
|
367
|
-
makeJudgment(1, { score: 0.9, principleAligned: true }),
|
|
368
|
-
];
|
|
369
|
-
|
|
370
|
-
const result1 = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
|
|
371
|
-
const result2 = runTournament(candidates, judgments, DEFAULT_THRESHOLDS);
|
|
372
|
-
|
|
373
|
-
expect(result1.winner!.candidateIndex).toBe(result2.winner!.candidateIndex);
|
|
374
|
-
});
|
|
375
|
-
});
|
|
376
|
-
|
|
377
|
-
// ---------------------------------------------------------------------------
|
|
378
|
-
// Tests: DEFAULT_SCORING_WEIGHTS
|
|
379
|
-
// ---------------------------------------------------------------------------
|
|
380
|
-
|
|
381
|
-
describe('DEFAULT_SCORING_WEIGHTS', () => {
|
|
382
|
-
it('has weights that sum to 1.0', () => {
|
|
383
|
-
const sum = Object.values(DEFAULT_SCORING_WEIGHTS).reduce((a, b) => a + b, 0);
|
|
384
|
-
expect(sum).toBeCloseTo(1.0, 2);
|
|
385
|
-
});
|
|
386
|
-
|
|
387
|
-
it('has all required properties', () => {
|
|
388
|
-
expect(DEFAULT_SCORING_WEIGHTS.schemaCompleteness).toBeDefined();
|
|
389
|
-
expect(DEFAULT_SCORING_WEIGHTS.principleAlignment).toBeDefined();
|
|
390
|
-
expect(DEFAULT_SCORING_WEIGHTS.executability).toBeDefined();
|
|
391
|
-
expect(DEFAULT_SCORING_WEIGHTS.boundedness).toBeDefined();
|
|
392
|
-
expect(DEFAULT_SCORING_WEIGHTS.confidence).toBeDefined();
|
|
393
|
-
});
|
|
394
|
-
|
|
395
|
-
it('has values in valid range (0-1)', () => {
|
|
396
|
-
for (const weight of Object.values(DEFAULT_SCORING_WEIGHTS)) {
|
|
397
|
-
expect(weight).toBeGreaterThanOrEqual(0);
|
|
398
|
-
expect(weight).toBeLessThanOrEqual(1);
|
|
399
|
-
}
|
|
400
|
-
});
|
|
401
|
-
});
|
|
402
|
-
|
|
403
|
-
// ---------------------------------------------------------------------------
|
|
404
|
-
// Tests: validateCandidateDiversity
|
|
405
|
-
// ---------------------------------------------------------------------------
|
|
406
|
-
|
|
407
|
-
describe('validateCandidateDiversity', () => {
|
|
408
|
-
it('passes when candidates have 2+ distinct risk levels and low keyword overlap', () => {
|
|
409
|
-
const candidates: DreamerCandidate[] = [
|
|
410
|
-
makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Read config.json to verify settings' }),
|
|
411
|
-
makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'Refactor the entire authentication module from scratch' }),
|
|
412
|
-
];
|
|
413
|
-
const result = validateCandidateDiversity(candidates);
|
|
414
|
-
expect(result.diversityCheckPassed).toBe(true);
|
|
415
|
-
expect(result.riskLevelDiversity).toBe(true);
|
|
416
|
-
expect(result.keywordOverlapPassed).toBe(true);
|
|
417
|
-
});
|
|
418
|
-
|
|
419
|
-
it('fails when all candidates have the same risk level', () => {
|
|
420
|
-
const candidates: DreamerCandidate[] = [
|
|
421
|
-
makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Read file A to check settings' }),
|
|
422
|
-
makeCandidate({ candidateIndex: 1, riskLevel: 'low', betterDecision: 'Review file completely different approach' }),
|
|
423
|
-
makeCandidate({ candidateIndex: 2, riskLevel: 'low', betterDecision: 'Inspect another unique diagnostic method' }),
|
|
424
|
-
];
|
|
425
|
-
const result = validateCandidateDiversity(candidates);
|
|
426
|
-
expect(result.diversityCheckPassed).toBe(false);
|
|
427
|
-
expect(result.riskLevelDiversity).toBe(false);
|
|
428
|
-
});
|
|
429
|
-
|
|
430
|
-
it('fails when candidate pair has keyword overlap > 0.8', () => {
|
|
431
|
-
const candidates: DreamerCandidate[] = [
|
|
432
|
-
makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Review the authentication configuration file before making any changes to the system' }),
|
|
433
|
-
makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'Review the authentication configuration file before making any changes to the system' }),
|
|
434
|
-
];
|
|
435
|
-
const result = validateCandidateDiversity(candidates);
|
|
436
|
-
expect(result.diversityCheckPassed).toBe(false);
|
|
437
|
-
expect(result.keywordOverlapPassed).toBe(false);
|
|
438
|
-
expect(result.maxOverlapScore).toBeGreaterThan(0.8);
|
|
439
|
-
});
|
|
440
|
-
|
|
441
|
-
it('passes for single candidate', () => {
|
|
442
|
-
const candidates: DreamerCandidate[] = [
|
|
443
|
-
makeCandidate({ candidateIndex: 0, riskLevel: 'low' }),
|
|
444
|
-
];
|
|
445
|
-
const result = validateCandidateDiversity(candidates);
|
|
446
|
-
expect(result.diversityCheckPassed).toBe(true);
|
|
447
|
-
expect(result.details).toContain('Single candidate');
|
|
448
|
-
});
|
|
449
|
-
|
|
450
|
-
it('passes for empty array', () => {
|
|
451
|
-
const result = validateCandidateDiversity([]);
|
|
452
|
-
expect(result.diversityCheckPassed).toBe(true);
|
|
453
|
-
expect(result.details).toContain('No candidates');
|
|
454
|
-
});
|
|
455
|
-
|
|
456
|
-
it('passes when candidates lack riskLevel (graceful degradation)', () => {
|
|
457
|
-
const candidates: DreamerCandidate[] = [
|
|
458
|
-
makeCandidate({ candidateIndex: 0, betterDecision: 'Read config.json to verify settings' }),
|
|
459
|
-
makeCandidate({ candidateIndex: 1, betterDecision: 'Refactor the entire authentication module from scratch' }),
|
|
460
|
-
];
|
|
461
|
-
// No riskLevel on any candidate - should pass (no risk levels to check)
|
|
462
|
-
const result = validateCandidateDiversity(candidates);
|
|
463
|
-
expect(result.diversityCheckPassed).toBe(true);
|
|
464
|
-
expect(result.riskLevelDiversity).toBe(true);
|
|
465
|
-
});
|
|
466
|
-
|
|
467
|
-
it('fails when some candidates have riskLevel but fewer than 2 distinct values', () => {
|
|
468
|
-
const candidates: DreamerCandidate[] = [
|
|
469
|
-
makeCandidate({ candidateIndex: 0, riskLevel: 'medium', betterDecision: 'Read config.json to verify settings' }),
|
|
470
|
-
makeCandidate({ candidateIndex: 1, betterDecision: 'Refactor the entire authentication module from scratch' }),
|
|
471
|
-
];
|
|
472
|
-
// Only 1 candidate has riskLevel, so only 1 distinct value → fail
|
|
473
|
-
const result = validateCandidateDiversity(candidates);
|
|
474
|
-
expect(result.diversityCheckPassed).toBe(false);
|
|
475
|
-
expect(result.riskLevelDiversity).toBe(false);
|
|
476
|
-
});
|
|
477
|
-
|
|
478
|
-
it('uses max(|A|, |B|) as denominator for keyword overlap', () => {
|
|
479
|
-
// Short text A, long text B - overlap should use max as denominator
|
|
480
|
-
const candidates: DreamerCandidate[] = [
|
|
481
|
-
makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'review authentication configuration' }),
|
|
482
|
-
makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'review authentication configuration before proceeding with changes to the deployment pipeline infrastructure' }),
|
|
483
|
-
];
|
|
484
|
-
const result = validateCandidateDiversity(candidates);
|
|
485
|
-
// "review", "authentication", "configuration" overlap in both
|
|
486
|
-
// Set A = {review, authentication, configuration} = 3
|
|
487
|
-
// Set B = {review, authentication, configuration, before, proceeding, with, changes, deployment, pipeline, infrastructure} = 10
|
|
488
|
-
// intersection = 3, max(3, 10) = 10, overlap = 3/10 = 0.3
|
|
489
|
-
expect(result.maxOverlapScore).toBeLessThanOrEqual(0.4);
|
|
490
|
-
});
|
|
491
|
-
|
|
492
|
-
it('ignores words <= 3 characters in keyword overlap', () => {
|
|
493
|
-
const candidates: DreamerCandidate[] = [
|
|
494
|
-
makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'the and but for' }),
|
|
495
|
-
makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'the and but for' }),
|
|
496
|
-
];
|
|
497
|
-
// All words are <= 3 chars, so no keywords extracted → overlap = 0
|
|
498
|
-
const result = validateCandidateDiversity(candidates);
|
|
499
|
-
expect(result.keywordOverlapPassed).toBe(true);
|
|
500
|
-
expect(result.maxOverlapScore).toBe(0);
|
|
501
|
-
});
|
|
502
|
-
|
|
503
|
-
it('never throws on malformed input', () => {
|
|
504
|
-
// Undefined candidates
|
|
505
|
-
expect(() => validateCandidateDiversity(undefined as unknown as DreamerCandidate[])).not.toThrow();
|
|
506
|
-
// Null candidates
|
|
507
|
-
expect(() => validateCandidateDiversity(null as unknown as DreamerCandidate[])).not.toThrow();
|
|
508
|
-
// Candidates with undefined fields
|
|
509
|
-
expect(() => validateCandidateDiversity([
|
|
510
|
-
{ candidateIndex: 0 } as DreamerCandidate,
|
|
511
|
-
])).not.toThrow();
|
|
512
|
-
// Mixed valid and malformed
|
|
513
|
-
expect(() => validateCandidateDiversity([
|
|
514
|
-
makeCandidate({ candidateIndex: 0, riskLevel: 'low' }),
|
|
515
|
-
{ candidateIndex: 1 } as DreamerCandidate,
|
|
516
|
-
])).not.toThrow();
|
|
517
|
-
});
|
|
518
|
-
|
|
519
|
-
it('returns correct maxOverlapScore rounded to 2 decimal places', () => {
|
|
520
|
-
const candidates: DreamerCandidate[] = [
|
|
521
|
-
makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Review configuration settings before deployment' }),
|
|
522
|
-
makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'Review configuration settings before deployment testing' }),
|
|
523
|
-
];
|
|
524
|
-
const result = validateCandidateDiversity(candidates);
|
|
525
|
-
// Verify the maxOverlapScore is a number with at most 2 decimal places
|
|
526
|
-
const decimalPart = result.maxOverlapScore.toString().split('.')[1];
|
|
527
|
-
if (decimalPart) {
|
|
528
|
-
expect(decimalPart.length).toBeLessThanOrEqual(2);
|
|
529
|
-
}
|
|
530
|
-
expect(typeof result.maxOverlapScore).toBe('number');
|
|
531
|
-
});
|
|
532
|
-
});
|
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tests for #216: P_* principle violation and opportunity detection
|
|
3
|
-
*
|
|
4
|
-
* Before this fix, detectOpportunity and detectViolation only handled T-01~T-09,
|
|
5
|
-
* causing all P_* principles to return false for both applicable and violated.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import { describe, expect, it } from 'vitest';
|
|
9
|
-
import { detectOpportunity, detectViolation } from '../../src/core/nocturnal-compliance.js';
|
|
10
|
-
import type { SessionEvents } from '../../src/core/nocturnal-compliance.js';
|
|
11
|
-
|
|
12
|
-
function makeSession(overrides: Partial<SessionEvents> = {}): SessionEvents {
|
|
13
|
-
return {
|
|
14
|
-
sessionId: 'test-session',
|
|
15
|
-
toolCalls: overrides.toolCalls ?? [],
|
|
16
|
-
painSignals: overrides.painSignals ?? [],
|
|
17
|
-
gateBlocks: overrides.gateBlocks ?? [],
|
|
18
|
-
userCorrections: overrides.userCorrections ?? [],
|
|
19
|
-
planApprovals: overrides.planApprovals ?? [],
|
|
20
|
-
};
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
describe('#216: P_* principle detection', () => {
|
|
24
|
-
describe('detectOpportunity for P_* principles', () => {
|
|
25
|
-
it('returns applicable=true when session has pain signals', () => {
|
|
26
|
-
const session = makeSession({
|
|
27
|
-
painSignals: [{ source: 'tool_failure', score: 80, reason: 'write failed' }],
|
|
28
|
-
});
|
|
29
|
-
const result = detectOpportunity('P_001', session);
|
|
30
|
-
expect(result.applicable).toBe(true);
|
|
31
|
-
expect(result.reason).toContain('pain signal');
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
it('returns applicable=true when session has tool failures', () => {
|
|
35
|
-
const session = makeSession({
|
|
36
|
-
toolCalls: [{ toolName: 'write', filePath: 'test.txt', outcome: 'failure', errorMessage: 'disk full' }],
|
|
37
|
-
});
|
|
38
|
-
const result = detectOpportunity('P_042', session);
|
|
39
|
-
expect(result.applicable).toBe(true);
|
|
40
|
-
expect(result.reason).toContain('tool failure');
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
it('returns applicable=true when session has gate blocks', () => {
|
|
44
|
-
const session = makeSession({
|
|
45
|
-
gateBlocks: [{ toolName: 'bash', reason: 'high risk operation' }],
|
|
46
|
-
});
|
|
47
|
-
const result = detectOpportunity('P_065', session);
|
|
48
|
-
expect(result.applicable).toBe(true);
|
|
49
|
-
expect(result.reason).toContain('gate block');
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
it('returns applicable=false when session has no negative signals', () => {
|
|
53
|
-
const session = makeSession({
|
|
54
|
-
toolCalls: [{ toolName: 'read', filePath: 'test.txt', outcome: 'success' }],
|
|
55
|
-
});
|
|
56
|
-
const result = detectOpportunity('P_001', session);
|
|
57
|
-
expect(result.applicable).toBe(false);
|
|
58
|
-
expect(result.reason).toContain('no pain/tool-failure/gate-block');
|
|
59
|
-
});
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
describe('detectViolation for P_* principles', () => {
|
|
63
|
-
it('returns violated=true when session has high pain signals (score >= 50)', () => {
|
|
64
|
-
const session = makeSession({
|
|
65
|
-
painSignals: [{ source: 'tool_failure', score: 80, reason: 'write failed' }],
|
|
66
|
-
});
|
|
67
|
-
const result = detectViolation('P_001', session);
|
|
68
|
-
expect(result.violated).toBe(true);
|
|
69
|
-
expect(result.reason).toContain('pain signal');
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
it('returns violated=false when pain signals are low (score < 50)', () => {
|
|
73
|
-
const session = makeSession({
|
|
74
|
-
painSignals: [{ source: 'minor_issue', score: 30, reason: 'cosmetic' }],
|
|
75
|
-
toolCalls: [{ toolName: 'read', filePath: 'test.txt', outcome: 'success' }],
|
|
76
|
-
});
|
|
77
|
-
const result = detectViolation('P_001', session);
|
|
78
|
-
expect(result.violated).toBe(false);
|
|
79
|
-
expect(result.reason).toContain('no violation signals');
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
it('returns violated=true when session has tool failures', () => {
|
|
83
|
-
const session = makeSession({
|
|
84
|
-
toolCalls: [
|
|
85
|
-
{ toolName: 'write', filePath: 'test.txt', outcome: 'failure', errorMessage: 'disk full' },
|
|
86
|
-
],
|
|
87
|
-
});
|
|
88
|
-
const result = detectViolation('P_042', session);
|
|
89
|
-
expect(result.violated).toBe(true);
|
|
90
|
-
expect(result.reason).toContain('tool failure');
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
it('returns violated=true when session has gate blocks', () => {
|
|
94
|
-
const session = makeSession({
|
|
95
|
-
gateBlocks: [{ toolName: 'bash', reason: 'high risk operation' }],
|
|
96
|
-
});
|
|
97
|
-
const result = detectViolation('P_065', session);
|
|
98
|
-
expect(result.violated).toBe(true);
|
|
99
|
-
expect(result.reason).toContain('gate block');
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
it('returns violated=false for clean session with no negative signals', () => {
|
|
103
|
-
const session = makeSession({
|
|
104
|
-
toolCalls: [{ toolName: 'read', filePath: 'test.txt', outcome: 'success' }],
|
|
105
|
-
});
|
|
106
|
-
const result = detectViolation('P_001', session);
|
|
107
|
-
expect(result.violated).toBe(false);
|
|
108
|
-
expect(result.reason).toContain('no violation signals');
|
|
109
|
-
});
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
describe('T-* principles still work (regression check)', () => {
|
|
113
|
-
it('T-01 opportunity detected for edit operations', () => {
|
|
114
|
-
const session = makeSession({
|
|
115
|
-
toolCalls: [{ toolName: 'edit_file', filePath: 'test.ts', outcome: 'success' }],
|
|
116
|
-
});
|
|
117
|
-
const result = detectOpportunity('T-01', session);
|
|
118
|
-
expect(result.applicable).toBe(true);
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
it('T-01 violation detected when editing without reading first', () => {
|
|
122
|
-
const session = makeSession({
|
|
123
|
-
toolCalls: [
|
|
124
|
-
{ toolName: 'edit_file', filePath: 'test.ts', outcome: 'failure', errorMessage: 'merge conflict' },
|
|
125
|
-
],
|
|
126
|
-
painSignals: [{ source: 'test.ts edit failed', score: 70, reason: 'Did not survey structure before editing' }],
|
|
127
|
-
});
|
|
128
|
-
const result = detectViolation('T-01', session);
|
|
129
|
-
// T-01 violation: edit without prior read, with pain signal matching file or pattern
|
|
130
|
-
expect(result.violated).toBe(true);
|
|
131
|
-
});
|
|
132
|
-
});
|
|
133
|
-
});
|