principles-disciple 1.71.0 → 1.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +10 -5
- package/package.json +17 -19
- package/scripts/acceptance-test.mjs +16 -73
- package/scripts/sync-plugin.mjs +382 -77
- package/src/commands/archive-impl.ts +2 -1
- package/src/commands/capabilities.ts +2 -2
- package/src/commands/context.ts +2 -2
- package/src/commands/disable-impl.ts +2 -1
- package/src/commands/evolution-status.ts +16 -16
- package/src/commands/export.ts +12 -67
- package/src/commands/pain.ts +91 -1
- package/src/commands/principle-rollback.ts +2 -1
- package/src/commands/promote-impl.ts +7 -43
- package/src/commands/rollback-impl.ts +2 -1
- package/src/commands/rollback.ts +2 -1
- package/src/commands/samples.ts +2 -1
- package/src/commands/thinking-os.ts +2 -1
- package/src/config/errors.ts +18 -2
- package/src/constants/diagnostician.ts +2 -2
- package/src/constants/tools.ts +2 -1
- package/src/core/__tests__/focus-history.test.ts +210 -0
- package/src/core/config.ts +1 -1
- package/src/core/confirm-first-gate.ts +255 -0
- package/src/core/correction-cue-learner.ts +2 -136
- package/src/core/correction-types.ts +16 -88
- package/src/core/dictionary.ts +19 -20
- package/src/core/empathy-keyword-matcher.ts +17 -289
- package/src/core/empathy-types.ts +18 -229
- package/src/core/event-log.ts +38 -132
- package/src/core/evolution-reducer.ts +21 -2
- package/src/core/evolution-types.ts +76 -464
- package/src/core/file-store.ts +80 -0
- package/src/core/focus-history.ts +228 -955
- package/src/core/local-worker-routing.ts +34 -314
- package/src/core/merge-gate-audit.ts +0 -195
- package/src/core/pain-diagnostic-gate.ts +154 -0
- package/src/core/pain-signal.ts +21 -138
- package/src/core/pain.ts +15 -88
- package/src/core/pd-task-reconciler.ts +26 -115
- package/src/core/pd-task-service.ts +9 -9
- package/src/core/pd-task-types.ts +23 -127
- package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
- package/src/core/principle-compiler/code-validator.ts +15 -42
- package/src/core/principle-compiler/compiler.ts +100 -15
- package/src/core/principle-compiler/index.ts +5 -2
- package/src/core/principle-compiler/template-generator.ts +4 -104
- package/src/core/principle-injection.ts +10 -202
- package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
- package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
- package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
- package/src/core/principle-tree-ledger-adapter.ts +145 -0
- package/src/core/principle-tree-ledger.ts +8 -6
- package/src/core/reflection/reflection-context.ts +14 -109
- package/src/core/replay-engine.ts +8 -500
- package/src/core/rule-host-helpers.ts +5 -35
- package/src/core/rule-host-types.ts +10 -82
- package/src/core/rule-host.ts +6 -63
- package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
- package/src/core/session-tracker.ts +87 -101
- package/src/core/shadow-observation-registry.ts +19 -48
- package/src/core/trajectory.ts +3 -1
- package/src/core/workflow-funnel-loader.ts +62 -68
- package/src/core/workspace-context.ts +46 -0
- package/src/core/workspace-dir-service.ts +1 -1
- package/src/core/workspace-dir-validation.ts +18 -9
- package/src/hooks/AGENTS.md +1 -1
- package/src/hooks/gate-block-helper.ts +46 -44
- package/src/hooks/gate.ts +207 -7
- package/src/hooks/lifecycle.ts +30 -32
- package/src/hooks/llm.ts +60 -32
- package/src/hooks/pain.ts +297 -103
- package/src/hooks/prompt.ts +469 -339
- package/src/hooks/subagent.ts +2 -29
- package/src/i18n/commands.ts +2 -10
- package/src/index.ts +95 -85
- package/src/openclaw-sdk.ts +311 -0
- package/src/service/central-database.ts +8 -4
- package/src/service/evolution-queue-migration.ts +2 -1
- package/src/service/evolution-worker.ts +163 -1786
- package/src/service/internalization-trigger-adapter.ts +302 -0
- package/src/service/keyword-optimization-service.ts +4 -4
- package/src/service/monitoring-query-service.ts +1 -215
- package/src/service/queue-io.ts +60 -331
- package/src/service/runtime-summary-service.ts +115 -18
- package/src/service/subagent-workflow/index.ts +0 -41
- package/src/service/subagent-workflow/types.ts +9 -120
- package/src/service/subagent-workflow/workflow-store.ts +2 -119
- package/src/service/workflow-watchdog.ts +0 -43
- package/src/types/event-payload.ts +16 -74
- package/src/types/event-types.ts +39 -547
- package/src/types/hygiene-types.ts +7 -30
- package/src/types/principle-tree-schema.ts +20 -222
- package/src/types/queue.ts +15 -70
- package/src/types/runtime-summary.ts +5 -49
- package/src/utils/io.ts +10 -0
- package/src/utils/retry.ts +1 -1
- package/src/utils/shadow-fingerprint.ts +2 -2
- package/src/utils/workspace-resolver.ts +50 -0
- package/templates/langs/en/core/AGENTS.md +2 -2
- package/templates/langs/en/core/BOOT.md +1 -1
- package/templates/langs/en/core/HEARTBEAT.md +2 -2
- package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/en/skills/evolve-task/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-mentor/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
- package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
- package/templates/langs/zh/core/AGENTS.md +2 -2
- package/templates/langs/zh/core/BOOT.md +1 -1
- package/templates/langs/zh/core/HEARTBEAT.md +2 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
- package/templates/langs/zh/skills/evolve-task/SKILL.md +2 -2
- package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/zh/skills/pd-mentor/SKILL.md +1 -1
- package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
- package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
- package/tests/build-artifacts.test.ts +1 -3
- package/tests/commands/evolution-status.test.ts +0 -118
- package/tests/core/bootstrap-rules.test.ts +1 -1
- package/tests/core/config.test.ts +1 -1
- package/tests/core/event-log.test.ts +35 -0
- package/tests/core/evolution-engine.test.ts +610 -0
- package/tests/core/file-store.test.ts +102 -0
- package/tests/core/focus-history.test.ts +203 -11
- package/tests/core/merge-gate-audit.test.ts +2 -169
- package/tests/core/model-deployment-registry.test.ts +7 -1
- package/tests/core/model-training-registry.test.ts +19 -0
- package/tests/core/observability.test.ts +0 -1
- package/tests/core/pain-diagnostic-gate.test.ts +498 -0
- package/tests/core/pain.test.ts +0 -1
- package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
- package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
- package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
- package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
- package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
- package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
- package/tests/core/reflection-context.test.ts +0 -14
- package/tests/core/replay-engine.test.ts +127 -215
- package/tests/core/rule-host-helpers.test.ts +2 -2
- package/tests/core/rule-implementation-runtime.test.ts +0 -27
- package/tests/core/workflow-funnel-loader.test.ts +162 -0
- package/tests/core/workspace-dir-validation.test.ts +8 -1
- package/tests/core-anti-growth.test.ts +192 -0
- package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
- package/tests/hooks/confirm-first-gate.test.ts +333 -0
- package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
- package/tests/hooks/gate-auto-correct.test.ts +665 -0
- package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
- package/tests/hooks/pain.test.ts +269 -12
- package/tests/hooks/prompt-characterization.test.ts +500 -0
- package/tests/hooks/prompt-size-guard.test.ts +329 -0
- package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
- package/tests/index.test.ts +94 -1
- package/tests/integration/auto-entry-gate.test.ts +248 -0
- package/tests/integration/internalization-trigger-guard.test.ts +69 -0
- package/tests/integration/m8-legacy-paths.test.ts +63 -0
- package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
- package/tests/plugin-config-resolution-cutover.test.ts +359 -0
- package/tests/runtime-v2-discovery-guard.test.ts +154 -0
- package/tests/service/central-database.test.ts +457 -0
- package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
- package/tests/service/evolution-worker.timeout.test.ts +11 -129
- package/tests/service/internalization-trigger-adapter.test.ts +251 -0
- package/tests/service/monitoring-query-service.test.ts +1 -47
- package/tests/service/queue-io.test.ts +1 -62
- package/tests/service/runtime-summary-service.test.ts +184 -3
- package/tests/service/workflow-watchdog.test.ts +0 -91
- package/tests/utils/file-lock.test.ts +5 -3
- package/tests/utils/session-key.test.ts +52 -0
- package/tests/utils/subagent-probe.test.ts +48 -1
- package/vitest.config.ts +4 -11
- package/.planning/codebase/ARCHITECTURE.md +0 -157
- package/.planning/codebase/CONCERNS.md +0 -145
- package/.planning/codebase/CONVENTIONS.md +0 -148
- package/.planning/codebase/INTEGRATIONS.md +0 -81
- package/.planning/codebase/STACK.md +0 -87
- package/.planning/codebase/STRUCTURE.md +0 -193
- package/.planning/codebase/TESTING.md +0 -243
- package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
- package/docs/COMMAND_REFERENCE.md +0 -76
- package/docs/COMMAND_REFERENCE_EN.md +0 -79
- package/scripts/build-web.mjs +0 -46
- package/scripts/diagnose-nocturnal.mjs +0 -537
- package/scripts/seed-nocturnal-scenarios.mjs +0 -384
- package/src/commands/nocturnal-review.ts +0 -322
- package/src/commands/nocturnal-rollout.ts +0 -790
- package/src/commands/nocturnal-train.ts +0 -986
- package/src/commands/pd-reflect.ts +0 -88
- package/src/core/adaptive-thresholds.ts +0 -478
- package/src/core/diagnostician-task-store.ts +0 -192
- package/src/core/nocturnal-arbiter.ts +0 -715
- package/src/core/nocturnal-artifact-lineage.ts +0 -116
- package/src/core/nocturnal-artificer.ts +0 -257
- package/src/core/nocturnal-candidate-scoring.ts +0 -530
- package/src/core/nocturnal-compliance.ts +0 -1146
- package/src/core/nocturnal-dataset.ts +0 -763
- package/src/core/nocturnal-executability.ts +0 -428
- package/src/core/nocturnal-export.ts +0 -499
- package/src/core/nocturnal-paths.ts +0 -240
- package/src/core/nocturnal-reasoning-deriver.ts +0 -343
- package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
- package/src/core/nocturnal-snapshot-contract.ts +0 -99
- package/src/core/nocturnal-trajectory-extractor.ts +0 -512
- package/src/core/nocturnal-trinity-types.ts +0 -218
- package/src/core/nocturnal-trinity.ts +0 -2680
- package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
- package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
- package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
- package/src/http/principles-console-route.ts +0 -709
- package/src/service/central-health-service.ts +0 -49
- package/src/service/central-overview-service.ts +0 -138
- package/src/service/control-ui-query-service.ts +0 -900
- package/src/service/cooldown-strategy.ts +0 -97
- package/src/service/evolution-pain-context.ts +0 -79
- package/src/service/evolution-query-service.ts +0 -407
- package/src/service/health-query-service.ts +0 -1038
- package/src/service/nocturnal-config.ts +0 -214
- package/src/service/nocturnal-runtime.ts +0 -734
- package/src/service/nocturnal-service.ts +0 -1605
- package/src/service/nocturnal-target-selector.ts +0 -545
- package/src/service/sleep-cycle.ts +0 -157
- package/src/service/startup-reconciler.ts +0 -112
- package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
- package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
- package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
- package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
- package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
- package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
- package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
- package/src/tools/write-pain-flag.ts +0 -215
- package/tests/commands/nocturnal-review.test.ts +0 -448
- package/tests/commands/nocturnal-train.test.ts +0 -97
- package/tests/commands/pd-reflect.test.ts +0 -49
- package/tests/core/adaptive-thresholds.test.ts +0 -261
- package/tests/core/nocturnal-arbiter.test.ts +0 -559
- package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
- package/tests/core/nocturnal-artificer.test.ts +0 -241
- package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
- package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
- package/tests/core/nocturnal-compliance.test.ts +0 -646
- package/tests/core/nocturnal-dataset.test.ts +0 -892
- package/tests/core/nocturnal-e2e.test.ts +0 -234
- package/tests/core/nocturnal-executability.test.ts +0 -357
- package/tests/core/nocturnal-export.test.ts +0 -517
- package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
- package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
- package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
- package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
- package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
- package/tests/core/nocturnal-trinity.test.ts +0 -2053
- package/tests/core/pain-auto-repair.test.ts +0 -96
- package/tests/core/pain-integration.test.ts +0 -510
- package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
- package/tests/http/principles-console-route.test.ts +0 -162
- package/tests/integration/chaos-resilience.test.ts +0 -348
- package/tests/integration/empathy-workflow-integration.test.ts +0 -626
- package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
- package/tests/service/control-ui-query-service.test.ts +0 -121
- package/tests/service/cooldown-strategy.test.ts +0 -164
- package/tests/service/data-endpoints-regression.test.ts +0 -834
- package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
- package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
- package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
- package/tests/service/nocturnal-runtime.test.ts +0 -473
- package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
- package/tests/service/nocturnal-target-selector.test.ts +0 -615
- package/tests/service/startup-reconciler.test.ts +0 -148
- package/tests/tools/write-pain-flag.test.ts +0 -358
- package/ui/src/App.tsx +0 -45
- package/ui/src/api.ts +0 -220
- package/ui/src/charts.tsx +0 -955
- package/ui/src/components/ErrorState.tsx +0 -6
- package/ui/src/components/Loading.tsx +0 -13
- package/ui/src/components/ProtectedRoute.tsx +0 -12
- package/ui/src/components/Shell.tsx +0 -91
- package/ui/src/components/WorkspaceConfig.tsx +0 -178
- package/ui/src/components/index.ts +0 -5
- package/ui/src/context/auth.tsx +0 -80
- package/ui/src/context/theme.tsx +0 -66
- package/ui/src/hooks/useAutoRefresh.ts +0 -39
- package/ui/src/i18n/ui.ts +0 -473
- package/ui/src/main.tsx +0 -16
- package/ui/src/pages/EvolutionPage.tsx +0 -333
- package/ui/src/pages/FeedbackPage.tsx +0 -138
- package/ui/src/pages/GateMonitorPage.tsx +0 -136
- package/ui/src/pages/LoginPage.tsx +0 -89
- package/ui/src/pages/OverviewPage.tsx +0 -599
- package/ui/src/pages/SamplesPage.tsx +0 -174
- package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
- package/ui/src/styles.css +0 -2020
- package/ui/src/types.ts +0 -384
- package/ui/src/utils/format.ts +0 -15
|
@@ -1,2053 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi } from 'vitest';
|
|
2
|
-
import {
|
|
3
|
-
runTrinity,
|
|
4
|
-
runTrinityAsync,
|
|
5
|
-
validateDraftArtifact,
|
|
6
|
-
draftToArtifact,
|
|
7
|
-
DEFAULT_TRINITY_CONFIG,
|
|
8
|
-
OpenClawTrinityRuntimeAdapter,
|
|
9
|
-
TrinityRuntimeContractError,
|
|
10
|
-
NOCTURNAL_DREAMER_PROMPT,
|
|
11
|
-
NOCTURNAL_PHILOSOPHER_PROMPT,
|
|
12
|
-
formatReasoningContext,
|
|
13
|
-
invokeStubDreamer,
|
|
14
|
-
invokeStubPhilosopher,
|
|
15
|
-
validateExtraction,
|
|
16
|
-
type TrinityConfig,
|
|
17
|
-
type DreamerOutput,
|
|
18
|
-
type DreamerCandidate,
|
|
19
|
-
type PhilosopherOutput,
|
|
20
|
-
type PhilosopherJudgment,
|
|
21
|
-
type TrinityDraftArtifact,
|
|
22
|
-
type TrinityRuntimeAdapter,
|
|
23
|
-
type TrinityTelemetry,
|
|
24
|
-
type RejectedAnalysis,
|
|
25
|
-
type ChosenJustification,
|
|
26
|
-
type ContrastiveAnalysis,
|
|
27
|
-
} from '../../src/core/nocturnal-trinity.js';
|
|
28
|
-
import {
|
|
29
|
-
validateDreamerOutput,
|
|
30
|
-
validatePhilosopherOutput,
|
|
31
|
-
validateTrinityDraft,
|
|
32
|
-
} from '../../src/core/nocturnal-arbiter.js';
|
|
33
|
-
|
|
34
|
-
// ---------------------------------------------------------------------------
|
|
35
|
-
// Test Fixtures
|
|
36
|
-
// ---------------------------------------------------------------------------
|
|
37
|
-
|
|
38
|
-
function makeSnapshot(overrides: Partial<{
|
|
39
|
-
failureCount: number;
|
|
40
|
-
totalPainEvents: number;
|
|
41
|
-
totalGateBlocks: number;
|
|
42
|
-
}> = {}) {
|
|
43
|
-
return {
|
|
44
|
-
sessionId: 'session-test-123',
|
|
45
|
-
startedAt: '2026-04-12T00:00:00.000Z',
|
|
46
|
-
updatedAt: '2026-04-12T00:05:00.000Z',
|
|
47
|
-
assistantTurns: [],
|
|
48
|
-
userTurns: [],
|
|
49
|
-
toolCalls: [],
|
|
50
|
-
painEvents: [],
|
|
51
|
-
gateBlocks: [],
|
|
52
|
-
stats: {
|
|
53
|
-
failureCount: overrides.failureCount ?? 0,
|
|
54
|
-
totalPainEvents: overrides.totalPainEvents ?? 0,
|
|
55
|
-
totalGateBlocks: overrides.totalGateBlocks ?? 0,
|
|
56
|
-
totalAssistantTurns: 5,
|
|
57
|
-
totalToolCalls: 10,
|
|
58
|
-
},
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
// ---------------------------------------------------------------------------
|
|
63
|
-
// Tests: validateDreamerOutput
|
|
64
|
-
// ---------------------------------------------------------------------------
|
|
65
|
-
|
|
66
|
-
describe('validateDreamerOutput', () => {
|
|
67
|
-
it('passes a valid Dreamer output with candidates', () => {
|
|
68
|
-
const output = {
|
|
69
|
-
valid: true,
|
|
70
|
-
candidates: [
|
|
71
|
-
{
|
|
72
|
-
candidateIndex: 0,
|
|
73
|
-
badDecision: 'Did something wrong',
|
|
74
|
-
betterDecision: 'Do it right',
|
|
75
|
-
rationale: 'Because the principle says so',
|
|
76
|
-
confidence: 0.9,
|
|
77
|
-
},
|
|
78
|
-
],
|
|
79
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
80
|
-
};
|
|
81
|
-
const result = validateDreamerOutput(output);
|
|
82
|
-
expect(result.valid).toBe(true);
|
|
83
|
-
expect(result.failures).toHaveLength(0);
|
|
84
|
-
});
|
|
85
|
-
|
|
86
|
-
it('passes a valid Dreamer output with multiple candidates', () => {
|
|
87
|
-
const output = {
|
|
88
|
-
valid: true,
|
|
89
|
-
candidates: [
|
|
90
|
-
{
|
|
91
|
-
candidateIndex: 0,
|
|
92
|
-
badDecision: 'Did something wrong',
|
|
93
|
-
betterDecision: 'Do it right',
|
|
94
|
-
rationale: 'Because the principle says so',
|
|
95
|
-
confidence: 0.9,
|
|
96
|
-
},
|
|
97
|
-
{
|
|
98
|
-
candidateIndex: 1,
|
|
99
|
-
badDecision: 'Did another wrong thing',
|
|
100
|
-
betterDecision: 'Do it differently',
|
|
101
|
-
rationale: 'Alternative approach is better',
|
|
102
|
-
confidence: 0.8,
|
|
103
|
-
},
|
|
104
|
-
],
|
|
105
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
106
|
-
};
|
|
107
|
-
const result = validateDreamerOutput(output);
|
|
108
|
-
expect(result.valid).toBe(true);
|
|
109
|
-
expect(result.failures).toHaveLength(0);
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
it('rejects Dreamer output marked invalid', () => {
|
|
113
|
-
const output = {
|
|
114
|
-
valid: false,
|
|
115
|
-
candidates: [],
|
|
116
|
-
reason: 'No signal found',
|
|
117
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
118
|
-
};
|
|
119
|
-
const result = validateDreamerOutput(output);
|
|
120
|
-
expect(result.valid).toBe(false);
|
|
121
|
-
expect(result.failures.some(f => f.includes('marked invalid'))).toBe(true);
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
it('rejects Dreamer output marked invalid without reason', () => {
|
|
125
|
-
const output = {
|
|
126
|
-
valid: false,
|
|
127
|
-
candidates: [],
|
|
128
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
129
|
-
};
|
|
130
|
-
const result = validateDreamerOutput(output);
|
|
131
|
-
expect(result.valid).toBe(false);
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
it('rejects Dreamer output without candidates array', () => {
|
|
135
|
-
const output = {
|
|
136
|
-
valid: true,
|
|
137
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
138
|
-
};
|
|
139
|
-
const result = validateDreamerOutput(output);
|
|
140
|
-
expect(result.valid).toBe(false);
|
|
141
|
-
expect(result.failures.some(f => f.includes('candidates array'))).toBe(true);
|
|
142
|
-
});
|
|
143
|
-
|
|
144
|
-
it('rejects Dreamer candidate missing required fields', () => {
|
|
145
|
-
const output = {
|
|
146
|
-
valid: true,
|
|
147
|
-
candidates: [
|
|
148
|
-
{
|
|
149
|
-
candidateIndex: 0,
|
|
150
|
-
badDecision: 'Has badDecision but missing betterDecision',
|
|
151
|
-
// missing: betterDecision, rationale, confidence
|
|
152
|
-
},
|
|
153
|
-
],
|
|
154
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
155
|
-
};
|
|
156
|
-
const result = validateDreamerOutput(output);
|
|
157
|
-
expect(result.valid).toBe(false);
|
|
158
|
-
expect(result.failures.some(f => f.includes('betterDecision'))).toBe(true);
|
|
159
|
-
expect(result.failures.some(f => f.includes('rationale'))).toBe(true);
|
|
160
|
-
expect(result.failures.some(f => f.includes('confidence'))).toBe(true);
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
it('rejects Dreamer candidate with invalid confidence (out of range)', () => {
|
|
164
|
-
const output = {
|
|
165
|
-
valid: true,
|
|
166
|
-
candidates: [
|
|
167
|
-
{
|
|
168
|
-
candidateIndex: 0,
|
|
169
|
-
badDecision: 'Wrong',
|
|
170
|
-
betterDecision: 'Right',
|
|
171
|
-
rationale: 'Because',
|
|
172
|
-
confidence: 1.5, // out of range
|
|
173
|
-
},
|
|
174
|
-
],
|
|
175
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
176
|
-
};
|
|
177
|
-
const result = validateDreamerOutput(output);
|
|
178
|
-
expect(result.valid).toBe(false);
|
|
179
|
-
expect(result.failures.some(f => f.includes('confidence'))).toBe(true);
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
it('rejects Dreamer candidate with duplicate candidateIndex', () => {
|
|
183
|
-
const output = {
|
|
184
|
-
valid: true,
|
|
185
|
-
candidates: [
|
|
186
|
-
{
|
|
187
|
-
candidateIndex: 0,
|
|
188
|
-
badDecision: 'Wrong 1',
|
|
189
|
-
betterDecision: 'Right 1',
|
|
190
|
-
rationale: 'Because 1',
|
|
191
|
-
confidence: 0.9,
|
|
192
|
-
},
|
|
193
|
-
{
|
|
194
|
-
candidateIndex: 0, // duplicate
|
|
195
|
-
badDecision: 'Wrong 2',
|
|
196
|
-
betterDecision: 'Right 2',
|
|
197
|
-
rationale: 'Because 2',
|
|
198
|
-
confidence: 0.8,
|
|
199
|
-
},
|
|
200
|
-
],
|
|
201
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
202
|
-
};
|
|
203
|
-
const result = validateDreamerOutput(output);
|
|
204
|
-
expect(result.valid).toBe(false);
|
|
205
|
-
expect(result.failures.some(f => f.includes('duplicate'))).toBe(true);
|
|
206
|
-
});
|
|
207
|
-
|
|
208
|
-
it('rejects Dreamer candidate with identical badDecision and betterDecision', () => {
|
|
209
|
-
const output = {
|
|
210
|
-
valid: true,
|
|
211
|
-
candidates: [
|
|
212
|
-
{
|
|
213
|
-
candidateIndex: 0,
|
|
214
|
-
badDecision: 'Do the same thing',
|
|
215
|
-
betterDecision: 'Do the same thing', // identical
|
|
216
|
-
rationale: 'Because it is correct',
|
|
217
|
-
confidence: 0.9,
|
|
218
|
-
},
|
|
219
|
-
],
|
|
220
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
221
|
-
};
|
|
222
|
-
const result = validateDreamerOutput(output);
|
|
223
|
-
expect(result.valid).toBe(false);
|
|
224
|
-
expect(result.failures.some(f => f.includes('identical'))).toBe(true);
|
|
225
|
-
});
|
|
226
|
-
|
|
227
|
-
it('rejects Dreamer output missing generatedAt', () => {
|
|
228
|
-
const output = {
|
|
229
|
-
valid: true,
|
|
230
|
-
candidates: [
|
|
231
|
-
{
|
|
232
|
-
candidateIndex: 0,
|
|
233
|
-
badDecision: 'Wrong',
|
|
234
|
-
betterDecision: 'Right',
|
|
235
|
-
rationale: 'Because',
|
|
236
|
-
confidence: 0.9,
|
|
237
|
-
},
|
|
238
|
-
],
|
|
239
|
-
// missing generatedAt
|
|
240
|
-
};
|
|
241
|
-
const result = validateDreamerOutput(output);
|
|
242
|
-
expect(result.valid).toBe(false);
|
|
243
|
-
expect(result.failures.some(f => f.includes('generatedAt'))).toBe(true);
|
|
244
|
-
});
|
|
245
|
-
|
|
246
|
-
it('rejects non-object input', () => {
|
|
247
|
-
const result = validateDreamerOutput(null);
|
|
248
|
-
expect(result.valid).toBe(false);
|
|
249
|
-
});
|
|
250
|
-
|
|
251
|
-
it('rejects string input', () => {
|
|
252
|
-
const result = validateDreamerOutput('not an object');
|
|
253
|
-
expect(result.valid).toBe(false);
|
|
254
|
-
});
|
|
255
|
-
});
|
|
256
|
-
|
|
257
|
-
describe('OpenClawTrinityRuntimeAdapter contract hardening', () => {
|
|
258
|
-
function makeRuntimeApi(overrides: Partial<any> = {}) {
|
|
259
|
-
return {
|
|
260
|
-
runtime: {
|
|
261
|
-
agent: {
|
|
262
|
-
runEmbeddedPiAgent: vi.fn().mockResolvedValue({
|
|
263
|
-
payloads: [
|
|
264
|
-
{ text: '{"valid":true,"candidates":[],"generatedAt":"2026-04-12T00:00:00.000Z"}' },
|
|
265
|
-
],
|
|
266
|
-
}),
|
|
267
|
-
},
|
|
268
|
-
config: {
|
|
269
|
-
loadConfig: vi.fn().mockReturnValue({
|
|
270
|
-
agents: {
|
|
271
|
-
defaults: {
|
|
272
|
-
model: 'openai/gpt-5.4',
|
|
273
|
-
},
|
|
274
|
-
},
|
|
275
|
-
}),
|
|
276
|
-
},
|
|
277
|
-
...overrides.runtime,
|
|
278
|
-
},
|
|
279
|
-
logger: {
|
|
280
|
-
info: vi.fn(),
|
|
281
|
-
warn: vi.fn(),
|
|
282
|
-
error: vi.fn(),
|
|
283
|
-
},
|
|
284
|
-
};
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
it('rejects missing runtime.agent.runEmbeddedPiAgent contract explicitly', () => {
|
|
288
|
-
expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(TrinityRuntimeContractError);
|
|
289
|
-
expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(/runtime_unavailable/);
|
|
290
|
-
});
|
|
291
|
-
|
|
292
|
-
it('passes explicit provider/model overrides into runtime.agent.runEmbeddedPiAgent', async () => {
|
|
293
|
-
const api = makeRuntimeApi();
|
|
294
|
-
const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
|
|
295
|
-
|
|
296
|
-
await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
|
|
297
|
-
|
|
298
|
-
expect(api.runtime.agent.runEmbeddedPiAgent).toHaveBeenCalledWith(
|
|
299
|
-
expect.objectContaining({
|
|
300
|
-
provider: 'openai',
|
|
301
|
-
model: 'gpt-5.4',
|
|
302
|
-
}),
|
|
303
|
-
);
|
|
304
|
-
});
|
|
305
|
-
|
|
306
|
-
it('returns stable failure classes when runtime invocation fails', async () => {
|
|
307
|
-
const api = makeRuntimeApi({
|
|
308
|
-
runtime: {
|
|
309
|
-
agent: {
|
|
310
|
-
runEmbeddedPiAgent: vi.fn().mockRejectedValue(new Error('gateway unavailable')),
|
|
311
|
-
},
|
|
312
|
-
},
|
|
313
|
-
});
|
|
314
|
-
const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
|
|
315
|
-
|
|
316
|
-
const result = await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
|
|
317
|
-
|
|
318
|
-
expect(result.valid).toBe(false);
|
|
319
|
-
expect(result.reason).toContain('runtime_run_failed');
|
|
320
|
-
expect(adapter.getLastFailureReason()).toContain('runtime_run_failed');
|
|
321
|
-
});
|
|
322
|
-
});
|
|
323
|
-
|
|
324
|
-
// ---------------------------------------------------------------------------
|
|
325
|
-
// Tests: validatePhilosopherOutput
|
|
326
|
-
// ---------------------------------------------------------------------------
|
|
327
|
-
|
|
328
|
-
describe('validatePhilosopherOutput', () => {
|
|
329
|
-
it('passes a valid Philosopher output', () => {
|
|
330
|
-
const output = {
|
|
331
|
-
valid: true,
|
|
332
|
-
judgments: [
|
|
333
|
-
{
|
|
334
|
-
candidateIndex: 0,
|
|
335
|
-
critique: 'Strong alignment',
|
|
336
|
-
principleAligned: true,
|
|
337
|
-
score: 0.92,
|
|
338
|
-
rank: 1,
|
|
339
|
-
},
|
|
340
|
-
],
|
|
341
|
-
overallAssessment: 'Good candidate set',
|
|
342
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
343
|
-
};
|
|
344
|
-
const result = validatePhilosopherOutput(output);
|
|
345
|
-
expect(result.valid).toBe(true);
|
|
346
|
-
expect(result.failures).toHaveLength(0);
|
|
347
|
-
});
|
|
348
|
-
|
|
349
|
-
it('rejects Philosopher output marked invalid', () => {
|
|
350
|
-
const output = {
|
|
351
|
-
valid: false,
|
|
352
|
-
judgments: [],
|
|
353
|
-
reason: 'No candidates to judge',
|
|
354
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
355
|
-
};
|
|
356
|
-
const result = validatePhilosopherOutput(output);
|
|
357
|
-
expect(result.valid).toBe(false);
|
|
358
|
-
});
|
|
359
|
-
|
|
360
|
-
it('rejects Philosopher output without judgments array', () => {
|
|
361
|
-
const output = {
|
|
362
|
-
valid: true,
|
|
363
|
-
overallAssessment: 'Good',
|
|
364
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
365
|
-
};
|
|
366
|
-
const result = validatePhilosopherOutput(output);
|
|
367
|
-
expect(result.valid).toBe(false);
|
|
368
|
-
expect(result.failures.some(f => f.includes('judgments array'))).toBe(true);
|
|
369
|
-
});
|
|
370
|
-
|
|
371
|
-
it('rejects Philosopher judgment missing required fields', () => {
|
|
372
|
-
const output = {
|
|
373
|
-
valid: true,
|
|
374
|
-
judgments: [
|
|
375
|
-
{
|
|
376
|
-
candidateIndex: 0,
|
|
377
|
-
// missing: critique, principleAligned, score, rank
|
|
378
|
-
},
|
|
379
|
-
],
|
|
380
|
-
overallAssessment: 'Good',
|
|
381
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
382
|
-
};
|
|
383
|
-
const result = validatePhilosopherOutput(output);
|
|
384
|
-
expect(result.valid).toBe(false);
|
|
385
|
-
});
|
|
386
|
-
|
|
387
|
-
it('rejects Philosopher judgment with invalid score (out of range)', () => {
|
|
388
|
-
const output = {
|
|
389
|
-
valid: true,
|
|
390
|
-
judgments: [
|
|
391
|
-
{
|
|
392
|
-
candidateIndex: 0,
|
|
393
|
-
critique: 'Good',
|
|
394
|
-
principleAligned: true,
|
|
395
|
-
score: 1.5, // out of range
|
|
396
|
-
rank: 1,
|
|
397
|
-
},
|
|
398
|
-
],
|
|
399
|
-
overallAssessment: 'Good',
|
|
400
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
401
|
-
};
|
|
402
|
-
const result = validatePhilosopherOutput(output);
|
|
403
|
-
expect(result.valid).toBe(false);
|
|
404
|
-
expect(result.failures.some(f => f.includes('score'))).toBe(true);
|
|
405
|
-
});
|
|
406
|
-
|
|
407
|
-
it('rejects Philosopher judgment with invalid rank (must be >= 1)', () => {
|
|
408
|
-
const output = {
|
|
409
|
-
valid: true,
|
|
410
|
-
judgments: [
|
|
411
|
-
{
|
|
412
|
-
candidateIndex: 0,
|
|
413
|
-
critique: 'Good',
|
|
414
|
-
principleAligned: true,
|
|
415
|
-
score: 0.9,
|
|
416
|
-
rank: 0, // invalid
|
|
417
|
-
},
|
|
418
|
-
],
|
|
419
|
-
overallAssessment: 'Good',
|
|
420
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
421
|
-
};
|
|
422
|
-
const result = validatePhilosopherOutput(output);
|
|
423
|
-
expect(result.valid).toBe(false);
|
|
424
|
-
expect(result.failures.some(f => f.includes('rank'))).toBe(true);
|
|
425
|
-
});
|
|
426
|
-
|
|
427
|
-
it('rejects Philosopher output with non-sequential ranks', () => {
|
|
428
|
-
const output = {
|
|
429
|
-
valid: true,
|
|
430
|
-
judgments: [
|
|
431
|
-
{
|
|
432
|
-
candidateIndex: 0,
|
|
433
|
-
critique: 'Good',
|
|
434
|
-
principleAligned: true,
|
|
435
|
-
score: 0.9,
|
|
436
|
-
rank: 1,
|
|
437
|
-
},
|
|
438
|
-
{
|
|
439
|
-
candidateIndex: 1,
|
|
440
|
-
critique: 'Also good',
|
|
441
|
-
principleAligned: true,
|
|
442
|
-
score: 0.8,
|
|
443
|
-
rank: 3, // should be 2
|
|
444
|
-
},
|
|
445
|
-
],
|
|
446
|
-
overallAssessment: 'Good',
|
|
447
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
448
|
-
};
|
|
449
|
-
const result = validatePhilosopherOutput(output);
|
|
450
|
-
expect(result.valid).toBe(false);
|
|
451
|
-
expect(result.failures.some(f => f.includes('sequential ranks'))).toBe(true);
|
|
452
|
-
});
|
|
453
|
-
|
|
454
|
-
it('rejects Philosopher output missing overallAssessment', () => {
|
|
455
|
-
const output = {
|
|
456
|
-
valid: true,
|
|
457
|
-
judgments: [
|
|
458
|
-
{
|
|
459
|
-
candidateIndex: 0,
|
|
460
|
-
critique: 'Good',
|
|
461
|
-
principleAligned: true,
|
|
462
|
-
score: 0.9,
|
|
463
|
-
rank: 1,
|
|
464
|
-
},
|
|
465
|
-
],
|
|
466
|
-
// missing overallAssessment
|
|
467
|
-
generatedAt: '2026-03-27T12:00:00.000Z',
|
|
468
|
-
};
|
|
469
|
-
const result = validatePhilosopherOutput(output);
|
|
470
|
-
expect(result.valid).toBe(false);
|
|
471
|
-
expect(result.failures.some(f => f.includes('overallAssessment'))).toBe(true);
|
|
472
|
-
});
|
|
473
|
-
});
|
|
474
|
-
|
|
475
|
-
// ---------------------------------------------------------------------------
|
|
476
|
-
// Tests: validateTrinityDraft
|
|
477
|
-
// ---------------------------------------------------------------------------
|
|
478
|
-
|
|
479
|
-
describe('validateTrinityDraft', () => {
|
|
480
|
-
function makeValidDraft(overrides: Record<string, unknown> = {}): Record<string, unknown> {
|
|
481
|
-
return {
|
|
482
|
-
selectedCandidateIndex: 0,
|
|
483
|
-
badDecision: 'Did something wrong',
|
|
484
|
-
betterDecision: 'Do it right',
|
|
485
|
-
rationale: 'Because the principle says so and this is the right approach',
|
|
486
|
-
sessionId: 'session-test-123',
|
|
487
|
-
principleId: 'T-01',
|
|
488
|
-
sourceSnapshotRef: 'snapshot-test-001',
|
|
489
|
-
telemetry: {
|
|
490
|
-
chainMode: 'trinity',
|
|
491
|
-
dreamerPassed: true,
|
|
492
|
-
philosopherPassed: true,
|
|
493
|
-
scribePassed: true,
|
|
494
|
-
candidateCount: 3,
|
|
495
|
-
selectedCandidateIndex: 0,
|
|
496
|
-
stageFailures: [],
|
|
497
|
-
},
|
|
498
|
-
...overrides,
|
|
499
|
-
};
|
|
500
|
-
}
|
|
501
|
-
|
|
502
|
-
it('passes a valid Trinity draft artifact', () => {
|
|
503
|
-
const draft = makeValidDraft();
|
|
504
|
-
const result = validateTrinityDraft(draft);
|
|
505
|
-
expect(result.valid).toBe(true);
|
|
506
|
-
expect(result.failures).toHaveLength(0);
|
|
507
|
-
});
|
|
508
|
-
|
|
509
|
-
it('rejects draft with missing badDecision', () => {
|
|
510
|
-
const draft = makeValidDraft();
|
|
511
|
-
delete draft.badDecision;
|
|
512
|
-
const result = validateTrinityDraft(draft);
|
|
513
|
-
expect(result.valid).toBe(false);
|
|
514
|
-
expect(result.failures.some(f => f.includes('badDecision'))).toBe(true);
|
|
515
|
-
});
|
|
516
|
-
|
|
517
|
-
it('rejects draft with empty badDecision', () => {
|
|
518
|
-
const draft = makeValidDraft({ badDecision: ' ' });
|
|
519
|
-
const result = validateTrinityDraft(draft);
|
|
520
|
-
expect(result.valid).toBe(false);
|
|
521
|
-
expect(result.failures.some(f => f.includes('badDecision'))).toBe(true);
|
|
522
|
-
});
|
|
523
|
-
|
|
524
|
-
it('rejects draft with short rationale (< 20 chars)', () => {
|
|
525
|
-
const draft = makeValidDraft({ rationale: 'Too short' });
|
|
526
|
-
const result = validateTrinityDraft(draft);
|
|
527
|
-
expect(result.valid).toBe(false);
|
|
528
|
-
expect(result.failures.some(f => f.includes('rationale'))).toBe(true);
|
|
529
|
-
});
|
|
530
|
-
|
|
531
|
-
it('rejects draft with identical badDecision and betterDecision', () => {
|
|
532
|
-
const draft = makeValidDraft({
|
|
533
|
-
badDecision: 'Same thing',
|
|
534
|
-
betterDecision: 'Same thing',
|
|
535
|
-
});
|
|
536
|
-
const result = validateTrinityDraft(draft);
|
|
537
|
-
expect(result.valid).toBe(false);
|
|
538
|
-
expect(result.failures.some(f => f.includes('identical'))).toBe(true);
|
|
539
|
-
});
|
|
540
|
-
|
|
541
|
-
it('rejects draft with invalid telemetry', () => {
|
|
542
|
-
const draft = makeValidDraft({ telemetry: null });
|
|
543
|
-
const result = validateTrinityDraft(draft);
|
|
544
|
-
expect(result.valid).toBe(false);
|
|
545
|
-
expect(result.failures.some(f => f.includes('telemetry'))).toBe(true);
|
|
546
|
-
});
|
|
547
|
-
|
|
548
|
-
it('rejects draft with invalid chainMode in telemetry', () => {
|
|
549
|
-
const draft = makeValidDraft({
|
|
550
|
-
telemetry: {
|
|
551
|
-
chainMode: 'invalid-mode', // must be 'trinity' or 'single-reflector'
|
|
552
|
-
dreamerPassed: true,
|
|
553
|
-
philosopherPassed: true,
|
|
554
|
-
scribePassed: true,
|
|
555
|
-
candidateCount: 3,
|
|
556
|
-
selectedCandidateIndex: 0,
|
|
557
|
-
stageFailures: [],
|
|
558
|
-
},
|
|
559
|
-
});
|
|
560
|
-
const result = validateTrinityDraft(draft);
|
|
561
|
-
expect(result.valid).toBe(false);
|
|
562
|
-
expect(result.failures.some(f => f.includes('chainMode'))).toBe(true);
|
|
563
|
-
});
|
|
564
|
-
});
|
|
565
|
-
|
|
566
|
-
// ---------------------------------------------------------------------------
|
|
567
|
-
// Tests: runTrinity — successful path
|
|
568
|
-
// ---------------------------------------------------------------------------
|
|
569
|
-
|
|
570
|
-
describe('runTrinity', () => {
|
|
571
|
-
it('produces a successful Trinity result with valid snapshot (failure signal)', () => {
|
|
572
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
573
|
-
const config: TrinityConfig = {
|
|
574
|
-
useTrinity: true,
|
|
575
|
-
maxCandidates: 3,
|
|
576
|
-
useStubs: true, // Use stub implementations
|
|
577
|
-
};
|
|
578
|
-
|
|
579
|
-
const result = runTrinity({ snapshot, principleId: 'T-08', config });
|
|
580
|
-
|
|
581
|
-
expect(result.success).toBe(true);
|
|
582
|
-
expect(result.artifact).toBeDefined();
|
|
583
|
-
expect(result.telemetry.chainMode).toBe('trinity');
|
|
584
|
-
expect(result.telemetry.dreamerPassed).toBe(true);
|
|
585
|
-
expect(result.telemetry.philosopherPassed).toBe(true);
|
|
586
|
-
expect(result.telemetry.scribePassed).toBe(true);
|
|
587
|
-
expect(result.telemetry.candidateCount).toBeGreaterThan(0);
|
|
588
|
-
expect(result.telemetry.selectedCandidateIndex).toBeGreaterThanOrEqual(0);
|
|
589
|
-
expect(result.failures).toHaveLength(0);
|
|
590
|
-
expect(result.fallbackOccurred).toBe(false);
|
|
591
|
-
});
|
|
592
|
-
|
|
593
|
-
it('produces a successful Trinity result with pain signal', () => {
|
|
594
|
-
const snapshot = makeSnapshot({ totalPainEvents: 3 });
|
|
595
|
-
const config: TrinityConfig = {
|
|
596
|
-
useTrinity: true,
|
|
597
|
-
maxCandidates: 3,
|
|
598
|
-
useStubs: true,
|
|
599
|
-
};
|
|
600
|
-
|
|
601
|
-
const result = runTrinity({ snapshot, principleId: 'T-08', config });
|
|
602
|
-
|
|
603
|
-
expect(result.success).toBe(true);
|
|
604
|
-
expect(result.artifact).toBeDefined();
|
|
605
|
-
});
|
|
606
|
-
|
|
607
|
-
it('produces a successful Trinity result with gate block signal', () => {
|
|
608
|
-
const snapshot = makeSnapshot({ totalGateBlocks: 1 });
|
|
609
|
-
const config: TrinityConfig = {
|
|
610
|
-
useTrinity: true,
|
|
611
|
-
maxCandidates: 3,
|
|
612
|
-
useStubs: true,
|
|
613
|
-
};
|
|
614
|
-
|
|
615
|
-
const result = runTrinity({ snapshot, principleId: 'T-03', config });
|
|
616
|
-
|
|
617
|
-
expect(result.success).toBe(true);
|
|
618
|
-
expect(result.artifact).toBeDefined();
|
|
619
|
-
});
|
|
620
|
-
|
|
621
|
-
it('respects maxCandidates config', () => {
|
|
622
|
-
const snapshot = makeSnapshot({ failureCount: 5 });
|
|
623
|
-
const config: TrinityConfig = {
|
|
624
|
-
useTrinity: true,
|
|
625
|
-
maxCandidates: 2,
|
|
626
|
-
useStubs: true,
|
|
627
|
-
};
|
|
628
|
-
|
|
629
|
-
const result = runTrinity({ snapshot, principleId: 'T-08', config });
|
|
630
|
-
|
|
631
|
-
expect(result.success).toBe(true);
|
|
632
|
-
expect(result.telemetry.candidateCount).toBeLessThanOrEqual(2);
|
|
633
|
-
});
|
|
634
|
-
});
|
|
635
|
-
|
|
636
|
-
// ---------------------------------------------------------------------------
|
|
637
|
-
// Tests: runTrinity — failure paths
|
|
638
|
-
// ---------------------------------------------------------------------------
|
|
639
|
-
|
|
640
|
-
describe('runTrinity — failure paths', () => {
|
|
641
|
-
it('fails when snapshot has no signal and generates no candidates', () => {
|
|
642
|
-
// Snapshot with all zero stats - stub will fail to generate candidates
|
|
643
|
-
const snapshot = makeSnapshot({
|
|
644
|
-
failureCount: 0,
|
|
645
|
-
totalPainEvents: 0,
|
|
646
|
-
totalGateBlocks: 0,
|
|
647
|
-
});
|
|
648
|
-
const config: TrinityConfig = {
|
|
649
|
-
useTrinity: true,
|
|
650
|
-
maxCandidates: 3,
|
|
651
|
-
useStubs: true,
|
|
652
|
-
};
|
|
653
|
-
|
|
654
|
-
const result = runTrinity({ snapshot, principleId: 'T-08', config });
|
|
655
|
-
|
|
656
|
-
expect(result.success).toBe(false);
|
|
657
|
-
expect(result.failures.length).toBeGreaterThan(0);
|
|
658
|
-
expect(result.failures[0].stage).toBe('dreamer');
|
|
659
|
-
expect(result.telemetry.dreamerPassed).toBe(false);
|
|
660
|
-
});
|
|
661
|
-
});
|
|
662
|
-
|
|
663
|
-
// ---------------------------------------------------------------------------
|
|
664
|
-
// Tests: validateDraftArtifact
|
|
665
|
-
// ---------------------------------------------------------------------------
|
|
666
|
-
|
|
667
|
-
describe('validateDraftArtifact', () => {
|
|
668
|
-
function makeValidArtifact(): TrinityDraftArtifact {
|
|
669
|
-
return {
|
|
670
|
-
selectedCandidateIndex: 0,
|
|
671
|
-
badDecision: 'Did something wrong',
|
|
672
|
-
betterDecision: 'Do it right',
|
|
673
|
-
rationale: 'Because the principle says so and this is the correct approach',
|
|
674
|
-
sessionId: 'session-test-123',
|
|
675
|
-
principleId: 'T-01',
|
|
676
|
-
sourceSnapshotRef: 'snapshot-test-001',
|
|
677
|
-
telemetry: {
|
|
678
|
-
chainMode: 'trinity',
|
|
679
|
-
dreamerPassed: true,
|
|
680
|
-
philosopherPassed: true,
|
|
681
|
-
scribePassed: true,
|
|
682
|
-
candidateCount: 3,
|
|
683
|
-
selectedCandidateIndex: 0,
|
|
684
|
-
stageFailures: [],
|
|
685
|
-
},
|
|
686
|
-
};
|
|
687
|
-
}
|
|
688
|
-
|
|
689
|
-
it('passes a valid TrinityDraftArtifact', () => {
|
|
690
|
-
const artifact = makeValidArtifact();
|
|
691
|
-
const result = validateDraftArtifact(artifact);
|
|
692
|
-
expect(result.valid).toBe(true);
|
|
693
|
-
expect(result.failures).toHaveLength(0);
|
|
694
|
-
});
|
|
695
|
-
|
|
696
|
-
it('rejects artifact with missing badDecision', () => {
|
|
697
|
-
const artifact = makeValidArtifact();
|
|
698
|
-
delete (artifact as Record<string, unknown>).badDecision;
|
|
699
|
-
const result = validateDraftArtifact(artifact);
|
|
700
|
-
expect(result.valid).toBe(false);
|
|
701
|
-
});
|
|
702
|
-
|
|
703
|
-
it('rejects artifact with empty betterDecision', () => {
|
|
704
|
-
const artifact = makeValidArtifact();
|
|
705
|
-
artifact.betterDecision = ' ';
|
|
706
|
-
const result = validateDraftArtifact(artifact);
|
|
707
|
-
expect(result.valid).toBe(false);
|
|
708
|
-
});
|
|
709
|
-
|
|
710
|
-
it('rejects artifact with short rationale', () => {
|
|
711
|
-
const artifact = makeValidArtifact();
|
|
712
|
-
artifact.rationale = 'Too short';
|
|
713
|
-
const result = validateDraftArtifact(artifact);
|
|
714
|
-
expect(result.valid).toBe(false);
|
|
715
|
-
});
|
|
716
|
-
|
|
717
|
-
it('rejects artifact with identical badDecision and betterDecision', () => {
|
|
718
|
-
const artifact = makeValidArtifact();
|
|
719
|
-
artifact.badDecision = 'Same';
|
|
720
|
-
artifact.betterDecision = 'Same';
|
|
721
|
-
const result = validateDraftArtifact(artifact);
|
|
722
|
-
expect(result.valid).toBe(false);
|
|
723
|
-
expect(result.failures.some(f => f.includes('identical'))).toBe(true);
|
|
724
|
-
});
|
|
725
|
-
});
|
|
726
|
-
|
|
727
|
-
// ---------------------------------------------------------------------------
|
|
728
|
-
// Tests: draftToArtifact
|
|
729
|
-
// ---------------------------------------------------------------------------
|
|
730
|
-
|
|
731
|
-
describe('draftToArtifact', () => {
|
|
732
|
-
it('converts TrinityDraftArtifact to NocturnalArtifact-compatible structure', () => {
|
|
733
|
-
const draft: TrinityDraftArtifact = {
|
|
734
|
-
selectedCandidateIndex: 1,
|
|
735
|
-
badDecision: 'Did something wrong',
|
|
736
|
-
betterDecision: 'Do it right',
|
|
737
|
-
rationale: 'Because the principle says so',
|
|
738
|
-
sessionId: 'session-test-123',
|
|
739
|
-
principleId: 'T-01',
|
|
740
|
-
sourceSnapshotRef: 'snapshot-test-001',
|
|
741
|
-
telemetry: {
|
|
742
|
-
chainMode: 'trinity',
|
|
743
|
-
dreamerPassed: true,
|
|
744
|
-
philosopherPassed: true,
|
|
745
|
-
scribePassed: true,
|
|
746
|
-
candidateCount: 3,
|
|
747
|
-
selectedCandidateIndex: 1,
|
|
748
|
-
stageFailures: [],
|
|
749
|
-
},
|
|
750
|
-
};
|
|
751
|
-
|
|
752
|
-
const artifact = draftToArtifact(draft);
|
|
753
|
-
|
|
754
|
-
expect(artifact.artifactId).toBeDefined(); // Generated UUID
|
|
755
|
-
expect(artifact.sessionId).toBe('session-test-123');
|
|
756
|
-
expect(artifact.principleId).toBe('T-01');
|
|
757
|
-
expect(artifact.badDecision).toBe('Did something wrong');
|
|
758
|
-
expect(artifact.betterDecision).toBe('Do it right');
|
|
759
|
-
expect(artifact.rationale).toBe('Because the principle says so');
|
|
760
|
-
expect(artifact.sourceSnapshotRef).toBe('snapshot-test-001');
|
|
761
|
-
expect(artifact.createdAt).toBeDefined(); // Current timestamp
|
|
762
|
-
});
|
|
763
|
-
});
|
|
764
|
-
|
|
765
|
-
// ---------------------------------------------------------------------------
|
|
766
|
-
// Tests: DEFAULT_TRINITY_CONFIG
|
|
767
|
-
// ---------------------------------------------------------------------------
|
|
768
|
-
|
|
769
|
-
describe('DEFAULT_TRINITY_CONFIG', () => {
|
|
770
|
-
it('has sensible defaults', () => {
|
|
771
|
-
expect(DEFAULT_TRINITY_CONFIG.useTrinity).toBe(true);
|
|
772
|
-
expect(DEFAULT_TRINITY_CONFIG.maxCandidates).toBe(3);
|
|
773
|
-
expect(DEFAULT_TRINITY_CONFIG.useStubs).toBe(false); // real subagent execution is now the default
|
|
774
|
-
});
|
|
775
|
-
});
|
|
776
|
-
|
|
777
|
-
// ---------------------------------------------------------------------------
|
|
778
|
-
// Tests: runTrinity — useStubs=false without adapter (sync failure)
|
|
779
|
-
// ---------------------------------------------------------------------------
|
|
780
|
-
|
|
781
|
-
describe('runTrinity — useStubs=false without adapter', () => {
|
|
782
|
-
it('fails with clear error when useStubs=false but no runtimeAdapter provided', () => {
|
|
783
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
784
|
-
const config: TrinityConfig = {
|
|
785
|
-
useTrinity: true,
|
|
786
|
-
maxCandidates: 3,
|
|
787
|
-
useStubs: false, // No adapter provided!
|
|
788
|
-
};
|
|
789
|
-
|
|
790
|
-
const result = runTrinity({ snapshot, principleId: 'T-08', config });
|
|
791
|
-
|
|
792
|
-
expect(result.success).toBe(false);
|
|
793
|
-
expect(result.failures.length).toBeGreaterThan(0);
|
|
794
|
-
expect(result.failures[0].stage).toBe('dreamer');
|
|
795
|
-
expect(result.failures[0].reason).toContain('runtimeAdapter');
|
|
796
|
-
expect(result.telemetry.usedStubs).toBe(false);
|
|
797
|
-
expect(result.telemetry.dreamerPassed).toBe(false);
|
|
798
|
-
});
|
|
799
|
-
});
|
|
800
|
-
|
|
801
|
-
// ---------------------------------------------------------------------------
|
|
802
|
-
// Tests: runTrinityAsync — with mock runtime adapter
|
|
803
|
-
// ---------------------------------------------------------------------------
|
|
804
|
-
|
|
805
|
-
describe('runTrinityAsync — with mock runtime adapter', () => {
|
|
806
|
-
function makeMockAdapter(overrides: Partial<{
|
|
807
|
-
dreamerOutput: DreamerOutput;
|
|
808
|
-
philosopherOutput: PhilosopherOutput;
|
|
809
|
-
scribeArtifact: TrinityDraftArtifact | null;
|
|
810
|
-
closeCalled: boolean;
|
|
811
|
-
}> = {}): TrinityRuntimeAdapter & { closeCalled: boolean } {
|
|
812
|
-
const defaultDreamerOutput: DreamerOutput = {
|
|
813
|
-
valid: true,
|
|
814
|
-
candidates: [
|
|
815
|
-
{
|
|
816
|
-
candidateIndex: 0,
|
|
817
|
-
badDecision: 'Did something wrong',
|
|
818
|
-
betterDecision: 'Do it right',
|
|
819
|
-
rationale: 'Because the principle says so',
|
|
820
|
-
confidence: 0.9,
|
|
821
|
-
},
|
|
822
|
-
],
|
|
823
|
-
generatedAt: new Date().toISOString(),
|
|
824
|
-
};
|
|
825
|
-
|
|
826
|
-
const defaultPhilosopherOutput: PhilosopherOutput = {
|
|
827
|
-
valid: true,
|
|
828
|
-
judgments: [
|
|
829
|
-
{
|
|
830
|
-
candidateIndex: 0,
|
|
831
|
-
critique: 'Good alignment',
|
|
832
|
-
principleAligned: true,
|
|
833
|
-
score: 0.92,
|
|
834
|
-
rank: 1,
|
|
835
|
-
},
|
|
836
|
-
],
|
|
837
|
-
overallAssessment: 'Good candidate',
|
|
838
|
-
generatedAt: new Date().toISOString(),
|
|
839
|
-
};
|
|
840
|
-
|
|
841
|
-
const defaultScribeArtifact: TrinityDraftArtifact = {
|
|
842
|
-
selectedCandidateIndex: 0,
|
|
843
|
-
badDecision: 'Did something wrong',
|
|
844
|
-
betterDecision: 'Do it right',
|
|
845
|
-
rationale: 'Because the principle says so and this is the right approach',
|
|
846
|
-
sessionId: 'session-test-123',
|
|
847
|
-
principleId: 'T-01',
|
|
848
|
-
sourceSnapshotRef: 'snapshot-test-001',
|
|
849
|
-
telemetry: {
|
|
850
|
-
chainMode: 'trinity',
|
|
851
|
-
usedStubs: false,
|
|
852
|
-
dreamerPassed: true,
|
|
853
|
-
philosopherPassed: true,
|
|
854
|
-
scribePassed: true,
|
|
855
|
-
candidateCount: 1,
|
|
856
|
-
selectedCandidateIndex: 0,
|
|
857
|
-
stageFailures: [],
|
|
858
|
-
},
|
|
859
|
-
};
|
|
860
|
-
|
|
861
|
-
return {
|
|
862
|
-
closeCalled: overrides.closeCalled ?? false,
|
|
863
|
-
invokeDreamer: vi.fn().mockResolvedValue(overrides.dreamerOutput ?? defaultDreamerOutput),
|
|
864
|
-
invokePhilosopher: vi.fn().mockResolvedValue(overrides.philosopherOutput ?? defaultPhilosopherOutput),
|
|
865
|
-
invokeScribe: vi.fn().mockResolvedValue(
|
|
866
|
-
overrides.scribeArtifact === null ? null : (overrides.scribeArtifact ?? defaultScribeArtifact)
|
|
867
|
-
),
|
|
868
|
-
close: vi.fn().mockResolvedValue(undefined),
|
|
869
|
-
} as unknown as TrinityRuntimeAdapter & { closeCalled: boolean; invokeDreamer: ReturnType<typeof vi.fn>; invokePhilosopher: ReturnType<typeof vi.fn>; invokeScribe: ReturnType<typeof vi.fn> };
|
|
870
|
-
}
|
|
871
|
-
|
|
872
|
-
it('uses runtime adapter when useStubs=false with adapter provided', async () => {
|
|
873
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
874
|
-
const adapter = makeMockAdapter();
|
|
875
|
-
const config: TrinityConfig = {
|
|
876
|
-
useTrinity: true,
|
|
877
|
-
maxCandidates: 3,
|
|
878
|
-
useStubs: false,
|
|
879
|
-
runtimeAdapter: adapter,
|
|
880
|
-
};
|
|
881
|
-
|
|
882
|
-
const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
|
|
883
|
-
|
|
884
|
-
expect(result.success).toBe(true);
|
|
885
|
-
expect(adapter.invokeDreamer).toHaveBeenCalledWith(snapshot, 'T-08', 3);
|
|
886
|
-
expect(adapter.invokePhilosopher).toHaveBeenCalled();
|
|
887
|
-
expect(adapter.invokeScribe).toHaveBeenCalled();
|
|
888
|
-
expect(result.telemetry.usedStubs).toBe(false);
|
|
889
|
-
expect(result.telemetry.dreamerPassed).toBe(true);
|
|
890
|
-
expect(result.telemetry.philosopherPassed).toBe(true);
|
|
891
|
-
expect(result.telemetry.scribePassed).toBe(true);
|
|
892
|
-
});
|
|
893
|
-
|
|
894
|
-
it('fails closed when Dreamer stage returns invalid output', async () => {
|
|
895
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
896
|
-
const adapter = makeMockAdapter({
|
|
897
|
-
dreamerOutput: { valid: false, candidates: [], reason: 'No signal found', generatedAt: new Date().toISOString() },
|
|
898
|
-
});
|
|
899
|
-
const config: TrinityConfig = {
|
|
900
|
-
useTrinity: true,
|
|
901
|
-
maxCandidates: 3,
|
|
902
|
-
useStubs: false,
|
|
903
|
-
runtimeAdapter: adapter,
|
|
904
|
-
};
|
|
905
|
-
|
|
906
|
-
const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
|
|
907
|
-
|
|
908
|
-
expect(result.success).toBe(false);
|
|
909
|
-
expect(result.failures.length).toBeGreaterThan(0);
|
|
910
|
-
expect(result.failures[0].stage).toBe('dreamer');
|
|
911
|
-
expect(result.telemetry.dreamerPassed).toBe(false);
|
|
912
|
-
expect(result.telemetry.philosopherPassed).toBe(false);
|
|
913
|
-
expect(result.telemetry.scribePassed).toBe(false);
|
|
914
|
-
});
|
|
915
|
-
|
|
916
|
-
it('fails closed when Philosopher stage returns invalid output', async () => {
|
|
917
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
918
|
-
const adapter = makeMockAdapter({
|
|
919
|
-
philosopherOutput: { valid: false, judgments: [], overallAssessment: '', reason: 'No candidates', generatedAt: new Date().toISOString() },
|
|
920
|
-
});
|
|
921
|
-
const config: TrinityConfig = {
|
|
922
|
-
useTrinity: true,
|
|
923
|
-
maxCandidates: 3,
|
|
924
|
-
useStubs: false,
|
|
925
|
-
runtimeAdapter: adapter,
|
|
926
|
-
};
|
|
927
|
-
|
|
928
|
-
const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
|
|
929
|
-
|
|
930
|
-
expect(result.success).toBe(false);
|
|
931
|
-
expect(result.failures.some(f => f.stage === 'dreamer')).toBe(false); // Dreamer passed
|
|
932
|
-
expect(result.failures.some(f => f.stage === 'philosopher')).toBe(true);
|
|
933
|
-
expect(result.telemetry.dreamerPassed).toBe(true);
|
|
934
|
-
expect(result.telemetry.philosopherPassed).toBe(false);
|
|
935
|
-
});
|
|
936
|
-
|
|
937
|
-
it('fails closed when Scribe stage returns null', async () => {
|
|
938
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
939
|
-
const adapter = makeMockAdapter({ scribeArtifact: null });
|
|
940
|
-
const config: TrinityConfig = {
|
|
941
|
-
useTrinity: true,
|
|
942
|
-
maxCandidates: 3,
|
|
943
|
-
useStubs: false,
|
|
944
|
-
runtimeAdapter: adapter,
|
|
945
|
-
};
|
|
946
|
-
|
|
947
|
-
const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
|
|
948
|
-
|
|
949
|
-
expect(result.success).toBe(false);
|
|
950
|
-
expect(result.failures.some(f => f.stage === 'scribe')).toBe(true);
|
|
951
|
-
expect(result.telemetry.dreamerPassed).toBe(true);
|
|
952
|
-
expect(result.telemetry.philosopherPassed).toBe(true);
|
|
953
|
-
expect(result.telemetry.scribePassed).toBe(false);
|
|
954
|
-
});
|
|
955
|
-
|
|
956
|
-
it('calls adapter.close() after successful execution', async () => {
|
|
957
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
958
|
-
const adapter = makeMockAdapter();
|
|
959
|
-
const config: TrinityConfig = {
|
|
960
|
-
useTrinity: true,
|
|
961
|
-
maxCandidates: 3,
|
|
962
|
-
useStubs: false,
|
|
963
|
-
runtimeAdapter: adapter,
|
|
964
|
-
};
|
|
965
|
-
|
|
966
|
-
await runTrinityAsync({ snapshot, principleId: 'T-08', config });
|
|
967
|
-
|
|
968
|
-
expect(adapter.close).toHaveBeenCalled();
|
|
969
|
-
});
|
|
970
|
-
|
|
971
|
-
it('calls adapter.close() even when execution fails', async () => {
|
|
972
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
973
|
-
const adapter = makeMockAdapter({
|
|
974
|
-
dreamerOutput: { valid: false, candidates: [], reason: 'No signal', generatedAt: new Date().toISOString() },
|
|
975
|
-
});
|
|
976
|
-
const config: TrinityConfig = {
|
|
977
|
-
useTrinity: true,
|
|
978
|
-
maxCandidates: 3,
|
|
979
|
-
useStubs: false,
|
|
980
|
-
runtimeAdapter: adapter,
|
|
981
|
-
};
|
|
982
|
-
|
|
983
|
-
await runTrinityAsync({ snapshot, principleId: 'T-08', config });
|
|
984
|
-
|
|
985
|
-
expect(adapter.close).toHaveBeenCalled();
|
|
986
|
-
});
|
|
987
|
-
|
|
988
|
-
it('produces artifact compatible with draftToArtifact', async () => {
|
|
989
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
990
|
-
const adapter = makeMockAdapter();
|
|
991
|
-
const config: TrinityConfig = {
|
|
992
|
-
useTrinity: true,
|
|
993
|
-
maxCandidates: 3,
|
|
994
|
-
useStubs: false,
|
|
995
|
-
runtimeAdapter: adapter,
|
|
996
|
-
};
|
|
997
|
-
|
|
998
|
-
const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
|
|
999
|
-
|
|
1000
|
-
expect(result.success).toBe(true);
|
|
1001
|
-
expect(result.artifact).toBeDefined();
|
|
1002
|
-
const artifact = draftToArtifact(result.artifact!);
|
|
1003
|
-
expect(artifact.artifactId).toBeDefined();
|
|
1004
|
-
expect(artifact.sessionId).toBe('session-test-123');
|
|
1005
|
-
expect(artifact.principleId).toBe('T-01');
|
|
1006
|
-
expect(artifact.badDecision).toBeDefined();
|
|
1007
|
-
expect(artifact.betterDecision).toBeDefined();
|
|
1008
|
-
});
|
|
1009
|
-
});
|
|
1010
|
-
|
|
1011
|
-
// ---------------------------------------------------------------------------
|
|
1012
|
-
// Tests: runTrinityAsync — useStubs=true still uses stubs
|
|
1013
|
-
// ---------------------------------------------------------------------------
|
|
1014
|
-
|
|
1015
|
-
describe('runTrinityAsync — useStubs=true uses synchronous stubs', () => {
|
|
1016
|
-
it('still uses stub implementations when useStubs=true even with adapter', async () => {
|
|
1017
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
1018
|
-
const adapter = {
|
|
1019
|
-
invokeDreamer: vi.fn().mockResolvedValue({ valid: true, candidates: [], generatedAt: new Date().toISOString() }),
|
|
1020
|
-
invokePhilosopher: vi.fn().mockResolvedValue({ valid: true, judgments: [], overallAssessment: '', generatedAt: new Date().toISOString() }),
|
|
1021
|
-
invokeScribe: vi.fn().mockResolvedValue(null),
|
|
1022
|
-
};
|
|
1023
|
-
const config: TrinityConfig = {
|
|
1024
|
-
useTrinity: true,
|
|
1025
|
-
maxCandidates: 3,
|
|
1026
|
-
useStubs: true, // Explicitly use stubs
|
|
1027
|
-
runtimeAdapter: adapter as unknown as TrinityRuntimeAdapter,
|
|
1028
|
-
};
|
|
1029
|
-
|
|
1030
|
-
// With stubs, adapter is ignored - stub produces success with failureCount signal
|
|
1031
|
-
const result = await runTrinityAsync({ snapshot, principleId: 'T-08', config });
|
|
1032
|
-
|
|
1033
|
-
expect(result.success).toBe(true); // Stub succeeds because snapshot has failureCount
|
|
1034
|
-
expect(adapter.invokeDreamer).not.toHaveBeenCalled(); // Adapter NOT called
|
|
1035
|
-
expect(adapter.invokePhilosopher).not.toHaveBeenCalled();
|
|
1036
|
-
expect(adapter.invokeScribe).not.toHaveBeenCalled();
|
|
1037
|
-
});
|
|
1038
|
-
});
|
|
1039
|
-
|
|
1040
|
-
// ---------------------------------------------------------------------------
|
|
1041
|
-
// Tests: NOCTURNAL_DREAMER_PROMPT — strategic perspective requirements (Task 1)
|
|
1042
|
-
// ---------------------------------------------------------------------------
|
|
1043
|
-
|
|
1044
|
-
describe('NOCTURNAL_DREAMER_PROMPT — strategic perspective requirements', () => {
|
|
1045
|
-
it('contains "## Strategic Perspective Requirements" section', () => {
|
|
1046
|
-
expect(NOCTURNAL_DREAMER_PROMPT).toContain('## Strategic Perspective Requirements');
|
|
1047
|
-
});
|
|
1048
|
-
|
|
1049
|
-
it('mentions all three strategic perspectives', () => {
|
|
1050
|
-
expect(NOCTURNAL_DREAMER_PROMPT).toContain('conservative_fix');
|
|
1051
|
-
expect(NOCTURNAL_DREAMER_PROMPT).toContain('structural_improvement');
|
|
1052
|
-
expect(NOCTURNAL_DREAMER_PROMPT).toContain('paradigm_shift');
|
|
1053
|
-
});
|
|
1054
|
-
|
|
1055
|
-
it('contains ANTI-PATTERN warning', () => {
|
|
1056
|
-
expect(NOCTURNAL_DREAMER_PROMPT).toContain('ANTI-PATTERN');
|
|
1057
|
-
});
|
|
1058
|
-
|
|
1059
|
-
it('references riskLevel as required candidate field', () => {
|
|
1060
|
-
expect(NOCTURNAL_DREAMER_PROMPT).toContain('riskLevel');
|
|
1061
|
-
});
|
|
1062
|
-
|
|
1063
|
-
it('references strategicPerspective as required candidate field', () => {
|
|
1064
|
-
expect(NOCTURNAL_DREAMER_PROMPT).toContain('strategicPerspective');
|
|
1065
|
-
});
|
|
1066
|
-
});
|
|
1067
|
-
|
|
1068
|
-
// ---------------------------------------------------------------------------
|
|
1069
|
-
// Tests: DreamerCandidate interface — optional fields (Task 1)
|
|
1070
|
-
// ---------------------------------------------------------------------------
|
|
1071
|
-
|
|
1072
|
-
describe('DreamerCandidate interface — optional fields', () => {
|
|
1073
|
-
it('accepts a candidate with riskLevel and strategicPerspective', () => {
|
|
1074
|
-
const candidate: DreamerCandidate = {
|
|
1075
|
-
candidateIndex: 0,
|
|
1076
|
-
badDecision: 'Did something wrong',
|
|
1077
|
-
betterDecision: 'Do it right',
|
|
1078
|
-
rationale: 'Because the principle says so',
|
|
1079
|
-
confidence: 0.9,
|
|
1080
|
-
riskLevel: 'medium',
|
|
1081
|
-
strategicPerspective: 'structural_improvement',
|
|
1082
|
-
};
|
|
1083
|
-
expect(candidate.riskLevel).toBe('medium');
|
|
1084
|
-
expect(candidate.strategicPerspective).toBe('structural_improvement');
|
|
1085
|
-
});
|
|
1086
|
-
|
|
1087
|
-
it('accepts a candidate without riskLevel or strategicPerspective (backward compat)', () => {
|
|
1088
|
-
const candidate: DreamerCandidate = {
|
|
1089
|
-
candidateIndex: 0,
|
|
1090
|
-
badDecision: 'Did something wrong',
|
|
1091
|
-
betterDecision: 'Do it right',
|
|
1092
|
-
rationale: 'Because the principle says so',
|
|
1093
|
-
confidence: 0.9,
|
|
1094
|
-
};
|
|
1095
|
-
expect(candidate.riskLevel).toBeUndefined();
|
|
1096
|
-
expect(candidate.strategicPerspective).toBeUndefined();
|
|
1097
|
-
});
|
|
1098
|
-
|
|
1099
|
-
it('accepts all valid riskLevel values', () => {
|
|
1100
|
-
const levels: Array<'low' | 'medium' | 'high'> = ['low', 'medium', 'high'];
|
|
1101
|
-
for (const level of levels) {
|
|
1102
|
-
const candidate: DreamerCandidate = {
|
|
1103
|
-
candidateIndex: 0,
|
|
1104
|
-
badDecision: 'Wrong',
|
|
1105
|
-
betterDecision: 'Right',
|
|
1106
|
-
rationale: 'Because',
|
|
1107
|
-
confidence: 0.8,
|
|
1108
|
-
riskLevel: level,
|
|
1109
|
-
};
|
|
1110
|
-
expect(candidate.riskLevel).toBe(level);
|
|
1111
|
-
}
|
|
1112
|
-
});
|
|
1113
|
-
|
|
1114
|
-
it('accepts all valid strategicPerspective values', () => {
|
|
1115
|
-
const perspectives: Array<'conservative_fix' | 'structural_improvement' | 'paradigm_shift'> = [
|
|
1116
|
-
'conservative_fix',
|
|
1117
|
-
'structural_improvement',
|
|
1118
|
-
'paradigm_shift',
|
|
1119
|
-
];
|
|
1120
|
-
for (const perspective of perspectives) {
|
|
1121
|
-
const candidate: DreamerCandidate = {
|
|
1122
|
-
candidateIndex: 0,
|
|
1123
|
-
badDecision: 'Wrong',
|
|
1124
|
-
betterDecision: 'Right',
|
|
1125
|
-
rationale: 'Because',
|
|
1126
|
-
confidence: 0.8,
|
|
1127
|
-
strategicPerspective: perspective,
|
|
1128
|
-
};
|
|
1129
|
-
expect(candidate.strategicPerspective).toBe(perspective);
|
|
1130
|
-
}
|
|
1131
|
-
});
|
|
1132
|
-
});
|
|
1133
|
-
|
|
1134
|
-
// ---------------------------------------------------------------------------
|
|
1135
|
-
// Tests: buildDreamerPrompt — reasoning context injection (Task 2)
|
|
1136
|
-
// ---------------------------------------------------------------------------
|
|
1137
|
-
|
|
1138
|
-
describe('buildDreamerPrompt — reasoning context injection', () => {
|
|
1139
|
-
// Helper to create a minimal snapshot for reasoning context tests
|
|
1140
|
-
function makeReasoningSnapshot(overrides: {
|
|
1141
|
-
assistantTurns?: any[];
|
|
1142
|
-
toolCalls?: any[];
|
|
1143
|
-
userTurns?: any[];
|
|
1144
|
-
} = {}) {
|
|
1145
|
-
return {
|
|
1146
|
-
sessionId: 'session-reasoning-test',
|
|
1147
|
-
startedAt: '2026-04-13T00:00:00.000Z',
|
|
1148
|
-
updatedAt: '2026-04-13T00:05:00.000Z',
|
|
1149
|
-
assistantTurns: overrides.assistantTurns ?? [],
|
|
1150
|
-
userTurns: overrides.userTurns ?? [],
|
|
1151
|
-
toolCalls: overrides.toolCalls ?? [],
|
|
1152
|
-
painEvents: [],
|
|
1153
|
-
gateBlocks: [],
|
|
1154
|
-
stats: {
|
|
1155
|
-
failureCount: 0,
|
|
1156
|
-
totalPainEvents: 0,
|
|
1157
|
-
totalGateBlocks: 0,
|
|
1158
|
-
totalAssistantTurns: overrides.assistantTurns?.length ?? 0,
|
|
1159
|
-
totalToolCalls: overrides.toolCalls?.length ?? 0,
|
|
1160
|
-
},
|
|
1161
|
-
};
|
|
1162
|
-
}
|
|
1163
|
-
|
|
1164
|
-
it('injects ## Reasoning Context section when assistant turns have thinking content', () => {
|
|
1165
|
-
const snapshot = makeReasoningSnapshot({
|
|
1166
|
-
assistantTurns: [
|
|
1167
|
-
{
|
|
1168
|
-
turnIndex: 0,
|
|
1169
|
-
sanitizedText: '<thinking>I need to consider the implications carefully</thinking>',
|
|
1170
|
-
createdAt: '2026-04-13T00:01:00.000Z',
|
|
1171
|
-
},
|
|
1172
|
-
],
|
|
1173
|
-
});
|
|
1174
|
-
|
|
1175
|
-
const result = formatReasoningContext(snapshot as any);
|
|
1176
|
-
expect(result).toContain('## Reasoning Context');
|
|
1177
|
-
});
|
|
1178
|
-
|
|
1179
|
-
it('includes uncertainty markers in reasoning context', () => {
|
|
1180
|
-
const snapshot = makeReasoningSnapshot({
|
|
1181
|
-
assistantTurns: [
|
|
1182
|
-
{
|
|
1183
|
-
turnIndex: 0,
|
|
1184
|
-
sanitizedText: 'let me verify this first before proceeding with the change',
|
|
1185
|
-
createdAt: '2026-04-13T00:01:00.000Z',
|
|
1186
|
-
},
|
|
1187
|
-
],
|
|
1188
|
-
});
|
|
1189
|
-
|
|
1190
|
-
const result = formatReasoningContext(snapshot as any);
|
|
1191
|
-
expect(result).toContain('Uncertainty detected');
|
|
1192
|
-
});
|
|
1193
|
-
|
|
1194
|
-
it('includes confidence signal when not high', () => {
|
|
1195
|
-
const snapshot = makeReasoningSnapshot({
|
|
1196
|
-
assistantTurns: [
|
|
1197
|
-
{
|
|
1198
|
-
turnIndex: 0,
|
|
1199
|
-
sanitizedText: 'I should probably check this more thoroughly before continuing',
|
|
1200
|
-
createdAt: '2026-04-13T00:01:00.000Z',
|
|
1201
|
-
},
|
|
1202
|
-
],
|
|
1203
|
-
});
|
|
1204
|
-
|
|
1205
|
-
const result = formatReasoningContext(snapshot as any);
|
|
1206
|
-
// Low or medium confidence should be shown
|
|
1207
|
-
expect(result).toMatch(/Confidence:\s*(low|medium)/);
|
|
1208
|
-
});
|
|
1209
|
-
|
|
1210
|
-
it('includes contextual factors when present', () => {
|
|
1211
|
-
const snapshot = makeReasoningSnapshot({
|
|
1212
|
-
assistantTurns: [],
|
|
1213
|
-
toolCalls: [
|
|
1214
|
-
{ toolName: 'Read', outcome: 'success', createdAt: '2026-04-13T00:01:00.000Z' },
|
|
1215
|
-
{ toolName: 'Edit', outcome: 'success', createdAt: '2026-04-13T00:02:00.000Z' },
|
|
1216
|
-
],
|
|
1217
|
-
});
|
|
1218
|
-
|
|
1219
|
-
const result = formatReasoningContext(snapshot as any);
|
|
1220
|
-
expect(result).toContain('File structure explored');
|
|
1221
|
-
});
|
|
1222
|
-
|
|
1223
|
-
it('omits ## Reasoning Context when no reasoning signals exist', () => {
|
|
1224
|
-
const snapshot = makeReasoningSnapshot({
|
|
1225
|
-
assistantTurns: [],
|
|
1226
|
-
toolCalls: [
|
|
1227
|
-
{ toolName: 'Edit', outcome: 'success', createdAt: '2026-04-13T00:01:00.000Z' },
|
|
1228
|
-
],
|
|
1229
|
-
});
|
|
1230
|
-
|
|
1231
|
-
const result = formatReasoningContext(snapshot as any);
|
|
1232
|
-
expect(result).toBeNull();
|
|
1233
|
-
});
|
|
1234
|
-
|
|
1235
|
-
it('does not inject decisionPoints', () => {
|
|
1236
|
-
const snapshot = makeReasoningSnapshot({
|
|
1237
|
-
assistantTurns: [
|
|
1238
|
-
{
|
|
1239
|
-
turnIndex: 0,
|
|
1240
|
-
sanitizedText: '<thinking>some thought</thinking>',
|
|
1241
|
-
createdAt: '2026-04-13T00:01:00.000Z',
|
|
1242
|
-
},
|
|
1243
|
-
],
|
|
1244
|
-
});
|
|
1245
|
-
|
|
1246
|
-
const result = formatReasoningContext(snapshot as any);
|
|
1247
|
-
expect(result).not.toContain('decisionPoint');
|
|
1248
|
-
expect(result).not.toContain('DecisionPoint');
|
|
1249
|
-
});
|
|
1250
|
-
});
|
|
1251
|
-
|
|
1252
|
-
// ---------------------------------------------------------------------------
|
|
1253
|
-
// Tests: invokeStubDreamer — risk level and perspective mapping (D-07)
|
|
1254
|
-
// ---------------------------------------------------------------------------
|
|
1255
|
-
|
|
1256
|
-
describe('invokeStubDreamer — risk level and perspective mapping (D-07)', () => {
|
|
1257
|
-
it('gateBlocks candidates get conservative_fix/low', () => {
|
|
1258
|
-
const snapshot = makeSnapshot({ totalGateBlocks: 2 });
|
|
1259
|
-
const output = invokeStubDreamer(snapshot as any, 'T-03', 3);
|
|
1260
|
-
expect(output.valid).toBe(true);
|
|
1261
|
-
expect(output.candidates.length).toBeGreaterThan(0);
|
|
1262
|
-
for (const candidate of output.candidates) {
|
|
1263
|
-
expect(candidate.riskLevel).toBe('low');
|
|
1264
|
-
expect(candidate.strategicPerspective).toBe('conservative_fix');
|
|
1265
|
-
}
|
|
1266
|
-
});
|
|
1267
|
-
|
|
1268
|
-
it('pain candidates get structural_improvement/medium', () => {
|
|
1269
|
-
const snapshot = makeSnapshot({ totalPainEvents: 3 });
|
|
1270
|
-
const output = invokeStubDreamer(snapshot as any, 'T-08', 3);
|
|
1271
|
-
expect(output.valid).toBe(true);
|
|
1272
|
-
expect(output.candidates.length).toBeGreaterThan(0);
|
|
1273
|
-
for (const candidate of output.candidates) {
|
|
1274
|
-
expect(candidate.riskLevel).toBe('medium');
|
|
1275
|
-
expect(candidate.strategicPerspective).toBe('structural_improvement');
|
|
1276
|
-
}
|
|
1277
|
-
});
|
|
1278
|
-
|
|
1279
|
-
it('failure candidates get paradigm_shift/high', () => {
|
|
1280
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
1281
|
-
const output = invokeStubDreamer(snapshot as any, 'T-08', 3);
|
|
1282
|
-
expect(output.valid).toBe(true);
|
|
1283
|
-
expect(output.candidates.length).toBeGreaterThan(0);
|
|
1284
|
-
for (const candidate of output.candidates) {
|
|
1285
|
-
expect(candidate.riskLevel).toBe('high');
|
|
1286
|
-
expect(candidate.strategicPerspective).toBe('paradigm_shift');
|
|
1287
|
-
}
|
|
1288
|
-
});
|
|
1289
|
-
});
|
|
1290
|
-
|
|
1291
|
-
// ---------------------------------------------------------------------------
|
|
1292
|
-
// Tests: runTrinity — diversity telemetry (DIVER-04)
|
|
1293
|
-
// ---------------------------------------------------------------------------
|
|
1294
|
-
|
|
1295
|
-
describe('runTrinity — diversity telemetry (DIVER-04)', () => {
|
|
1296
|
-
it('emits diversityCheckPassed=false when stub candidates all have same risk level', () => {
|
|
1297
|
-
// Failure signal produces all paradigm_shift/high candidates → not diverse
|
|
1298
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
1299
|
-
const config: TrinityConfig = {
|
|
1300
|
-
useTrinity: true,
|
|
1301
|
-
maxCandidates: 3,
|
|
1302
|
-
useStubs: true,
|
|
1303
|
-
};
|
|
1304
|
-
|
|
1305
|
-
const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
|
|
1306
|
-
|
|
1307
|
-
expect(result.success).toBe(true);
|
|
1308
|
-
expect(result.telemetry.diversityCheckPassed).toBe(false);
|
|
1309
|
-
});
|
|
1310
|
-
|
|
1311
|
-
it('emits candidateRiskLevels array matching stub mapping', () => {
|
|
1312
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
1313
|
-
const config: TrinityConfig = {
|
|
1314
|
-
useTrinity: true,
|
|
1315
|
-
maxCandidates: 3,
|
|
1316
|
-
useStubs: true,
|
|
1317
|
-
};
|
|
1318
|
-
|
|
1319
|
-
const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
|
|
1320
|
-
|
|
1321
|
-
expect(result.success).toBe(true);
|
|
1322
|
-
expect(result.telemetry.candidateRiskLevels).toBeDefined();
|
|
1323
|
-
expect(result.telemetry.candidateRiskLevels!.length).toBeGreaterThan(0);
|
|
1324
|
-
// All failure stub candidates should be 'high'
|
|
1325
|
-
for (const level of result.telemetry.candidateRiskLevels!) {
|
|
1326
|
-
expect(level).toBe('high');
|
|
1327
|
-
}
|
|
1328
|
-
});
|
|
1329
|
-
|
|
1330
|
-
it('pipeline completes even when diversity check fails (soft enforcement)', () => {
|
|
1331
|
-
// Failure signal: all candidates have same risk → diversity fails
|
|
1332
|
-
const snapshot = makeSnapshot({ failureCount: 2 });
|
|
1333
|
-
const config: TrinityConfig = {
|
|
1334
|
-
useTrinity: true,
|
|
1335
|
-
maxCandidates: 3,
|
|
1336
|
-
useStubs: true,
|
|
1337
|
-
};
|
|
1338
|
-
|
|
1339
|
-
const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
|
|
1340
|
-
|
|
1341
|
-
expect(result.telemetry.diversityCheckPassed).toBe(false);
|
|
1342
|
-
expect(result.success).toBe(true);
|
|
1343
|
-
expect(result.artifact).toBeDefined();
|
|
1344
|
-
});
|
|
1345
|
-
});
|
|
1346
|
-
|
|
1347
|
-
// ---------------------------------------------------------------------------
|
|
1348
|
-
// Tests: TrinityTelemetry — diversity fields
|
|
1349
|
-
// ---------------------------------------------------------------------------
|
|
1350
|
-
|
|
1351
|
-
describe('TrinityTelemetry — diversity fields', () => {
|
|
1352
|
-
it('accepts optional diversityCheckPassed field', () => {
|
|
1353
|
-
const telemetry: TrinityTelemetry = {
|
|
1354
|
-
chainMode: 'trinity',
|
|
1355
|
-
usedStubs: true,
|
|
1356
|
-
dreamerPassed: true,
|
|
1357
|
-
philosopherPassed: true,
|
|
1358
|
-
scribePassed: true,
|
|
1359
|
-
candidateCount: 2,
|
|
1360
|
-
selectedCandidateIndex: 0,
|
|
1361
|
-
stageFailures: [],
|
|
1362
|
-
diversityCheckPassed: true,
|
|
1363
|
-
};
|
|
1364
|
-
expect(telemetry.diversityCheckPassed).toBe(true);
|
|
1365
|
-
});
|
|
1366
|
-
|
|
1367
|
-
it('accepts optional candidateRiskLevels field', () => {
|
|
1368
|
-
const telemetry: TrinityTelemetry = {
|
|
1369
|
-
chainMode: 'trinity',
|
|
1370
|
-
usedStubs: true,
|
|
1371
|
-
dreamerPassed: true,
|
|
1372
|
-
philosopherPassed: true,
|
|
1373
|
-
scribePassed: true,
|
|
1374
|
-
candidateCount: 2,
|
|
1375
|
-
selectedCandidateIndex: 0,
|
|
1376
|
-
stageFailures: [],
|
|
1377
|
-
candidateRiskLevels: ['low', 'high'],
|
|
1378
|
-
};
|
|
1379
|
-
expect(telemetry.candidateRiskLevels).toEqual(['low', 'high']);
|
|
1380
|
-
});
|
|
1381
|
-
});
|
|
1382
|
-
|
|
1383
|
-
// ---------------------------------------------------------------------------
|
|
1384
|
-
// Tests: Philosopher 6D Evaluation (PHILO-01)
|
|
1385
|
-
// ---------------------------------------------------------------------------
|
|
1386
|
-
|
|
1387
|
-
describe('Philosopher 6D Evaluation (PHILO-01)', () => {
|
|
1388
|
-
it('NOCTURNAL_PHILOSOPHER_PROMPT contains 6 dimensions with calibrated weights', () => {
|
|
1389
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('Safety Impact');
|
|
1390
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('UX Impact');
|
|
1391
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('(weight: 0.20)'); // Principle Alignment
|
|
1392
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('(weight: 0.15)'); // Specificity
|
|
1393
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('(weight: 0.15)'); // Actionability
|
|
1394
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('(weight: 0.15)'); // Executability
|
|
1395
|
-
});
|
|
1396
|
-
|
|
1397
|
-
it('prompt output format includes scores and risks objects', () => {
|
|
1398
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"scores"');
|
|
1399
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"principleAlignment"');
|
|
1400
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"safetyImpact"');
|
|
1401
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"uxImpact"');
|
|
1402
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"risks"');
|
|
1403
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"falsePositiveEstimate"');
|
|
1404
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"implementationComplexity"');
|
|
1405
|
-
expect(NOCTURNAL_PHILOSOPHER_PROMPT).toContain('"breakingChangeRisk"');
|
|
1406
|
-
});
|
|
1407
|
-
});
|
|
1408
|
-
|
|
1409
|
-
// ---------------------------------------------------------------------------
|
|
1410
|
-
// Tests: Philosopher Risk Assessment (PHILO-02)
|
|
1411
|
-
// ---------------------------------------------------------------------------
|
|
1412
|
-
|
|
1413
|
-
describe('Philosopher Risk Assessment (PHILO-02)', () => {
|
|
1414
|
-
it('invokeStubPhilosopher produces risk assessment per candidate', () => {
|
|
1415
|
-
const dreamerOutput: DreamerOutput = {
|
|
1416
|
-
valid: true,
|
|
1417
|
-
candidates: [
|
|
1418
|
-
{
|
|
1419
|
-
candidateIndex: 0,
|
|
1420
|
-
badDecision: 'Did something wrong',
|
|
1421
|
-
betterDecision: 'Read the file before editing to verify content',
|
|
1422
|
-
rationale: 'A good rationale that explains why this is better',
|
|
1423
|
-
confidence: 0.9,
|
|
1424
|
-
riskLevel: 'low',
|
|
1425
|
-
strategicPerspective: 'conservative_fix',
|
|
1426
|
-
},
|
|
1427
|
-
{
|
|
1428
|
-
candidateIndex: 1,
|
|
1429
|
-
badDecision: 'Ignored error messages',
|
|
1430
|
-
betterDecision: 'Challenge the original approach entirely',
|
|
1431
|
-
rationale: 'A paradigm shift rationale for fundamentally different approach',
|
|
1432
|
-
confidence: 0.6,
|
|
1433
|
-
riskLevel: 'high',
|
|
1434
|
-
strategicPerspective: 'paradigm_shift',
|
|
1435
|
-
},
|
|
1436
|
-
],
|
|
1437
|
-
generatedAt: new Date().toISOString(),
|
|
1438
|
-
};
|
|
1439
|
-
const result = invokeStubPhilosopher(dreamerOutput, 'T-01', makeSnapshot() as any);
|
|
1440
|
-
expect(result.valid).toBe(true);
|
|
1441
|
-
for (const j of result.judgments) {
|
|
1442
|
-
expect(j.risks).toBeDefined();
|
|
1443
|
-
expect(j.risks!.falsePositiveEstimate).toBeGreaterThanOrEqual(0);
|
|
1444
|
-
expect(j.risks!.falsePositiveEstimate).toBeLessThanOrEqual(1);
|
|
1445
|
-
expect(['low', 'medium', 'high']).toContain(j.risks!.implementationComplexity);
|
|
1446
|
-
expect(typeof j.risks!.breakingChangeRisk).toBe('boolean');
|
|
1447
|
-
}
|
|
1448
|
-
});
|
|
1449
|
-
});
|
|
1450
|
-
|
|
1451
|
-
// ---------------------------------------------------------------------------
|
|
1452
|
-
// Tests: Philosopher Backward Compatibility (PHILO-03)
|
|
1453
|
-
// ---------------------------------------------------------------------------
|
|
1454
|
-
|
|
1455
|
-
describe('Philosopher Backward Compatibility (PHILO-03)', () => {
|
|
1456
|
-
it('PhilosopherJudgment without scores/risks is valid', () => {
|
|
1457
|
-
const judgment: PhilosopherJudgment = {
|
|
1458
|
-
candidateIndex: 0,
|
|
1459
|
-
critique: 'test',
|
|
1460
|
-
principleAligned: true,
|
|
1461
|
-
score: 0.8,
|
|
1462
|
-
rank: 1,
|
|
1463
|
-
};
|
|
1464
|
-
expect(judgment.score).toBe(0.8);
|
|
1465
|
-
expect(judgment.scores).toBeUndefined();
|
|
1466
|
-
expect(judgment.risks).toBeUndefined();
|
|
1467
|
-
});
|
|
1468
|
-
|
|
1469
|
-
it('runTrinity produces output with 6D scores when candidates have strategicPerspective', () => {
|
|
1470
|
-
const snapshot = makeSnapshot({ failureCount: 2, totalPainEvents: 1 });
|
|
1471
|
-
const config: TrinityConfig = {
|
|
1472
|
-
useTrinity: true,
|
|
1473
|
-
maxCandidates: 3,
|
|
1474
|
-
useStubs: true,
|
|
1475
|
-
};
|
|
1476
|
-
|
|
1477
|
-
const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
|
|
1478
|
-
expect(result.success).toBe(true);
|
|
1479
|
-
expect(result.artifact).toBeDefined();
|
|
1480
|
-
|
|
1481
|
-
// The stub philosopher should produce 6D scores for stub candidates
|
|
1482
|
-
// (stub dreamer assigns strategicPerspective based on principleId)
|
|
1483
|
-
if (result.telemetry.philosopher6D) {
|
|
1484
|
-
const avgScores = result.telemetry.philosopher6D.avgScores;
|
|
1485
|
-
expect(typeof avgScores.principleAlignment).toBe('number');
|
|
1486
|
-
expect(typeof avgScores.specificity).toBe('number');
|
|
1487
|
-
expect(typeof avgScores.actionability).toBe('number');
|
|
1488
|
-
expect(typeof avgScores.executability).toBe('number');
|
|
1489
|
-
expect(typeof avgScores.safetyImpact).toBe('number');
|
|
1490
|
-
expect(typeof avgScores.uxImpact).toBe('number');
|
|
1491
|
-
}
|
|
1492
|
-
});
|
|
1493
|
-
});
|
|
1494
|
-
|
|
1495
|
-
// ---------------------------------------------------------------------------
|
|
1496
|
-
// Tests: Stub Philosopher 6D Scoring (D-09)
|
|
1497
|
-
// ---------------------------------------------------------------------------
|
|
1498
|
-
|
|
1499
|
-
describe('Stub Philosopher 6D Scoring (D-09)', () => {
|
|
1500
|
-
it('conservative_fix candidates get high principleAlignment and low risk', () => {
|
|
1501
|
-
const dreamerOutput: DreamerOutput = {
|
|
1502
|
-
valid: true,
|
|
1503
|
-
candidates: [
|
|
1504
|
-
{
|
|
1505
|
-
candidateIndex: 0,
|
|
1506
|
-
badDecision: 'Did something wrong',
|
|
1507
|
-
betterDecision: 'Read the file before editing to verify current content',
|
|
1508
|
-
rationale: 'Following T-01 requires verifying content before making changes',
|
|
1509
|
-
confidence: 0.9,
|
|
1510
|
-
riskLevel: 'low',
|
|
1511
|
-
strategicPerspective: 'conservative_fix',
|
|
1512
|
-
},
|
|
1513
|
-
],
|
|
1514
|
-
generatedAt: new Date().toISOString(),
|
|
1515
|
-
};
|
|
1516
|
-
const result = invokeStubPhilosopher(dreamerOutput, 'T-01', makeSnapshot() as any);
|
|
1517
|
-
expect(result.valid).toBe(true);
|
|
1518
|
-
const j = result.judgments[0];
|
|
1519
|
-
expect(j.scores).toBeDefined();
|
|
1520
|
-
expect(j.scores!.principleAlignment).toBeGreaterThanOrEqual(0.9);
|
|
1521
|
-
expect(j.scores!.safetyImpact).toBeGreaterThanOrEqual(0.9);
|
|
1522
|
-
expect(j.risks).toBeDefined();
|
|
1523
|
-
expect(j.risks!.breakingChangeRisk).toBe(false);
|
|
1524
|
-
expect(j.risks!.implementationComplexity).toBe('low');
|
|
1525
|
-
});
|
|
1526
|
-
|
|
1527
|
-
it('paradigm_shift candidates get high breakingChangeRisk', () => {
|
|
1528
|
-
const dreamerOutput: DreamerOutput = {
|
|
1529
|
-
valid: true,
|
|
1530
|
-
candidates: [
|
|
1531
|
-
{
|
|
1532
|
-
candidateIndex: 0,
|
|
1533
|
-
badDecision: 'Ignored all errors',
|
|
1534
|
-
betterDecision: 'Challenge the entire approach and redesign from scratch',
|
|
1535
|
-
rationale: 'A paradigm shift rationale for a fundamentally different approach',
|
|
1536
|
-
confidence: 0.5,
|
|
1537
|
-
riskLevel: 'high',
|
|
1538
|
-
strategicPerspective: 'paradigm_shift',
|
|
1539
|
-
},
|
|
1540
|
-
],
|
|
1541
|
-
generatedAt: new Date().toISOString(),
|
|
1542
|
-
};
|
|
1543
|
-
const result = invokeStubPhilosopher(dreamerOutput, 'T-08', makeSnapshot() as any);
|
|
1544
|
-
expect(result.valid).toBe(true);
|
|
1545
|
-
const j = result.judgments[0];
|
|
1546
|
-
expect(j.scores).toBeDefined();
|
|
1547
|
-
expect(j.scores!.safetyImpact).toBeLessThan(0.5);
|
|
1548
|
-
expect(j.risks).toBeDefined();
|
|
1549
|
-
expect(j.risks!.breakingChangeRisk).toBe(true);
|
|
1550
|
-
expect(j.risks!.implementationComplexity).toBe('high');
|
|
1551
|
-
});
|
|
1552
|
-
|
|
1553
|
-
it('structural_improvement candidates get medium across all dimensions', () => {
|
|
1554
|
-
const dreamerOutput: DreamerOutput = {
|
|
1555
|
-
valid: true,
|
|
1556
|
-
candidates: [
|
|
1557
|
-
{
|
|
1558
|
-
candidateIndex: 0,
|
|
1559
|
-
badDecision: 'Rushed through steps',
|
|
1560
|
-
betterDecision: 'Reorder operations and introduce an intermediate checkpoint',
|
|
1561
|
-
rationale: 'Structural improvement rationale to reorder operations properly',
|
|
1562
|
-
confidence: 0.7,
|
|
1563
|
-
riskLevel: 'medium',
|
|
1564
|
-
strategicPerspective: 'structural_improvement',
|
|
1565
|
-
},
|
|
1566
|
-
],
|
|
1567
|
-
generatedAt: new Date().toISOString(),
|
|
1568
|
-
};
|
|
1569
|
-
const result = invokeStubPhilosopher(dreamerOutput, 'T-03', makeSnapshot() as any);
|
|
1570
|
-
expect(result.valid).toBe(true);
|
|
1571
|
-
const j = result.judgments[0];
|
|
1572
|
-
expect(j.scores).toBeDefined();
|
|
1573
|
-
// Medium scores should be between conservative and paradigm
|
|
1574
|
-
expect(j.scores!.principleAlignment).toBeGreaterThanOrEqual(0.7);
|
|
1575
|
-
expect(j.scores!.principleAlignment).toBeLessThanOrEqual(0.8);
|
|
1576
|
-
expect(j.risks).toBeDefined();
|
|
1577
|
-
expect(j.risks!.breakingChangeRisk).toBe(false);
|
|
1578
|
-
expect(j.risks!.implementationComplexity).toBe('medium');
|
|
1579
|
-
});
|
|
1580
|
-
});
|
|
1581
|
-
|
|
1582
|
-
// ---------------------------------------------------------------------------
|
|
1583
|
-
// Tests: TrinityTelemetry — philosopher6D field
|
|
1584
|
-
// ---------------------------------------------------------------------------
|
|
1585
|
-
|
|
1586
|
-
describe('TrinityTelemetry — philosopher6D field', () => {
|
|
1587
|
-
it('accepts optional philosopher6D field', () => {
|
|
1588
|
-
const telemetry: TrinityTelemetry = {
|
|
1589
|
-
chainMode: 'trinity',
|
|
1590
|
-
usedStubs: true,
|
|
1591
|
-
dreamerPassed: true,
|
|
1592
|
-
philosopherPassed: true,
|
|
1593
|
-
scribePassed: true,
|
|
1594
|
-
candidateCount: 2,
|
|
1595
|
-
selectedCandidateIndex: 0,
|
|
1596
|
-
stageFailures: [],
|
|
1597
|
-
philosopher6D: {
|
|
1598
|
-
avgScores: {
|
|
1599
|
-
principleAlignment: 0.85,
|
|
1600
|
-
specificity: 0.75,
|
|
1601
|
-
actionability: 0.8,
|
|
1602
|
-
executability: 0.78,
|
|
1603
|
-
safetyImpact: 0.7,
|
|
1604
|
-
uxImpact: 0.72,
|
|
1605
|
-
},
|
|
1606
|
-
highRiskCount: 1,
|
|
1607
|
-
},
|
|
1608
|
-
};
|
|
1609
|
-
expect(telemetry.philosopher6D).toBeDefined();
|
|
1610
|
-
expect(telemetry.philosopher6D!.avgScores.principleAlignment).toBe(0.85);
|
|
1611
|
-
expect(telemetry.philosopher6D!.highRiskCount).toBe(1);
|
|
1612
|
-
});
|
|
1613
|
-
});
|
|
1614
|
-
|
|
1615
|
-
// ---------------------------------------------------------------------------
|
|
1616
|
-
// Tests: Scribe Contrastive Analysis (SCRIBE-01, SCRIBE-02, SCRIBE-03)
|
|
1617
|
-
// ---------------------------------------------------------------------------
|
|
1618
|
-
|
|
1619
|
-
describe('Scribe Contrastive Analysis (SCRIBE-01, SCRIBE-02, SCRIBE-03)', () => {
|
|
1620
|
-
function makeValidArtifact(overrides: Record<string, unknown> = {}): TrinityDraftArtifact {
|
|
1621
|
-
return {
|
|
1622
|
-
selectedCandidateIndex: 0,
|
|
1623
|
-
badDecision: 'Did something wrong',
|
|
1624
|
-
betterDecision: 'Do it right',
|
|
1625
|
-
rationale: 'Because the principle says so and this is the right approach',
|
|
1626
|
-
sessionId: 'session-test-123',
|
|
1627
|
-
principleId: 'T-01',
|
|
1628
|
-
sourceSnapshotRef: 'snapshot-test-001',
|
|
1629
|
-
telemetry: {
|
|
1630
|
-
chainMode: 'trinity',
|
|
1631
|
-
usedStubs: false,
|
|
1632
|
-
dreamerPassed: true,
|
|
1633
|
-
philosopherPassed: true,
|
|
1634
|
-
scribePassed: true,
|
|
1635
|
-
candidateCount: 2,
|
|
1636
|
-
selectedCandidateIndex: 0,
|
|
1637
|
-
stageFailures: [],
|
|
1638
|
-
},
|
|
1639
|
-
...overrides,
|
|
1640
|
-
};
|
|
1641
|
-
}
|
|
1642
|
-
|
|
1643
|
-
it('TrinityDraftArtifact accepts optional rejectedAnalysis fields (SCRIBE-01)', () => {
|
|
1644
|
-
const artifact = makeValidArtifact({
|
|
1645
|
-
rejectedAnalysis: {
|
|
1646
|
-
whyRejected: 'Lower alignment score',
|
|
1647
|
-
warningSignals: ['missed pain signal', 'ignored gate block'],
|
|
1648
|
-
correctiveThinking: 'Should have verified the routing state before proceeding',
|
|
1649
|
-
},
|
|
1650
|
-
} as Record<string, unknown>);
|
|
1651
|
-
expect(artifact.rejectedAnalysis).toBeDefined();
|
|
1652
|
-
expect(artifact.rejectedAnalysis!.whyRejected).toBe('Lower alignment score');
|
|
1653
|
-
expect(artifact.rejectedAnalysis!.warningSignals).toHaveLength(2);
|
|
1654
|
-
expect(artifact.rejectedAnalysis!.correctiveThinking).toContain('Should have');
|
|
1655
|
-
});
|
|
1656
|
-
|
|
1657
|
-
it('TrinityDraftArtifact accepts optional chosenJustification fields (SCRIBE-02)', () => {
|
|
1658
|
-
const artifact = makeValidArtifact({
|
|
1659
|
-
chosenJustification: {
|
|
1660
|
-
whyChosen: 'Highest 6D composite score and low breakingChangeRisk',
|
|
1661
|
-
keyInsights: ['Verify routing state before file operations', 'Check pain signals early'],
|
|
1662
|
-
limitations: ['Does not apply when session has no pain history', 'Less relevant for conservative fixes'],
|
|
1663
|
-
},
|
|
1664
|
-
} as Record<string, unknown>);
|
|
1665
|
-
expect(artifact.chosenJustification).toBeDefined();
|
|
1666
|
-
expect(artifact.chosenJustification!.whyChosen).toContain('Highest');
|
|
1667
|
-
expect(artifact.chosenJustification!.keyInsights).toHaveLength(2);
|
|
1668
|
-
expect(artifact.chosenJustification!.limitations).toHaveLength(2);
|
|
1669
|
-
});
|
|
1670
|
-
|
|
1671
|
-
it('TrinityDraftArtifact accepts optional contrastiveAnalysis fields (SCRIBE-03)', () => {
|
|
1672
|
-
const artifact = makeValidArtifact({
|
|
1673
|
-
contrastiveAnalysis: {
|
|
1674
|
-
criticalDifference: 'Winner checked routing state; loser proceeded without verification',
|
|
1675
|
-
decisionTrigger: 'When session has pain events and gate blocks, verify infrastructure before file operations',
|
|
1676
|
-
preventionStrategy: 'Add a pre-flight check: read the routing status and confirm no pending failures',
|
|
1677
|
-
},
|
|
1678
|
-
} as Record<string, unknown>);
|
|
1679
|
-
expect(artifact.contrastiveAnalysis).toBeDefined();
|
|
1680
|
-
expect(artifact.contrastiveAnalysis!.criticalDifference).toContain('routing state');
|
|
1681
|
-
expect(artifact.contrastiveAnalysis!.decisionTrigger).toContain('When');
|
|
1682
|
-
expect(artifact.contrastiveAnalysis!.preventionStrategy).toContain('pre-flight');
|
|
1683
|
-
});
|
|
1684
|
-
|
|
1685
|
-
it('validateDraftArtifact passes when all three analysis sections are present', () => {
|
|
1686
|
-
const artifact = makeValidArtifact({
|
|
1687
|
-
rejectedAnalysis: {
|
|
1688
|
-
whyRejected: 'Lower score',
|
|
1689
|
-
warningSignals: ['missed signal'],
|
|
1690
|
-
correctiveThinking: 'Should have checked',
|
|
1691
|
-
},
|
|
1692
|
-
chosenJustification: {
|
|
1693
|
-
whyChosen: 'Best score',
|
|
1694
|
-
keyInsights: ['insight 1'],
|
|
1695
|
-
limitations: ['limitation 1'],
|
|
1696
|
-
},
|
|
1697
|
-
contrastiveAnalysis: {
|
|
1698
|
-
criticalDifference: 'key difference',
|
|
1699
|
-
decisionTrigger: 'When X, do Y',
|
|
1700
|
-
preventionStrategy: 'avoid the rejected path',
|
|
1701
|
-
},
|
|
1702
|
-
} as Record<string, unknown>);
|
|
1703
|
-
const result = validateDraftArtifact(artifact);
|
|
1704
|
-
expect(result.valid).toBe(true);
|
|
1705
|
-
expect(result.failures).toHaveLength(0);
|
|
1706
|
-
});
|
|
1707
|
-
|
|
1708
|
-
it('RejectedAnalysis interface accepts all required fields', () => {
|
|
1709
|
-
const analysis: RejectedAnalysis = {
|
|
1710
|
-
whyRejected: 'test reason',
|
|
1711
|
-
warningSignals: ['signal 1', 'signal 2'],
|
|
1712
|
-
correctiveThinking: 'correct path',
|
|
1713
|
-
};
|
|
1714
|
-
expect(analysis.whyRejected).toBe('test reason');
|
|
1715
|
-
expect(analysis.warningSignals).toHaveLength(2);
|
|
1716
|
-
expect(analysis.correctiveThinking).toBe('correct path');
|
|
1717
|
-
});
|
|
1718
|
-
|
|
1719
|
-
it('ChosenJustification interface accepts all required fields', () => {
|
|
1720
|
-
const justification: ChosenJustification = {
|
|
1721
|
-
whyChosen: 'test reason',
|
|
1722
|
-
keyInsights: ['insight 1', 'insight 2', 'insight 3'],
|
|
1723
|
-
limitations: ['limitation 1'],
|
|
1724
|
-
};
|
|
1725
|
-
expect(justification.whyChosen).toBe('test reason');
|
|
1726
|
-
expect(justification.keyInsights).toHaveLength(3);
|
|
1727
|
-
expect(justification.limitations).toHaveLength(1);
|
|
1728
|
-
});
|
|
1729
|
-
|
|
1730
|
-
it('ContrastiveAnalysis interface accepts all required fields', () => {
|
|
1731
|
-
const analysis: ContrastiveAnalysis = {
|
|
1732
|
-
criticalDifference: 'key insight',
|
|
1733
|
-
decisionTrigger: 'When X, do Y',
|
|
1734
|
-
preventionStrategy: 'avoid the rejected path',
|
|
1735
|
-
};
|
|
1736
|
-
expect(analysis.criticalDifference).toBe('key insight');
|
|
1737
|
-
expect(analysis.decisionTrigger).toBe('When X, do Y');
|
|
1738
|
-
expect(analysis.preventionStrategy).toBe('avoid the rejected path');
|
|
1739
|
-
});
|
|
1740
|
-
});
|
|
1741
|
-
|
|
1742
|
-
// ---------------------------------------------------------------------------
|
|
1743
|
-
// Tests: Scribe Backward Compatibility (SCRIBE-04)
|
|
1744
|
-
// ---------------------------------------------------------------------------
|
|
1745
|
-
|
|
1746
|
-
describe('Scribe Backward Compatibility (SCRIBE-04)', () => {
|
|
1747
|
-
function makeValidArtifact(): TrinityDraftArtifact {
|
|
1748
|
-
return {
|
|
1749
|
-
selectedCandidateIndex: 0,
|
|
1750
|
-
badDecision: 'Did something wrong',
|
|
1751
|
-
betterDecision: 'Do it right',
|
|
1752
|
-
rationale: 'Because the principle says so and this is the right approach',
|
|
1753
|
-
sessionId: 'session-test-123',
|
|
1754
|
-
principleId: 'T-01',
|
|
1755
|
-
sourceSnapshotRef: 'snapshot-test-001',
|
|
1756
|
-
telemetry: {
|
|
1757
|
-
chainMode: 'trinity',
|
|
1758
|
-
usedStubs: false,
|
|
1759
|
-
dreamerPassed: true,
|
|
1760
|
-
philosopherPassed: true,
|
|
1761
|
-
scribePassed: true,
|
|
1762
|
-
candidateCount: 2,
|
|
1763
|
-
selectedCandidateIndex: 0,
|
|
1764
|
-
stageFailures: [],
|
|
1765
|
-
},
|
|
1766
|
-
};
|
|
1767
|
-
}
|
|
1768
|
-
|
|
1769
|
-
it('TrinityDraftArtifact without contrastiveAnalysis fields is valid', () => {
|
|
1770
|
-
const artifact = makeValidArtifact();
|
|
1771
|
-
expect(artifact.contrastiveAnalysis).toBeUndefined();
|
|
1772
|
-
expect(artifact.rejectedAnalysis).toBeUndefined();
|
|
1773
|
-
expect(artifact.chosenJustification).toBeUndefined();
|
|
1774
|
-
const result = validateDraftArtifact(artifact);
|
|
1775
|
-
expect(result.valid).toBe(true);
|
|
1776
|
-
expect(result.failures).toHaveLength(0);
|
|
1777
|
-
});
|
|
1778
|
-
|
|
1779
|
-
it('artifact without new fields produces identical output via draftToArtifact', () => {
|
|
1780
|
-
const artifact = makeValidArtifact();
|
|
1781
|
-
const nocturnalArtifact = draftToArtifact(artifact);
|
|
1782
|
-
expect(nocturnalArtifact.badDecision).toBe('Did something wrong');
|
|
1783
|
-
expect(nocturnalArtifact.betterDecision).toBe('Do it right');
|
|
1784
|
-
expect(nocturnalArtifact.principleId).toBe('T-01');
|
|
1785
|
-
});
|
|
1786
|
-
|
|
1787
|
-
it('runTrinity produces artifact without contrastiveAnalysis when useStubs=true', () => {
|
|
1788
|
-
const snapshot = {
|
|
1789
|
-
sessionId: 'session-backward-compat',
|
|
1790
|
-
startedAt: '2026-04-13T00:00:00.000Z',
|
|
1791
|
-
updatedAt: '2026-04-13T00:05:00.000Z',
|
|
1792
|
-
assistantTurns: [],
|
|
1793
|
-
userTurns: [],
|
|
1794
|
-
toolCalls: [],
|
|
1795
|
-
painEvents: [],
|
|
1796
|
-
gateBlocks: [],
|
|
1797
|
-
stats: {
|
|
1798
|
-
failureCount: 1,
|
|
1799
|
-
totalPainEvents: 0,
|
|
1800
|
-
totalGateBlocks: 0,
|
|
1801
|
-
totalAssistantTurns: 5,
|
|
1802
|
-
totalToolCalls: 10,
|
|
1803
|
-
},
|
|
1804
|
-
};
|
|
1805
|
-
const config: TrinityConfig = {
|
|
1806
|
-
useTrinity: true,
|
|
1807
|
-
maxCandidates: 3,
|
|
1808
|
-
useStubs: true,
|
|
1809
|
-
};
|
|
1810
|
-
|
|
1811
|
-
const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
|
|
1812
|
-
expect(result.success).toBe(true);
|
|
1813
|
-
expect(result.artifact).toBeDefined();
|
|
1814
|
-
expect(result.artifact!.contrastiveAnalysis).toBeUndefined();
|
|
1815
|
-
expect(result.artifact!.rejectedAnalysis).toBeUndefined();
|
|
1816
|
-
expect(result.artifact!.chosenJustification).toBeUndefined();
|
|
1817
|
-
});
|
|
1818
|
-
});
|
|
1819
|
-
|
|
1820
|
-
// ---------------------------------------------------------------------------
|
|
1821
|
-
// Tests: validateExtraction — Hallucination Detection (SDK-QUAL-02)
|
|
1822
|
-
// ---------------------------------------------------------------------------
|
|
1823
|
-
|
|
1824
|
-
describe('validateExtraction — Hallucination Detection (SDK-QUAL-02)', () => {
|
|
1825
|
-
function makeArtifact(badDecision: string, overrides: Record<string, unknown> = {}): TrinityDraftArtifact {
|
|
1826
|
-
return {
|
|
1827
|
-
selectedCandidateIndex: 0,
|
|
1828
|
-
badDecision,
|
|
1829
|
-
betterDecision: 'Do it right instead',
|
|
1830
|
-
rationale: 'Because the principle says so and this is the correct approach',
|
|
1831
|
-
sessionId: 'session-test-123',
|
|
1832
|
-
principleId: 'T-01',
|
|
1833
|
-
sourceSnapshotRef: 'snapshot-test-001',
|
|
1834
|
-
telemetry: {
|
|
1835
|
-
chainMode: 'trinity',
|
|
1836
|
-
usedStubs: true,
|
|
1837
|
-
dreamerPassed: true,
|
|
1838
|
-
philosopherPassed: true,
|
|
1839
|
-
scribePassed: true,
|
|
1840
|
-
candidateCount: 1,
|
|
1841
|
-
selectedCandidateIndex: 0,
|
|
1842
|
-
stageFailures: [],
|
|
1843
|
-
},
|
|
1844
|
-
...overrides,
|
|
1845
|
-
};
|
|
1846
|
-
}
|
|
1847
|
-
|
|
1848
|
-
function makeSnapshotWithEvidence(overrides: {
|
|
1849
|
-
failedToolCalls?: Array<{ toolName: string; filePath?: string; errorMessage?: string }>;
|
|
1850
|
-
painEvents?: Array<{ source: string; score: number; reason?: string }>;
|
|
1851
|
-
gateBlocks?: Array<{ toolName: string; reason: string }>;
|
|
1852
|
-
userCorrections?: number;
|
|
1853
|
-
} = {}) {
|
|
1854
|
-
const toolCalls = (overrides.failedToolCalls ?? []).map(tc => ({
|
|
1855
|
-
toolName: tc.toolName,
|
|
1856
|
-
outcome: 'failure' as const,
|
|
1857
|
-
filePath: tc.filePath ?? null,
|
|
1858
|
-
durationMs: null,
|
|
1859
|
-
exitCode: 1,
|
|
1860
|
-
errorType: 'runtime_error',
|
|
1861
|
-
errorMessage: tc.errorMessage ?? 'unknown error',
|
|
1862
|
-
createdAt: '2026-04-17T00:00:00.000Z',
|
|
1863
|
-
}));
|
|
1864
|
-
|
|
1865
|
-
const painEvents = (overrides.painEvents ?? []).map(pe => ({
|
|
1866
|
-
source: pe.source,
|
|
1867
|
-
score: pe.score,
|
|
1868
|
-
severity: 'medium' as const,
|
|
1869
|
-
reason: pe.reason ?? null,
|
|
1870
|
-
createdAt: '2026-04-17T00:00:00.000Z',
|
|
1871
|
-
}));
|
|
1872
|
-
|
|
1873
|
-
const gateBlocks = (overrides.gateBlocks ?? []).map(gb => ({
|
|
1874
|
-
toolName: gb.toolName,
|
|
1875
|
-
filePath: null,
|
|
1876
|
-
reason: gb.reason,
|
|
1877
|
-
planStatus: null,
|
|
1878
|
-
createdAt: '2026-04-17T00:00:00.000Z',
|
|
1879
|
-
}));
|
|
1880
|
-
|
|
1881
|
-
const userTurns = Array.from({ length: overrides.userCorrections ?? 0 }, (_, i) => ({
|
|
1882
|
-
turnIndex: i,
|
|
1883
|
-
correctionDetected: true,
|
|
1884
|
-
correctionCue: 'wrong approach',
|
|
1885
|
-
createdAt: '2026-04-17T00:00:00.000Z',
|
|
1886
|
-
}));
|
|
1887
|
-
|
|
1888
|
-
return {
|
|
1889
|
-
sessionId: 'session-test-123',
|
|
1890
|
-
startedAt: '2026-04-17T00:00:00.000Z',
|
|
1891
|
-
updatedAt: '2026-04-17T00:05:00.000Z',
|
|
1892
|
-
assistantTurns: [],
|
|
1893
|
-
userTurns,
|
|
1894
|
-
toolCalls: toolCalls,
|
|
1895
|
-
painEvents,
|
|
1896
|
-
gateBlocks,
|
|
1897
|
-
stats: {
|
|
1898
|
-
failureCount: toolCalls.length,
|
|
1899
|
-
totalPainEvents: painEvents.length,
|
|
1900
|
-
totalGateBlocks: gateBlocks.length,
|
|
1901
|
-
totalAssistantTurns: 5,
|
|
1902
|
-
totalToolCalls: 10,
|
|
1903
|
-
},
|
|
1904
|
-
};
|
|
1905
|
-
}
|
|
1906
|
-
|
|
1907
|
-
it('passes when badDecision references a tool failure from the snapshot', () => {
|
|
1908
|
-
const snapshot = makeSnapshotWithEvidence({
|
|
1909
|
-
failedToolCalls: [{ toolName: 'Edit', filePath: 'src/config.ts', errorMessage: 'permission denied' }],
|
|
1910
|
-
});
|
|
1911
|
-
const artifact = makeArtifact('Proceeded with Edit on src/config.ts without checking permission');
|
|
1912
|
-
|
|
1913
|
-
const result = validateExtraction(artifact, snapshot as any);
|
|
1914
|
-
|
|
1915
|
-
expect(result.isGrounded).toBe(true);
|
|
1916
|
-
expect(result.evidenceTypes).toContain('tool_failures');
|
|
1917
|
-
});
|
|
1918
|
-
|
|
1919
|
-
it('passes when badDecision references a pain event from the snapshot', () => {
|
|
1920
|
-
const snapshot = makeSnapshotWithEvidence({
|
|
1921
|
-
painEvents: [{ source: 'gate', score: 70, reason: 'accumulated friction from repeated file operation failures' }],
|
|
1922
|
-
});
|
|
1923
|
-
const artifact = makeArtifact('Ignored accumulated friction from file operations');
|
|
1924
|
-
|
|
1925
|
-
const result = validateExtraction(artifact, snapshot as any);
|
|
1926
|
-
|
|
1927
|
-
expect(result.isGrounded).toBe(true);
|
|
1928
|
-
expect(result.evidenceTypes).toContain('pain_events');
|
|
1929
|
-
});
|
|
1930
|
-
|
|
1931
|
-
it('passes when badDecision references a gate block from the snapshot', () => {
|
|
1932
|
-
const snapshot = makeSnapshotWithEvidence({
|
|
1933
|
-
gateBlocks: [{ toolName: 'Bash', reason: 'destructive command blocked by safety gate' }],
|
|
1934
|
-
});
|
|
1935
|
-
const artifact = makeArtifact('Attempted to execute a destructive Bash command that was blocked by the gate');
|
|
1936
|
-
|
|
1937
|
-
const result = validateExtraction(artifact, snapshot as any);
|
|
1938
|
-
|
|
1939
|
-
expect(result.isGrounded).toBe(true);
|
|
1940
|
-
expect(result.evidenceTypes).toContain('gate_blocks');
|
|
1941
|
-
});
|
|
1942
|
-
|
|
1943
|
-
it('passes when badDecision references user corrections', () => {
|
|
1944
|
-
const snapshot = makeSnapshotWithEvidence({
|
|
1945
|
-
userCorrections: 2,
|
|
1946
|
-
});
|
|
1947
|
-
const artifact = makeArtifact('Continued with the wrong approach despite user corrections');
|
|
1948
|
-
|
|
1949
|
-
const result = validateExtraction(artifact, snapshot as any);
|
|
1950
|
-
|
|
1951
|
-
expect(result.isGrounded).toBe(true);
|
|
1952
|
-
expect(result.evidenceTypes).toContain('user_corrections');
|
|
1953
|
-
});
|
|
1954
|
-
|
|
1955
|
-
it('detects hallucination when badDecision has no overlap with snapshot evidence', () => {
|
|
1956
|
-
const snapshot = makeSnapshotWithEvidence({
|
|
1957
|
-
failedToolCalls: [{ toolName: 'Read', filePath: 'package.json', errorMessage: 'file not found' }],
|
|
1958
|
-
});
|
|
1959
|
-
const artifact = makeArtifact('Deployed production database without running migration scripts first');
|
|
1960
|
-
|
|
1961
|
-
const result = validateExtraction(artifact, snapshot as any);
|
|
1962
|
-
|
|
1963
|
-
expect(result.isGrounded).toBe(false);
|
|
1964
|
-
expect(result.reason).toContain('Hallucinated extraction');
|
|
1965
|
-
});
|
|
1966
|
-
|
|
1967
|
-
it('passes when snapshot has no evidence at all (no signal to validate against)', () => {
|
|
1968
|
-
const snapshot = makeSnapshotWithEvidence();
|
|
1969
|
-
const artifact = makeArtifact('Made an incorrect decision during the session');
|
|
1970
|
-
|
|
1971
|
-
const result = validateExtraction(artifact, snapshot as any);
|
|
1972
|
-
|
|
1973
|
-
// No evidence means we cannot validate -- allow through
|
|
1974
|
-
expect(result.isGrounded).toBe(true);
|
|
1975
|
-
expect(result.evidenceTypes).toHaveLength(0);
|
|
1976
|
-
});
|
|
1977
|
-
|
|
1978
|
-
it('provides evidence preview for telemetry', () => {
|
|
1979
|
-
const snapshot = makeSnapshotWithEvidence({
|
|
1980
|
-
failedToolCalls: [{ toolName: 'Write', filePath: 'output.log', errorMessage: 'permission denied for write operation' }],
|
|
1981
|
-
painEvents: [{ source: 'hook', score: 80, reason: 'repeated permission denied failures during write operation' }],
|
|
1982
|
-
});
|
|
1983
|
-
const artifact = makeArtifact('Proceeded with write operation on output.log despite permission denied error');
|
|
1984
|
-
|
|
1985
|
-
const result = validateExtraction(artifact, snapshot as any);
|
|
1986
|
-
|
|
1987
|
-
expect(result.isGrounded).toBe(true);
|
|
1988
|
-
expect(result.evidencePreview.length).toBeGreaterThan(0);
|
|
1989
|
-
expect(result.evidenceTypes).toContain('tool_failures');
|
|
1990
|
-
expect(result.evidenceTypes).toContain('pain_events');
|
|
1991
|
-
});
|
|
1992
|
-
|
|
1993
|
-
it('detects hallucination with unrelated but specific badDecision text', () => {
|
|
1994
|
-
const snapshot = makeSnapshotWithEvidence({
|
|
1995
|
-
painEvents: [{ source: 'gate', score: 60, reason: 'rate limit exceeded for API calls' }],
|
|
1996
|
-
});
|
|
1997
|
-
const artifact = makeArtifact('Deleted the primary database without creating a backup first');
|
|
1998
|
-
|
|
1999
|
-
const result = validateExtraction(artifact, snapshot as any);
|
|
2000
|
-
|
|
2001
|
-
expect(result.isGrounded).toBe(false);
|
|
2002
|
-
});
|
|
2003
|
-
|
|
2004
|
-
it('runTrinity stub path fails when hallucination is detected', () => {
|
|
2005
|
-
// Create a snapshot with failure signals so stub candidates are generated
|
|
2006
|
-
// but override the tool calls to be something completely unrelated to what
|
|
2007
|
-
// the stub Dreamer generates (which mentions "failing operation")
|
|
2008
|
-
const snapshot = {
|
|
2009
|
-
sessionId: 'session-hallucination-test',
|
|
2010
|
-
startedAt: '2026-04-17T00:00:00.000Z',
|
|
2011
|
-
updatedAt: '2026-04-17T00:05:00.000Z',
|
|
2012
|
-
assistantTurns: [],
|
|
2013
|
-
userTurns: [],
|
|
2014
|
-
toolCalls: [
|
|
2015
|
-
{
|
|
2016
|
-
toolName: 'Grep',
|
|
2017
|
-
outcome: 'failure' as const,
|
|
2018
|
-
filePath: null,
|
|
2019
|
-
durationMs: null,
|
|
2020
|
-
exitCode: 1,
|
|
2021
|
-
errorType: 'timeout',
|
|
2022
|
-
errorMessage: 'search timed out after 30 seconds',
|
|
2023
|
-
createdAt: '2026-04-17T00:00:00.000Z',
|
|
2024
|
-
},
|
|
2025
|
-
],
|
|
2026
|
-
painEvents: [],
|
|
2027
|
-
gateBlocks: [],
|
|
2028
|
-
stats: {
|
|
2029
|
-
failureCount: 1,
|
|
2030
|
-
totalPainEvents: 0,
|
|
2031
|
-
totalGateBlocks: 0,
|
|
2032
|
-
totalAssistantTurns: 2,
|
|
2033
|
-
totalToolCalls: 1,
|
|
2034
|
-
},
|
|
2035
|
-
};
|
|
2036
|
-
|
|
2037
|
-
const config: TrinityConfig = {
|
|
2038
|
-
useTrinity: true,
|
|
2039
|
-
maxCandidates: 3,
|
|
2040
|
-
useStubs: true,
|
|
2041
|
-
};
|
|
2042
|
-
|
|
2043
|
-
const result = runTrinity({ snapshot: snapshot as any, principleId: 'T-08', config });
|
|
2044
|
-
|
|
2045
|
-
// The stub Dreamer generates candidates mentioning "failing operation" and "config.json"
|
|
2046
|
-
// The snapshot has a Grep failure with "search timed out"
|
|
2047
|
-
// With the normalized token matching: badDecisionTokens = {retry,faili,oper,diagnos,root,caus}
|
|
2048
|
-
// and evidenceTokens = {search,timed,after,seconds,timedout} — no overlap → extraction fails
|
|
2049
|
-
// So result.success must be false with a Hallucinated failure.
|
|
2050
|
-
expect(result.success).toBe(false);
|
|
2051
|
-
expect(result.failures.some(f => f.reason?.includes('Hallucinated'))).toBe(true);
|
|
2052
|
-
});
|
|
2053
|
-
});
|