principles-disciple 1.72.0 → 1.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +10 -5
- package/package.json +17 -19
- package/scripts/acceptance-test.mjs +16 -73
- package/scripts/sync-plugin.mjs +382 -77
- package/src/commands/archive-impl.ts +2 -1
- package/src/commands/capabilities.ts +2 -2
- package/src/commands/context.ts +2 -2
- package/src/commands/disable-impl.ts +2 -1
- package/src/commands/evolution-status.ts +16 -16
- package/src/commands/export.ts +12 -67
- package/src/commands/pain.ts +91 -1
- package/src/commands/principle-rollback.ts +2 -1
- package/src/commands/promote-impl.ts +7 -43
- package/src/commands/rollback-impl.ts +2 -1
- package/src/commands/rollback.ts +2 -1
- package/src/commands/samples.ts +2 -1
- package/src/commands/thinking-os.ts +2 -1
- package/src/config/errors.ts +18 -2
- package/src/constants/diagnostician.ts +2 -2
- package/src/constants/tools.ts +2 -1
- package/src/core/__tests__/focus-history.test.ts +210 -0
- package/src/core/config.ts +1 -1
- package/src/core/confirm-first-gate.ts +255 -0
- package/src/core/correction-cue-learner.ts +2 -136
- package/src/core/correction-types.ts +16 -88
- package/src/core/dictionary.ts +19 -20
- package/src/core/empathy-keyword-matcher.ts +17 -289
- package/src/core/empathy-types.ts +18 -229
- package/src/core/event-log.ts +38 -132
- package/src/core/evolution-reducer.ts +21 -2
- package/src/core/evolution-types.ts +76 -464
- package/src/core/file-store.ts +80 -0
- package/src/core/focus-history.ts +228 -955
- package/src/core/local-worker-routing.ts +34 -314
- package/src/core/merge-gate-audit.ts +0 -195
- package/src/core/pain-diagnostic-gate.ts +154 -0
- package/src/core/pain-signal.ts +21 -138
- package/src/core/pain.ts +15 -88
- package/src/core/pd-task-reconciler.ts +26 -115
- package/src/core/pd-task-service.ts +9 -9
- package/src/core/pd-task-types.ts +23 -127
- package/src/core/principle-compiler/__tests__/compiler-replay-gate.test.ts +174 -0
- package/src/core/principle-compiler/code-validator.ts +15 -42
- package/src/core/principle-compiler/compiler.ts +100 -15
- package/src/core/principle-compiler/index.ts +5 -2
- package/src/core/principle-compiler/template-generator.ts +4 -104
- package/src/core/principle-injection.ts +10 -202
- package/src/core/principle-internalization/filesystem-lifecycle-datasource.ts +42 -0
- package/src/core/principle-internalization/lifecycle-read-model.ts +39 -242
- package/src/core/principle-internalization/principle-lifecycle-service.ts +12 -10
- package/src/core/principle-tree-ledger-adapter.ts +145 -0
- package/src/core/principle-tree-ledger.ts +8 -6
- package/src/core/reflection/reflection-context.ts +14 -109
- package/src/core/replay-engine.ts +8 -500
- package/src/core/rule-host-helpers.ts +5 -35
- package/src/core/rule-host-types.ts +10 -82
- package/src/core/rule-host.ts +6 -63
- package/src/core/runtime-v2-prompt-activation-reader.ts +231 -0
- package/src/core/session-tracker.ts +87 -101
- package/src/core/shadow-observation-registry.ts +19 -48
- package/src/core/trajectory.ts +3 -1
- package/src/core/workflow-funnel-loader.ts +62 -68
- package/src/core/workspace-context.ts +46 -0
- package/src/core/workspace-dir-service.ts +1 -1
- package/src/core/workspace-dir-validation.ts +18 -9
- package/src/hooks/AGENTS.md +1 -1
- package/src/hooks/gate-block-helper.ts +46 -44
- package/src/hooks/gate.ts +207 -7
- package/src/hooks/lifecycle.ts +30 -32
- package/src/hooks/llm.ts +60 -32
- package/src/hooks/pain.ts +297 -103
- package/src/hooks/prompt.ts +459 -439
- package/src/hooks/subagent.ts +2 -29
- package/src/i18n/commands.ts +2 -10
- package/src/index.ts +95 -85
- package/src/openclaw-sdk.ts +311 -0
- package/src/service/central-database.ts +8 -4
- package/src/service/evolution-queue-migration.ts +2 -1
- package/src/service/evolution-worker.ts +163 -1786
- package/src/service/internalization-trigger-adapter.ts +302 -0
- package/src/service/keyword-optimization-service.ts +4 -4
- package/src/service/monitoring-query-service.ts +1 -215
- package/src/service/queue-io.ts +60 -331
- package/src/service/runtime-summary-service.ts +59 -16
- package/src/service/subagent-workflow/index.ts +0 -41
- package/src/service/subagent-workflow/types.ts +9 -120
- package/src/service/subagent-workflow/workflow-store.ts +2 -119
- package/src/service/workflow-watchdog.ts +0 -43
- package/src/types/event-payload.ts +16 -74
- package/src/types/event-types.ts +39 -547
- package/src/types/hygiene-types.ts +7 -30
- package/src/types/principle-tree-schema.ts +20 -222
- package/src/types/queue.ts +15 -70
- package/src/types/runtime-summary.ts +5 -49
- package/src/utils/io.ts +10 -0
- package/src/utils/retry.ts +1 -1
- package/src/utils/shadow-fingerprint.ts +2 -2
- package/src/utils/workspace-resolver.ts +50 -0
- package/templates/langs/en/core/AGENTS.md +2 -2
- package/templates/langs/en/core/BOOT.md +1 -1
- package/templates/langs/en/core/HEARTBEAT.md +2 -2
- package/templates/langs/en/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
- package/templates/langs/en/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/en/skills/evolve-task/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/en/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-mentor/SKILL.md +1 -1
- package/templates/langs/en/skills/pd-pain-signal/SKILL.md +17 -39
- package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +61 -0
- package/templates/langs/zh/core/AGENTS.md +2 -2
- package/templates/langs/zh/core/BOOT.md +1 -1
- package/templates/langs/zh/core/HEARTBEAT.md +2 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/agent-registry.json +1 -72
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/bugfix-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/feature-complex-template.json +6 -6
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +8 -8
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal-verify.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/workflow-validation-minimal.json +2 -12
- package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +51 -15
- package/templates/langs/zh/skills/ai-sprint-orchestration/test/run.test.mjs +21 -5
- package/templates/langs/zh/skills/evolve-task/SKILL.md +2 -2
- package/templates/langs/zh/skills/pd-cli-operator/SKILL.md +67 -0
- package/templates/langs/zh/skills/pd-diagnostician/SKILL.md +1 -1
- package/templates/langs/zh/skills/pd-mentor/SKILL.md +1 -1
- package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +17 -38
- package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +61 -0
- package/tests/build-artifacts.test.ts +1 -3
- package/tests/commands/evolution-status.test.ts +0 -118
- package/tests/core/bootstrap-rules.test.ts +1 -1
- package/tests/core/config.test.ts +1 -1
- package/tests/core/event-log.test.ts +35 -0
- package/tests/core/evolution-engine.test.ts +610 -0
- package/tests/core/file-store.test.ts +102 -0
- package/tests/core/focus-history.test.ts +203 -11
- package/tests/core/merge-gate-audit.test.ts +2 -169
- package/tests/core/model-deployment-registry.test.ts +7 -1
- package/tests/core/model-training-registry.test.ts +19 -0
- package/tests/core/observability.test.ts +0 -1
- package/tests/core/pain-diagnostic-gate.test.ts +498 -0
- package/tests/core/pain.test.ts +0 -1
- package/tests/core/principle-internalization/deprecated-readiness.test.ts +2 -2
- package/tests/core/principle-internalization/lifecycle-metrics.test.ts +2 -2
- package/tests/core/principle-internalization/{internalization-routing-policy.test.ts → lifecycle-routing-policy.test.ts} +6 -6
- package/tests/core/principle-internalization/lineage-source-retired.test.ts +56 -0
- package/tests/core/principle-internalization/principle-lifecycle-service.test.ts +1 -23
- package/tests/core/principle-tree-ledger-adapter.test.ts +253 -0
- package/tests/core/reflection-context.test.ts +0 -14
- package/tests/core/replay-engine.test.ts +127 -215
- package/tests/core/rule-host-helpers.test.ts +2 -2
- package/tests/core/rule-implementation-runtime.test.ts +0 -27
- package/tests/core/workflow-funnel-loader.test.ts +162 -0
- package/tests/core/workspace-dir-validation.test.ts +8 -1
- package/tests/core-anti-growth.test.ts +192 -0
- package/tests/hook-workspace-nextaction-contract.test.ts +42 -0
- package/tests/hooks/confirm-first-gate.test.ts +333 -0
- package/tests/hooks/gate-auto-correct-shadow.test.ts +310 -0
- package/tests/hooks/gate-auto-correct.test.ts +665 -0
- package/tests/hooks/gate-rule-host-pipeline.test.ts +2 -1
- package/tests/hooks/pain.test.ts +269 -12
- package/tests/hooks/prompt-characterization.test.ts +500 -0
- package/tests/hooks/prompt-size-guard.test.ts +32 -17
- package/tests/hooks/runtime-v2-prompt-activation.test.ts +869 -0
- package/tests/index.test.ts +94 -1
- package/tests/integration/auto-entry-gate.test.ts +248 -0
- package/tests/integration/internalization-trigger-guard.test.ts +69 -0
- package/tests/integration/m8-legacy-paths.test.ts +63 -0
- package/tests/integration/runtime-v2-pain-guard.test.ts +125 -0
- package/tests/plugin-config-resolution-cutover.test.ts +359 -0
- package/tests/runtime-v2-discovery-guard.test.ts +154 -0
- package/tests/service/central-database.test.ts +457 -0
- package/tests/service/evolution-worker.correction-observer.test.ts +173 -0
- package/tests/service/evolution-worker.timeout.test.ts +11 -129
- package/tests/service/internalization-trigger-adapter.test.ts +251 -0
- package/tests/service/monitoring-query-service.test.ts +1 -47
- package/tests/service/queue-io.test.ts +1 -62
- package/tests/service/runtime-summary-service.test.ts +3 -1
- package/tests/service/workflow-watchdog.test.ts +0 -91
- package/tests/utils/file-lock.test.ts +5 -3
- package/tests/utils/session-key.test.ts +52 -0
- package/tests/utils/subagent-probe.test.ts +48 -1
- package/vitest.config.ts +4 -11
- package/.planning/codebase/ARCHITECTURE.md +0 -157
- package/.planning/codebase/CONCERNS.md +0 -145
- package/.planning/codebase/CONVENTIONS.md +0 -148
- package/.planning/codebase/INTEGRATIONS.md +0 -81
- package/.planning/codebase/STACK.md +0 -87
- package/.planning/codebase/STRUCTURE.md +0 -193
- package/.planning/codebase/TESTING.md +0 -243
- package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +0 -113
- package/docs/COMMAND_REFERENCE.md +0 -76
- package/docs/COMMAND_REFERENCE_EN.md +0 -79
- package/scripts/build-web.mjs +0 -46
- package/scripts/diagnose-nocturnal.mjs +0 -537
- package/scripts/seed-nocturnal-scenarios.mjs +0 -384
- package/src/commands/nocturnal-review.ts +0 -322
- package/src/commands/nocturnal-rollout.ts +0 -790
- package/src/commands/nocturnal-train.ts +0 -986
- package/src/commands/pd-reflect.ts +0 -88
- package/src/core/adaptive-thresholds.ts +0 -478
- package/src/core/diagnostician-task-store.ts +0 -192
- package/src/core/nocturnal-arbiter.ts +0 -715
- package/src/core/nocturnal-artifact-lineage.ts +0 -116
- package/src/core/nocturnal-artificer.ts +0 -257
- package/src/core/nocturnal-candidate-scoring.ts +0 -530
- package/src/core/nocturnal-compliance.ts +0 -1146
- package/src/core/nocturnal-dataset.ts +0 -763
- package/src/core/nocturnal-executability.ts +0 -428
- package/src/core/nocturnal-export.ts +0 -499
- package/src/core/nocturnal-paths.ts +0 -240
- package/src/core/nocturnal-reasoning-deriver.ts +0 -343
- package/src/core/nocturnal-rule-implementation-validator.ts +0 -246
- package/src/core/nocturnal-snapshot-contract.ts +0 -99
- package/src/core/nocturnal-trajectory-extractor.ts +0 -512
- package/src/core/nocturnal-trinity-types.ts +0 -218
- package/src/core/nocturnal-trinity.ts +0 -2680
- package/src/core/principle-internalization/deprecated-readiness.ts +0 -93
- package/src/core/principle-internalization/internalization-routing-policy.ts +0 -208
- package/src/core/principle-internalization/lifecycle-metrics.ts +0 -152
- package/src/http/principles-console-route.ts +0 -709
- package/src/service/central-health-service.ts +0 -49
- package/src/service/central-overview-service.ts +0 -138
- package/src/service/control-ui-query-service.ts +0 -900
- package/src/service/cooldown-strategy.ts +0 -97
- package/src/service/evolution-pain-context.ts +0 -79
- package/src/service/evolution-query-service.ts +0 -407
- package/src/service/health-query-service.ts +0 -1038
- package/src/service/nocturnal-config.ts +0 -214
- package/src/service/nocturnal-runtime.ts +0 -734
- package/src/service/nocturnal-service.ts +0 -1605
- package/src/service/nocturnal-target-selector.ts +0 -545
- package/src/service/sleep-cycle.ts +0 -157
- package/src/service/startup-reconciler.ts +0 -112
- package/src/service/subagent-workflow/correction-observer-types.ts +0 -82
- package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +0 -250
- package/src/service/subagent-workflow/deep-reflect-workflow-manager.ts +0 -1
- package/src/service/subagent-workflow/dynamic-timeout.ts +0 -30
- package/src/service/subagent-workflow/empathy-observer-workflow-manager.ts +0 -268
- package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -795
- package/src/service/subagent-workflow/runtime-direct-driver.ts +0 -268
- package/src/service/subagent-workflow/workflow-manager-base.ts +0 -580
- package/src/tools/write-pain-flag.ts +0 -215
- package/tests/commands/nocturnal-review.test.ts +0 -448
- package/tests/commands/nocturnal-train.test.ts +0 -97
- package/tests/commands/pd-reflect.test.ts +0 -49
- package/tests/core/adaptive-thresholds.test.ts +0 -261
- package/tests/core/nocturnal-arbiter.test.ts +0 -559
- package/tests/core/nocturnal-artifact-lineage.test.ts +0 -53
- package/tests/core/nocturnal-artificer.test.ts +0 -241
- package/tests/core/nocturnal-candidate-scoring.test.ts +0 -532
- package/tests/core/nocturnal-compliance-p-principles.test.ts +0 -133
- package/tests/core/nocturnal-compliance.test.ts +0 -646
- package/tests/core/nocturnal-dataset.test.ts +0 -892
- package/tests/core/nocturnal-e2e.test.ts +0 -234
- package/tests/core/nocturnal-executability.test.ts +0 -357
- package/tests/core/nocturnal-export.test.ts +0 -517
- package/tests/core/nocturnal-reasoning-deriver.test.ts +0 -372
- package/tests/core/nocturnal-reviewed-subset-comparison.test.ts +0 -428
- package/tests/core/nocturnal-rule-implementation-validator.test.ts +0 -127
- package/tests/core/nocturnal-snapshot-contract.test.ts +0 -121
- package/tests/core/nocturnal-trajectory-extractor.test.ts +0 -634
- package/tests/core/nocturnal-trinity.test.ts +0 -2053
- package/tests/core/pain-auto-repair.test.ts +0 -96
- package/tests/core/pain-integration.test.ts +0 -510
- package/tests/fixtures/nocturnal-reviewed-subset.json +0 -183
- package/tests/http/principles-console-route.test.ts +0 -162
- package/tests/integration/chaos-resilience.test.ts +0 -348
- package/tests/integration/empathy-workflow-integration.test.ts +0 -626
- package/tests/integration/pain-diagnostician-loop.e2e.test.ts +0 -380
- package/tests/service/control-ui-query-service.test.ts +0 -121
- package/tests/service/cooldown-strategy.test.ts +0 -164
- package/tests/service/data-endpoints-regression.test.ts +0 -834
- package/tests/service/empathy-observer-workflow-manager.test.ts +0 -175
- package/tests/service/evolution-worker.nocturnal.test.ts +0 -601
- package/tests/service/nocturnal-runtime-hardening.test.ts +0 -118
- package/tests/service/nocturnal-runtime.test.ts +0 -473
- package/tests/service/nocturnal-service-code-candidate.test.ts +0 -330
- package/tests/service/nocturnal-target-selector.test.ts +0 -615
- package/tests/service/startup-reconciler.test.ts +0 -148
- package/tests/tools/write-pain-flag.test.ts +0 -358
- package/ui/src/App.tsx +0 -45
- package/ui/src/api.ts +0 -220
- package/ui/src/charts.tsx +0 -955
- package/ui/src/components/ErrorState.tsx +0 -6
- package/ui/src/components/Loading.tsx +0 -13
- package/ui/src/components/ProtectedRoute.tsx +0 -12
- package/ui/src/components/Shell.tsx +0 -91
- package/ui/src/components/WorkspaceConfig.tsx +0 -178
- package/ui/src/components/index.ts +0 -5
- package/ui/src/context/auth.tsx +0 -80
- package/ui/src/context/theme.tsx +0 -66
- package/ui/src/hooks/useAutoRefresh.ts +0 -39
- package/ui/src/i18n/ui.ts +0 -473
- package/ui/src/main.tsx +0 -16
- package/ui/src/pages/EvolutionPage.tsx +0 -333
- package/ui/src/pages/FeedbackPage.tsx +0 -138
- package/ui/src/pages/GateMonitorPage.tsx +0 -136
- package/ui/src/pages/LoginPage.tsx +0 -89
- package/ui/src/pages/OverviewPage.tsx +0 -599
- package/ui/src/pages/SamplesPage.tsx +0 -174
- package/ui/src/pages/ThinkingModelsPage.tsx +0 -702
- package/ui/src/styles.css +0 -2020
- package/ui/src/types.ts +0 -384
- package/ui/src/utils/format.ts +0 -15
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "nocturnal-reviewed-subset-v1",
|
|
3
|
-
"description": "Reviewed subset fixture for comparing single-reflector vs Trinity quality",
|
|
4
|
-
"version": "1.0.0",
|
|
5
|
-
"createdAt": "2026-03-28T00:00:00.000Z",
|
|
6
|
-
"testCases": [
|
|
7
|
-
{
|
|
8
|
-
"caseId": "case-001",
|
|
9
|
-
"principleId": "T-01",
|
|
10
|
-
"principleDescription": "Map Before Territory: Always survey the existing structure before making changes",
|
|
11
|
-
"sessionId": "session-case-001",
|
|
12
|
-
"signalType": "failure",
|
|
13
|
-
"signalContext": "Agent edited src/main.ts without reading it first, causing a merge conflict",
|
|
14
|
-
"singleReflectorOutput": {
|
|
15
|
-
"artifactId": "single-001-artifact",
|
|
16
|
-
"sessionId": "session-case-001",
|
|
17
|
-
"principleId": "T-01",
|
|
18
|
-
"sourceSnapshotRef": "snapshot-case-001",
|
|
19
|
-
"badDecision": "Edited src/main.ts without first reading its contents, leading to a merge conflict",
|
|
20
|
-
"betterDecision": "Read src/main.ts to understand its current structure before making edits",
|
|
21
|
-
"rationale": "Surveying the existing territory prevents conflicts and ensures edits integrate properly",
|
|
22
|
-
"createdAt": "2026-03-28T00:01:00.000Z"
|
|
23
|
-
},
|
|
24
|
-
"trinityOutput": {
|
|
25
|
-
"selectedCandidateIndex": 0,
|
|
26
|
-
"badDecision": "Edited src/main.ts without first reading its contents, leading to a merge conflict with parallel changes",
|
|
27
|
-
"betterDecision": "Before editing, read src/main.ts to understand its current structure and identify any conflicting sections",
|
|
28
|
-
"rationale": "Surveying the existing territory before making changes prevents conflicts and ensures the edit integrates properly with the current implementation",
|
|
29
|
-
"sessionId": "session-case-001",
|
|
30
|
-
"principleId": "T-01",
|
|
31
|
-
"sourceSnapshotRef": "snapshot-case-001",
|
|
32
|
-
"telemetry": {
|
|
33
|
-
"chainMode": "trinity",
|
|
34
|
-
"dreamerPassed": true,
|
|
35
|
-
"philosopherPassed": true,
|
|
36
|
-
"scribePassed": true,
|
|
37
|
-
"candidateCount": 3,
|
|
38
|
-
"selectedCandidateIndex": 0,
|
|
39
|
-
"stageFailures": []
|
|
40
|
-
}
|
|
41
|
-
},
|
|
42
|
-
"qualityScores": {
|
|
43
|
-
"singleReflector": {
|
|
44
|
-
"specificity": 0.85,
|
|
45
|
-
"principleAlignment": 0.90,
|
|
46
|
-
"actionability": 0.88,
|
|
47
|
-
"rationaleQuality": 0.85,
|
|
48
|
-
"overall": 0.87
|
|
49
|
-
},
|
|
50
|
-
"trinity": {
|
|
51
|
-
"specificity": 0.95,
|
|
52
|
-
"principleAlignment": 0.95,
|
|
53
|
-
"actionability": 0.92,
|
|
54
|
-
"rationaleQuality": 0.95,
|
|
55
|
-
"overall": 0.94
|
|
56
|
-
}
|
|
57
|
-
},
|
|
58
|
-
"trinityWins": true,
|
|
59
|
-
"notes": "Trinity produces more specific badDecision and betterDecision with better rationale"
|
|
60
|
-
},
|
|
61
|
-
{
|
|
62
|
-
"caseId": "case-002",
|
|
63
|
-
"principleId": "T-08",
|
|
64
|
-
"principleDescription": "Pain as Signal: Treat failures and errors as signals to pause and reflect",
|
|
65
|
-
"sessionId": "session-case-002",
|
|
66
|
-
"signalType": "pain",
|
|
67
|
-
"signalContext": "Agent retries failing bash command 3 times without any diagnosis",
|
|
68
|
-
"singleReflectorOutput": {
|
|
69
|
-
"artifactId": "single-002-artifact",
|
|
70
|
-
"sessionId": "session-case-002",
|
|
71
|
-
"principleId": "T-08",
|
|
72
|
-
"sourceSnapshotRef": "snapshot-case-002",
|
|
73
|
-
"badDecision": "Retried failing operation without diagnosing",
|
|
74
|
-
"betterDecision": "Diagnose before retrying",
|
|
75
|
-
"rationale": "Treating failures as signals prevents repeated failures",
|
|
76
|
-
"createdAt": "2026-03-28T00:02:00.000Z"
|
|
77
|
-
},
|
|
78
|
-
"trinityOutput": {
|
|
79
|
-
"selectedCandidateIndex": 0,
|
|
80
|
-
"badDecision": "After bash command failed with 'command not found', immediately retried the exact same command without pausing to diagnose the root cause",
|
|
81
|
-
"betterDecision": "When the bash command fails, pause to check if the tool is installed, verify the path, or consult documentation before retrying",
|
|
82
|
-
"rationale": "Treating each failure as a signal to diagnose rather than blindly retry prevents repeated failures and respects the cost of each action attempt",
|
|
83
|
-
"sessionId": "session-case-002",
|
|
84
|
-
"principleId": "T-08",
|
|
85
|
-
"sourceSnapshotRef": "snapshot-case-002",
|
|
86
|
-
"telemetry": {
|
|
87
|
-
"chainMode": "trinity",
|
|
88
|
-
"dreamerPassed": true,
|
|
89
|
-
"philosopherPassed": true,
|
|
90
|
-
"scribePassed": true,
|
|
91
|
-
"candidateCount": 2,
|
|
92
|
-
"selectedCandidateIndex": 0,
|
|
93
|
-
"stageFailures": []
|
|
94
|
-
}
|
|
95
|
-
},
|
|
96
|
-
"qualityScores": {
|
|
97
|
-
"singleReflector": {
|
|
98
|
-
"specificity": 0.70,
|
|
99
|
-
"principleAlignment": 0.85,
|
|
100
|
-
"actionability": 0.75,
|
|
101
|
-
"rationaleQuality": 0.80,
|
|
102
|
-
"overall": 0.77
|
|
103
|
-
},
|
|
104
|
-
"trinity": {
|
|
105
|
-
"specificity": 0.95,
|
|
106
|
-
"principleAlignment": 0.95,
|
|
107
|
-
"actionability": 0.90,
|
|
108
|
-
"rationaleQuality": 0.95,
|
|
109
|
-
"overall": 0.94
|
|
110
|
-
}
|
|
111
|
-
},
|
|
112
|
-
"trinityWins": true,
|
|
113
|
-
"notes": "Single-reflector is generic; Trinity provides specific diagnosis steps"
|
|
114
|
-
},
|
|
115
|
-
{
|
|
116
|
-
"caseId": "case-003",
|
|
117
|
-
"principleId": "T-03",
|
|
118
|
-
"principleDescription": "Evidence Over Intuition: Ground decisions in observable evidence",
|
|
119
|
-
"sessionId": "session-case-003",
|
|
120
|
-
"signalType": "gate_block",
|
|
121
|
-
"signalContext": "Agent proceeded with a tool call despite receiving a gate block",
|
|
122
|
-
"singleReflectorOutput": {
|
|
123
|
-
"artifactId": "single-003-artifact",
|
|
124
|
-
"sessionId": "session-case-003",
|
|
125
|
-
"principleId": "T-03",
|
|
126
|
-
"sourceSnapshotRef": "snapshot-case-003",
|
|
127
|
-
"badDecision": "Proceeded despite gate block",
|
|
128
|
-
"betterDecision": "Respect gate blocks",
|
|
129
|
-
"rationale": "Gate blocks are safety mechanisms",
|
|
130
|
-
"createdAt": "2026-03-28T00:03:00.000Z"
|
|
131
|
-
},
|
|
132
|
-
"trinityOutput": {
|
|
133
|
-
"selectedCandidateIndex": 0,
|
|
134
|
-
"badDecision": "Proceeded with a tool call despite receiving a gate block, bypassing the safety check",
|
|
135
|
-
"betterDecision": "Read the blocked operation documentation and obtain proper authorization before retrying the operation",
|
|
136
|
-
"rationale": "Respecting gate blocks prevents unintended system modifications and ensures alignment with operational constraints",
|
|
137
|
-
"sessionId": "session-case-003",
|
|
138
|
-
"principleId": "T-03",
|
|
139
|
-
"sourceSnapshotRef": "snapshot-case-003",
|
|
140
|
-
"telemetry": {
|
|
141
|
-
"chainMode": "trinity",
|
|
142
|
-
"dreamerPassed": true,
|
|
143
|
-
"philosopherPassed": true,
|
|
144
|
-
"scribePassed": true,
|
|
145
|
-
"candidateCount": 3,
|
|
146
|
-
"selectedCandidateIndex": 0,
|
|
147
|
-
"stageFailures": []
|
|
148
|
-
}
|
|
149
|
-
},
|
|
150
|
-
"qualityScores": {
|
|
151
|
-
"singleReflector": {
|
|
152
|
-
"specificity": 0.60,
|
|
153
|
-
"principleAlignment": 0.75,
|
|
154
|
-
"actionability": 0.65,
|
|
155
|
-
"rationaleQuality": 0.70,
|
|
156
|
-
"overall": 0.68
|
|
157
|
-
},
|
|
158
|
-
"trinity": {
|
|
159
|
-
"specificity": 0.90,
|
|
160
|
-
"principleAlignment": 0.95,
|
|
161
|
-
"actionability": 0.88,
|
|
162
|
-
"rationaleQuality": 0.92,
|
|
163
|
-
"overall": 0.91
|
|
164
|
-
}
|
|
165
|
-
},
|
|
166
|
-
"trinityWins": true,
|
|
167
|
-
"notes": "Single-reflector is too generic; Trinity is much more specific"
|
|
168
|
-
}
|
|
169
|
-
],
|
|
170
|
-
"summary": {
|
|
171
|
-
"totalCases": 3,
|
|
172
|
-
"trinityWins": 3,
|
|
173
|
-
"singleReflectorWins": 0,
|
|
174
|
-
"averageDelta": {
|
|
175
|
-
"specificity": 0.23,
|
|
176
|
-
"principleAlignment": 0.10,
|
|
177
|
-
"actionability": 0.17,
|
|
178
|
-
"rationaleQuality": 0.17,
|
|
179
|
-
"overall": 0.19
|
|
180
|
-
},
|
|
181
|
-
"conclusion": "Trinity consistently outperforms single-reflector on reviewed subset"
|
|
182
|
-
}
|
|
183
|
-
}
|
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
|
|
2
|
-
import { EventEmitter } from 'node:events';
|
|
3
|
-
import type { IncomingMessage, ServerResponse } from 'node:http';
|
|
4
|
-
import { createPrinciplesConsoleRoute } from '../../src/http/principles-console-route.js';
|
|
5
|
-
import { ControlUiQueryService } from '../../src/service/control-ui-query-service.js';
|
|
6
|
-
|
|
7
|
-
vi.mock('../../src/service/control-ui-query-service.js');
|
|
8
|
-
|
|
9
|
-
// Store original env
|
|
10
|
-
const originalHome = process.env.HOME;
|
|
11
|
-
|
|
12
|
-
beforeEach(() => {
|
|
13
|
-
// Set HOME to a non-existent path to prevent reading real config
|
|
14
|
-
process.env.HOME = '/nonexistent-test-home';
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
afterEach(() => {
|
|
18
|
-
process.env.HOME = originalHome;
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
class MockResponse extends EventEmitter {
|
|
22
|
-
statusCode = 200;
|
|
23
|
-
headers = new Map<string, string>();
|
|
24
|
-
body = '';
|
|
25
|
-
|
|
26
|
-
setHeader(name: string, value: string) {
|
|
27
|
-
this.headers.set(name.toLowerCase(), value);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
end(chunk?: Buffer | string) {
|
|
31
|
-
if (chunk) {
|
|
32
|
-
this.body += typeof chunk === 'string' ? chunk : chunk.toString('utf8');
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
function createRequest(method: string, url: string, body?: string, headers?: Record<string, string>): IncomingMessage {
|
|
38
|
-
const req = new EventEmitter() as IncomingMessage & AsyncIterable<Buffer>;
|
|
39
|
-
(req as any).method = method;
|
|
40
|
-
(req as any).url = url;
|
|
41
|
-
(req as any).headers = headers || {};
|
|
42
|
-
req[Symbol.asyncIterator] = async function* () {
|
|
43
|
-
if (body) {
|
|
44
|
-
yield Buffer.from(body);
|
|
45
|
-
}
|
|
46
|
-
};
|
|
47
|
-
return req as IncomingMessage;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
describe('principles-console-route', () => {
|
|
51
|
-
const createApi = () => ({
|
|
52
|
-
rootDir: '/plugin',
|
|
53
|
-
logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() },
|
|
54
|
-
runtime: {
|
|
55
|
-
agent: {
|
|
56
|
-
resolveAgentWorkspaceDir: vi.fn(() => '/workspace'),
|
|
57
|
-
},
|
|
58
|
-
},
|
|
59
|
-
config: {},
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
it('serves overview JSON from the plugin API route', async () => {
|
|
63
|
-
vi.mocked(ControlUiQueryService).mockImplementation(function MockControlUiQueryService() {
|
|
64
|
-
return {
|
|
65
|
-
getOverview: () => ({ workspaceDir: '/workspace', generatedAt: 'now', dataFreshness: null }),
|
|
66
|
-
dispose: vi.fn(),
|
|
67
|
-
} as any;
|
|
68
|
-
} as any);
|
|
69
|
-
|
|
70
|
-
const api = createApi();
|
|
71
|
-
const route = createPrinciplesConsoleRoute(api as any);
|
|
72
|
-
|
|
73
|
-
const response = new MockResponse() as unknown as ServerResponse;
|
|
74
|
-
const handled = await route.handler(
|
|
75
|
-
createRequest('GET', '/plugins/principles/api/overview'),
|
|
76
|
-
response,
|
|
77
|
-
);
|
|
78
|
-
|
|
79
|
-
expect(handled).toBe(true);
|
|
80
|
-
expect((response as any).statusCode).toBe(200);
|
|
81
|
-
expect((response as any).body).toContain('"workspaceDir": "/workspace"');
|
|
82
|
-
expect(api.runtime.agent.resolveAgentWorkspaceDir).toHaveBeenCalled();
|
|
83
|
-
});
|
|
84
|
-
|
|
85
|
-
it('rejects unsupported asset methods with 405', async () => {
|
|
86
|
-
const route = createPrinciplesConsoleRoute(createApi() as any);
|
|
87
|
-
|
|
88
|
-
const response = new MockResponse() as unknown as ServerResponse;
|
|
89
|
-
const handled = await route.handler(
|
|
90
|
-
createRequest('POST', '/plugins/principles/assets/app.js'),
|
|
91
|
-
response,
|
|
92
|
-
);
|
|
93
|
-
|
|
94
|
-
expect(handled).toBe(true);
|
|
95
|
-
expect((response as any).statusCode).toBe(405);
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
it('returns 400 for invalid review JSON bodies', async () => {
|
|
99
|
-
vi.mocked(ControlUiQueryService).mockImplementation(function MockControlUiQueryService() {
|
|
100
|
-
return {
|
|
101
|
-
dispose: vi.fn(),
|
|
102
|
-
} as any;
|
|
103
|
-
} as any);
|
|
104
|
-
|
|
105
|
-
const route = createPrinciplesConsoleRoute(createApi() as any);
|
|
106
|
-
|
|
107
|
-
const response = new MockResponse() as unknown as ServerResponse;
|
|
108
|
-
const handled = await route.handler(
|
|
109
|
-
createRequest('POST', '/plugins/principles/api/samples/sample-1/review', '{invalid'),
|
|
110
|
-
response,
|
|
111
|
-
);
|
|
112
|
-
|
|
113
|
-
expect(handled).toBe(true);
|
|
114
|
-
expect((response as any).statusCode).toBe(400);
|
|
115
|
-
expect((response as any).body).toContain('valid JSON');
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
it('returns 404 for unknown thinking model details', async () => {
|
|
119
|
-
vi.mocked(ControlUiQueryService).mockImplementation(function MockControlUiQueryService() {
|
|
120
|
-
return {
|
|
121
|
-
getThinkingModelDetail: () => null,
|
|
122
|
-
dispose: vi.fn(),
|
|
123
|
-
} as any;
|
|
124
|
-
} as any);
|
|
125
|
-
|
|
126
|
-
const route = createPrinciplesConsoleRoute(createApi() as any);
|
|
127
|
-
|
|
128
|
-
const response = new MockResponse() as unknown as ServerResponse;
|
|
129
|
-
const handled = await route.handler(
|
|
130
|
-
createRequest('GET', '/plugins/principles/api/thinking/models/unknown'),
|
|
131
|
-
response,
|
|
132
|
-
);
|
|
133
|
-
|
|
134
|
-
expect(handled).toBe(true);
|
|
135
|
-
expect((response as any).statusCode).toBe(404);
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
it('fails fast when workspace resolution is unavailable', async () => {
|
|
139
|
-
vi.mocked(ControlUiQueryService).mockImplementation(function MockControlUiQueryService() {
|
|
140
|
-
return {
|
|
141
|
-
getOverview: () => ({ workspaceDir: '/workspace', generatedAt: 'now', dataFreshness: null }),
|
|
142
|
-
dispose: vi.fn(),
|
|
143
|
-
} as any;
|
|
144
|
-
} as any);
|
|
145
|
-
|
|
146
|
-
const api = createApi();
|
|
147
|
-
api.runtime.agent.resolveAgentWorkspaceDir = vi.fn(() => {
|
|
148
|
-
throw new Error('workspace unavailable');
|
|
149
|
-
});
|
|
150
|
-
const route = createPrinciplesConsoleRoute(api as any);
|
|
151
|
-
|
|
152
|
-
const response = new MockResponse() as unknown as ServerResponse;
|
|
153
|
-
const handled = await route.handler(
|
|
154
|
-
createRequest('GET', '/plugins/principles/api/overview'),
|
|
155
|
-
response,
|
|
156
|
-
);
|
|
157
|
-
|
|
158
|
-
expect(handled).toBe(true);
|
|
159
|
-
expect((response as any).statusCode).toBe(500);
|
|
160
|
-
expect((response as any).body).toContain('unable to resolve a valid workspace directory');
|
|
161
|
-
});
|
|
162
|
-
});
|
|
@@ -1,348 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Chaos Engineering Tests for Principles Disciple
|
|
3
|
-
*
|
|
4
|
-
* These tests inject failures and verify RESILIENCE - the system's ability
|
|
5
|
-
* to recover gracefully from unexpected conditions.
|
|
6
|
-
*
|
|
7
|
-
* Based on real production data showing:
|
|
8
|
-
* - 13 failed diagnostician tasks in worker-status.json
|
|
9
|
-
* - Concurrent write scenarios
|
|
10
|
-
* - Corrupted file recovery needs
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
14
|
-
import * as fs from 'fs';
|
|
15
|
-
import * as path from 'path';
|
|
16
|
-
import * as os from 'os';
|
|
17
|
-
import {
|
|
18
|
-
buildPainFlag,
|
|
19
|
-
writePainFlag,
|
|
20
|
-
readPainFlagData,
|
|
21
|
-
validatePainFlag
|
|
22
|
-
} from '../../src/core/pain.js';
|
|
23
|
-
import { TrajectoryDatabase } from '../../src/core/trajectory.js';
|
|
24
|
-
|
|
25
|
-
// Helper to safely remove directories
|
|
26
|
-
function safeRmDir(dir: string): void {
|
|
27
|
-
try {
|
|
28
|
-
fs.rmSync(dir, { recursive: true, force: true });
|
|
29
|
-
} catch {
|
|
30
|
-
// ignore
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
35
|
-
// CHAOS 1: File System Failures
|
|
36
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
37
|
-
|
|
38
|
-
describe('Chaos: File System Failures', () => {
|
|
39
|
-
let workspaceDir: string;
|
|
40
|
-
let stateDir: string;
|
|
41
|
-
|
|
42
|
-
beforeEach(() => {
|
|
43
|
-
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-fs-'));
|
|
44
|
-
stateDir = path.join(workspaceDir, '.state');
|
|
45
|
-
fs.mkdirSync(stateDir, { recursive: true });
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
afterEach(() => {
|
|
49
|
-
safeRmDir(workspaceDir);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
it('RESILIENCE: readPainFlagData MUST NOT crash on corrupted file', () => {
|
|
53
|
-
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
54
|
-
|
|
55
|
-
// 写入损坏的数据
|
|
56
|
-
fs.writeFileSync(painFlagPath, 'invalid content {{{ not kv format');
|
|
57
|
-
|
|
58
|
-
// 必须不崩溃
|
|
59
|
-
const result = readPainFlagData(workspaceDir);
|
|
60
|
-
|
|
61
|
-
// 验证:返回安全默认值
|
|
62
|
-
expect(result).toBeDefined();
|
|
63
|
-
expect(typeof result).toBe('object');
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
it('RESILIENCE: readPainFlagData MUST handle empty file', () => {
|
|
67
|
-
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
68
|
-
fs.writeFileSync(painFlagPath, '');
|
|
69
|
-
|
|
70
|
-
const result = readPainFlagData(workspaceDir);
|
|
71
|
-
|
|
72
|
-
expect(result).toBeDefined();
|
|
73
|
-
});
|
|
74
|
-
|
|
75
|
-
it('RESILIENCE: readPainFlagData MUST handle missing file gracefully', () => {
|
|
76
|
-
// 不创建文件
|
|
77
|
-
const result = readPainFlagData(workspaceDir);
|
|
78
|
-
|
|
79
|
-
expect(result).toBeDefined();
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
it('RESILIENCE: validatePainFlag MUST handle invalid object inputs', () => {
|
|
83
|
-
// 各种无效对象输入
|
|
84
|
-
const invalidInputs: Record<string, string>[] = [
|
|
85
|
-
{},
|
|
86
|
-
{ source: '' },
|
|
87
|
-
{ source: 'test', score: 'invalid' },
|
|
88
|
-
{ source: 'test', score: '50' },
|
|
89
|
-
{ source: 'test', score: '50', time: '' },
|
|
90
|
-
];
|
|
91
|
-
|
|
92
|
-
for (const input of invalidInputs) {
|
|
93
|
-
const result = validatePainFlag(input);
|
|
94
|
-
expect(Array.isArray(result)).toBe(true); // 返回缺失字段列表
|
|
95
|
-
}
|
|
96
|
-
});
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
100
|
-
// CHAOS 2: Concurrent Operations
|
|
101
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
102
|
-
|
|
103
|
-
describe('Chaos: Concurrent Operations', () => {
|
|
104
|
-
let workspaceDir: string;
|
|
105
|
-
let stateDir: string;
|
|
106
|
-
|
|
107
|
-
beforeEach(() => {
|
|
108
|
-
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-concurrent-'));
|
|
109
|
-
stateDir = path.join(workspaceDir, '.state');
|
|
110
|
-
fs.mkdirSync(stateDir, { recursive: true });
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
afterEach(() => {
|
|
114
|
-
safeRmDir(workspaceDir);
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
it('RESILIENCE: Sequential writes MUST preserve last value', () => {
|
|
118
|
-
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
119
|
-
|
|
120
|
-
// 连续写入 100 次
|
|
121
|
-
for (let i = 0; i < 100; i++) {
|
|
122
|
-
writePainFlag(workspaceDir, buildPainFlag({
|
|
123
|
-
source: 'sequential_test',
|
|
124
|
-
score: String(i),
|
|
125
|
-
reason: `Iteration ${i}`,
|
|
126
|
-
}));
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
// 验证:最后一次写入生效
|
|
130
|
-
const result = readPainFlagData(workspaceDir);
|
|
131
|
-
expect(result.score).toBe('99');
|
|
132
|
-
expect(result.reason).toBe('Iteration 99');
|
|
133
|
-
});
|
|
134
|
-
|
|
135
|
-
it('RESILIENCE: File MUST NOT contain corrupted data after writes', () => {
|
|
136
|
-
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
137
|
-
|
|
138
|
-
for (let i = 0; i < 50; i++) {
|
|
139
|
-
writePainFlag(workspaceDir, buildPainFlag({
|
|
140
|
-
source: `test_${i}`,
|
|
141
|
-
score: String(i),
|
|
142
|
-
reason: `Test ${i}`,
|
|
143
|
-
session_id: `session-${i}`,
|
|
144
|
-
agent_id: 'test-agent',
|
|
145
|
-
}));
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
const content = fs.readFileSync(painFlagPath, 'utf-8');
|
|
149
|
-
|
|
150
|
-
// 不应该有损坏的内容
|
|
151
|
-
expect(content).not.toContain('undefined');
|
|
152
|
-
expect(content).not.toContain('[object Object]');
|
|
153
|
-
expect(content).not.toContain('NaN');
|
|
154
|
-
expect(content).not.toContain('null');
|
|
155
|
-
expect(content).not.toContain('function');
|
|
156
|
-
});
|
|
157
|
-
});
|
|
158
|
-
|
|
159
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
160
|
-
// CHAOS 3: Database Resilience
|
|
161
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
162
|
-
|
|
163
|
-
describe('Chaos: Database Resilience', () => {
|
|
164
|
-
let workspaceDir: string;
|
|
165
|
-
let trajectory: TrajectoryDatabase;
|
|
166
|
-
|
|
167
|
-
beforeEach(() => {
|
|
168
|
-
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-db-'));
|
|
169
|
-
trajectory = new TrajectoryDatabase({ workspaceDir });
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
afterEach(() => {
|
|
173
|
-
trajectory?.dispose();
|
|
174
|
-
safeRmDir(workspaceDir);
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
it('RESILIENCE: Database MUST handle dispose and reopen correctly', () => {
|
|
178
|
-
// 写入数据
|
|
179
|
-
trajectory.recordSession({
|
|
180
|
-
sessionId: 'test-session',
|
|
181
|
-
startedAt: new Date().toISOString()
|
|
182
|
-
});
|
|
183
|
-
trajectory.recordToolCall({
|
|
184
|
-
sessionId: 'test-session',
|
|
185
|
-
toolName: 'test_tool',
|
|
186
|
-
outcome: 'success',
|
|
187
|
-
});
|
|
188
|
-
|
|
189
|
-
// 关闭
|
|
190
|
-
trajectory.dispose();
|
|
191
|
-
|
|
192
|
-
// 重新打开
|
|
193
|
-
const trajectory2 = new TrajectoryDatabase({ workspaceDir });
|
|
194
|
-
|
|
195
|
-
// 验证数据仍然存在
|
|
196
|
-
const stats = trajectory2.getDataStats();
|
|
197
|
-
expect(stats.toolCalls).toBe(1);
|
|
198
|
-
|
|
199
|
-
trajectory2.dispose();
|
|
200
|
-
});
|
|
201
|
-
|
|
202
|
-
it('RESILIENCE: Database MUST handle invalid session gracefully', () => {
|
|
203
|
-
// 写入不存在的 session 的 tool call
|
|
204
|
-
// 当前实现会自动创建 session
|
|
205
|
-
expect(() => {
|
|
206
|
-
trajectory.recordToolCall({
|
|
207
|
-
sessionId: 'non-existent-session',
|
|
208
|
-
toolName: 'test',
|
|
209
|
-
outcome: 'success',
|
|
210
|
-
});
|
|
211
|
-
}).not.toThrow();
|
|
212
|
-
});
|
|
213
|
-
|
|
214
|
-
it('RESILIENCE: Database MUST handle duplicate session recording', () => {
|
|
215
|
-
// 多次记录同一个 session
|
|
216
|
-
for (let i = 0; i < 5; i++) {
|
|
217
|
-
trajectory.recordSession({
|
|
218
|
-
sessionId: 'same-session',
|
|
219
|
-
startedAt: new Date().toISOString()
|
|
220
|
-
});
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
// 验证只有一个 session
|
|
224
|
-
const stats = trajectory.getDataStats();
|
|
225
|
-
expect(stats.toolCalls).toBe(0); // 没有 tool calls
|
|
226
|
-
});
|
|
227
|
-
});
|
|
228
|
-
|
|
229
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
230
|
-
// CHAOS 4: Malformed Input Recovery
|
|
231
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
232
|
-
|
|
233
|
-
describe('Chaos: Malformed Input Recovery', () => {
|
|
234
|
-
let workspaceDir: string;
|
|
235
|
-
let stateDir: string;
|
|
236
|
-
|
|
237
|
-
beforeEach(() => {
|
|
238
|
-
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-input-'));
|
|
239
|
-
stateDir = path.join(workspaceDir, '.state');
|
|
240
|
-
fs.mkdirSync(stateDir, { recursive: true });
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
afterEach(() => {
|
|
244
|
-
safeRmDir(workspaceDir);
|
|
245
|
-
});
|
|
246
|
-
|
|
247
|
-
it('RESILIENCE: buildPainFlag MUST handle all edge cases', () => {
|
|
248
|
-
const edgeCases = [
|
|
249
|
-
{ source: '', score: '50', reason: '' },
|
|
250
|
-
{ source: 'a'.repeat(10000), score: '50', reason: 'x'.repeat(10000) },
|
|
251
|
-
{ source: 'test', score: '-1', reason: 'negative score' },
|
|
252
|
-
{ source: 'test', score: '101', reason: 'overflow score' },
|
|
253
|
-
{ source: 'test', score: '50.5', reason: 'decimal score' },
|
|
254
|
-
{ source: 'test', score: 'NaN', reason: 'NaN score' },
|
|
255
|
-
{ source: 'test\nwith\nnewlines', score: '50', reason: 'multiline\nreason' },
|
|
256
|
-
{ source: 'test<script>', score: '50', reason: 'xss<script>alert(1)</script>' },
|
|
257
|
-
];
|
|
258
|
-
|
|
259
|
-
for (const input of edgeCases) {
|
|
260
|
-
expect(() => buildPainFlag(input)).not.toThrow();
|
|
261
|
-
}
|
|
262
|
-
});
|
|
263
|
-
|
|
264
|
-
it('RESILIENCE: writePainFlag MUST sanitize special characters', () => {
|
|
265
|
-
writePainFlag(workspaceDir, buildPainFlag({
|
|
266
|
-
source: 'test\nwith\nnewlines',
|
|
267
|
-
score: '50',
|
|
268
|
-
reason: 'reason\twith\ttabs',
|
|
269
|
-
}));
|
|
270
|
-
|
|
271
|
-
const content = fs.readFileSync(path.join(stateDir, '.pain_flag'), 'utf-8');
|
|
272
|
-
|
|
273
|
-
// 文件应该可以正常读取
|
|
274
|
-
expect(content).toBeDefined();
|
|
275
|
-
expect(content.length).toBeGreaterThan(0);
|
|
276
|
-
});
|
|
277
|
-
});
|
|
278
|
-
|
|
279
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
280
|
-
// CHAOS 5: Edge Case Discovery (based on production data)
|
|
281
|
-
// ─────────────────────────────────────────────────────────────────────
|
|
282
|
-
|
|
283
|
-
describe('Chaos: Production Data Patterns', () => {
|
|
284
|
-
let workspaceDir: string;
|
|
285
|
-
let stateDir: string;
|
|
286
|
-
|
|
287
|
-
beforeEach(() => {
|
|
288
|
-
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-prod-'));
|
|
289
|
-
stateDir = path.join(workspaceDir, '.state');
|
|
290
|
-
fs.mkdirSync(stateDir, { recursive: true });
|
|
291
|
-
});
|
|
292
|
-
|
|
293
|
-
afterEach(() => {
|
|
294
|
-
safeRmDir(workspaceDir);
|
|
295
|
-
});
|
|
296
|
-
|
|
297
|
-
it('RESILIENCE: Pain flag without session_id MUST be valid', () => {
|
|
298
|
-
// 生产数据中 session_id 可能为空
|
|
299
|
-
writePainFlag(workspaceDir, buildPainFlag({
|
|
300
|
-
source: 'tool_failure',
|
|
301
|
-
score: '80',
|
|
302
|
-
reason: 'Test without session',
|
|
303
|
-
session_id: '',
|
|
304
|
-
agent_id: '',
|
|
305
|
-
}));
|
|
306
|
-
|
|
307
|
-
const result = readPainFlagData(workspaceDir);
|
|
308
|
-
expect(result.source).toBe('tool_failure');
|
|
309
|
-
expect(result.score).toBe('80');
|
|
310
|
-
});
|
|
311
|
-
|
|
312
|
-
it('RESILIENCE: Multiple pain sources MUST be distinguishable', () => {
|
|
313
|
-
const sources = [
|
|
314
|
-
'tool_failure',
|
|
315
|
-
'user_feedback',
|
|
316
|
-
'human_intervention',
|
|
317
|
-
'manual',
|
|
318
|
-
'gate_block',
|
|
319
|
-
];
|
|
320
|
-
|
|
321
|
-
for (const source of sources) {
|
|
322
|
-
writePainFlag(workspaceDir, buildPainFlag({
|
|
323
|
-
source,
|
|
324
|
-
score: '50',
|
|
325
|
-
reason: `Test ${source}`,
|
|
326
|
-
}));
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// 最后一个写入应该生效
|
|
330
|
-
const result = readPainFlagData(workspaceDir);
|
|
331
|
-
expect(result.source).toBe('gate_block');
|
|
332
|
-
});
|
|
333
|
-
|
|
334
|
-
it('RESILIENCE: Timestamp MUST be valid ISO format', () => {
|
|
335
|
-
writePainFlag(workspaceDir, buildPainFlag({
|
|
336
|
-
source: 'test',
|
|
337
|
-
score: '50',
|
|
338
|
-
reason: 'timestamp test',
|
|
339
|
-
}));
|
|
340
|
-
|
|
341
|
-
const result = readPainFlagData(workspaceDir);
|
|
342
|
-
|
|
343
|
-
// 验证时间戳是有效的 ISO 格式
|
|
344
|
-
const timestamp = new Date(result.time);
|
|
345
|
-
expect(timestamp).toBeInstanceOf(Date);
|
|
346
|
-
expect(isNaN(timestamp.getTime())).toBe(false);
|
|
347
|
-
});
|
|
348
|
-
});
|