principles-disciple 1.35.0 → 1.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/commands/nocturnal-train.ts +1 -0
- package/src/core/correction-cue-learner.ts +23 -8
- package/src/core/event-log.ts +3 -0
- package/src/core/evolution-engine.ts +1 -0
- package/src/core/init.ts +2 -2
- package/src/core/nocturnal-trinity-types.ts +124 -0
- package/src/core/session-tracker.ts +1 -0
- package/src/core/training-program.ts +1 -0
- package/src/hooks/gate-block-helper.ts +1 -1
- package/src/hooks/prompt.ts +3 -3
- package/src/index.ts +2 -1
- package/src/service/central-sync-service.ts +2 -0
- package/src/service/evolution-dedup.ts +74 -0
- package/src/service/evolution-pain-context.ts +79 -0
- package/src/service/evolution-queue-lock.ts +47 -0
- package/src/service/evolution-queue-migration.ts +173 -0
- package/src/service/evolution-worker.ts +43 -34
- package/src/service/keyword-optimization-service.ts +2 -2
- package/src/service/subagent-workflow/correction-observer-types.ts +69 -0
- package/src/service/subagent-workflow/correction-observer-workflow-manager.ts +246 -0
- package/src/service/subagent-workflow/index.ts +13 -0
- package/src/service/subagent-workflow/workflow-manager-base.ts +1 -0
- package/tests/core/correction-cue-learner.test.ts +345 -0
- package/tests/core/pain-score.property.test.ts +205 -0
- package/tests/integration/chaos-resilience.test.ts +348 -0
- package/tests/integration/gate-real-io.e2e.test.ts +251 -0
- package/tests/integration/pain-diagnostician-loop.e2e.test.ts +380 -0
- package/tests/integration/tool-hooks-workspace-dir.e2e.test.ts +8 -2
- package/tests/integration/trajectory-lifecycle.e2e.test.ts +523 -0
- package/vitest.config.ts +23 -4
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Property-based tests for Pain Score computation
|
|
3
|
+
*
|
|
4
|
+
* These tests verify INVARIANTS - mathematical properties that MUST hold
|
|
5
|
+
* for ALL possible inputs, not just a few hand-picked examples.
|
|
6
|
+
*
|
|
7
|
+
* Using fast-check for property-based testing.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, it, expect } from 'vitest';
|
|
11
|
+
import fc from 'fast-check';
|
|
12
|
+
import { computePainScore, painSeverityLabel } from '../../src/core/pain.js';
|
|
13
|
+
|
|
14
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
15
|
+
// PROPERTY 1: Score Range Invariant
|
|
16
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
describe('Property: Pain Score Range Invariant', () => {
|
|
19
|
+
it('INVARIANT: Score MUST be in [0, 100] for ALL inputs', () => {
|
|
20
|
+
fc.assert(
|
|
21
|
+
fc.property(
|
|
22
|
+
fc.integer({ min: -255, max: 255 }), // exitCode (包括边界和无效值)
|
|
23
|
+
fc.boolean(), // isSpiral
|
|
24
|
+
fc.boolean(), // missingTestCommand
|
|
25
|
+
fc.integer({ min: -100, max: 200 }), // softScore (包括越界值)
|
|
26
|
+
(exitCode, isSpiral, missingTest, softScore) => {
|
|
27
|
+
const result = computePainScore(exitCode, isSpiral, missingTest, softScore);
|
|
28
|
+
|
|
29
|
+
// 不变量:分数必须在有效范围内
|
|
30
|
+
return result >= 0 && result <= 100;
|
|
31
|
+
}
|
|
32
|
+
),
|
|
33
|
+
{ numRuns: 1000 } // 运行 1000 次随机测试
|
|
34
|
+
);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('INVARIANT: Score MUST be a valid number (not NaN/Infinity)', () => {
|
|
38
|
+
fc.assert(
|
|
39
|
+
fc.property(
|
|
40
|
+
fc.integer(),
|
|
41
|
+
fc.boolean(),
|
|
42
|
+
fc.boolean(),
|
|
43
|
+
fc.integer(),
|
|
44
|
+
(exitCode, isSpiral, missingTest, softScore) => {
|
|
45
|
+
const result = computePainScore(exitCode, isSpiral, missingTest, softScore);
|
|
46
|
+
return Number.isFinite(result);
|
|
47
|
+
}
|
|
48
|
+
)
|
|
49
|
+
);
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
54
|
+
// PROPERTY 2: Monotonicity Invariant
|
|
55
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
describe('Property: Monotonicity Invariant', () => {
|
|
58
|
+
it('INVARIANT: Spiral MUST increase or maintain score', () => {
|
|
59
|
+
fc.assert(
|
|
60
|
+
fc.property(
|
|
61
|
+
fc.integer({ min: 0, max: 255 }),
|
|
62
|
+
fc.boolean(),
|
|
63
|
+
fc.integer({ min: 0, max: 100 }),
|
|
64
|
+
(exitCode, missingTest, softScore) => {
|
|
65
|
+
const normal = computePainScore(exitCode, false, missingTest, softScore);
|
|
66
|
+
const spiral = computePainScore(exitCode, true, missingTest, softScore);
|
|
67
|
+
|
|
68
|
+
// 不变量:spiral 情况分数必须 >= 正常情况
|
|
69
|
+
return spiral >= normal;
|
|
70
|
+
}
|
|
71
|
+
)
|
|
72
|
+
);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('INVARIANT: Missing test command MUST increase or maintain score', () => {
|
|
76
|
+
fc.assert(
|
|
77
|
+
fc.property(
|
|
78
|
+
fc.integer({ min: 0, max: 255 }),
|
|
79
|
+
fc.boolean(),
|
|
80
|
+
fc.integer({ min: 0, max: 100 }),
|
|
81
|
+
(exitCode, isSpiral, softScore) => {
|
|
82
|
+
const withTest = computePainScore(exitCode, isSpiral, false, softScore);
|
|
83
|
+
const withoutTest = computePainScore(exitCode, isSpiral, true, softScore);
|
|
84
|
+
|
|
85
|
+
// 不变量:缺少测试命令时分数必须 >= 有测试命令时
|
|
86
|
+
return withoutTest >= withTest;
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('INVARIANT: Higher softScore MUST produce higher or equal total score', () => {
|
|
93
|
+
fc.assert(
|
|
94
|
+
fc.property(
|
|
95
|
+
fc.integer({ min: 0, max: 255 }),
|
|
96
|
+
fc.boolean(),
|
|
97
|
+
fc.boolean(),
|
|
98
|
+
fc.integer({ min: 0, max: 50 }),
|
|
99
|
+
fc.integer({ min: 50, max: 100 }), // 始终 >= 第一个 softScore
|
|
100
|
+
(exitCode, isSpiral, missingTest, softLow, softHigh) => {
|
|
101
|
+
const scoreLow = computePainScore(exitCode, isSpiral, missingTest, softLow);
|
|
102
|
+
const scoreHigh = computePainScore(exitCode, isSpiral, missingTest, softHigh);
|
|
103
|
+
|
|
104
|
+
// 不变量:更高的 softScore 必须产生更高或相等的总分
|
|
105
|
+
return scoreHigh >= scoreLow;
|
|
106
|
+
}
|
|
107
|
+
)
|
|
108
|
+
);
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
113
|
+
// PROPERTY 3: Exit Code Effect Invariant
|
|
114
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
115
|
+
|
|
116
|
+
describe('Property: Exit Code Effect Invariant', () => {
|
|
117
|
+
it('INVARIANT: Non-zero exitCode MUST add penalty', () => {
|
|
118
|
+
fc.assert(
|
|
119
|
+
fc.property(
|
|
120
|
+
fc.integer({ min: 1, max: 255 }), // 非零 exitCode
|
|
121
|
+
fc.boolean(),
|
|
122
|
+
fc.boolean(),
|
|
123
|
+
fc.integer({ min: 0, max: 100 }),
|
|
124
|
+
(exitCode, isSpiral, missingTest, softScore) => {
|
|
125
|
+
const result = computePainScore(exitCode, isSpiral, missingTest, softScore);
|
|
126
|
+
|
|
127
|
+
// 不变量:非零 exitCode 必须添加惩罚(>= exit_code_penalty)
|
|
128
|
+
// exit_code_penalty 默认是 70
|
|
129
|
+
return result >= 70;
|
|
130
|
+
}
|
|
131
|
+
)
|
|
132
|
+
);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
it('INVARIANT: Zero exitCode MUST NOT add exit penalty', () => {
|
|
136
|
+
fc.assert(
|
|
137
|
+
fc.property(
|
|
138
|
+
fc.boolean(),
|
|
139
|
+
fc.boolean(),
|
|
140
|
+
fc.integer({ min: 0, max: 100 }),
|
|
141
|
+
(isSpiral, missingTest, softScore) => {
|
|
142
|
+
const result = computePainScore(0, isSpiral, missingTest, softScore);
|
|
143
|
+
|
|
144
|
+
// 不变量:零 exitCode 时不添加 exit_code_penalty
|
|
145
|
+
// 所以分数应该只来自 softScore + spiral_penalty + missing_test_penalty
|
|
146
|
+
const expectedMax = softScore + (isSpiral ? 40 : 0) + (missingTest ? 30 : 0);
|
|
147
|
+
return result <= expectedMax;
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
);
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
155
|
+
// PROPERTY 4: Severity Label Invariant
|
|
156
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
157
|
+
|
|
158
|
+
describe('Property: Severity Label Invariant', () => {
|
|
159
|
+
it('INVARIANT: Severity label MUST match score range', () => {
|
|
160
|
+
fc.assert(
|
|
161
|
+
fc.property(
|
|
162
|
+
fc.integer({ min: 0, max: 100 }),
|
|
163
|
+
fc.boolean(),
|
|
164
|
+
(score, isSpiral) => {
|
|
165
|
+
const label = painSeverityLabel(score, isSpiral);
|
|
166
|
+
|
|
167
|
+
// 不变量:spiral 情况必须是 critical
|
|
168
|
+
if (isSpiral) {
|
|
169
|
+
return label === 'critical';
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// 不变量:severity label 必须与 score 对应
|
|
173
|
+
if (score >= 70) return label === 'high';
|
|
174
|
+
if (score >= 40) return label === 'medium';
|
|
175
|
+
if (score >= 20) return label === 'low';
|
|
176
|
+
return label === 'info';
|
|
177
|
+
}
|
|
178
|
+
)
|
|
179
|
+
);
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
184
|
+
// PROPERTY 5: Idempotence Invariant
|
|
185
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
186
|
+
|
|
187
|
+
describe('Property: Idempotence Invariant', () => {
|
|
188
|
+
it('INVARIANT: Same inputs MUST produce same output (pure function)', () => {
|
|
189
|
+
fc.assert(
|
|
190
|
+
fc.property(
|
|
191
|
+
fc.integer(),
|
|
192
|
+
fc.boolean(),
|
|
193
|
+
fc.boolean(),
|
|
194
|
+
fc.integer(),
|
|
195
|
+
(exitCode, isSpiral, missingTest, softScore) => {
|
|
196
|
+
const result1 = computePainScore(exitCode, isSpiral, missingTest, softScore);
|
|
197
|
+
const result2 = computePainScore(exitCode, isSpiral, missingTest, softScore);
|
|
198
|
+
|
|
199
|
+
// 不变量:纯函数必须幂等
|
|
200
|
+
return result1 === result2;
|
|
201
|
+
}
|
|
202
|
+
)
|
|
203
|
+
);
|
|
204
|
+
});
|
|
205
|
+
});
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chaos Engineering Tests for Principles Disciple
|
|
3
|
+
*
|
|
4
|
+
* These tests inject failures and verify RESILIENCE - the system's ability
|
|
5
|
+
* to recover gracefully from unexpected conditions.
|
|
6
|
+
*
|
|
7
|
+
* Based on real production data showing:
|
|
8
|
+
* - 13 failed diagnostician tasks in worker-status.json
|
|
9
|
+
* - Concurrent write scenarios
|
|
10
|
+
* - Corrupted file recovery needs
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
14
|
+
import * as fs from 'fs';
|
|
15
|
+
import * as path from 'path';
|
|
16
|
+
import * as os from 'os';
|
|
17
|
+
import {
|
|
18
|
+
buildPainFlag,
|
|
19
|
+
writePainFlag,
|
|
20
|
+
readPainFlagData,
|
|
21
|
+
validatePainFlag
|
|
22
|
+
} from '../../src/core/pain.js';
|
|
23
|
+
import { TrajectoryDatabase } from '../../src/core/trajectory.js';
|
|
24
|
+
|
|
25
|
+
// Helper to safely remove directories
|
|
26
|
+
function safeRmDir(dir: string): void {
|
|
27
|
+
try {
|
|
28
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
29
|
+
} catch {
|
|
30
|
+
// ignore
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
35
|
+
// CHAOS 1: File System Failures
|
|
36
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
describe('Chaos: File System Failures', () => {
|
|
39
|
+
let workspaceDir: string;
|
|
40
|
+
let stateDir: string;
|
|
41
|
+
|
|
42
|
+
beforeEach(() => {
|
|
43
|
+
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-fs-'));
|
|
44
|
+
stateDir = path.join(workspaceDir, '.state');
|
|
45
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
afterEach(() => {
|
|
49
|
+
safeRmDir(workspaceDir);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('RESILIENCE: readPainFlagData MUST NOT crash on corrupted file', () => {
|
|
53
|
+
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
54
|
+
|
|
55
|
+
// 写入损坏的数据
|
|
56
|
+
fs.writeFileSync(painFlagPath, 'invalid content {{{ not kv format');
|
|
57
|
+
|
|
58
|
+
// 必须不崩溃
|
|
59
|
+
const result = readPainFlagData(workspaceDir);
|
|
60
|
+
|
|
61
|
+
// 验证:返回安全默认值
|
|
62
|
+
expect(result).toBeDefined();
|
|
63
|
+
expect(typeof result).toBe('object');
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('RESILIENCE: readPainFlagData MUST handle empty file', () => {
|
|
67
|
+
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
68
|
+
fs.writeFileSync(painFlagPath, '');
|
|
69
|
+
|
|
70
|
+
const result = readPainFlagData(workspaceDir);
|
|
71
|
+
|
|
72
|
+
expect(result).toBeDefined();
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('RESILIENCE: readPainFlagData MUST handle missing file gracefully', () => {
|
|
76
|
+
// 不创建文件
|
|
77
|
+
const result = readPainFlagData(workspaceDir);
|
|
78
|
+
|
|
79
|
+
expect(result).toBeDefined();
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('RESILIENCE: validatePainFlag MUST handle invalid object inputs', () => {
|
|
83
|
+
// 各种无效对象输入
|
|
84
|
+
const invalidInputs: Record<string, string>[] = [
|
|
85
|
+
{},
|
|
86
|
+
{ source: '' },
|
|
87
|
+
{ source: 'test', score: 'invalid' },
|
|
88
|
+
{ source: 'test', score: '50' },
|
|
89
|
+
{ source: 'test', score: '50', time: '' },
|
|
90
|
+
];
|
|
91
|
+
|
|
92
|
+
for (const input of invalidInputs) {
|
|
93
|
+
const result = validatePainFlag(input);
|
|
94
|
+
expect(Array.isArray(result)).toBe(true); // 返回缺失字段列表
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
100
|
+
// CHAOS 2: Concurrent Operations
|
|
101
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
describe('Chaos: Concurrent Operations', () => {
|
|
104
|
+
let workspaceDir: string;
|
|
105
|
+
let stateDir: string;
|
|
106
|
+
|
|
107
|
+
beforeEach(() => {
|
|
108
|
+
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-concurrent-'));
|
|
109
|
+
stateDir = path.join(workspaceDir, '.state');
|
|
110
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
afterEach(() => {
|
|
114
|
+
safeRmDir(workspaceDir);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it('RESILIENCE: Sequential writes MUST preserve last value', () => {
|
|
118
|
+
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
119
|
+
|
|
120
|
+
// 连续写入 100 次
|
|
121
|
+
for (let i = 0; i < 100; i++) {
|
|
122
|
+
writePainFlag(workspaceDir, buildPainFlag({
|
|
123
|
+
source: 'sequential_test',
|
|
124
|
+
score: String(i),
|
|
125
|
+
reason: `Iteration ${i}`,
|
|
126
|
+
}));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// 验证:最后一次写入生效
|
|
130
|
+
const result = readPainFlagData(workspaceDir);
|
|
131
|
+
expect(result.score).toBe('99');
|
|
132
|
+
expect(result.reason).toBe('Iteration 99');
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
it('RESILIENCE: File MUST NOT contain corrupted data after writes', () => {
|
|
136
|
+
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
137
|
+
|
|
138
|
+
for (let i = 0; i < 50; i++) {
|
|
139
|
+
writePainFlag(workspaceDir, buildPainFlag({
|
|
140
|
+
source: `test_${i}`,
|
|
141
|
+
score: String(i),
|
|
142
|
+
reason: `Test ${i}`,
|
|
143
|
+
session_id: `session-${i}`,
|
|
144
|
+
agent_id: 'test-agent',
|
|
145
|
+
}));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const content = fs.readFileSync(painFlagPath, 'utf-8');
|
|
149
|
+
|
|
150
|
+
// 不应该有损坏的内容
|
|
151
|
+
expect(content).not.toContain('undefined');
|
|
152
|
+
expect(content).not.toContain('[object Object]');
|
|
153
|
+
expect(content).not.toContain('NaN');
|
|
154
|
+
expect(content).not.toContain('null');
|
|
155
|
+
expect(content).not.toContain('function');
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
160
|
+
// CHAOS 3: Database Resilience
|
|
161
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
162
|
+
|
|
163
|
+
describe('Chaos: Database Resilience', () => {
|
|
164
|
+
let workspaceDir: string;
|
|
165
|
+
let trajectory: TrajectoryDatabase;
|
|
166
|
+
|
|
167
|
+
beforeEach(() => {
|
|
168
|
+
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-db-'));
|
|
169
|
+
trajectory = new TrajectoryDatabase({ workspaceDir });
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
afterEach(() => {
|
|
173
|
+
trajectory?.dispose();
|
|
174
|
+
safeRmDir(workspaceDir);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it('RESILIENCE: Database MUST handle dispose and reopen correctly', () => {
|
|
178
|
+
// 写入数据
|
|
179
|
+
trajectory.recordSession({
|
|
180
|
+
sessionId: 'test-session',
|
|
181
|
+
startedAt: new Date().toISOString()
|
|
182
|
+
});
|
|
183
|
+
trajectory.recordToolCall({
|
|
184
|
+
sessionId: 'test-session',
|
|
185
|
+
toolName: 'test_tool',
|
|
186
|
+
outcome: 'success',
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
// 关闭
|
|
190
|
+
trajectory.dispose();
|
|
191
|
+
|
|
192
|
+
// 重新打开
|
|
193
|
+
const trajectory2 = new TrajectoryDatabase({ workspaceDir });
|
|
194
|
+
|
|
195
|
+
// 验证数据仍然存在
|
|
196
|
+
const stats = trajectory2.getDataStats();
|
|
197
|
+
expect(stats.toolCalls).toBe(1);
|
|
198
|
+
|
|
199
|
+
trajectory2.dispose();
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it('RESILIENCE: Database MUST handle invalid session gracefully', () => {
|
|
203
|
+
// 写入不存在的 session 的 tool call
|
|
204
|
+
// 当前实现会自动创建 session
|
|
205
|
+
expect(() => {
|
|
206
|
+
trajectory.recordToolCall({
|
|
207
|
+
sessionId: 'non-existent-session',
|
|
208
|
+
toolName: 'test',
|
|
209
|
+
outcome: 'success',
|
|
210
|
+
});
|
|
211
|
+
}).not.toThrow();
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
it('RESILIENCE: Database MUST handle duplicate session recording', () => {
|
|
215
|
+
// 多次记录同一个 session
|
|
216
|
+
for (let i = 0; i < 5; i++) {
|
|
217
|
+
trajectory.recordSession({
|
|
218
|
+
sessionId: 'same-session',
|
|
219
|
+
startedAt: new Date().toISOString()
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// 验证只有一个 session
|
|
224
|
+
const stats = trajectory.getDataStats();
|
|
225
|
+
expect(stats.toolCalls).toBe(0); // 没有 tool calls
|
|
226
|
+
});
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
230
|
+
// CHAOS 4: Malformed Input Recovery
|
|
231
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
232
|
+
|
|
233
|
+
describe('Chaos: Malformed Input Recovery', () => {
|
|
234
|
+
let workspaceDir: string;
|
|
235
|
+
let stateDir: string;
|
|
236
|
+
|
|
237
|
+
beforeEach(() => {
|
|
238
|
+
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-input-'));
|
|
239
|
+
stateDir = path.join(workspaceDir, '.state');
|
|
240
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
afterEach(() => {
|
|
244
|
+
safeRmDir(workspaceDir);
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
it('RESILIENCE: buildPainFlag MUST handle all edge cases', () => {
|
|
248
|
+
const edgeCases = [
|
|
249
|
+
{ source: '', score: '50', reason: '' },
|
|
250
|
+
{ source: 'a'.repeat(10000), score: '50', reason: 'x'.repeat(10000) },
|
|
251
|
+
{ source: 'test', score: '-1', reason: 'negative score' },
|
|
252
|
+
{ source: 'test', score: '101', reason: 'overflow score' },
|
|
253
|
+
{ source: 'test', score: '50.5', reason: 'decimal score' },
|
|
254
|
+
{ source: 'test', score: 'NaN', reason: 'NaN score' },
|
|
255
|
+
{ source: 'test\nwith\nnewlines', score: '50', reason: 'multiline\nreason' },
|
|
256
|
+
{ source: 'test<script>', score: '50', reason: 'xss<script>alert(1)</script>' },
|
|
257
|
+
];
|
|
258
|
+
|
|
259
|
+
for (const input of edgeCases) {
|
|
260
|
+
expect(() => buildPainFlag(input)).not.toThrow();
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
it('RESILIENCE: writePainFlag MUST sanitize special characters', () => {
|
|
265
|
+
writePainFlag(workspaceDir, buildPainFlag({
|
|
266
|
+
source: 'test\nwith\nnewlines',
|
|
267
|
+
score: '50',
|
|
268
|
+
reason: 'reason\twith\ttabs',
|
|
269
|
+
}));
|
|
270
|
+
|
|
271
|
+
const content = fs.readFileSync(path.join(stateDir, '.pain_flag'), 'utf-8');
|
|
272
|
+
|
|
273
|
+
// 文件应该可以正常读取
|
|
274
|
+
expect(content).toBeDefined();
|
|
275
|
+
expect(content.length).toBeGreaterThan(0);
|
|
276
|
+
});
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
280
|
+
// CHAOS 5: Edge Case Discovery (based on production data)
|
|
281
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
282
|
+
|
|
283
|
+
describe('Chaos: Production Data Patterns', () => {
|
|
284
|
+
let workspaceDir: string;
|
|
285
|
+
let stateDir: string;
|
|
286
|
+
|
|
287
|
+
beforeEach(() => {
|
|
288
|
+
workspaceDir = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-chaos-prod-'));
|
|
289
|
+
stateDir = path.join(workspaceDir, '.state');
|
|
290
|
+
fs.mkdirSync(stateDir, { recursive: true });
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
afterEach(() => {
|
|
294
|
+
safeRmDir(workspaceDir);
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
it('RESILIENCE: Pain flag without session_id MUST be valid', () => {
|
|
298
|
+
// 生产数据中 session_id 可能为空
|
|
299
|
+
writePainFlag(workspaceDir, buildPainFlag({
|
|
300
|
+
source: 'tool_failure',
|
|
301
|
+
score: '80',
|
|
302
|
+
reason: 'Test without session',
|
|
303
|
+
session_id: '',
|
|
304
|
+
agent_id: '',
|
|
305
|
+
}));
|
|
306
|
+
|
|
307
|
+
const result = readPainFlagData(workspaceDir);
|
|
308
|
+
expect(result.source).toBe('tool_failure');
|
|
309
|
+
expect(result.score).toBe('80');
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
it('RESILIENCE: Multiple pain sources MUST be distinguishable', () => {
|
|
313
|
+
const sources = [
|
|
314
|
+
'tool_failure',
|
|
315
|
+
'user_feedback',
|
|
316
|
+
'human_intervention',
|
|
317
|
+
'manual',
|
|
318
|
+
'gate_block',
|
|
319
|
+
];
|
|
320
|
+
|
|
321
|
+
for (const source of sources) {
|
|
322
|
+
writePainFlag(workspaceDir, buildPainFlag({
|
|
323
|
+
source,
|
|
324
|
+
score: '50',
|
|
325
|
+
reason: `Test ${source}`,
|
|
326
|
+
}));
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// 最后一个写入应该生效
|
|
330
|
+
const result = readPainFlagData(workspaceDir);
|
|
331
|
+
expect(result.source).toBe('gate_block');
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
it('RESILIENCE: Timestamp MUST be valid ISO format', () => {
|
|
335
|
+
writePainFlag(workspaceDir, buildPainFlag({
|
|
336
|
+
source: 'test',
|
|
337
|
+
score: '50',
|
|
338
|
+
reason: 'timestamp test',
|
|
339
|
+
}));
|
|
340
|
+
|
|
341
|
+
const result = readPainFlagData(workspaceDir);
|
|
342
|
+
|
|
343
|
+
// 验证时间戳是有效的 ISO 格式
|
|
344
|
+
const timestamp = new Date(result.time);
|
|
345
|
+
expect(timestamp).toBeInstanceOf(Date);
|
|
346
|
+
expect(isNaN(timestamp.getTime())).toBe(false);
|
|
347
|
+
});
|
|
348
|
+
});
|