@principles/pd-cli 1.111.0 → 1.113.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/dist/commands/__tests__/run-rulehost-flag-wiring.test.d.ts +24 -0
  2. package/dist/commands/__tests__/run-rulehost-flag-wiring.test.d.ts.map +1 -0
  3. package/dist/commands/__tests__/run-rulehost-flag-wiring.test.js +223 -0
  4. package/dist/commands/__tests__/run-rulehost-flag-wiring.test.js.map +1 -0
  5. package/dist/commands/runtime-internalization-run-rulehost.d.ts +23 -0
  6. package/dist/commands/runtime-internalization-run-rulehost.d.ts.map +1 -0
  7. package/dist/commands/runtime-internalization-run-rulehost.js +364 -0
  8. package/dist/commands/runtime-internalization-run-rulehost.js.map +1 -0
  9. package/dist/index.js +2 -0
  10. package/dist/index.js.map +1 -1
  11. package/dist/services/demo-rule-compiler.d.ts +24 -0
  12. package/dist/services/demo-rule-compiler.d.ts.map +1 -0
  13. package/dist/services/demo-rule-compiler.js +53 -0
  14. package/dist/services/demo-rule-compiler.js.map +1 -0
  15. package/dist/services/rulehost-pipeline-runner.d.ts +124 -0
  16. package/dist/services/rulehost-pipeline-runner.d.ts.map +1 -0
  17. package/dist/services/rulehost-pipeline-runner.js +334 -0
  18. package/dist/services/rulehost-pipeline-runner.js.map +1 -0
  19. package/package.json +1 -1
  20. package/src/commands/__tests__/run-rulehost-flag-wiring.test.ts +280 -0
  21. package/src/commands/runtime-internalization-run-rulehost.ts +417 -0
  22. package/src/index.ts +3 -0
  23. package/src/services/demo-rule-compiler.ts +71 -0
  24. package/src/services/rulehost-pipeline-runner.ts +585 -0
  25. package/tests/commands/diagnose.test.ts +178 -1
  26. package/tests/services/resolve-runtime-from-pd-config.test.ts +59 -0
  27. package/tests/services/rulehost-pipeline-e2e.test.ts +477 -0
  28. package/tests/services/rulehost-pipeline-runner.test.ts +519 -0
@@ -0,0 +1,519 @@
1
+ /**
2
+ * runRuleHostPipeline unit tests (PRI-429).
3
+ *
4
+ * Covers the atomic capability contract (ArtificerL2 + Evaluator both run or
5
+ * neither runs) and exact sourcePainId matching (D fix: pain-1 must NOT match
6
+ * pain-10).
7
+ *
8
+ * ERR refs considered:
9
+ * - ERR-001: parsed JSON treated as unknown
10
+ * - ERR-005/007: no `as` bypass; type narrowing via typeof + Object.hasOwn
11
+ * - ERR-009: missing sourcePainId = no match (fail loud)
12
+ * - ERR-013: Object.hasOwn for untrusted key checks
13
+ */
14
+ /* eslint-disable @typescript-eslint/no-non-null-assertion, @typescript-eslint/class-methods-use-this, @typescript-eslint/require-await */
15
+ import { describe, it, expect, afterEach, vi } from 'vitest';
16
+ import * as os from 'node:os';
17
+ import * as path from 'node:path';
18
+ import * as fs from 'node:fs';
19
+ import { createSandboxGateDeps, runRuleHostPipeline } from '../../src/services/rulehost-pipeline-runner.js';
20
+ import type { CodeRuleCapability } from '../../src/services/rulehost-pipeline-runner.js';
21
+ import type { PDRuntimeAdapter, RunHandle, RunStatus, PIArtifactStore, RuntimeCapabilities, RuntimeHealth, RuntimeArtifactRef, ContextItem, StructuredRunOutput, StartRunInput } from '@principles/core/runtime-v2';
22
+ import { ArtificerL2Adapter, DefaultArtificerValidator, RuntimeStateManager, createPITaskDiagnosticJson } from '@principles/core/runtime-v2';
23
+
24
+ type StageFactory = (taskId: string, priorArtifactId?: string) => unknown;
25
+ type EvaluatorFactory = (taskId: string, artificerArtifactId: string) => unknown;
26
+
27
+ class ScriptedAdapter implements PDRuntimeAdapter {
28
+ readonly startRunCalls: { taskId: string }[] = [];
29
+ readonly startRunInputs = new Map<string, StartRunInput>();
30
+ artifactStore: PIArtifactStore | null = null;
31
+ constructor(private readonly factories: { dreamer: StageFactory; philosopher: StageFactory; scribe: StageFactory; artificer: StageFactory; evaluator: EvaluatorFactory }) {}
32
+
33
+ private kindFor(taskId: string): 'dreamer' | 'philosopher' | 'scribe' | 'artificer' | 'evaluator' {
34
+ if (taskId.startsWith('dreamer')) return 'dreamer';
35
+ if (taskId.includes('philosopher')) return 'philosopher';
36
+ if (taskId.includes('scribe')) return 'scribe';
37
+ if (taskId.includes('artificer')) return 'artificer';
38
+ return 'evaluator';
39
+ }
40
+
41
+ private async priorArtifactId(priorKind: string): Promise<string | undefined> {
42
+ const calls = this.startRunCalls.filter((c) => this.kindFor(c.taskId) === priorKind);
43
+ const last = calls[calls.length - 1];
44
+ if (!last || !this.artifactStore) return undefined;
45
+ const arts = await this.artifactStore.listBySourceTaskId(last.taskId);
46
+ return arts[0]?.artifactId;
47
+ }
48
+
49
+ async startRun(input: StartRunInput): Promise<RunHandle> {
50
+ this.startRunCalls.push({ taskId: input.taskRef.taskId });
51
+ const runId = `run-${input.taskRef.taskId}`;
52
+ this.startRunInputs.set(runId, input);
53
+ return { runId, runtimeKind: 'test-double', startedAt: new Date().toISOString() };
54
+ }
55
+ async pollRun(_runId: string): Promise<RunStatus> { return { status: 'succeeded', runId: 'run-x' }; }
56
+ async fetchOutput(runId: string): Promise<StructuredRunOutput | null> {
57
+ const taskId = runId.replace(/^run-/, '');
58
+ const kind = this.kindFor(taskId);
59
+ let payload: unknown;
60
+ if (kind === 'dreamer') payload = this.factories.dreamer(taskId);
61
+ else if (kind === 'philosopher') payload = this.factories.philosopher(taskId, await this.priorArtifactId('dreamer'));
62
+ else if (kind === 'scribe') payload = this.factories.scribe(taskId, await this.priorArtifactId('philosopher'));
63
+ else if (kind === 'artificer') payload = this.factories.artificer(taskId, await this.priorArtifactId('scribe'));
64
+ else {
65
+ let artificerArtifactId = await this.priorArtifactId('artificer');
66
+ if (!artificerArtifactId) {
67
+ const inputPayload = this.startRunInputs.get(runId)?.inputPayload;
68
+ if (typeof inputPayload === 'string') {
69
+ const parsed: unknown = JSON.parse(inputPayload);
70
+ if (parsed !== null && typeof parsed === 'object' && Object.hasOwn(parsed, 'sourceArtificerArtifactId')) {
71
+ const candidate = Reflect.get(parsed, 'sourceArtificerArtifactId');
72
+ if (typeof candidate === 'string') artificerArtifactId = candidate;
73
+ }
74
+ }
75
+ }
76
+ payload = this.factories.evaluator(taskId, requireLineage(artificerArtifactId, 'sourceArtificerArtifactId'));
77
+ }
78
+ return { runId, payload };
79
+ }
80
+ async cancelRun(_runId: string): Promise<void> { /* noop */ }
81
+ async getCapabilities(): Promise<RuntimeCapabilities> {
82
+ return {
83
+ supportsStructuredJsonOutput: false,
84
+ supportsToolUse: false,
85
+ supportsWorkingDirectory: false,
86
+ supportsModelSelection: false,
87
+ supportsLongRunningSessions: false,
88
+ supportsCancellation: false,
89
+ supportsArtifactWriteBack: false,
90
+ supportsConcurrentRuns: false,
91
+ supportsStreaming: false,
92
+ };
93
+ }
94
+ async healthCheck(): Promise<RuntimeHealth> {
95
+ return { healthy: true, degraded: false, warnings: [], lastCheckedAt: new Date().toISOString() };
96
+ }
97
+ async fetchArtifacts(_runId: string): Promise<RuntimeArtifactRef[]> { return []; }
98
+ async appendContext(_runId: string, _items: ContextItem[]): Promise<void> { /* noop */ }
99
+ async refreshCapabilities(): Promise<RuntimeCapabilities> { return this.getCapabilities(); }
100
+ kind(): 'test-double' { return 'test-double'; }
101
+ }
102
+
103
+ // ── Output factories ─────────────────────────────────────────────────────────
104
+
105
+ function dreamerOut(taskId: string, painId: string): unknown {
106
+ return {
107
+ valid: true, taskId,
108
+ candidates: [{ candidateIndex: 0, badDecision: 'Wrote to /etc/passwd', betterDecision: 'Block system path writes', rationale: 'OS damage risk', confidence: 0.85, riskLevel: 'low' as const, strategicPerspective: 'safety' }],
109
+ sourcePainId: painId, contextRefs: [], generatedAt: new Date().toISOString(),
110
+ };
111
+ }
112
+
113
+ function requireLineage(id: string | undefined, field: string): string {
114
+ if (!id) throw new Error(`missing required lineage field: ${field}`);
115
+ return id;
116
+ }
117
+
118
+ function philosopherOut(taskId: string, priorId?: string): unknown {
119
+ return {
120
+ taskId, sourceDreamerArtifactId: requireLineage(priorId, 'sourceDreamerArtifactId'),
121
+ thesis: 'System path writes must be blocked',
122
+ principleCandidate: { title: 'Block system path writes', rationale: 'OS corruption risk', scope: 'write ops', confidence: 0.9 },
123
+ risks: [], generatedAt: new Date().toISOString(),
124
+ };
125
+ }
126
+
127
+ function scribeOut(taskId: string, priorId?: string): unknown {
128
+ return {
129
+ taskId, sourcePhilosopherArtifactId: requireLineage(priorId, 'sourcePhilosopherArtifactId'),
130
+ principleDraft: { title: 'Block system path writes', statement: 'Writes to /etc, /boot, /sys must be blocked.', rationale: 'OS corruption risk', applicability: ['write_file'], antiPatterns: ['Hardcoded allow'], confidence: 0.9 },
131
+ sourceTrace: { philosopherArtifactId: requireLineage(priorId, 'sourceTrace.philosopherArtifactId') },
132
+ risks: [], generatedAt: new Date().toISOString(),
133
+ };
134
+ }
135
+
136
+ function artificerV2(taskId: string, priorId?: string): unknown {
137
+ return {
138
+ taskId, sourceScribeArtifactId: requireLineage(priorId, 'sourceScribeArtifactId'),
139
+ implementationPlan: { summary: 'Block /etc writes', targetSurface: 'rule-host', changes: ['matcher'], tests: ['unit'], rolloutNotes: ['shadow'], confidence: 0.85 },
140
+ implementationCode: 'function evaluate(input, helpers) { const p = String(input?.action?.paramsSummary?.path ?? input?.action?.normalizedPath ?? ""); return p.startsWith("/etc") ? { decision: "block", matched: true, reason: "system path" } : { decision: "allow", matched: false, reason: "ok" }; }',
141
+ goldenTraceCases: [
142
+ { caseId: 'pos-1', kind: 'positive', toolName: 'write_file', params: { path: '/project/f.txt' }, expectedDecision: 'allow' },
143
+ { caseId: 'neg-1', kind: 'negative', toolName: 'write_file', params: { path: '/etc/passwd' }, expectedDecision: 'block' },
144
+ ],
145
+ affectedTools: ['write_file'],
146
+ sourceTrace: { scribeArtifactId: requireLineage(priorId, 'sourceTrace.scribeArtifactId') },
147
+ risks: [], generatedAt: new Date().toISOString(),
148
+ };
149
+ }
150
+
151
+ function evaluatorApproved(taskId: string, artificerArtifactId: string): unknown {
152
+ return {
153
+ taskId, sourceArtificerArtifactId: artificerArtifactId,
154
+ evaluation: { decision: 'approved', summary: 'approved', score: 0.9, strengths: [], concerns: [], requiredChanges: [] },
155
+ sourceTrace: { artificerArtifactId },
156
+ risks: [], generatedAt: new Date().toISOString(),
157
+ codeReview: { intentConsistency: { aligned: true, explanation: 'ok' }, scopePrecision: { verdict: 'precise' as const, explanation: 'ok' }, traceCoverage: { sufficient: true, gaps: [], explanation: 'ok' } },
158
+ adversarialCases: [{ caseId: 'adv-1', attackType: 'boundary' as const, toolName: 'write_file', params: { path: '/etc/shadow' }, expectedDecision: 'block' as const, rationale: 'system path' }],
159
+ adversarialResult: { passed: true, failedCases: [] },
160
+ };
161
+ }
162
+
163
+ function evaluatorRejected(taskId: string, artificerArtifactId: string): unknown {
164
+ return {
165
+ taskId, sourceArtificerArtifactId: artificerArtifactId,
166
+ evaluation: { decision: 'rejected', summary: 'rejected: code too narrow', score: 0.3, strengths: [], concerns: ['misses /boot path'], requiredChanges: ['add /boot matcher'] },
167
+ sourceTrace: { artificerArtifactId },
168
+ risks: [], generatedAt: new Date().toISOString(),
169
+ codeReview: { intentConsistency: { aligned: false, explanation: 'misses /boot' }, scopePrecision: { verdict: 'imprecise' as const, explanation: 'narrow' }, traceCoverage: { sufficient: false, gaps: ['/boot'], explanation: 'missing' } },
170
+ adversarialCases: [{ caseId: 'adv-1', attackType: 'boundary' as const, toolName: 'write_file', params: { path: '/boot/grub' }, expectedDecision: 'block' as const, rationale: 'system path' }],
171
+ adversarialResult: { passed: false, failedCases: [{ caseId: 'adv-1', errorType: 'wrong_decision', message: 'expected block, got allow' }] },
172
+ };
173
+ }
174
+
175
+ // ── Helpers ──────────────────────────────────────────────────────────────────
176
+
177
+ let tmpDir = '';
178
+
179
+ function makeTmpDir(): string {
180
+ const dir = path.join(os.tmpdir(), `pd-pipe-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
181
+ fs.mkdirSync(dir, { recursive: true });
182
+ return dir;
183
+ }
184
+
185
+ function makeAdapter(opts?: { evaluator?: EvaluatorFactory }): ScriptedAdapter {
186
+ return new ScriptedAdapter({
187
+ dreamer: (taskId) => dreamerOut(taskId, 'pain-test-001'),
188
+ philosopher: philosopherOut,
189
+ scribe: scribeOut,
190
+ artificer: artificerV2,
191
+ evaluator: opts?.evaluator ?? evaluatorApproved,
192
+ });
193
+ }
194
+
195
+ /**
196
+ * Seed a dreamer task with sourcePainId embedded as a TOP-LEVEL key in
197
+ * diagnosticJson (outside the pi_metadata envelope). This mirrors the pattern
198
+ * from source-trace-locator.test.ts and PainSignalBridge.
199
+ */
200
+ async function seedDreamerWithId(sm: RuntimeStateManager, taskId: string, painId: string): Promise<void> {
201
+ const baseMetadata = JSON.parse(createPITaskDiagnosticJson({
202
+ dependencyTaskIds: [], channel: 'prompt', timeoutMs: 1000, inputArtifactRefs: [], outputArtifactRefs: [],
203
+ })) as Record<string, unknown>;
204
+ const diagnosticJson = JSON.stringify({ ...baseMetadata, sourcePainId: painId });
205
+ await sm.createTask({ taskId, taskKind: 'dreamer', status: 'pending', attemptCount: 0, maxAttempts: 3, diagnosticJson });
206
+ }
207
+
208
+ /** Seed a dreamer task with raw diagnosticJson (for missing/wrong-type tests). */
209
+ async function seedDreamerRaw(sm: RuntimeStateManager, taskId: string, diagnosticJson: string): Promise<void> {
210
+ await sm.createTask({ taskId, taskKind: 'dreamer', status: 'pending', attemptCount: 0, maxAttempts: 3, diagnosticJson });
211
+ }
212
+
213
+ // ── Tests ────────────────────────────────────────────────────────────────────
214
+
215
+ describe('runRuleHostPipeline (PRI-429) — atomic capability + exact pain match', () => {
216
+ afterEach(() => {
217
+ vi.restoreAllMocks();
218
+ if (tmpDir) { try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ } tmpDir = ''; }
219
+ });
220
+
221
+ // ── Test 1: Capability ON + approved → candidate_ready_for_owner_review ──
222
+ it('capability ON + evaluator approved → candidate_ready_for_owner_review', async () => {
223
+ tmpDir = makeTmpDir();
224
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
225
+ await sm.initialize();
226
+ await seedDreamerWithId(sm, 'dreamer-seeded-001', 'pain-test-001');
227
+ await sm.close();
228
+
229
+ const adapter = makeAdapter();
230
+ const capability: CodeRuleCapability = { enabled: true, artificerAdapter: adapter };
231
+ const result = await runRuleHostPipeline({
232
+ workspaceDir: tmpDir, painId: 'pain-test-001', runtimeAdapter: adapter,
233
+ channel: 'code_tool_hook', pollIntervalMs: 5, timeoutMs: 1000,
234
+ codeRuleCapability: capability,
235
+ onStoreReady: (store) => { adapter.artifactStore = store; },
236
+ });
237
+
238
+ expect(result.decision, JSON.stringify(result)).toBe('candidate_ready_for_owner_review');
239
+ expect(result.stages.map((s) => s.name)).toEqual(['pain_lookup', 'dreamer', 'philosopher', 'scribe', 'adversarial_loop']);
240
+ expect(result.ruleArtifactId).not.toBeNull();
241
+ }, 60_000);
242
+
243
+ it('runs the real ArtificerL2Adapter through fail-feedback-fix before creating a candidate', async () => {
244
+ tmpDir = makeTmpDir();
245
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
246
+ await sm.initialize();
247
+ await seedDreamerWithId(sm, 'dreamer-l2-001', 'pain-l2-001');
248
+ await sm.close();
249
+
250
+ const baseAdapter = makeAdapter();
251
+ const prompts: string[] = [];
252
+ let sourceScribeArtifactId: string | null = null;
253
+ const l2Adapter = new ArtificerL2Adapter({
254
+ validator: new DefaultArtificerValidator(),
255
+ gateDeps: createSandboxGateDeps(),
256
+ generateCode: async (prompt) => {
257
+ prompts.push(prompt);
258
+ if (sourceScribeArtifactId === null) {
259
+ const parsed: unknown = JSON.parse(prompt);
260
+ if (parsed === null || typeof parsed !== 'object') throw new Error('Artificer prompt must be an object');
261
+ const sourceId = Reflect.get(parsed, 'sourceScribeArtifactId');
262
+ if (typeof sourceId !== 'string') throw new Error('sourceScribeArtifactId missing from prompt');
263
+ sourceScribeArtifactId = sourceId;
264
+ }
265
+ const candidate = artificerV2('', sourceScribeArtifactId);
266
+ if (candidate === null || typeof candidate !== 'object') throw new Error('candidate fixture invalid');
267
+ Reflect.deleteProperty(candidate, 'taskId');
268
+ if (prompts.length === 1) {
269
+ Reflect.set(candidate, 'implementationCode', 'function evaluate() { return { decision: "allow", matched: false, reason: "bug" }; }');
270
+ }
271
+ return candidate;
272
+ },
273
+ });
274
+
275
+ const result = await runRuleHostPipeline({
276
+ workspaceDir: tmpDir,
277
+ painId: 'pain-l2-001',
278
+ runtimeAdapter: baseAdapter,
279
+ codeRuleCapability: { enabled: true, artificerAdapter: l2Adapter },
280
+ channel: 'code_tool_hook',
281
+ pollIntervalMs: 5,
282
+ timeoutMs: 1000,
283
+ onStoreReady: (store) => { baseAdapter.artifactStore = store; },
284
+ });
285
+
286
+ expect(prompts).toHaveLength(2);
287
+ expect(prompts[1]).toContain('Previous sandbox replay failures');
288
+ expect(result.decision, JSON.stringify(result)).toBe('candidate_ready_for_owner_review');
289
+ expect(result.ruleArtifactId).toMatch(/^pi-rule-/);
290
+ }, 60_000);
291
+
292
+ // ── Test 2: Capability OFF (explicitly disabled) → text_principle_only ──
293
+ it('capability OFF (explicitly disabled) → text_principle_only', async () => {
294
+ tmpDir = makeTmpDir();
295
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
296
+ await sm.initialize();
297
+ await seedDreamerWithId(sm, 'dreamer-seeded-002', 'pain-test-001');
298
+ await sm.close();
299
+
300
+ const adapter = makeAdapter();
301
+ const capability: CodeRuleCapability = { enabled: false, disabledReason: 'artificer agent disabled in config' };
302
+ const result = await runRuleHostPipeline({
303
+ workspaceDir: tmpDir, painId: 'pain-test-001', runtimeAdapter: adapter,
304
+ channel: 'code_tool_hook', pollIntervalMs: 5, timeoutMs: 1000,
305
+ codeRuleCapability: capability,
306
+ onStoreReady: (store) => { adapter.artifactStore = store; },
307
+ });
308
+
309
+ expect(result.decision).toBe('text_principle_only');
310
+ expect(result.ruleArtifactId).toBeNull();
311
+ // Scribe ran, so a principle artifact should exist.
312
+ expect(result.principleArtifactId).not.toBeNull();
313
+ expect(result.degradationReason).toContain('code_rule_capability_off');
314
+ // Adversarial loop stage should be skipped (not present or skipped status).
315
+ const advStage = result.stages.find((s) => s.name === 'adversarial_loop');
316
+ expect(advStage?.status).toBe('skipped');
317
+ }, 60_000);
318
+
319
+ // ── Test 3: Capability ON + evaluator rejected → generation_rejected ──
320
+ it('capability ON + evaluator rejected → generation_rejected', async () => {
321
+ tmpDir = makeTmpDir();
322
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
323
+ await sm.initialize();
324
+ await seedDreamerWithId(sm, 'dreamer-seeded-003', 'pain-test-001');
325
+ await sm.close();
326
+
327
+ const adapter = makeAdapter({ evaluator: evaluatorRejected });
328
+ const capability: CodeRuleCapability = { enabled: true, artificerAdapter: adapter };
329
+ const result = await runRuleHostPipeline({
330
+ workspaceDir: tmpDir, painId: 'pain-test-001', runtimeAdapter: adapter,
331
+ channel: 'code_tool_hook', pollIntervalMs: 5, timeoutMs: 1000,
332
+ codeRuleCapability: capability,
333
+ onStoreReady: (store) => { adapter.artifactStore = store; },
334
+ });
335
+
336
+ expect(result.decision).toBe('generation_rejected');
337
+ expect(result.ruleArtifactId).toBeNull();
338
+ }, 60_000);
339
+
340
+ // ── Test 4: Capability OFF by default (not provided) → text_principle_only ──
341
+ it('capability OFF by default (not provided) → text_principle_only', async () => {
342
+ tmpDir = makeTmpDir();
343
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
344
+ await sm.initialize();
345
+ await seedDreamerWithId(sm, 'dreamer-seeded-004', 'pain-test-001');
346
+ await sm.close();
347
+
348
+ const adapter = makeAdapter();
349
+ // No codeRuleCapability provided — defaults to OFF.
350
+ const result = await runRuleHostPipeline({
351
+ workspaceDir: tmpDir, painId: 'pain-test-001', runtimeAdapter: adapter,
352
+ channel: 'code_tool_hook', pollIntervalMs: 5, timeoutMs: 1000,
353
+ onStoreReady: (store) => { adapter.artifactStore = store; },
354
+ });
355
+
356
+ expect(result.decision).toBe('text_principle_only');
357
+ expect(result.ruleArtifactId).toBeNull();
358
+ expect(result.degradationReason).toContain('code_rule_capability');
359
+ }, 60_000);
360
+
361
+ // ── Test 5: Exact sourcePainId match — pain-1 must NOT match pain-10 ──
362
+ it('exact sourcePainId match: pain-1 does NOT match pain-10', async () => {
363
+ tmpDir = makeTmpDir();
364
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
365
+ await sm.initialize();
366
+ // Seed dreamer for pain-10 but query for pain-1 — must NOT match.
367
+ await seedDreamerWithId(sm, 'dreamer-seeded-pain-10', 'pain-10');
368
+ await sm.close();
369
+
370
+ const adapter = makeAdapter();
371
+ const result = await runRuleHostPipeline({
372
+ workspaceDir: tmpDir, painId: 'pain-1', runtimeAdapter: adapter,
373
+ channel: 'code_tool_hook', pollIntervalMs: 5, timeoutMs: 1000,
374
+ onStoreReady: (store) => { adapter.artifactStore = store; },
375
+ });
376
+
377
+ expect(result.decision).toBe('generation_rejected');
378
+ expect(result.degradationReason).toContain('no_dreamer_task_seeded');
379
+ }, 60_000);
380
+
381
+ // ── Test 6: No dreamer task at all → generation_rejected ──
382
+ it('no dreamer task seeded for pain → generation_rejected', async () => {
383
+ tmpDir = makeTmpDir();
384
+ const adapter = makeAdapter();
385
+ const result = await runRuleHostPipeline({
386
+ workspaceDir: tmpDir, painId: 'pain-nonexistent', runtimeAdapter: adapter,
387
+ pollIntervalMs: 5, timeoutMs: 1000,
388
+ });
389
+
390
+ expect(result.decision).toBe('generation_rejected');
391
+ expect(result.degradationReason).toContain('no_dreamer_task_seeded');
392
+ expect(result.stages[0]!.name).toBe('pain_lookup');
393
+ expect(result.stages[0]!.status).toBe('failed');
394
+ });
395
+
396
+ // ── Test 7: Missing or wrong-type sourcePainId → no match ──
397
+ it('missing or wrong-type sourcePainId in diagnosticJson → no match', async () => {
398
+ tmpDir = makeTmpDir();
399
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
400
+ await sm.initialize();
401
+ // Seed a dreamer task WITHOUT sourcePainId (only pi_metadata).
402
+ await seedDreamerRaw(sm, 'dreamer-no-painid-001', JSON.stringify({ pi_metadata: { channel: 'prompt' } }));
403
+ // Seed a dreamer task with WRONG-TYPE sourcePainId (number, not string).
404
+ await seedDreamerRaw(sm, 'dreamer-wrong-type-001', JSON.stringify({ pi_metadata: { channel: 'prompt' }, sourcePainId: 12345 }));
405
+ await sm.close();
406
+
407
+ const adapter = makeAdapter();
408
+ const result = await runRuleHostPipeline({
409
+ workspaceDir: tmpDir, painId: '12345', runtimeAdapter: adapter,
410
+ pollIntervalMs: 5, timeoutMs: 1000,
411
+ });
412
+
413
+ // Neither task should match — missing sourcePainId and wrong-type sourcePainId
414
+ // are both skipped (not silently matched).
415
+ expect(result.decision).toBe('generation_rejected');
416
+ expect(result.degradationReason).toContain('no_dreamer_task_seeded');
417
+ }, 60_000);
418
+
419
+ it('rejects ambiguous lineage when multiple runnable Dreamer tasks have the same sourcePainId', async () => {
420
+ tmpDir = makeTmpDir();
421
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
422
+ await sm.initialize();
423
+ await seedDreamerWithId(sm, 'dreamer-ambiguous-a', 'pain-ambiguous');
424
+ await seedDreamerWithId(sm, 'dreamer-ambiguous-b', 'pain-ambiguous');
425
+ await sm.close();
426
+
427
+ const result = await runRuleHostPipeline({
428
+ workspaceDir: tmpDir,
429
+ painId: 'pain-ambiguous',
430
+ runtimeAdapter: makeAdapter(),
431
+ pollIntervalMs: 5,
432
+ timeoutMs: 1000,
433
+ });
434
+
435
+ expect(result.decision).toBe('generation_rejected');
436
+ expect(result.degradationReason).toContain('ambiguous_dreamer_tasks_for_pain');
437
+ expect(result.stages[0]?.status).toBe('failed');
438
+ }, 60_000);
439
+
440
+ // ── Test 8 (E fix): retried status is NOT terminal — bounded retry succeeds ──
441
+ it('retried status triggers bounded retry and eventually succeeds (E fix)', async () => {
442
+ tmpDir = makeTmpDir();
443
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
444
+ await sm.initialize();
445
+ await seedDreamerWithId(sm, 'dreamer-retry-001', 'pain-retry-001');
446
+ await sm.close();
447
+
448
+ // Adapter that returns retried on first dreamer call, then succeeds.
449
+ let dreamerCallCount = 0;
450
+ const adapter = new ScriptedAdapter({
451
+ dreamer: (taskId) => {
452
+ dreamerCallCount++;
453
+ if (dreamerCallCount === 1) {
454
+ // First call: return a payload that will cause the runner to mark
455
+ // the task as retried. We simulate this by throwing — the base
456
+ // runner catches errors and may mark as retried.
457
+ throw new Error('transient LLM error');
458
+ }
459
+ return dreamerOut(taskId, 'pain-retry-001');
460
+ },
461
+ philosopher: philosopherOut,
462
+ scribe: scribeOut,
463
+ artificer: artificerV2,
464
+ evaluator: evaluatorApproved,
465
+ });
466
+ const capability: CodeRuleCapability = { enabled: true, artificerAdapter: adapter };
467
+
468
+ const result = await runRuleHostPipeline({
469
+ workspaceDir: tmpDir, painId: 'pain-retry-001', runtimeAdapter: adapter,
470
+ channel: 'code_tool_hook', pollIntervalMs: 5, timeoutMs: 1000,
471
+ maxStageRetries: 2,
472
+ codeRuleCapability: capability,
473
+ onStoreReady: (store) => { adapter.artifactStore = store; },
474
+ });
475
+
476
+ // The retry contract is exact: one transient failure, one fresh successful
477
+ // attempt, then the full pipeline succeeds.
478
+ const dreamerStage = result.stages.find((s) => s.name === 'dreamer');
479
+ expect(dreamerCallCount).toBe(2);
480
+ expect(dreamerStage?.status).toBe('succeeded');
481
+ expect(result.decision).toBe('candidate_ready_for_owner_review');
482
+ }, 60_000);
483
+
484
+ // ── Test 9 (E fix): retried status exhausted → stage marked 'degraded' ──
485
+ it('retried status exhausted marks stage as degraded, not failed (E fix)', async () => {
486
+ tmpDir = makeTmpDir();
487
+ const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
488
+ await sm.initialize();
489
+ await seedDreamerWithId(sm, 'dreamer-exhaust-001', 'pain-exhaust-001');
490
+ await sm.close();
491
+
492
+ // Adapter that always throws on dreamer — simulating persistent transient
493
+ // errors. The runner will mark the task as retried each time until
494
+ // maxStageRetries is exhausted.
495
+ const adapter = new ScriptedAdapter({
496
+ dreamer: () => { throw new Error('persistent transient error'); },
497
+ philosopher: philosopherOut,
498
+ scribe: scribeOut,
499
+ artificer: artificerV2,
500
+ evaluator: evaluatorApproved,
501
+ });
502
+
503
+ const result = await runRuleHostPipeline({
504
+ workspaceDir: tmpDir, painId: 'pain-exhaust-001', runtimeAdapter: adapter,
505
+ channel: 'code_tool_hook', pollIntervalMs: 5, timeoutMs: 1000,
506
+ maxStageRetries: 1, // Only 1 retry to keep the test fast
507
+ });
508
+
509
+ // Pipeline must reject — dreamer never succeeded.
510
+ expect(result.decision).toBe('generation_rejected');
511
+ expect(result.degradationReason).toContain('dreamer_failed');
512
+ // The stage must be precisely degraded after the initial attempt + one retry.
513
+ const dreamerStage = result.stages.find((s) => s.name === 'dreamer');
514
+ expect(adapter.startRunCalls.filter((call) => call.taskId === 'dreamer-exhaust-001')).toHaveLength(2);
515
+ expect(dreamerStage?.status).toBe('degraded');
516
+ // The reason must be present (Runtime Contract Rule 9: no silent degradation).
517
+ expect(dreamerStage?.reason).toBeTruthy();
518
+ }, 60_000);
519
+ });