@principles/pd-cli 1.119.0 → 1.121.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/commands/__tests__/legacy-cleanup.test.d.ts +18 -0
  2. package/dist/commands/__tests__/legacy-cleanup.test.d.ts.map +1 -0
  3. package/dist/commands/__tests__/legacy-cleanup.test.js +459 -0
  4. package/dist/commands/__tests__/legacy-cleanup.test.js.map +1 -0
  5. package/dist/commands/__tests__/mvp-smoke.test.js +19 -2
  6. package/dist/commands/__tests__/mvp-smoke.test.js.map +1 -1
  7. package/dist/commands/__tests__/rulecode-flag-wiring.test.d.ts +21 -0
  8. package/dist/commands/__tests__/rulecode-flag-wiring.test.d.ts.map +1 -0
  9. package/dist/commands/__tests__/rulecode-flag-wiring.test.js +179 -0
  10. package/dist/commands/__tests__/rulecode-flag-wiring.test.js.map +1 -0
  11. package/dist/commands/__tests__/rulecode-handler.test.d.ts +16 -0
  12. package/dist/commands/__tests__/rulecode-handler.test.d.ts.map +1 -0
  13. package/dist/commands/__tests__/rulecode-handler.test.js +285 -0
  14. package/dist/commands/__tests__/rulecode-handler.test.js.map +1 -0
  15. package/dist/commands/__tests__/runtime-probe-config.test.js +1 -1
  16. package/dist/commands/__tests__/runtime-probe-config.test.js.map +1 -1
  17. package/dist/commands/legacy-cleanup.d.ts +72 -6
  18. package/dist/commands/legacy-cleanup.d.ts.map +1 -1
  19. package/dist/commands/legacy-cleanup.js +243 -23
  20. package/dist/commands/legacy-cleanup.js.map +1 -1
  21. package/dist/commands/rulecode.d.ts +85 -0
  22. package/dist/commands/rulecode.d.ts.map +1 -0
  23. package/dist/commands/rulecode.js +356 -0
  24. package/dist/commands/rulecode.js.map +1 -0
  25. package/dist/commands/runtime-internalization-run-rulehost.d.ts.map +1 -1
  26. package/dist/commands/runtime-internalization-run-rulehost.js +4 -7
  27. package/dist/commands/runtime-internalization-run-rulehost.js.map +1 -1
  28. package/dist/index.js +30 -9
  29. package/dist/index.js.map +1 -1
  30. package/dist/utils/production-workspace-guard.test.js +9 -2
  31. package/dist/utils/production-workspace-guard.test.js.map +1 -1
  32. package/package.json +1 -1
  33. package/scripts/llm-dogfood.ts +8 -12
  34. package/src/commands/__tests__/legacy-cleanup.test.ts +596 -0
  35. package/src/commands/__tests__/mvp-smoke.test.ts +18 -2
  36. package/src/commands/__tests__/rulecode-flag-wiring.test.ts +230 -0
  37. package/src/commands/__tests__/rulecode-handler.test.ts +369 -0
  38. package/src/commands/__tests__/runtime-probe-config.test.ts +1 -1
  39. package/src/commands/legacy-cleanup.ts +335 -27
  40. package/src/commands/rulecode.ts +434 -0
  41. package/src/commands/runtime-internalization-run-rulehost.ts +3 -8
  42. package/src/index.ts +31 -9
  43. package/src/utils/production-workspace-guard.test.ts +9 -2
  44. package/tests/commands/cli-command-tree.test.ts +57 -7
  45. package/tests/commands/console-open.test.ts +19 -13
  46. package/tests/e2e/cli-full-flow.test.ts +198 -0
  47. package/tests/e2e/cross-package-acceptance.test.ts +1 -0
  48. package/tests/services/rulehost-pipeline-runner.test.ts +51 -30
@@ -552,11 +552,13 @@ describe('CLI command wiring (pd console open)', () => {
552
552
  });
553
553
 
554
554
  it('pd console open --json (port free) returns a structured JSON object with required fields', () => {
555
- const out = runPd(['console', 'open', '--workspace', tmp, '--json', '--no-browser'], workspaceRoot);
556
- // The CLI will attempt to start the fake server.js; since the file is a stub, the
557
- // child will exit early we should get a structured "failed" JSON, not a crash.
558
- // The required-field contract (status, url, port, host, workspaceDir, reason, nextAction, reused, browserOpened) is
559
- // what we assert.
555
+ // The real CLI spawns a long-lived Console server process and then prints
556
+ // JSON *without exiting* (the child keeps running). execFileSync would
557
+ // block forever waiting for the child to exit. Use a short timeout so the
558
+ // test reads stdout before the process is killed.
559
+ // Vitest timeout (10s) must exceed execFileSync timeout (8s) so the child
560
+ // is killed by execFileSync first, allowing stdout to be read.
561
+ const out = runPd(['console', 'open', '--workspace', tmp, '--json', '--no-browser'], workspaceRoot, 8_000);
560
562
  const parsed = JSON.parse(out);
561
563
  expect(parsed).toHaveProperty('status');
562
564
  expect(['started', 'reused', 'failed', 'refused']).toContain(parsed.status);
@@ -565,7 +567,7 @@ describe('CLI command wiring (pd console open)', () => {
565
567
  expect(parsed).toHaveProperty('workspaceDir');
566
568
  expect(parsed).toHaveProperty('reused');
567
569
  expect(parsed).toHaveProperty('browserOpened');
568
- });
570
+ }, 10_000);
569
571
 
570
572
  it('pd console open --port 99999 --json returns a structured failure (invalid port)', () => {
571
573
  const out = runPd(['console', 'open', '--workspace', tmp, '--port', '99999', '--json', '--no-browser'], workspaceRoot);
@@ -589,18 +591,20 @@ describe('CLI command wiring (pd console open)', () => {
589
591
  });
590
592
 
591
593
  it('pd console open --json with --no-auth and --no-browser parses options correctly', () => {
592
- const out = runPd(['console', 'open', '--workspace', tmp, '--json', '--no-auth', '--no-browser'], workspaceRoot);
594
+ // Same as port-free test: CLI spawns a long-lived server, use timeout.
595
+ const out = runPd(['console', 'open', '--workspace', tmp, '--json', '--no-auth', '--no-browser'], workspaceRoot, 8_000);
593
596
  const parsed = JSON.parse(out);
594
597
  expect(parsed).toHaveProperty('status');
595
598
  expect(parsed.browserOpened).toBe(false);
596
- });
599
+ }, 10_000);
597
600
 
598
601
  it('pd console --no-auth --json legacy path parses --no-auth correctly', () => {
599
- const out = runPd(['console', '--workspace', tmp, '--json', '--no-auth'], workspaceRoot);
602
+ // Same: may spawn a long-lived server, use timeout.
603
+ const out = runPd(['console', '--workspace', tmp, '--json', '--no-auth'], workspaceRoot, 8_000);
600
604
  expect(out.trim()).not.toBe('');
601
605
  const parsed = JSON.parse(out);
602
606
  expect(parsed).toBeDefined();
603
- });
607
+ }, 10_000);
604
608
 
605
609
  describe('openBrowser', () => {
606
610
  afterEach(() => {
@@ -746,17 +750,18 @@ describe('CLI command wiring (pd console open)', () => {
746
750
  });
747
751
 
748
752
  it('[::1] is accepted and normalized to ::1 (not refused)', () => {
749
- const out = runPd(['console', 'open', '--workspace', tmp, '--host', '[::1]', '--json', '--no-browser'], workspaceRoot);
753
+ // May spawn a long-lived server, use timeout.
754
+ const out = runPd(['console', 'open', '--workspace', tmp, '--host', '[::1]', '--json', '--no-browser'], workspaceRoot, 8_000);
750
755
  const parsed = JSON.parse(out);
751
756
  // Should NOT be refused — [::1] is loopback
752
757
  expect(parsed.status).not.toBe('refused');
753
758
  // Host should be normalized to ::1 (without brackets)
754
759
  expect(parsed.host).toBe('::1');
755
- });
760
+ }, 10_000);
756
761
  });
757
762
  });
758
763
 
759
- function runPd(args: string[], cwd: string): string {
764
+ function runPd(args: string[], cwd: string, timeoutMs?: number): string {
760
765
  try {
761
766
  const env: Record<string, string> = { ...process.env };
762
767
  if (!args.includes('--workspace') && !args.includes('--help') && !args.includes('-h')) {
@@ -770,6 +775,7 @@ function runPd(args: string[], cwd: string): string {
770
775
  encoding: 'utf8',
771
776
  cwd,
772
777
  env,
778
+ timeout: timeoutMs,
773
779
  });
774
780
  } catch (err: unknown) {
775
781
  if (err && typeof err === 'object' && Object.hasOwn(err, 'stdout')) {
@@ -0,0 +1,198 @@
1
+ /**
2
+ * CLI Full-Flow E2E — drives the real compiled pd binary.
3
+ *
4
+ * Every other test in this package imports handler functions directly. This
5
+ * file validates the actual user path: spawning `node dist/index.js <args>`
6
+ * via child_process.execFile and asserting on exit code / stdout / stderr.
7
+ *
8
+ * Uses Node.js built-in child_process (not execa) to avoid vitest forks-pool
9
+ * compatibility issues.
10
+ *
11
+ * ERR checklist:
12
+ * - EP-04 (CLI and Operator Contract): verifies --json emits exactly one
13
+ * parseable JSON object (Rule 1), invalid commands exit non-zero (Rule 2),
14
+ * and JSON-mode stdout is not polluted with banners.
15
+ * - EP-09 (Test Reality Gap): drives the real compiled binary, not imported
16
+ * helpers — proves the Commander wiring, dependency loading (including
17
+ * better-sqlite3), and workspace resolution all work end-to-end.
18
+ * - EP-02 (Production Path Wiring): exercises the production entry point
19
+ * (dist/index.js), confirming commands are registered and reachable.
20
+ * - ERR-001: error fields from execFile are validated with type guards, not `as`.
21
+ * - ERR-071: all temp workspaces are tracked and cleaned in afterEach.
22
+ *
23
+ * Note: pd-cli has no `pd init` command. `runtime activation list` is the
24
+ * closest production path that bootstraps `.pd/state.db` on a fresh workspace
25
+ * (SqliteConnection constructor creates the directory and DB file).
26
+ */
27
+
28
+ import { describe, it, expect, afterEach, beforeAll } from 'vitest';
29
+ import { execFile } from 'node:child_process';
30
+ import { promisify } from 'node:util';
31
+ import * as path from 'node:path';
32
+ import * as fs from 'node:fs';
33
+ import * as os from 'node:os';
34
+ import { getBuiltPdCliPath } from '../helpers/pd-cli-path.js';
35
+
36
+ const execFileAsync = promisify(execFile);
37
+ const PD_BIN = getBuiltPdCliPath();
38
+
39
+ const WORKSPACES: string[] = [];
40
+
41
+ afterEach(() => {
42
+ while (WORKSPACES.length > 0) {
43
+ const ws = WORKSPACES.pop();
44
+ if (ws) {
45
+ try {
46
+ fs.rmSync(ws, { recursive: true, force: true });
47
+ } catch (err) {
48
+ console.warn(
49
+ `[afterEach] Failed to clean ${ws}: ${err instanceof Error ? err.message : String(err)}`,
50
+ );
51
+ }
52
+ }
53
+ }
54
+ });
55
+
56
+ beforeAll(() => {
57
+ // ERR-009/ERR-010: fail loud if the compiled binary is missing.
58
+ if (!fs.existsSync(PD_BIN)) {
59
+ throw new Error(
60
+ `[cli-full-flow] Compiled binary not found at ${PD_BIN}. ` +
61
+ `Run "npm run build --workspace=@principles/pd-cli" before running this test.`,
62
+ );
63
+ }
64
+ });
65
+
66
+ function makeWorkspace(): string {
67
+ const ws = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-cli-e2e-'));
68
+ WORKSPACES.push(ws);
69
+ return ws;
70
+ }
71
+
72
+ interface RunResult {
73
+ stdout: string;
74
+ stderr: string;
75
+ exitCode: number;
76
+ }
77
+
78
+ function requireRecord(value: unknown, label: string): Record<string, unknown> {
79
+ if (typeof value !== 'object' || value === null || Array.isArray(value)) {
80
+ throw new Error(`${label} must be a JSON object`);
81
+ }
82
+ return value;
83
+ }
84
+
85
+ /**
86
+ * Type guard: extract a string field from an unknown error object.
87
+ * ERR-001: no `as` casts on untrusted error data — use Reflect.get + typeof.
88
+ */
89
+ function readStringField(obj: unknown, field: string): string {
90
+ if (typeof obj !== 'object' || obj === null) return '';
91
+ if (!Object.hasOwn(obj, field)) return '';
92
+ const value: unknown = Reflect.get(obj, field);
93
+ return typeof value === 'string' ? value : '';
94
+ }
95
+
96
+ /**
97
+ * Type guard: extract a numeric exit code from an unknown error object.
98
+ * ERR-001: no `as` casts on untrusted error data — use Reflect.get + typeof.
99
+ */
100
+ function readExitCode(obj: unknown): number {
101
+ if (typeof obj !== 'object' || obj === null) return 1;
102
+ if (!Object.hasOwn(obj, 'code')) return 1;
103
+ const code: unknown = Reflect.get(obj, 'code');
104
+ return typeof code === 'number' ? code : 1;
105
+ }
106
+
107
+ async function runPd(
108
+ args: string[],
109
+ options: { timeout?: number } = {},
110
+ ): Promise<RunResult> {
111
+ try {
112
+ const { stdout, stderr } = await execFileAsync('node', [PD_BIN, ...args], {
113
+ timeout: options.timeout ?? 30_000,
114
+ maxBuffer: 10 * 1024 * 1024,
115
+ });
116
+ return { stdout, stderr, exitCode: 0 };
117
+ } catch (err: unknown) {
118
+ return {
119
+ stdout: readStringField(err, 'stdout'),
120
+ stderr: readStringField(err, 'stderr'),
121
+ exitCode: readExitCode(err),
122
+ };
123
+ }
124
+ }
125
+
126
+ describe('CLI full flow', () => {
127
+ it('pd --help exits 0 and lists main commands', async () => {
128
+ const { stdout, exitCode } = await runPd(['--help']);
129
+ expect(exitCode).toBe(0);
130
+ expect(stdout).toContain('Usage: pd');
131
+ // Verify key command groups are registered and visible to users
132
+ expect(stdout).toContain('pain');
133
+ expect(stdout).toContain('runtime');
134
+ expect(stdout).toContain('diagnose');
135
+ expect(stdout).toContain('candidate');
136
+ });
137
+
138
+ it('pd --version exits 0 and prints version', async () => {
139
+ const { stdout, exitCode } = await runPd(['--version']);
140
+ expect(exitCode).toBe(0);
141
+ expect(stdout.trim()).toMatch(/^\d+\.\d+\.\d+/);
142
+ });
143
+
144
+ it('pd runtime features --json on fresh workspace returns valid JSON with defaults', async () => {
145
+ const workspace = makeWorkspace();
146
+ const { stdout, exitCode } = await runPd([
147
+ 'runtime',
148
+ 'features',
149
+ '--workspace',
150
+ workspace,
151
+ '--json',
152
+ ]);
153
+ expect(exitCode).toBe(0);
154
+ // EP-04 Rule 1: --json stdout must be exactly one parseable JSON object
155
+ const parsed: unknown = JSON.parse(stdout);
156
+ const result = requireRecord(parsed, 'runtime features output');
157
+ expect(result).toHaveProperty('status');
158
+ expect(result).toHaveProperty('source');
159
+ expect(result).toHaveProperty('features');
160
+ expect(Array.isArray(result.features)).toBe(true);
161
+ expect(result).toHaveProperty('enabledMvpChannels');
162
+ expect(Array.isArray(result.enabledMvpChannels)).toBe(true);
163
+ // Fresh workspace with no .pd/config.yaml falls back to defaults
164
+ expect(result.source).toBe('defaults');
165
+ });
166
+
167
+ it('pd runtime activation list --json on fresh workspace returns empty list and bootstraps .pd/', async () => {
168
+ const workspace = makeWorkspace();
169
+ const { stdout, exitCode } = await runPd([
170
+ 'runtime',
171
+ 'activation',
172
+ 'list',
173
+ '--workspace',
174
+ workspace,
175
+ '--json',
176
+ ]);
177
+ expect(exitCode).toBe(0);
178
+ // EP-04 Rule 1: --json stdout must be exactly one parseable JSON object
179
+ const parsed: unknown = JSON.parse(stdout);
180
+ const result = requireRecord(parsed, 'activation list output');
181
+ expect(result).toHaveProperty('activations');
182
+ const activations = result.activations;
183
+ expect(Array.isArray(activations)).toBe(true);
184
+ expect(activations).toEqual([]);
185
+ // SqliteConnection constructor creates .pd/ and state.db on first access
186
+ expect(fs.existsSync(path.join(workspace, '.pd'))).toBe(true);
187
+ expect(fs.existsSync(path.join(workspace, '.pd', 'state.db'))).toBe(true);
188
+ });
189
+
190
+ it('pd <invalid-command> exits non-zero with non-empty stderr', async () => {
191
+ const { stderr, exitCode } = await runPd([
192
+ 'this-command-does-not-exist',
193
+ ]);
194
+ expect(exitCode).not.toBe(0);
195
+ expect(stderr.length).toBeGreaterThan(0);
196
+ expect(stderr).toContain('unknown command');
197
+ });
198
+ });
@@ -174,6 +174,7 @@ function artificerV2(taskId: string, priorId?: string): unknown {
174
174
  taskId, sourceScribeArtifactId: requireLineage(priorId, 'sourceScribeArtifactId'),
175
175
  implementationPlan: { summary: 'Block /etc writes', targetSurface: 'rule-host', changes: ['matcher'], tests: ['unit'], rolloutNotes: ['shadow'], confidence: 0.85 },
176
176
  implementationCode: 'function evaluate(input, helpers) { const p = String(input?.action?.paramsSummary?.path ?? input?.action?.normalizedPath ?? ""); return p.startsWith("/etc") ? { decision: "block", matched: true, reason: "system path" } : { decision: "allow", matched: false, reason: "ok" }; }',
177
+ implementationSummary: 'Block system path writes',
177
178
  goldenTraceCases: [
178
179
  { caseId: 'pos-1', kind: 'positive', toolName: 'write_file', params: { path: '/project/f.txt' }, expectedDecision: 'allow' },
179
180
  { caseId: 'neg-1', kind: 'negative', toolName: 'write_file', params: { path: '/etc/passwd' }, expectedDecision: 'block' },
@@ -16,10 +16,10 @@ import { describe, it, expect, afterEach, vi } from 'vitest';
16
16
  import * as os from 'node:os';
17
17
  import * as path from 'node:path';
18
18
  import * as fs from 'node:fs';
19
- import { createSandboxGateDeps, runRuleHostPipeline } from '../../src/services/rulehost-pipeline-runner.js';
19
+ import { runRuleHostPipeline } from '../../src/services/rulehost-pipeline-runner.js';
20
20
  import type { CodeRuleCapability } from '../../src/services/rulehost-pipeline-runner.js';
21
21
  import type { PDRuntimeAdapter, RunHandle, RunStatus, PIArtifactStore, RuntimeCapabilities, RuntimeHealth, RuntimeArtifactRef, ContextItem, StructuredRunOutput, StartRunInput } from '@principles/core/runtime-v2';
22
- import { ArtificerL2Adapter, DefaultArtificerValidator, RuntimeStateManager, createPITaskDiagnosticJson } from '@principles/core/runtime-v2';
22
+ import { RuntimeStateManager, createPITaskDiagnosticJson } from '@principles/core/runtime-v2';
23
23
 
24
24
  type StageFactory = (taskId: string, priorArtifactId?: string) => unknown;
25
25
  type EvaluatorFactory = (taskId: string, artificerArtifactId: string) => unknown;
@@ -138,6 +138,7 @@ function artificerV2(taskId: string, priorId?: string): unknown {
138
138
  taskId, sourceScribeArtifactId: requireLineage(priorId, 'sourceScribeArtifactId'),
139
139
  implementationPlan: { summary: 'Block /etc writes', targetSurface: 'rule-host', changes: ['matcher'], tests: ['unit'], rolloutNotes: ['shadow'], confidence: 0.85 },
140
140
  implementationCode: 'function evaluate(input, helpers) { const p = String(input?.action?.paramsSummary?.path ?? input?.action?.normalizedPath ?? ""); return p.startsWith("/etc") ? { decision: "block", matched: true, reason: "system path" } : { decision: "allow", matched: false, reason: "ok" }; }',
141
+ implementationSummary: 'Block system path writes',
141
142
  goldenTraceCases: [
142
143
  { caseId: 'pos-1', kind: 'positive', toolName: 'write_file', params: { path: '/project/f.txt' }, expectedDecision: 'allow' },
143
144
  { caseId: 'neg-1', kind: 'negative', toolName: 'write_file', params: { path: '/etc/passwd' }, expectedDecision: 'block' },
@@ -172,6 +173,18 @@ function evaluatorRejected(taskId: string, artificerArtifactId: string): unknown
172
173
  };
173
174
  }
174
175
 
176
+ function evaluatorNeedsRevision(taskId: string, artificerArtifactId: string): unknown {
177
+ return {
178
+ taskId, sourceArtificerArtifactId: artificerArtifactId,
179
+ evaluation: { decision: 'needs_revision', summary: 'needs revision: adversarial replay failed', score: 0.4, strengths: [], concerns: ['adversarial case failed'], requiredChanges: ['fix matcher'] },
180
+ sourceTrace: { artificerArtifactId },
181
+ risks: [], generatedAt: new Date().toISOString(),
182
+ codeReview: { intentConsistency: { aligned: false, explanation: 'misses system paths' }, scopePrecision: { verdict: 'too_narrow' as const, explanation: 'narrow' }, traceCoverage: { sufficient: false, gaps: [], explanation: 'missing' } },
183
+ adversarialCases: [{ caseId: 'adv-1', attackType: 'boundary' as const, toolName: 'write_file', params: { path: '/etc/shadow' }, expectedDecision: 'block' as const, rationale: 'system path' }],
184
+ adversarialResult: { passed: false, failedCases: [{ caseId: 'adv-1', attackType: 'boundary' as const, actualDecision: 'allow', expectedDecision: 'block', rationale: 'system path' }] },
185
+ };
186
+ }
187
+
175
188
  // ── Helpers ──────────────────────────────────────────────────────────────────
176
189
 
177
190
  let tmpDir = '';
@@ -247,51 +260,59 @@ describe('runRuleHostPipeline (PRI-429) — atomic capability + exact pain match
247
260
  expect(result.approvalId).not.toBeNull();
248
261
  }, 60_000);
249
262
 
250
- it('runs the real ArtificerL2Adapter through fail-feedback-fix before creating a candidate', async () => {
263
+ it('adversarial feedback loop drives a second artificer round before creating a candidate', async () => {
251
264
  tmpDir = makeTmpDir();
252
265
  const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
253
266
  await sm.initialize();
254
- await seedDreamerWithId(sm, 'dreamer-l2-001', 'pain-l2-001');
267
+ await seedDreamerWithId(sm, 'dreamer-feedback-001', 'pain-feedback-001');
255
268
  await sm.close();
256
269
 
257
- const baseAdapter = makeAdapter();
258
- const prompts: string[] = [];
259
- let sourceScribeArtifactId: string | null = null;
260
- const l2Adapter = new ArtificerL2Adapter({
261
- validator: new DefaultArtificerValidator(),
262
- gateDeps: createSandboxGateDeps(),
263
- generateCode: async (prompt) => {
264
- prompts.push(prompt);
265
- if (sourceScribeArtifactId === null) {
266
- const parsed: unknown = JSON.parse(prompt);
267
- if (parsed === null || typeof parsed !== 'object') throw new Error('Artificer prompt must be an object');
268
- const sourceId = Reflect.get(parsed, 'sourceScribeArtifactId');
269
- if (typeof sourceId !== 'string') throw new Error('sourceScribeArtifactId missing from prompt');
270
- sourceScribeArtifactId = sourceId;
270
+ let adapter: ScriptedAdapter;
271
+ let artificerCallCount = 0;
272
+ const artificerPrompts: string[] = [];
273
+ adapter = new ScriptedAdapter({
274
+ dreamer: (taskId) => dreamerOut(taskId, 'pain-feedback-001'),
275
+ philosopher: philosopherOut,
276
+ scribe: scribeOut,
277
+ artificer: (taskId, priorId) => {
278
+ artificerCallCount++;
279
+ const runId = `run-${taskId}`;
280
+ const inputPayload = adapter.startRunInputs.get(runId)?.inputPayload;
281
+ if (typeof inputPayload === 'string') artificerPrompts.push(inputPayload);
282
+
283
+ const base = artificerV2(taskId, priorId);
284
+ if (artificerCallCount === 1) {
285
+ // Round 1: code fails the adversarial replay, forcing needs_revision.
286
+ return {
287
+ ...base,
288
+ implementationCode: 'function evaluate() { return { decision: "allow", matched: false, reason: "bug" }; }',
289
+ };
271
290
  }
272
- const candidate = artificerV2('', sourceScribeArtifactId);
273
- if (candidate === null || typeof candidate !== 'object') throw new Error('candidate fixture invalid');
274
- Reflect.deleteProperty(candidate, 'taskId');
275
- if (prompts.length === 1) {
276
- Reflect.set(candidate, 'implementationCode', 'function evaluate() { return { decision: "allow", matched: false, reason: "bug" }; }');
291
+ // Round 2: fixed code passes the evaluator.
292
+ return base;
293
+ },
294
+ evaluator: (taskId, artificerArtifactId) => {
295
+ if (artificerCallCount === 1) {
296
+ return evaluatorNeedsRevision(taskId, artificerArtifactId);
277
297
  }
278
- return candidate;
298
+ return evaluatorApproved(taskId, artificerArtifactId);
279
299
  },
280
300
  });
281
301
 
282
302
  const result = await runRuleHostPipeline({
283
303
  workspaceDir: tmpDir,
284
- painId: 'pain-l2-001',
285
- runtimeAdapter: baseAdapter,
286
- codeRuleCapability: { enabled: true, artificerAdapter: l2Adapter },
304
+ painId: 'pain-feedback-001',
305
+ runtimeAdapter: adapter,
306
+ codeRuleCapability: { enabled: true, artificerAdapter: adapter },
287
307
  channel: 'code_tool_hook',
288
308
  pollIntervalMs: 5,
289
309
  timeoutMs: 1000,
290
- onStoreReady: (store) => { baseAdapter.artifactStore = store; },
310
+ onStoreReady: (store) => { adapter.artifactStore = store; },
291
311
  });
292
312
 
293
- expect(prompts).toHaveLength(2);
294
- expect(prompts[1]).toContain('Previous sandbox replay failures');
313
+ expect(artificerCallCount).toBe(2);
314
+ expect(artificerPrompts).toHaveLength(2);
315
+ expect(artificerPrompts[1]).toContain('Prior adversarial replay failures');
295
316
  expect(result.decision, JSON.stringify(result)).toBe('candidate_ready_for_owner_review');
296
317
  expect(result.ruleArtifactId).toMatch(/^pi-rule-/);
297
318
  // P1 #1 fix: candidate should be auto-enqueued into the ApprovalQueue