@principles/pd-cli 1.119.0 → 1.121.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/__tests__/legacy-cleanup.test.d.ts +18 -0
- package/dist/commands/__tests__/legacy-cleanup.test.d.ts.map +1 -0
- package/dist/commands/__tests__/legacy-cleanup.test.js +459 -0
- package/dist/commands/__tests__/legacy-cleanup.test.js.map +1 -0
- package/dist/commands/__tests__/mvp-smoke.test.js +19 -2
- package/dist/commands/__tests__/mvp-smoke.test.js.map +1 -1
- package/dist/commands/__tests__/rulecode-flag-wiring.test.d.ts +21 -0
- package/dist/commands/__tests__/rulecode-flag-wiring.test.d.ts.map +1 -0
- package/dist/commands/__tests__/rulecode-flag-wiring.test.js +179 -0
- package/dist/commands/__tests__/rulecode-flag-wiring.test.js.map +1 -0
- package/dist/commands/__tests__/rulecode-handler.test.d.ts +16 -0
- package/dist/commands/__tests__/rulecode-handler.test.d.ts.map +1 -0
- package/dist/commands/__tests__/rulecode-handler.test.js +285 -0
- package/dist/commands/__tests__/rulecode-handler.test.js.map +1 -0
- package/dist/commands/__tests__/runtime-probe-config.test.js +1 -1
- package/dist/commands/__tests__/runtime-probe-config.test.js.map +1 -1
- package/dist/commands/legacy-cleanup.d.ts +72 -6
- package/dist/commands/legacy-cleanup.d.ts.map +1 -1
- package/dist/commands/legacy-cleanup.js +243 -23
- package/dist/commands/legacy-cleanup.js.map +1 -1
- package/dist/commands/rulecode.d.ts +85 -0
- package/dist/commands/rulecode.d.ts.map +1 -0
- package/dist/commands/rulecode.js +356 -0
- package/dist/commands/rulecode.js.map +1 -0
- package/dist/commands/runtime-internalization-run-rulehost.d.ts.map +1 -1
- package/dist/commands/runtime-internalization-run-rulehost.js +4 -7
- package/dist/commands/runtime-internalization-run-rulehost.js.map +1 -1
- package/dist/index.js +30 -9
- package/dist/index.js.map +1 -1
- package/dist/utils/production-workspace-guard.test.js +9 -2
- package/dist/utils/production-workspace-guard.test.js.map +1 -1
- package/package.json +1 -1
- package/scripts/llm-dogfood.ts +8 -12
- package/src/commands/__tests__/legacy-cleanup.test.ts +596 -0
- package/src/commands/__tests__/mvp-smoke.test.ts +18 -2
- package/src/commands/__tests__/rulecode-flag-wiring.test.ts +230 -0
- package/src/commands/__tests__/rulecode-handler.test.ts +369 -0
- package/src/commands/__tests__/runtime-probe-config.test.ts +1 -1
- package/src/commands/legacy-cleanup.ts +335 -27
- package/src/commands/rulecode.ts +434 -0
- package/src/commands/runtime-internalization-run-rulehost.ts +3 -8
- package/src/index.ts +31 -9
- package/src/utils/production-workspace-guard.test.ts +9 -2
- package/tests/commands/cli-command-tree.test.ts +57 -7
- package/tests/commands/console-open.test.ts +19 -13
- package/tests/e2e/cli-full-flow.test.ts +198 -0
- package/tests/e2e/cross-package-acceptance.test.ts +1 -0
- package/tests/services/rulehost-pipeline-runner.test.ts +51 -30
|
@@ -552,11 +552,13 @@ describe('CLI command wiring (pd console open)', () => {
|
|
|
552
552
|
});
|
|
553
553
|
|
|
554
554
|
it('pd console open --json (port free) returns a structured JSON object with required fields', () => {
|
|
555
|
-
|
|
556
|
-
//
|
|
557
|
-
//
|
|
558
|
-
//
|
|
559
|
-
//
|
|
555
|
+
// The real CLI spawns a long-lived Console server process and then prints
|
|
556
|
+
// JSON *without exiting* (the child keeps running). execFileSync would
|
|
557
|
+
// block forever waiting for the child to exit. Use a short timeout so the
|
|
558
|
+
// test reads stdout before the process is killed.
|
|
559
|
+
// Vitest timeout (10s) must exceed execFileSync timeout (8s) so the child
|
|
560
|
+
// is killed by execFileSync first, allowing stdout to be read.
|
|
561
|
+
const out = runPd(['console', 'open', '--workspace', tmp, '--json', '--no-browser'], workspaceRoot, 8_000);
|
|
560
562
|
const parsed = JSON.parse(out);
|
|
561
563
|
expect(parsed).toHaveProperty('status');
|
|
562
564
|
expect(['started', 'reused', 'failed', 'refused']).toContain(parsed.status);
|
|
@@ -565,7 +567,7 @@ describe('CLI command wiring (pd console open)', () => {
|
|
|
565
567
|
expect(parsed).toHaveProperty('workspaceDir');
|
|
566
568
|
expect(parsed).toHaveProperty('reused');
|
|
567
569
|
expect(parsed).toHaveProperty('browserOpened');
|
|
568
|
-
});
|
|
570
|
+
}, 10_000);
|
|
569
571
|
|
|
570
572
|
it('pd console open --port 99999 --json returns a structured failure (invalid port)', () => {
|
|
571
573
|
const out = runPd(['console', 'open', '--workspace', tmp, '--port', '99999', '--json', '--no-browser'], workspaceRoot);
|
|
@@ -589,18 +591,20 @@ describe('CLI command wiring (pd console open)', () => {
|
|
|
589
591
|
});
|
|
590
592
|
|
|
591
593
|
it('pd console open --json with --no-auth and --no-browser parses options correctly', () => {
|
|
592
|
-
|
|
594
|
+
// Same as port-free test: CLI spawns a long-lived server, use timeout.
|
|
595
|
+
const out = runPd(['console', 'open', '--workspace', tmp, '--json', '--no-auth', '--no-browser'], workspaceRoot, 8_000);
|
|
593
596
|
const parsed = JSON.parse(out);
|
|
594
597
|
expect(parsed).toHaveProperty('status');
|
|
595
598
|
expect(parsed.browserOpened).toBe(false);
|
|
596
|
-
});
|
|
599
|
+
}, 10_000);
|
|
597
600
|
|
|
598
601
|
it('pd console --no-auth --json legacy path parses --no-auth correctly', () => {
|
|
599
|
-
|
|
602
|
+
// Same: may spawn a long-lived server, use timeout.
|
|
603
|
+
const out = runPd(['console', '--workspace', tmp, '--json', '--no-auth'], workspaceRoot, 8_000);
|
|
600
604
|
expect(out.trim()).not.toBe('');
|
|
601
605
|
const parsed = JSON.parse(out);
|
|
602
606
|
expect(parsed).toBeDefined();
|
|
603
|
-
});
|
|
607
|
+
}, 10_000);
|
|
604
608
|
|
|
605
609
|
describe('openBrowser', () => {
|
|
606
610
|
afterEach(() => {
|
|
@@ -746,17 +750,18 @@ describe('CLI command wiring (pd console open)', () => {
|
|
|
746
750
|
});
|
|
747
751
|
|
|
748
752
|
it('[::1] is accepted and normalized to ::1 (not refused)', () => {
|
|
749
|
-
|
|
753
|
+
// May spawn a long-lived server, use timeout.
|
|
754
|
+
const out = runPd(['console', 'open', '--workspace', tmp, '--host', '[::1]', '--json', '--no-browser'], workspaceRoot, 8_000);
|
|
750
755
|
const parsed = JSON.parse(out);
|
|
751
756
|
// Should NOT be refused — [::1] is loopback
|
|
752
757
|
expect(parsed.status).not.toBe('refused');
|
|
753
758
|
// Host should be normalized to ::1 (without brackets)
|
|
754
759
|
expect(parsed.host).toBe('::1');
|
|
755
|
-
});
|
|
760
|
+
}, 10_000);
|
|
756
761
|
});
|
|
757
762
|
});
|
|
758
763
|
|
|
759
|
-
function runPd(args: string[], cwd: string): string {
|
|
764
|
+
function runPd(args: string[], cwd: string, timeoutMs?: number): string {
|
|
760
765
|
try {
|
|
761
766
|
const env: Record<string, string> = { ...process.env };
|
|
762
767
|
if (!args.includes('--workspace') && !args.includes('--help') && !args.includes('-h')) {
|
|
@@ -770,6 +775,7 @@ function runPd(args: string[], cwd: string): string {
|
|
|
770
775
|
encoding: 'utf8',
|
|
771
776
|
cwd,
|
|
772
777
|
env,
|
|
778
|
+
timeout: timeoutMs,
|
|
773
779
|
});
|
|
774
780
|
} catch (err: unknown) {
|
|
775
781
|
if (err && typeof err === 'object' && Object.hasOwn(err, 'stdout')) {
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI Full-Flow E2E — drives the real compiled pd binary.
|
|
3
|
+
*
|
|
4
|
+
* Every other test in this package imports handler functions directly. This
|
|
5
|
+
* file validates the actual user path: spawning `node dist/index.js <args>`
|
|
6
|
+
* via child_process.execFile and asserting on exit code / stdout / stderr.
|
|
7
|
+
*
|
|
8
|
+
* Uses Node.js built-in child_process (not execa) to avoid vitest forks-pool
|
|
9
|
+
* compatibility issues.
|
|
10
|
+
*
|
|
11
|
+
* ERR checklist:
|
|
12
|
+
* - EP-04 (CLI and Operator Contract): verifies --json emits exactly one
|
|
13
|
+
* parseable JSON object (Rule 1), invalid commands exit non-zero (Rule 2),
|
|
14
|
+
* and JSON-mode stdout is not polluted with banners.
|
|
15
|
+
* - EP-09 (Test Reality Gap): drives the real compiled binary, not imported
|
|
16
|
+
* helpers — proves the Commander wiring, dependency loading (including
|
|
17
|
+
* better-sqlite3), and workspace resolution all work end-to-end.
|
|
18
|
+
* - EP-02 (Production Path Wiring): exercises the production entry point
|
|
19
|
+
* (dist/index.js), confirming commands are registered and reachable.
|
|
20
|
+
* - ERR-001: error fields from execFile are validated with type guards, not `as`.
|
|
21
|
+
* - ERR-071: all temp workspaces are tracked and cleaned in afterEach.
|
|
22
|
+
*
|
|
23
|
+
* Note: pd-cli has no `pd init` command. `runtime activation list` is the
|
|
24
|
+
* closest production path that bootstraps `.pd/state.db` on a fresh workspace
|
|
25
|
+
* (SqliteConnection constructor creates the directory and DB file).
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { describe, it, expect, afterEach, beforeAll } from 'vitest';
|
|
29
|
+
import { execFile } from 'node:child_process';
|
|
30
|
+
import { promisify } from 'node:util';
|
|
31
|
+
import * as path from 'node:path';
|
|
32
|
+
import * as fs from 'node:fs';
|
|
33
|
+
import * as os from 'node:os';
|
|
34
|
+
import { getBuiltPdCliPath } from '../helpers/pd-cli-path.js';
|
|
35
|
+
|
|
36
|
+
const execFileAsync = promisify(execFile);
|
|
37
|
+
const PD_BIN = getBuiltPdCliPath();
|
|
38
|
+
|
|
39
|
+
const WORKSPACES: string[] = [];
|
|
40
|
+
|
|
41
|
+
afterEach(() => {
|
|
42
|
+
while (WORKSPACES.length > 0) {
|
|
43
|
+
const ws = WORKSPACES.pop();
|
|
44
|
+
if (ws) {
|
|
45
|
+
try {
|
|
46
|
+
fs.rmSync(ws, { recursive: true, force: true });
|
|
47
|
+
} catch (err) {
|
|
48
|
+
console.warn(
|
|
49
|
+
`[afterEach] Failed to clean ${ws}: ${err instanceof Error ? err.message : String(err)}`,
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
beforeAll(() => {
|
|
57
|
+
// ERR-009/ERR-010: fail loud if the compiled binary is missing.
|
|
58
|
+
if (!fs.existsSync(PD_BIN)) {
|
|
59
|
+
throw new Error(
|
|
60
|
+
`[cli-full-flow] Compiled binary not found at ${PD_BIN}. ` +
|
|
61
|
+
`Run "npm run build --workspace=@principles/pd-cli" before running this test.`,
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
function makeWorkspace(): string {
|
|
67
|
+
const ws = fs.mkdtempSync(path.join(os.tmpdir(), 'pd-cli-e2e-'));
|
|
68
|
+
WORKSPACES.push(ws);
|
|
69
|
+
return ws;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
interface RunResult {
|
|
73
|
+
stdout: string;
|
|
74
|
+
stderr: string;
|
|
75
|
+
exitCode: number;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function requireRecord(value: unknown, label: string): Record<string, unknown> {
|
|
79
|
+
if (typeof value !== 'object' || value === null || Array.isArray(value)) {
|
|
80
|
+
throw new Error(`${label} must be a JSON object`);
|
|
81
|
+
}
|
|
82
|
+
return value;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Type guard: extract a string field from an unknown error object.
|
|
87
|
+
* ERR-001: no `as` casts on untrusted error data — use Reflect.get + typeof.
|
|
88
|
+
*/
|
|
89
|
+
function readStringField(obj: unknown, field: string): string {
|
|
90
|
+
if (typeof obj !== 'object' || obj === null) return '';
|
|
91
|
+
if (!Object.hasOwn(obj, field)) return '';
|
|
92
|
+
const value: unknown = Reflect.get(obj, field);
|
|
93
|
+
return typeof value === 'string' ? value : '';
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Type guard: extract a numeric exit code from an unknown error object.
|
|
98
|
+
* ERR-001: no `as` casts on untrusted error data — use Reflect.get + typeof.
|
|
99
|
+
*/
|
|
100
|
+
function readExitCode(obj: unknown): number {
|
|
101
|
+
if (typeof obj !== 'object' || obj === null) return 1;
|
|
102
|
+
if (!Object.hasOwn(obj, 'code')) return 1;
|
|
103
|
+
const code: unknown = Reflect.get(obj, 'code');
|
|
104
|
+
return typeof code === 'number' ? code : 1;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async function runPd(
|
|
108
|
+
args: string[],
|
|
109
|
+
options: { timeout?: number } = {},
|
|
110
|
+
): Promise<RunResult> {
|
|
111
|
+
try {
|
|
112
|
+
const { stdout, stderr } = await execFileAsync('node', [PD_BIN, ...args], {
|
|
113
|
+
timeout: options.timeout ?? 30_000,
|
|
114
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
115
|
+
});
|
|
116
|
+
return { stdout, stderr, exitCode: 0 };
|
|
117
|
+
} catch (err: unknown) {
|
|
118
|
+
return {
|
|
119
|
+
stdout: readStringField(err, 'stdout'),
|
|
120
|
+
stderr: readStringField(err, 'stderr'),
|
|
121
|
+
exitCode: readExitCode(err),
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
describe('CLI full flow', () => {
|
|
127
|
+
it('pd --help exits 0 and lists main commands', async () => {
|
|
128
|
+
const { stdout, exitCode } = await runPd(['--help']);
|
|
129
|
+
expect(exitCode).toBe(0);
|
|
130
|
+
expect(stdout).toContain('Usage: pd');
|
|
131
|
+
// Verify key command groups are registered and visible to users
|
|
132
|
+
expect(stdout).toContain('pain');
|
|
133
|
+
expect(stdout).toContain('runtime');
|
|
134
|
+
expect(stdout).toContain('diagnose');
|
|
135
|
+
expect(stdout).toContain('candidate');
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it('pd --version exits 0 and prints version', async () => {
|
|
139
|
+
const { stdout, exitCode } = await runPd(['--version']);
|
|
140
|
+
expect(exitCode).toBe(0);
|
|
141
|
+
expect(stdout.trim()).toMatch(/^\d+\.\d+\.\d+/);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('pd runtime features --json on fresh workspace returns valid JSON with defaults', async () => {
|
|
145
|
+
const workspace = makeWorkspace();
|
|
146
|
+
const { stdout, exitCode } = await runPd([
|
|
147
|
+
'runtime',
|
|
148
|
+
'features',
|
|
149
|
+
'--workspace',
|
|
150
|
+
workspace,
|
|
151
|
+
'--json',
|
|
152
|
+
]);
|
|
153
|
+
expect(exitCode).toBe(0);
|
|
154
|
+
// EP-04 Rule 1: --json stdout must be exactly one parseable JSON object
|
|
155
|
+
const parsed: unknown = JSON.parse(stdout);
|
|
156
|
+
const result = requireRecord(parsed, 'runtime features output');
|
|
157
|
+
expect(result).toHaveProperty('status');
|
|
158
|
+
expect(result).toHaveProperty('source');
|
|
159
|
+
expect(result).toHaveProperty('features');
|
|
160
|
+
expect(Array.isArray(result.features)).toBe(true);
|
|
161
|
+
expect(result).toHaveProperty('enabledMvpChannels');
|
|
162
|
+
expect(Array.isArray(result.enabledMvpChannels)).toBe(true);
|
|
163
|
+
// Fresh workspace with no .pd/config.yaml falls back to defaults
|
|
164
|
+
expect(result.source).toBe('defaults');
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it('pd runtime activation list --json on fresh workspace returns empty list and bootstraps .pd/', async () => {
|
|
168
|
+
const workspace = makeWorkspace();
|
|
169
|
+
const { stdout, exitCode } = await runPd([
|
|
170
|
+
'runtime',
|
|
171
|
+
'activation',
|
|
172
|
+
'list',
|
|
173
|
+
'--workspace',
|
|
174
|
+
workspace,
|
|
175
|
+
'--json',
|
|
176
|
+
]);
|
|
177
|
+
expect(exitCode).toBe(0);
|
|
178
|
+
// EP-04 Rule 1: --json stdout must be exactly one parseable JSON object
|
|
179
|
+
const parsed: unknown = JSON.parse(stdout);
|
|
180
|
+
const result = requireRecord(parsed, 'activation list output');
|
|
181
|
+
expect(result).toHaveProperty('activations');
|
|
182
|
+
const activations = result.activations;
|
|
183
|
+
expect(Array.isArray(activations)).toBe(true);
|
|
184
|
+
expect(activations).toEqual([]);
|
|
185
|
+
// SqliteConnection constructor creates .pd/ and state.db on first access
|
|
186
|
+
expect(fs.existsSync(path.join(workspace, '.pd'))).toBe(true);
|
|
187
|
+
expect(fs.existsSync(path.join(workspace, '.pd', 'state.db'))).toBe(true);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it('pd <invalid-command> exits non-zero with non-empty stderr', async () => {
|
|
191
|
+
const { stderr, exitCode } = await runPd([
|
|
192
|
+
'this-command-does-not-exist',
|
|
193
|
+
]);
|
|
194
|
+
expect(exitCode).not.toBe(0);
|
|
195
|
+
expect(stderr.length).toBeGreaterThan(0);
|
|
196
|
+
expect(stderr).toContain('unknown command');
|
|
197
|
+
});
|
|
198
|
+
});
|
|
@@ -174,6 +174,7 @@ function artificerV2(taskId: string, priorId?: string): unknown {
|
|
|
174
174
|
taskId, sourceScribeArtifactId: requireLineage(priorId, 'sourceScribeArtifactId'),
|
|
175
175
|
implementationPlan: { summary: 'Block /etc writes', targetSurface: 'rule-host', changes: ['matcher'], tests: ['unit'], rolloutNotes: ['shadow'], confidence: 0.85 },
|
|
176
176
|
implementationCode: 'function evaluate(input, helpers) { const p = String(input?.action?.paramsSummary?.path ?? input?.action?.normalizedPath ?? ""); return p.startsWith("/etc") ? { decision: "block", matched: true, reason: "system path" } : { decision: "allow", matched: false, reason: "ok" }; }',
|
|
177
|
+
implementationSummary: 'Block system path writes',
|
|
177
178
|
goldenTraceCases: [
|
|
178
179
|
{ caseId: 'pos-1', kind: 'positive', toolName: 'write_file', params: { path: '/project/f.txt' }, expectedDecision: 'allow' },
|
|
179
180
|
{ caseId: 'neg-1', kind: 'negative', toolName: 'write_file', params: { path: '/etc/passwd' }, expectedDecision: 'block' },
|
|
@@ -16,10 +16,10 @@ import { describe, it, expect, afterEach, vi } from 'vitest';
|
|
|
16
16
|
import * as os from 'node:os';
|
|
17
17
|
import * as path from 'node:path';
|
|
18
18
|
import * as fs from 'node:fs';
|
|
19
|
-
import {
|
|
19
|
+
import { runRuleHostPipeline } from '../../src/services/rulehost-pipeline-runner.js';
|
|
20
20
|
import type { CodeRuleCapability } from '../../src/services/rulehost-pipeline-runner.js';
|
|
21
21
|
import type { PDRuntimeAdapter, RunHandle, RunStatus, PIArtifactStore, RuntimeCapabilities, RuntimeHealth, RuntimeArtifactRef, ContextItem, StructuredRunOutput, StartRunInput } from '@principles/core/runtime-v2';
|
|
22
|
-
import {
|
|
22
|
+
import { RuntimeStateManager, createPITaskDiagnosticJson } from '@principles/core/runtime-v2';
|
|
23
23
|
|
|
24
24
|
type StageFactory = (taskId: string, priorArtifactId?: string) => unknown;
|
|
25
25
|
type EvaluatorFactory = (taskId: string, artificerArtifactId: string) => unknown;
|
|
@@ -138,6 +138,7 @@ function artificerV2(taskId: string, priorId?: string): unknown {
|
|
|
138
138
|
taskId, sourceScribeArtifactId: requireLineage(priorId, 'sourceScribeArtifactId'),
|
|
139
139
|
implementationPlan: { summary: 'Block /etc writes', targetSurface: 'rule-host', changes: ['matcher'], tests: ['unit'], rolloutNotes: ['shadow'], confidence: 0.85 },
|
|
140
140
|
implementationCode: 'function evaluate(input, helpers) { const p = String(input?.action?.paramsSummary?.path ?? input?.action?.normalizedPath ?? ""); return p.startsWith("/etc") ? { decision: "block", matched: true, reason: "system path" } : { decision: "allow", matched: false, reason: "ok" }; }',
|
|
141
|
+
implementationSummary: 'Block system path writes',
|
|
141
142
|
goldenTraceCases: [
|
|
142
143
|
{ caseId: 'pos-1', kind: 'positive', toolName: 'write_file', params: { path: '/project/f.txt' }, expectedDecision: 'allow' },
|
|
143
144
|
{ caseId: 'neg-1', kind: 'negative', toolName: 'write_file', params: { path: '/etc/passwd' }, expectedDecision: 'block' },
|
|
@@ -172,6 +173,18 @@ function evaluatorRejected(taskId: string, artificerArtifactId: string): unknown
|
|
|
172
173
|
};
|
|
173
174
|
}
|
|
174
175
|
|
|
176
|
+
function evaluatorNeedsRevision(taskId: string, artificerArtifactId: string): unknown {
|
|
177
|
+
return {
|
|
178
|
+
taskId, sourceArtificerArtifactId: artificerArtifactId,
|
|
179
|
+
evaluation: { decision: 'needs_revision', summary: 'needs revision: adversarial replay failed', score: 0.4, strengths: [], concerns: ['adversarial case failed'], requiredChanges: ['fix matcher'] },
|
|
180
|
+
sourceTrace: { artificerArtifactId },
|
|
181
|
+
risks: [], generatedAt: new Date().toISOString(),
|
|
182
|
+
codeReview: { intentConsistency: { aligned: false, explanation: 'misses system paths' }, scopePrecision: { verdict: 'too_narrow' as const, explanation: 'narrow' }, traceCoverage: { sufficient: false, gaps: [], explanation: 'missing' } },
|
|
183
|
+
adversarialCases: [{ caseId: 'adv-1', attackType: 'boundary' as const, toolName: 'write_file', params: { path: '/etc/shadow' }, expectedDecision: 'block' as const, rationale: 'system path' }],
|
|
184
|
+
adversarialResult: { passed: false, failedCases: [{ caseId: 'adv-1', attackType: 'boundary' as const, actualDecision: 'allow', expectedDecision: 'block', rationale: 'system path' }] },
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
|
|
175
188
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
176
189
|
|
|
177
190
|
let tmpDir = '';
|
|
@@ -247,51 +260,59 @@ describe('runRuleHostPipeline (PRI-429) — atomic capability + exact pain match
|
|
|
247
260
|
expect(result.approvalId).not.toBeNull();
|
|
248
261
|
}, 60_000);
|
|
249
262
|
|
|
250
|
-
it('
|
|
263
|
+
it('adversarial feedback loop drives a second artificer round before creating a candidate', async () => {
|
|
251
264
|
tmpDir = makeTmpDir();
|
|
252
265
|
const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
|
|
253
266
|
await sm.initialize();
|
|
254
|
-
await seedDreamerWithId(sm, 'dreamer-
|
|
267
|
+
await seedDreamerWithId(sm, 'dreamer-feedback-001', 'pain-feedback-001');
|
|
255
268
|
await sm.close();
|
|
256
269
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
270
|
+
let adapter: ScriptedAdapter;
|
|
271
|
+
let artificerCallCount = 0;
|
|
272
|
+
const artificerPrompts: string[] = [];
|
|
273
|
+
adapter = new ScriptedAdapter({
|
|
274
|
+
dreamer: (taskId) => dreamerOut(taskId, 'pain-feedback-001'),
|
|
275
|
+
philosopher: philosopherOut,
|
|
276
|
+
scribe: scribeOut,
|
|
277
|
+
artificer: (taskId, priorId) => {
|
|
278
|
+
artificerCallCount++;
|
|
279
|
+
const runId = `run-${taskId}`;
|
|
280
|
+
const inputPayload = adapter.startRunInputs.get(runId)?.inputPayload;
|
|
281
|
+
if (typeof inputPayload === 'string') artificerPrompts.push(inputPayload);
|
|
282
|
+
|
|
283
|
+
const base = artificerV2(taskId, priorId);
|
|
284
|
+
if (artificerCallCount === 1) {
|
|
285
|
+
// Round 1: code fails the adversarial replay, forcing needs_revision.
|
|
286
|
+
return {
|
|
287
|
+
...base,
|
|
288
|
+
implementationCode: 'function evaluate() { return { decision: "allow", matched: false, reason: "bug" }; }',
|
|
289
|
+
};
|
|
271
290
|
}
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
291
|
+
// Round 2: fixed code passes the evaluator.
|
|
292
|
+
return base;
|
|
293
|
+
},
|
|
294
|
+
evaluator: (taskId, artificerArtifactId) => {
|
|
295
|
+
if (artificerCallCount === 1) {
|
|
296
|
+
return evaluatorNeedsRevision(taskId, artificerArtifactId);
|
|
277
297
|
}
|
|
278
|
-
return
|
|
298
|
+
return evaluatorApproved(taskId, artificerArtifactId);
|
|
279
299
|
},
|
|
280
300
|
});
|
|
281
301
|
|
|
282
302
|
const result = await runRuleHostPipeline({
|
|
283
303
|
workspaceDir: tmpDir,
|
|
284
|
-
painId: 'pain-
|
|
285
|
-
runtimeAdapter:
|
|
286
|
-
codeRuleCapability: { enabled: true, artificerAdapter:
|
|
304
|
+
painId: 'pain-feedback-001',
|
|
305
|
+
runtimeAdapter: adapter,
|
|
306
|
+
codeRuleCapability: { enabled: true, artificerAdapter: adapter },
|
|
287
307
|
channel: 'code_tool_hook',
|
|
288
308
|
pollIntervalMs: 5,
|
|
289
309
|
timeoutMs: 1000,
|
|
290
|
-
onStoreReady: (store) => {
|
|
310
|
+
onStoreReady: (store) => { adapter.artifactStore = store; },
|
|
291
311
|
});
|
|
292
312
|
|
|
293
|
-
expect(
|
|
294
|
-
expect(
|
|
313
|
+
expect(artificerCallCount).toBe(2);
|
|
314
|
+
expect(artificerPrompts).toHaveLength(2);
|
|
315
|
+
expect(artificerPrompts[1]).toContain('Prior adversarial replay failures');
|
|
295
316
|
expect(result.decision, JSON.stringify(result)).toBe('candidate_ready_for_owner_review');
|
|
296
317
|
expect(result.ruleArtifactId).toMatch(/^pi-rule-/);
|
|
297
318
|
// P1 #1 fix: candidate should be auto-enqueued into the ApprovalQueue
|