@principles/pd-cli 1.112.0 → 1.114.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/__tests__/run-rulehost-flag-wiring.test.d.ts +24 -0
- package/dist/commands/__tests__/run-rulehost-flag-wiring.test.d.ts.map +1 -0
- package/dist/commands/__tests__/run-rulehost-flag-wiring.test.js +223 -0
- package/dist/commands/__tests__/run-rulehost-flag-wiring.test.js.map +1 -0
- package/dist/commands/runtime-activation.d.ts +37 -0
- package/dist/commands/runtime-activation.d.ts.map +1 -1
- package/dist/commands/runtime-activation.js +416 -2
- package/dist/commands/runtime-activation.js.map +1 -1
- package/dist/commands/runtime-internalization-run-rulehost.d.ts +23 -0
- package/dist/commands/runtime-internalization-run-rulehost.d.ts.map +1 -0
- package/dist/commands/runtime-internalization-run-rulehost.js +364 -0
- package/dist/commands/runtime-internalization-run-rulehost.js.map +1 -0
- package/dist/index.js +56 -1
- package/dist/index.js.map +1 -1
- package/dist/services/demo-rule-compiler.d.ts +24 -0
- package/dist/services/demo-rule-compiler.d.ts.map +1 -0
- package/dist/services/demo-rule-compiler.js +53 -0
- package/dist/services/demo-rule-compiler.js.map +1 -0
- package/dist/services/rulehost-pipeline-runner.d.ts +132 -0
- package/dist/services/rulehost-pipeline-runner.d.ts.map +1 -0
- package/dist/services/rulehost-pipeline-runner.js +376 -0
- package/dist/services/rulehost-pipeline-runner.js.map +1 -0
- package/package.json +1 -1
- package/scripts/llm-dogfood.ts +419 -0
- package/src/commands/__tests__/run-rulehost-flag-wiring.test.ts +280 -0
- package/src/commands/runtime-activation.ts +459 -1
- package/src/commands/runtime-internalization-run-rulehost.ts +417 -0
- package/src/index.ts +60 -1
- package/src/services/demo-rule-compiler.ts +71 -0
- package/src/services/rulehost-pipeline-runner.ts +638 -0
- package/tests/commands/cli-command-tree.test.ts +14 -0
- package/tests/commands/runtime-activation.test.ts +553 -1
- package/tests/e2e/cross-package-acceptance.test.ts +549 -0
- package/tests/services/rulehost-pipeline-e2e.test.ts +477 -0
- package/tests/services/rulehost-pipeline-runner.test.ts +525 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Dogfood Script — PRI-408 (P1/P2 fixes validation)
|
|
3
|
+
*
|
|
4
|
+
* Runs the RuleHost pipeline with a REAL LLM (qwen3.6-27b-mtp via LM Studio)
|
|
5
|
+
* to validate the six-step value chain end-to-end:
|
|
6
|
+
*
|
|
7
|
+
* pain → dreamer → philosopher → scribe → artificer ↔ evaluator
|
|
8
|
+
* → candidate → auto-enqueue → owner approve → activate
|
|
9
|
+
* → before/after behavior comparison → deactivate → restore
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* npx tsx scripts/llm-dogfood.ts
|
|
13
|
+
*
|
|
14
|
+
* Prerequisites:
|
|
15
|
+
* - LM Studio running on http://localhost:12341 with qwen3.6-27b-mtp loaded
|
|
16
|
+
* - Set LMSTUDIO_API_KEY=lm-studio (or any non-empty string)
|
|
17
|
+
*
|
|
18
|
+
* Output:
|
|
19
|
+
* - Console log with each stage's result
|
|
20
|
+
* - dogfood-output.json with full structured results
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import * as os from 'node:os';
|
|
24
|
+
import * as path from 'node:path';
|
|
25
|
+
import * as fs from 'node:fs';
|
|
26
|
+
import {
|
|
27
|
+
PiAiRuntimeAdapter,
|
|
28
|
+
ArtificerL2Adapter,
|
|
29
|
+
buildArtificerL2GenerateCode,
|
|
30
|
+
DefaultArtificerValidator,
|
|
31
|
+
RuntimeStateManager,
|
|
32
|
+
SqliteApprovalQueueStore,
|
|
33
|
+
SqliteActivationStateStore,
|
|
34
|
+
SqlitePIArtifactStore,
|
|
35
|
+
ActivationDispatcher,
|
|
36
|
+
ApprovalCompletionService,
|
|
37
|
+
PromptWriter,
|
|
38
|
+
DeferArchiveWriter,
|
|
39
|
+
RuleHostWriter,
|
|
40
|
+
createProductionGateDeps,
|
|
41
|
+
makeIdempotencyKey,
|
|
42
|
+
createPITaskDiagnosticJson,
|
|
43
|
+
} from '@principles/core/runtime-v2';
|
|
44
|
+
import type { PIArtifactSnapshot, PIArtifactRecord } from '@principles/core/runtime-v2';
|
|
45
|
+
import { runRuleHostPipeline, createSandboxGateDeps } from '../src/services/rulehost-pipeline-runner.js';
|
|
46
|
+
import { compileDemoRule } from '../src/services/demo-rule-compiler.js';
|
|
47
|
+
import type { CodeRuleCapability } from '../src/services/rulehost-pipeline-runner.js';
|
|
48
|
+
|
|
49
|
+
// ── Config ───────────────────────────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
const LM_STUDIO_BASE_URL = 'http://localhost:12341/v1';
|
|
52
|
+
const MODEL_ID = 'qwen3.6-27b-mtp';
|
|
53
|
+
const PROVIDER = 'lmstudio';
|
|
54
|
+
const API_KEY_ENV = 'LMSTUDIO_API_KEY';
|
|
55
|
+
|
|
56
|
+
// Ensure the API key env var is set (LM Studio doesn't require a real key,
|
|
57
|
+
// but PiAiRuntimeAdapter checks process.env[apiKeyEnv] is non-empty)
|
|
58
|
+
if (!process.env[API_KEY_ENV]) {
|
|
59
|
+
process.env[API_KEY_ENV] = 'lm-studio';
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ── Dogfood pain scenario ────────────────────────────────────────────────────
|
|
63
|
+
|
|
64
|
+
const DOGFOOD_PAIN = {
|
|
65
|
+
painId: 'pain-dogfood-001',
|
|
66
|
+
scenario: 'Agent attempted to write to /etc/passwd during a file operation',
|
|
67
|
+
badDecision: 'Wrote directly to /etc/passwd without checking if it is a system path',
|
|
68
|
+
betterDecision: 'Block writes to system paths (/etc, /boot, /sys, /proc)',
|
|
69
|
+
context: 'File write operation targeting a system-critical path',
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
function makeTmpDir(): string {
|
|
75
|
+
const dir = path.join(os.tmpdir(), `pd-dogfood-${Date.now()}`);
|
|
76
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
77
|
+
return dir;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function toSnapshot(record: PIArtifactRecord): PIArtifactSnapshot {
|
|
81
|
+
return {
|
|
82
|
+
artifactId: record.artifactId,
|
|
83
|
+
artifactKind: record.artifactKind,
|
|
84
|
+
sourceTaskId: record.sourceTaskId,
|
|
85
|
+
sourcePrincipleId: record.sourcePrincipleId,
|
|
86
|
+
sourceRuleId: record.sourceRuleId,
|
|
87
|
+
lineageArtifactIds: record.lineageArtifactIds,
|
|
88
|
+
validationStatus: record.validationStatus,
|
|
89
|
+
contentJson: record.contentJson,
|
|
90
|
+
createdAt: record.createdAt,
|
|
91
|
+
updatedAt: record.updatedAt,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function seedPainSignal(sm: RuntimeStateManager, painId: string): Promise<void> {
|
|
96
|
+
const baseMetadata = JSON.parse(createPITaskDiagnosticJson({
|
|
97
|
+
dependencyTaskIds: [], channel: 'code_tool_hook', timeoutMs: 300_000, inputArtifactRefs: [], outputArtifactRefs: [],
|
|
98
|
+
})) as Record<string, unknown>;
|
|
99
|
+
const diagnosticJson = JSON.stringify({
|
|
100
|
+
...baseMetadata,
|
|
101
|
+
sourcePainId: painId,
|
|
102
|
+
painSummary: DOGFOOD_PAIN.scenario,
|
|
103
|
+
badDecision: DOGFOOD_PAIN.badDecision,
|
|
104
|
+
betterDecision: DOGFOOD_PAIN.betterDecision,
|
|
105
|
+
});
|
|
106
|
+
await sm.createTask({
|
|
107
|
+
taskId: 'dreamer-dogfood-001',
|
|
108
|
+
taskKind: 'dreamer',
|
|
109
|
+
status: 'pending',
|
|
110
|
+
attemptCount: 0,
|
|
111
|
+
maxAttempts: 3,
|
|
112
|
+
diagnosticJson,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function log(stage: string, message: string, detail?: unknown): void {
|
|
117
|
+
const timestamp = new Date().toISOString();
|
|
118
|
+
console.log(`[${timestamp}] [${stage}] ${message}`);
|
|
119
|
+
if (detail !== undefined) {
|
|
120
|
+
console.log(JSON.stringify(detail, null, 2));
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// ── Main dogfood ─────────────────────────────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
async function main(): Promise<void> {
|
|
127
|
+
const outputDir = path.resolve(process.cwd(), 'dogfood-output');
|
|
128
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
129
|
+
|
|
130
|
+
const tmpDir = makeTmpDir();
|
|
131
|
+
log('SETUP', `Workspace: ${tmpDir}`);
|
|
132
|
+
log('SETUP', `LLM: ${MODEL_ID} @ ${LM_STUDIO_BASE_URL}`);
|
|
133
|
+
|
|
134
|
+
// ── Step 1: Seed pain signal ──────────────────────────────────────────────
|
|
135
|
+
log('STEP-1', `Seeding pain: ${DOGFOOD_PAIN.scenario}`);
|
|
136
|
+
const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
|
|
137
|
+
await sm.initialize();
|
|
138
|
+
await seedPainSignal(sm, DOGFOOD_PAIN.painId);
|
|
139
|
+
await sm.close();
|
|
140
|
+
|
|
141
|
+
// ── Step 2: Run RuleHost pipeline with real LLM ──────────────────────────
|
|
142
|
+
log('STEP-2', 'Starting RuleHost pipeline (dreamer → philosopher → scribe → artificer ↔ evaluator)');
|
|
143
|
+
|
|
144
|
+
const adapter = new PiAiRuntimeAdapter({
|
|
145
|
+
provider: PROVIDER,
|
|
146
|
+
model: MODEL_ID,
|
|
147
|
+
apiKeyEnv: API_KEY_ENV,
|
|
148
|
+
baseUrl: LM_STUDIO_BASE_URL,
|
|
149
|
+
maxRetries: 1,
|
|
150
|
+
timeoutMs: 600_000,
|
|
151
|
+
maxTokens: 8192,
|
|
152
|
+
reasoning: false,
|
|
153
|
+
workspace: tmpDir,
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
// Construct the ArtificerL2Adapter for the artificer stage.
|
|
157
|
+
// This adapter uses buildArtificerL2GenerateCode to call the LLM directly
|
|
158
|
+
// (via completeSimple) and runs sandbox replay to validate generated code.
|
|
159
|
+
const generateCode = buildArtificerL2GenerateCode({
|
|
160
|
+
provider: PROVIDER,
|
|
161
|
+
model: MODEL_ID,
|
|
162
|
+
apiKey: process.env[API_KEY_ENV]!,
|
|
163
|
+
baseUrl: LM_STUDIO_BASE_URL,
|
|
164
|
+
timeoutMs: 600_000,
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
const artificerAdapter = new ArtificerL2Adapter({
|
|
168
|
+
generateCode,
|
|
169
|
+
gateDeps: createSandboxGateDeps(),
|
|
170
|
+
validator: new DefaultArtificerValidator(),
|
|
171
|
+
maxAttempts: 3,
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
const capability: CodeRuleCapability = { enabled: true, artificerAdapter };
|
|
175
|
+
|
|
176
|
+
let pipelineResult;
|
|
177
|
+
try {
|
|
178
|
+
pipelineResult = await runRuleHostPipeline({
|
|
179
|
+
workspaceDir: tmpDir,
|
|
180
|
+
painId: DOGFOOD_PAIN.painId,
|
|
181
|
+
runtimeAdapter: adapter,
|
|
182
|
+
channel: 'code_tool_hook',
|
|
183
|
+
pollIntervalMs: 200,
|
|
184
|
+
timeoutMs: 600_000,
|
|
185
|
+
maxRounds: 2,
|
|
186
|
+
codeRuleCapability: capability,
|
|
187
|
+
onProgress: (stage, status, detail) => {
|
|
188
|
+
log('PIPELINE', `${stage}: ${status}${detail ? ' — ' + detail : ''}`);
|
|
189
|
+
},
|
|
190
|
+
});
|
|
191
|
+
} catch (err) {
|
|
192
|
+
log('STEP-2', 'Pipeline threw', { error: err instanceof Error ? err.message : String(err) });
|
|
193
|
+
throw err;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
log('STEP-2', `Pipeline decision: ${pipelineResult.decision}`, {
|
|
197
|
+
ruleArtifactId: pipelineResult.ruleArtifactId,
|
|
198
|
+
principleArtifactId: pipelineResult.principleArtifactId,
|
|
199
|
+
approvalId: pipelineResult.approvalId,
|
|
200
|
+
degradationReason: pipelineResult.degradationReason,
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
if (pipelineResult.decision !== 'candidate_ready_for_owner_review') {
|
|
204
|
+
log('STEP-2', 'Pipeline did not produce a candidate — saving partial results and exiting');
|
|
205
|
+
const partialOutput = {
|
|
206
|
+
pain: DOGFOOD_PAIN,
|
|
207
|
+
pipelineResult,
|
|
208
|
+
timestamp: new Date().toISOString(),
|
|
209
|
+
};
|
|
210
|
+
fs.writeFileSync(path.join(outputDir, 'dogfood-partial.json'), JSON.stringify(partialOutput, null, 2));
|
|
211
|
+
console.log(`\nPartial results saved to ${path.join(outputDir, 'dogfood-partial.json')}`);
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// ── Step 3: Extract generated principle + RuleCode + evaluator judgment ──
|
|
216
|
+
log('STEP-3', 'Extracting generated artifacts');
|
|
217
|
+
const sm2 = new RuntimeStateManager({ workspaceDir: tmpDir });
|
|
218
|
+
await sm2.initialize();
|
|
219
|
+
const artifactStore = new SqlitePIArtifactStore(sm2.connection);
|
|
220
|
+
const approvalStore = new SqliteApprovalQueueStore(sm2.connection);
|
|
221
|
+
const stateStore = new SqliteActivationStateStore(sm2.connection);
|
|
222
|
+
|
|
223
|
+
const ruleArtifact = await artifactStore.getArtifactById(pipelineResult.ruleArtifactId!);
|
|
224
|
+
const principleArtifact = pipelineResult.principleArtifactId
|
|
225
|
+
? await artifactStore.getArtifactById(pipelineResult.principleArtifactId)
|
|
226
|
+
: null;
|
|
227
|
+
|
|
228
|
+
let generatedPrinciple: unknown = null;
|
|
229
|
+
let ruleCode: string | null = null;
|
|
230
|
+
let evaluatorJudgment: unknown = null;
|
|
231
|
+
|
|
232
|
+
if (principleArtifact) {
|
|
233
|
+
try {
|
|
234
|
+
generatedPrinciple = JSON.parse(principleArtifact.contentJson);
|
|
235
|
+
} catch {
|
|
236
|
+
generatedPrinciple = principleArtifact.contentJson;
|
|
237
|
+
}
|
|
238
|
+
log('STEP-3', 'Generated principle extracted', { artifactId: principleArtifact.artifactId });
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (ruleArtifact) {
|
|
242
|
+
try {
|
|
243
|
+
const ruleContent = JSON.parse(ruleArtifact.contentJson) as Record<string, unknown>;
|
|
244
|
+
ruleCode = typeof ruleContent.implementationCode === 'string' ? ruleContent.implementationCode : null;
|
|
245
|
+
evaluatorJudgment = ruleContent.adversarialResult ?? null;
|
|
246
|
+
log('STEP-3', 'RuleCode extracted', { artifactId: ruleArtifact.artifactId, codeLength: ruleCode?.length ?? 0 });
|
|
247
|
+
log('STEP-3', 'Evaluator judgment', evaluatorJudgment);
|
|
248
|
+
} catch {
|
|
249
|
+
log('STEP-3', 'Failed to parse rule artifact contentJson');
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ── Step 4: Owner approves the candidate ─────────────────────────────────
|
|
254
|
+
log('STEP-4', 'Owner approving candidate');
|
|
255
|
+
const approvalId = pipelineResult.approvalId;
|
|
256
|
+
if (!approvalId) {
|
|
257
|
+
log('STEP-4', 'No approvalId — candidate was not auto-enqueued', {
|
|
258
|
+
degradationReason: pipelineResult.degradationReason,
|
|
259
|
+
});
|
|
260
|
+
const partialOutput = {
|
|
261
|
+
pain: DOGFOOD_PAIN,
|
|
262
|
+
pipelineResult,
|
|
263
|
+
timestamp: new Date().toISOString(),
|
|
264
|
+
error: 'auto_enqueue_failed',
|
|
265
|
+
};
|
|
266
|
+
fs.writeFileSync(path.join(outputDir, 'dogfood-partial.json'), JSON.stringify(partialOutput, null, 2));
|
|
267
|
+
console.log(`\nPartial results saved (enqueue failed)`);
|
|
268
|
+
await sm2.close();
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
const approveResult = await approvalStore.approve(approvalId, 'owner-dogfood', 'Dogfood approval');
|
|
272
|
+
log('STEP-4', `Approval result: ok=${approveResult.ok}`);
|
|
273
|
+
|
|
274
|
+
// ── Step 5: Dispatch activation ──────────────────────────────────────────
|
|
275
|
+
log('STEP-5', 'Dispatching activation');
|
|
276
|
+
const artifactReadModel = {
|
|
277
|
+
getArtifactById: async (id: string): Promise<PIArtifactSnapshot | null> => {
|
|
278
|
+
const rec = await artifactStore.getArtifactById(id);
|
|
279
|
+
return rec ? toSnapshot(rec) : null;
|
|
280
|
+
},
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
const dispatcher = new ActivationDispatcher(
|
|
284
|
+
artifactReadModel,
|
|
285
|
+
stateStore,
|
|
286
|
+
{
|
|
287
|
+
writers: [
|
|
288
|
+
new PromptWriter(),
|
|
289
|
+
new RuleHostWriter({ gateDeps: createProductionGateDeps() }),
|
|
290
|
+
new DeferArchiveWriter(),
|
|
291
|
+
],
|
|
292
|
+
approvalQueueStore: approvalStore,
|
|
293
|
+
},
|
|
294
|
+
);
|
|
295
|
+
|
|
296
|
+
const completionService = new ApprovalCompletionService(
|
|
297
|
+
approvalStore,
|
|
298
|
+
dispatcher,
|
|
299
|
+
stateStore,
|
|
300
|
+
);
|
|
301
|
+
|
|
302
|
+
const completionResult = await completionService.completeApproval({
|
|
303
|
+
approvalId,
|
|
304
|
+
actor: { kind: 'human', userId: 'owner-dogfood' },
|
|
305
|
+
now: new Date().toISOString(),
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
log('STEP-5', `Activation result: ok=${completionResult.ok}`, completionResult);
|
|
309
|
+
|
|
310
|
+
// ── Step 6: Before/after behavior comparison ─────────────────────────────
|
|
311
|
+
log('STEP-6', 'Before/after behavior comparison');
|
|
312
|
+
|
|
313
|
+
// Test the rule code against system-path and non-system-path inputs
|
|
314
|
+
const testCases = [
|
|
315
|
+
{ name: 'system-path-write', input: { action: { paramsSummary: { path: '/etc/passwd' } } }, expected: 'block' },
|
|
316
|
+
{ name: 'system-path-write-boot', input: { action: { paramsSummary: { path: '/boot/grub.cfg' } } }, expected: 'block' },
|
|
317
|
+
{ name: 'normal-write', input: { action: { paramsSummary: { path: '/project/src/main.ts' } } }, expected: 'allow' },
|
|
318
|
+
{ name: 'normal-write-2', input: { action: { paramsSummary: { path: '/home/user/file.txt' } } }, expected: 'allow' },
|
|
319
|
+
];
|
|
320
|
+
|
|
321
|
+
let behaviorResults: Array<{ name: string; expected: string; actual: string; passed: boolean }> = [];
|
|
322
|
+
if (ruleCode) {
|
|
323
|
+
// P1 #1 fix: use the production vm sandbox (compileDemoRule) instead of
|
|
324
|
+
// `new Function` which bypasses the sandbox and has no timeout protection.
|
|
325
|
+
// Also, `new Function('input', 'helpers', ruleCode)` would define `evaluate`
|
|
326
|
+
// inside the function body but not call it, returning undefined — making
|
|
327
|
+
// the behavior comparison unreliable.
|
|
328
|
+
try {
|
|
329
|
+
const evaluateFn = compileDemoRule(ruleCode, 'dogfood-behavior-test');
|
|
330
|
+
behaviorResults = testCases.map((tc) => {
|
|
331
|
+
try {
|
|
332
|
+
const result = evaluateFn(tc.input as never, {} as never);
|
|
333
|
+
const actual = typeof result === 'object' && result !== null && 'decision' in result
|
|
334
|
+
? String((result as Record<string, unknown>).decision)
|
|
335
|
+
: 'unknown';
|
|
336
|
+
return {
|
|
337
|
+
name: tc.name,
|
|
338
|
+
expected: tc.expected,
|
|
339
|
+
actual,
|
|
340
|
+
passed: actual === tc.expected,
|
|
341
|
+
};
|
|
342
|
+
} catch (err) {
|
|
343
|
+
return {
|
|
344
|
+
name: tc.name,
|
|
345
|
+
expected: tc.expected,
|
|
346
|
+
actual: `error: ${err instanceof Error ? err.message : String(err)}`,
|
|
347
|
+
passed: false,
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
});
|
|
351
|
+
} catch (err) {
|
|
352
|
+
log('STEP-6', 'Rule code compilation failed (vm sandbox)', { error: err instanceof Error ? err.message : String(err) });
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
log('STEP-6', 'Behavior results', behaviorResults);
|
|
357
|
+
|
|
358
|
+
// ── Step 7: Deactivate and verify restoration ────────────────────────────
|
|
359
|
+
log('STEP-7', 'Deactivating rule');
|
|
360
|
+
const idempotencyKey = makeIdempotencyKey(pipelineResult.ruleArtifactId!, 'code_tool_hook');
|
|
361
|
+
const activationRecord = await stateStore.getActivationStatus(idempotencyKey);
|
|
362
|
+
if (activationRecord) {
|
|
363
|
+
const deactivateResult = await stateStore.deactivateActivation(activationRecord.activationId, new Date().toISOString());
|
|
364
|
+
log('STEP-7', `Deactivation: ${deactivateResult ? 'success' : 'failed'}`);
|
|
365
|
+
|
|
366
|
+
// Verify the record is deactivated
|
|
367
|
+
const afterDeactivate = await stateStore.getActivationStatus(idempotencyKey);
|
|
368
|
+
log('STEP-7', `After deactivate: deactivatedAt=${afterDeactivate?.deactivatedAt ?? 'null'}`);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// ── Save full output ─────────────────────────────────────────────────────
|
|
372
|
+
const fullOutput = {
|
|
373
|
+
timestamp: new Date().toISOString(),
|
|
374
|
+
pain: DOGFOOD_PAIN,
|
|
375
|
+
pipelineResult: {
|
|
376
|
+
decision: pipelineResult.decision,
|
|
377
|
+
ruleArtifactId: pipelineResult.ruleArtifactId,
|
|
378
|
+
principleArtifactId: pipelineResult.principleArtifactId,
|
|
379
|
+
approvalId: pipelineResult.approvalId,
|
|
380
|
+
stages: pipelineResult.stages,
|
|
381
|
+
degradationReason: pipelineResult.degradationReason,
|
|
382
|
+
},
|
|
383
|
+
generatedPrinciple,
|
|
384
|
+
ruleCode,
|
|
385
|
+
evaluatorJudgment,
|
|
386
|
+
approval: { ok: approveResult.ok },
|
|
387
|
+
activation: {
|
|
388
|
+
ok: completionResult.ok,
|
|
389
|
+
decision: completionResult.ok ? completionResult.decision : null,
|
|
390
|
+
activationId: completionResult.ok ? completionResult.activationId : null,
|
|
391
|
+
},
|
|
392
|
+
behaviorResults,
|
|
393
|
+
model: { provider: PROVIDER, model: MODEL_ID, baseUrl: LM_STUDIO_BASE_URL },
|
|
394
|
+
};
|
|
395
|
+
|
|
396
|
+
const outputPath = path.join(outputDir, 'dogfood-output.json');
|
|
397
|
+
fs.writeFileSync(outputPath, JSON.stringify(fullOutput, null, 2));
|
|
398
|
+
|
|
399
|
+
// Also save rule code separately for easy review
|
|
400
|
+
if (ruleCode) {
|
|
401
|
+
fs.writeFileSync(path.join(outputDir, 'generated-rule.js'), ruleCode);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
log('DONE', `Results saved to ${outputDir}/`);
|
|
405
|
+
|
|
406
|
+
await sm2.close();
|
|
407
|
+
|
|
408
|
+
// Cleanup tmp dir
|
|
409
|
+
try {
|
|
410
|
+
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
411
|
+
} catch {
|
|
412
|
+
// ignore
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
main().catch((err) => {
|
|
417
|
+
console.error('Dogfood failed:', err);
|
|
418
|
+
process.exit(1);
|
|
419
|
+
});
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parser-level tests for `pd runtime internalization run-rulehost` flags (PRI-429).
|
|
3
|
+
*
|
|
4
|
+
* CLI gate rule 7: "Test the real command wiring — when behavior depends on
|
|
5
|
+
* Commander options, add a command-registration or parser test that exercises
|
|
6
|
+
* the actual flags."
|
|
7
|
+
*
|
|
8
|
+
* Tests the real `registerRunRuleHostCommand` helper (single source of truth
|
|
9
|
+
* shared with `index.ts`). Flag typos in production surface here at
|
|
10
|
+
* parseAsync time, not at handler dispatch.
|
|
11
|
+
*
|
|
12
|
+
* Covers:
|
|
13
|
+
* - --dry-run and --confirm are registered
|
|
14
|
+
* - --dry-run and --confirm can both be parsed
|
|
15
|
+
* - --json is registered
|
|
16
|
+
* - --pain-id is required (Commander rejects missing required option)
|
|
17
|
+
* - --workspace / -w shorthand is registered
|
|
18
|
+
* - --no-dry-run / --no-confirm are NOT registered (no accidental negation)
|
|
19
|
+
* - mutual exclusivity is enforced at handler level (not parser level —
|
|
20
|
+
* Commander doesn't natively support .conflicts() for boolean flags
|
|
21
|
+
* without explicit registration; the handler validates this)
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { describe, it, expect } from 'vitest';
|
|
25
|
+
import { Command } from 'commander';
|
|
26
|
+
import { registerRunRuleHostCommand } from '../runtime-internalization-run-rulehost.js';
|
|
27
|
+
|
|
28
|
+
type ActionOptions = Record<string, unknown>;
|
|
29
|
+
|
|
30
|
+
interface CapturedAction {
|
|
31
|
+
opts: ActionOptions | null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function attachCapture(cmd: Command, state: CapturedAction): void {
|
|
35
|
+
cmd.action(function captureAction(...args: unknown[]): void {
|
|
36
|
+
let optsArg: unknown = null;
|
|
37
|
+
for (let i = args.length - 1; i >= 0; i--) {
|
|
38
|
+
const arg: unknown = args[i];
|
|
39
|
+
if (arg !== null && typeof arg === 'object' && !(arg instanceof Command)) {
|
|
40
|
+
optsArg = arg;
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (optsArg !== null && typeof optsArg === 'object') {
|
|
45
|
+
state.opts = optsArg as ActionOptions;
|
|
46
|
+
} else {
|
|
47
|
+
state.opts = {};
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function freshProgram(): Command {
|
|
53
|
+
const program = new Command();
|
|
54
|
+
program.name('pd').exitOverride();
|
|
55
|
+
return program;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
describe('pd runtime internalization run-rulehost — flag wiring (CLI gate rule 7)', () => {
|
|
59
|
+
// ── Option metadata ──────────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
it('registers --dry-run flag', () => {
|
|
62
|
+
const program = freshProgram();
|
|
63
|
+
const intCmd = program.command('internalization');
|
|
64
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
65
|
+
|
|
66
|
+
const opt = runCmd.options.find((o) => o.long === '--dry-run');
|
|
67
|
+
expect(opt).toBeDefined();
|
|
68
|
+
expect(opt?.long).toBe('--dry-run');
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it('registers --confirm flag', () => {
|
|
72
|
+
const program = freshProgram();
|
|
73
|
+
const intCmd = program.command('internalization');
|
|
74
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
75
|
+
|
|
76
|
+
const opt = runCmd.options.find((o) => o.long === '--confirm');
|
|
77
|
+
expect(opt).toBeDefined();
|
|
78
|
+
expect(opt?.long).toBe('--confirm');
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('registers --json flag', () => {
|
|
82
|
+
const program = freshProgram();
|
|
83
|
+
const intCmd = program.command('internalization');
|
|
84
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
85
|
+
|
|
86
|
+
const opt = runCmd.options.find((o) => o.long === '--json');
|
|
87
|
+
expect(opt).toBeDefined();
|
|
88
|
+
expect(opt?.long).toBe('--json');
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it('registers --pain-id as required option', () => {
|
|
92
|
+
const program = freshProgram();
|
|
93
|
+
const intCmd = program.command('internalization');
|
|
94
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
95
|
+
|
|
96
|
+
const opt = runCmd.options.find((o) => o.long === '--pain-id');
|
|
97
|
+
expect(opt).toBeDefined();
|
|
98
|
+
expect(opt?.long).toBe('--pain-id');
|
|
99
|
+
// requiredOption sets .required = true on the Option
|
|
100
|
+
expect(opt?.required).toBe(true);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it('registers -w shorthand for --workspace', () => {
|
|
104
|
+
const program = freshProgram();
|
|
105
|
+
const intCmd = program.command('internalization');
|
|
106
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
107
|
+
|
|
108
|
+
const opt = runCmd.options.find((o) => o.short === '-w');
|
|
109
|
+
expect(opt).toBeDefined();
|
|
110
|
+
expect(opt?.long).toBe('--workspace');
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('does NOT register --no-dry-run (no accidental negation)', () => {
|
|
114
|
+
const program = freshProgram();
|
|
115
|
+
const intCmd = program.command('internalization');
|
|
116
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
117
|
+
|
|
118
|
+
const noForm = runCmd.options.find((o) => o.long === '--no-dry-run');
|
|
119
|
+
expect(noForm).toBeUndefined();
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('does NOT register --no-confirm (no accidental negation)', () => {
|
|
123
|
+
const program = freshProgram();
|
|
124
|
+
const intCmd = program.command('internalization');
|
|
125
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
126
|
+
|
|
127
|
+
const noForm = runCmd.options.find((o) => o.long === '--no-confirm');
|
|
128
|
+
expect(noForm).toBeUndefined();
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// ── Parser-level tests (program.parseAsync) ───────────────────────────────
|
|
132
|
+
|
|
133
|
+
it('parses --dry-run as true', async () => {
|
|
134
|
+
const program = freshProgram();
|
|
135
|
+
const intCmd = program.command('internalization');
|
|
136
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
137
|
+
const captured: CapturedAction = { opts: null };
|
|
138
|
+
attachCapture(runCmd, captured);
|
|
139
|
+
|
|
140
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1', '--dry-run']);
|
|
141
|
+
|
|
142
|
+
expect(captured.opts).not.toBeNull();
|
|
143
|
+
expect(captured.opts?.dryRun).toBe(true);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
it('parses --confirm as true', async () => {
|
|
147
|
+
const program = freshProgram();
|
|
148
|
+
const intCmd = program.command('internalization');
|
|
149
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
150
|
+
const captured: CapturedAction = { opts: null };
|
|
151
|
+
attachCapture(runCmd, captured);
|
|
152
|
+
|
|
153
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1', '--confirm']);
|
|
154
|
+
|
|
155
|
+
expect(captured.opts).not.toBeNull();
|
|
156
|
+
expect(captured.opts?.confirm).toBe(true);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('parses both --dry-run and --confirm (handler enforces mutual exclusivity)', async () => {
|
|
160
|
+
// Commander parses both flags; the handler validates mutual exclusivity.
|
|
161
|
+
// This test proves the parser accepts both — the handler test in
|
|
162
|
+
// rulehost-pipeline-e2e.test.ts proves the handler rejects the combination.
|
|
163
|
+
const program = freshProgram();
|
|
164
|
+
const intCmd = program.command('internalization');
|
|
165
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
166
|
+
const captured: CapturedAction = { opts: null };
|
|
167
|
+
attachCapture(runCmd, captured);
|
|
168
|
+
|
|
169
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1', '--dry-run', '--confirm']);
|
|
170
|
+
|
|
171
|
+
expect(captured.opts).not.toBeNull();
|
|
172
|
+
expect(captured.opts?.dryRun).toBe(true);
|
|
173
|
+
expect(captured.opts?.confirm).toBe(true);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('defaults --dry-run and --confirm to undefined when neither is passed', async () => {
|
|
177
|
+
const program = freshProgram();
|
|
178
|
+
const intCmd = program.command('internalization');
|
|
179
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
180
|
+
const captured: CapturedAction = { opts: null };
|
|
181
|
+
attachCapture(runCmd, captured);
|
|
182
|
+
|
|
183
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1']);
|
|
184
|
+
|
|
185
|
+
expect(captured.opts).not.toBeNull();
|
|
186
|
+
expect(captured.opts?.dryRun).toBeUndefined();
|
|
187
|
+
expect(captured.opts?.confirm).toBeUndefined();
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it('parses --json as true', async () => {
|
|
191
|
+
const program = freshProgram();
|
|
192
|
+
const intCmd = program.command('internalization');
|
|
193
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
194
|
+
const captured: CapturedAction = { opts: null };
|
|
195
|
+
attachCapture(runCmd, captured);
|
|
196
|
+
|
|
197
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1', '--json']);
|
|
198
|
+
|
|
199
|
+
expect(captured.opts).not.toBeNull();
|
|
200
|
+
expect(captured.opts?.json).toBe(true);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
it('parses -w shorthand for --workspace', async () => {
|
|
204
|
+
const program = freshProgram();
|
|
205
|
+
const intCmd = program.command('internalization');
|
|
206
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
207
|
+
const captured: CapturedAction = { opts: null };
|
|
208
|
+
attachCapture(runCmd, captured);
|
|
209
|
+
|
|
210
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1', '-w', '/tmp/test']);
|
|
211
|
+
|
|
212
|
+
expect(captured.opts).not.toBeNull();
|
|
213
|
+
expect(captured.opts?.workspace).toBe('/tmp/test');
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it('rejects missing --pain-id (required option)', async () => {
|
|
217
|
+
const program = freshProgram();
|
|
218
|
+
const intCmd = program.command('internalization');
|
|
219
|
+
registerRunRuleHostCommand(intCmd);
|
|
220
|
+
|
|
221
|
+
// Commander should throw on missing required option
|
|
222
|
+
await expect(
|
|
223
|
+
program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--dry-run']),
|
|
224
|
+
).rejects.toThrow(/pain-id/);
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
it('parses --channel with custom value', async () => {
|
|
228
|
+
const program = freshProgram();
|
|
229
|
+
const intCmd = program.command('internalization');
|
|
230
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
231
|
+
const captured: CapturedAction = { opts: null };
|
|
232
|
+
attachCapture(runCmd, captured);
|
|
233
|
+
|
|
234
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1', '--channel', 'prompt']);
|
|
235
|
+
|
|
236
|
+
expect(captured.opts).not.toBeNull();
|
|
237
|
+
expect(captured.opts?.channel).toBe('prompt');
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
it('defaults --channel to code_tool_hook', async () => {
|
|
241
|
+
const program = freshProgram();
|
|
242
|
+
const intCmd = program.command('internalization');
|
|
243
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
244
|
+
const captured: CapturedAction = { opts: null };
|
|
245
|
+
attachCapture(runCmd, captured);
|
|
246
|
+
|
|
247
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1']);
|
|
248
|
+
|
|
249
|
+
expect(captured.opts).not.toBeNull();
|
|
250
|
+
expect(captured.opts?.channel).toBe('code_tool_hook');
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
it('parses --max-rounds as integer', async () => {
|
|
254
|
+
const program = freshProgram();
|
|
255
|
+
const intCmd = program.command('internalization');
|
|
256
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
257
|
+
const captured: CapturedAction = { opts: null };
|
|
258
|
+
attachCapture(runCmd, captured);
|
|
259
|
+
|
|
260
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1', '--max-rounds', '2']);
|
|
261
|
+
|
|
262
|
+
expect(captured.opts).not.toBeNull();
|
|
263
|
+
expect(captured.opts?.maxRounds).toBe(2);
|
|
264
|
+
expect(typeof captured.opts?.maxRounds).toBe('number');
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
it('parses --timeout-ms as integer', async () => {
|
|
268
|
+
const program = freshProgram();
|
|
269
|
+
const intCmd = program.command('internalization');
|
|
270
|
+
const runCmd = registerRunRuleHostCommand(intCmd);
|
|
271
|
+
const captured: CapturedAction = { opts: null };
|
|
272
|
+
attachCapture(runCmd, captured);
|
|
273
|
+
|
|
274
|
+
await program.parseAsync(['node', 'pd', 'internalization', 'run-rulehost', '--pain-id', 'pain-1', '--timeout-ms', '600000']);
|
|
275
|
+
|
|
276
|
+
expect(captured.opts).not.toBeNull();
|
|
277
|
+
expect(captured.opts?.timeoutMs).toBe(600000);
|
|
278
|
+
expect(typeof captured.opts?.timeoutMs).toBe('number');
|
|
279
|
+
});
|
|
280
|
+
});
|