@principles/pd-cli 1.113.0 → 1.115.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/runtime-activation.d.ts +37 -0
- package/dist/commands/runtime-activation.d.ts.map +1 -1
- package/dist/commands/runtime-activation.js +416 -2
- package/dist/commands/runtime-activation.js.map +1 -1
- package/dist/index.js +54 -1
- package/dist/index.js.map +1 -1
- package/dist/services/demo-rule-compiler.d.ts.map +1 -1
- package/dist/services/demo-rule-compiler.js +30 -6
- package/dist/services/demo-rule-compiler.js.map +1 -1
- package/dist/services/rulehost-pipeline-runner.d.ts +8 -0
- package/dist/services/rulehost-pipeline-runner.d.ts.map +1 -1
- package/dist/services/rulehost-pipeline-runner.js +43 -1
- package/dist/services/rulehost-pipeline-runner.js.map +1 -1
- package/package.json +1 -1
- package/scripts/llm-dogfood.ts +419 -0
- package/src/commands/runtime-activation.ts +459 -1
- package/src/index.ts +57 -1
- package/src/services/demo-rule-compiler.ts +35 -15
- package/src/services/rulehost-pipeline-runner.ts +53 -0
- package/tests/commands/cli-command-tree.test.ts +14 -0
- package/tests/commands/run-rulehost-handler.test.ts +253 -0
- package/tests/commands/runtime-activation.test.ts +553 -1
- package/tests/e2e/cross-package-acceptance.test.ts +549 -0
- package/tests/services/demo-rule-compiler.test.ts +242 -0
- package/tests/services/rulehost-pipeline-runner.test.ts +6 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* compileDemoRule unit tests (PRI-429).
|
|
3
|
+
*
|
|
4
|
+
* The demo rule compiler is the sandbox adapter for run-rulehost's
|
|
5
|
+
* adversarial loop. It parses TypeScript rule implementations, extracts
|
|
6
|
+
* evaluate(), and validates the returned rule host result shape.
|
|
7
|
+
*
|
|
8
|
+
* Missing coverage would allow silent regression in the
|
|
9
|
+
* RefinerSandbox contract (evaluate output shape, invalid rule bodies,
|
|
10
|
+
* meta export shapes, polluted globals, etc.).
|
|
11
|
+
*
|
|
12
|
+
* ERR refs:
|
|
13
|
+
* - ERR-021: vm.Script runInContext must not leak globals
|
|
14
|
+
* - ERR-025: Object.hasOwn for untrusted output shape validation
|
|
15
|
+
* - ERR-037: non-object evaluate() return must throw loudly
|
|
16
|
+
*/
|
|
17
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
18
|
+
import { compileDemoRule } from '../../src/services/demo-rule-compiler.js';
|
|
19
|
+
import { createRuleHostHelpers } from '@principles/core/runtime-v2';
|
|
20
|
+
import type { ReplayEvaluateFn, RuleHostInput, RuleHostResult } from '@principles/core/runtime-v2';
|
|
21
|
+
|
|
22
|
+
const VALID_RULE = `
|
|
23
|
+
export const meta = {
|
|
24
|
+
id: 'r1',
|
|
25
|
+
version: '1.0.0',
|
|
26
|
+
purpose: 'unit test',
|
|
27
|
+
};
|
|
28
|
+
export function evaluate(input, helpers) {
|
|
29
|
+
return { decision: 'allow', matched: false, reason: 'ok' };
|
|
30
|
+
}
|
|
31
|
+
`;
|
|
32
|
+
|
|
33
|
+
const NO_EVALUATE = `
|
|
34
|
+
export const meta = { id: 'r1' };
|
|
35
|
+
`;
|
|
36
|
+
|
|
37
|
+
const THROWING_EVALUATE = `
|
|
38
|
+
export function evaluate(input, helpers) {
|
|
39
|
+
throw new Error('boom');
|
|
40
|
+
}
|
|
41
|
+
`;
|
|
42
|
+
|
|
43
|
+
const INVALID_RETURN_WRONG_DECISION = `
|
|
44
|
+
export function evaluate() {
|
|
45
|
+
return { decision: 'accepted', matched: false, reason: 'wrong decision enum' };
|
|
46
|
+
}
|
|
47
|
+
`;
|
|
48
|
+
|
|
49
|
+
const INVALID_RETURN_NO_MATCHED = `
|
|
50
|
+
export function evaluate() {
|
|
51
|
+
return { decision: 'allow', reason: 'missing matched' };
|
|
52
|
+
}
|
|
53
|
+
`;
|
|
54
|
+
|
|
55
|
+
const INVALID_RETURN_NO_DECISION = `
|
|
56
|
+
export function evaluate(input, helpers) {
|
|
57
|
+
return { reason: 'no-decision-field' };
|
|
58
|
+
}
|
|
59
|
+
`;
|
|
60
|
+
|
|
61
|
+
const INVALID_RETURN_PRIMITIVE = `
|
|
62
|
+
export function evaluate(input, helpers) {
|
|
63
|
+
return 42;
|
|
64
|
+
}
|
|
65
|
+
`;
|
|
66
|
+
|
|
67
|
+
const INVALID_RETURN_NULL = `
|
|
68
|
+
export function evaluate(input, helpers) {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
`;
|
|
72
|
+
|
|
73
|
+
const INVALID_RETURN_UNDEF = `
|
|
74
|
+
export function evaluate(input, helpers) {
|
|
75
|
+
return undefined;
|
|
76
|
+
}
|
|
77
|
+
`;
|
|
78
|
+
|
|
79
|
+
const INVALID_RETURN_STRING = `
|
|
80
|
+
export function evaluate(input, helpers) {
|
|
81
|
+
return 'accepted';
|
|
82
|
+
}
|
|
83
|
+
`;
|
|
84
|
+
|
|
85
|
+
const RULE_WITH_EVIDENCE = `
|
|
86
|
+
export const meta = { id: 'r-evidence' };
|
|
87
|
+
export function evaluate(input, helpers) {
|
|
88
|
+
const count = input.derived.estimatedLineChanges;
|
|
89
|
+
const matched = count > 0;
|
|
90
|
+
return { decision: matched ? 'block' : 'allow', matched, reason: 'based on changes', diagnostics: { count } };
|
|
91
|
+
}
|
|
92
|
+
`;
|
|
93
|
+
|
|
94
|
+
const RULE_WITH_HASOWN_POISON_PAYLOAD = `
|
|
95
|
+
export function evaluate(input, helpers) {
|
|
96
|
+
const poisoned = Object.create(null);
|
|
97
|
+
poisoned.decision = 'allow';
|
|
98
|
+
poisoned.matched = false;
|
|
99
|
+
poisoned.reason = 'ok';
|
|
100
|
+
return poisoned;
|
|
101
|
+
}
|
|
102
|
+
`;
|
|
103
|
+
|
|
104
|
+
function makeRuleHostInput(estimatedLineChanges = 0): RuleHostInput {
|
|
105
|
+
return {
|
|
106
|
+
action: {
|
|
107
|
+
toolName: 'write_file',
|
|
108
|
+
normalizedPath: '/workspace/a.ts',
|
|
109
|
+
paramsSummary: {},
|
|
110
|
+
},
|
|
111
|
+
workspace: { isRiskPath: false, planStatus: 'READY', hasPlanFile: true },
|
|
112
|
+
session: { currentGfi: 0, recentThinking: true },
|
|
113
|
+
evolution: { epTier: 0 },
|
|
114
|
+
derived: { estimatedLineChanges, bashRisk: 'safe' },
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function evaluateRule(evaluate: ReplayEvaluateFn, input: RuleHostInput = makeRuleHostInput()): RuleHostResult {
|
|
119
|
+
return evaluate(input, createRuleHostHelpers(input));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
describe('compileDemoRule', () => {
|
|
123
|
+
describe('source normalization', () => {
|
|
124
|
+
it('compiles a syntactically valid rule module', () => {
|
|
125
|
+
const fn = compileDemoRule(VALID_RULE, 'valid-rule.ts');
|
|
126
|
+
expect(typeof fn).toBe('function');
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('throws on code missing evaluate function', () => {
|
|
130
|
+
expect(() => compileDemoRule(NO_EVALUATE, 'no-evaluate.ts')).toThrow(/evaluate/);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it('propagates syntax errors from vm.Script', () => {
|
|
134
|
+
expect(() => compileDemoRule('this is not valid {{{', 'bad.ts')).toThrow();
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
describe('evaluate output shape validation', () => {
|
|
139
|
+
it('returns a fully validated RuleHostResult', () => {
|
|
140
|
+
const fn = compileDemoRule(VALID_RULE, 'valid-rule.ts');
|
|
141
|
+
const result = evaluateRule(fn);
|
|
142
|
+
expect(result).toEqual({
|
|
143
|
+
decision: 'allow',
|
|
144
|
+
matched: false,
|
|
145
|
+
reason: 'ok',
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
it('throws when evaluate returns an object without a decision field (ERR-037)', () => {
|
|
150
|
+
const fn = compileDemoRule(INVALID_RETURN_NO_DECISION, 'no-decision.ts');
|
|
151
|
+
expect(() => evaluateRule(fn)).toThrow(/invalid RuleHostResult/);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it('rejects objects using a non-RuleHost decision enum', () => {
|
|
155
|
+
const fn = compileDemoRule(INVALID_RETURN_WRONG_DECISION, 'wrong-decision.ts');
|
|
156
|
+
expect(() => evaluateRule(fn)).toThrow(/invalid RuleHostResult/);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('rejects objects missing the required matched flag', () => {
|
|
160
|
+
const fn = compileDemoRule(INVALID_RETURN_NO_MATCHED, 'missing-matched.ts');
|
|
161
|
+
expect(() => evaluateRule(fn)).toThrow(/invalid RuleHostResult/);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it('throws when evaluate returns a number (non-object)', () => {
|
|
165
|
+
const fn = compileDemoRule(INVALID_RETURN_PRIMITIVE, 'primitive.ts');
|
|
166
|
+
expect(() => evaluateRule(fn)).toThrow(/invalid RuleHostResult/);
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
it('throws when evaluate returns null', () => {
|
|
170
|
+
const fn = compileDemoRule(INVALID_RETURN_NULL, 'null-return.ts');
|
|
171
|
+
expect(() => evaluateRule(fn)).toThrow(/invalid RuleHostResult/);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it('throws when evaluate returns undefined', () => {
|
|
175
|
+
const fn = compileDemoRule(INVALID_RETURN_UNDEF, 'undef-return.ts');
|
|
176
|
+
expect(() => evaluateRule(fn)).toThrow(/invalid RuleHostResult/);
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it('throws when evaluate returns a string', () => {
|
|
180
|
+
const fn = compileDemoRule(INVALID_RETURN_STRING, 'string-return.ts');
|
|
181
|
+
expect(() => evaluateRule(fn)).toThrow(/invalid RuleHostResult/);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
it('handles Object.create(null) output (no prototype) via Object.hasOwn (ERR-025)', () => {
|
|
185
|
+
const fn = compileDemoRule(RULE_WITH_HASOWN_POISON_PAYLOAD, 'hasown-poison.ts');
|
|
186
|
+
const result = evaluateRule(fn);
|
|
187
|
+
expect(result.decision).toBe('allow');
|
|
188
|
+
});
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
describe('evaluate behaviour', () => {
|
|
192
|
+
it('propagates evaluate() exceptions to the caller (fail loud)', () => {
|
|
193
|
+
const fn = compileDemoRule(THROWING_EVALUATE, 'throwing.ts');
|
|
194
|
+
expect(() => evaluateRule(fn)).toThrow(/boom/);
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
it('reads real RuleHostInput fields and returns different decisions', () => {
|
|
198
|
+
const fn = compileDemoRule(RULE_WITH_EVIDENCE, 'evidence-rule.ts');
|
|
199
|
+
const withChanges = evaluateRule(fn, makeRuleHostInput(3));
|
|
200
|
+
const withoutChanges = evaluateRule(fn);
|
|
201
|
+
expect(withChanges.decision).toBe('block');
|
|
202
|
+
expect(withoutChanges.decision).toBe('allow');
|
|
203
|
+
});
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
describe('vm sandbox isolation', () => {
|
|
207
|
+
it('does not pollute Node.js globalThis between invocations (ERR-021)', () => {
|
|
208
|
+
const polluter = `
|
|
209
|
+
export function evaluate() {
|
|
210
|
+
globalThis.__pd_leaked_test = 1;
|
|
211
|
+
return { decision: 'block', matched: true, reason: 'polluting' };
|
|
212
|
+
}
|
|
213
|
+
`;
|
|
214
|
+
expect(Reflect.get(globalThis, '__pd_leaked_test')).toBeUndefined();
|
|
215
|
+
const fn = compileDemoRule(polluter, 'polluter.ts');
|
|
216
|
+
evaluateRule(fn);
|
|
217
|
+
const leakedValue = Reflect.get(globalThis, '__pd_leaked_test');
|
|
218
|
+
// The sandboxed __pdRuleModule temporary assignment must not leak
|
|
219
|
+
// arbitrary user-defined globals.
|
|
220
|
+
expect(leakedValue).toBeUndefined();
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
it('removes the __pdRuleModule helper from the sandbox after compilation', () => {
|
|
224
|
+
// This indirectly asserts the cleanup path — a second compilation
|
|
225
|
+
// that does not export evaluate still throws rather than returning
|
|
226
|
+
// a stale value from the first run.
|
|
227
|
+
compileDemoRule(VALID_RULE, 'first.ts');
|
|
228
|
+
expect(() => compileDemoRule(NO_EVALUATE, 'second.ts')).toThrow(/evaluate/);
|
|
229
|
+
});
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
describe('sourceLabel is threaded into error messages', () => {
|
|
233
|
+
it('includes sourceLabel when evaluate() returns invalid output', () => {
|
|
234
|
+
const fn = compileDemoRule(INVALID_RETURN_PRIMITIVE, 'labeled-42.ts');
|
|
235
|
+
expect(() => evaluateRule(fn)).toThrow(/labeled-42\.ts/);
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
it('includes sourceLabel when evaluate export is missing', () => {
|
|
239
|
+
expect(() => compileDemoRule(NO_EVALUATE, 'no-eval-source-label.ts')).toThrow(/no-eval-source-label\.ts/);
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
});
|
|
@@ -238,6 +238,8 @@ describe('runRuleHostPipeline (PRI-429) — atomic capability + exact pain match
|
|
|
238
238
|
expect(result.decision, JSON.stringify(result)).toBe('candidate_ready_for_owner_review');
|
|
239
239
|
expect(result.stages.map((s) => s.name)).toEqual(['pain_lookup', 'dreamer', 'philosopher', 'scribe', 'adversarial_loop']);
|
|
240
240
|
expect(result.ruleArtifactId).not.toBeNull();
|
|
241
|
+
// P1 #1 fix: candidate should be auto-enqueued into the ApprovalQueue
|
|
242
|
+
expect(result.approvalId).not.toBeNull();
|
|
241
243
|
}, 60_000);
|
|
242
244
|
|
|
243
245
|
it('runs the real ArtificerL2Adapter through fail-feedback-fix before creating a candidate', async () => {
|
|
@@ -287,6 +289,8 @@ describe('runRuleHostPipeline (PRI-429) — atomic capability + exact pain match
|
|
|
287
289
|
expect(prompts[1]).toContain('Previous sandbox replay failures');
|
|
288
290
|
expect(result.decision, JSON.stringify(result)).toBe('candidate_ready_for_owner_review');
|
|
289
291
|
expect(result.ruleArtifactId).toMatch(/^pi-rule-/);
|
|
292
|
+
// P1 #1 fix: candidate should be auto-enqueued into the ApprovalQueue
|
|
293
|
+
expect(result.approvalId).not.toBeNull();
|
|
290
294
|
}, 60_000);
|
|
291
295
|
|
|
292
296
|
// ── Test 2: Capability OFF (explicitly disabled) → text_principle_only ──
|
|
@@ -479,6 +483,8 @@ describe('runRuleHostPipeline (PRI-429) — atomic capability + exact pain match
|
|
|
479
483
|
expect(dreamerCallCount).toBe(2);
|
|
480
484
|
expect(dreamerStage?.status).toBe('succeeded');
|
|
481
485
|
expect(result.decision).toBe('candidate_ready_for_owner_review');
|
|
486
|
+
// P1 #1 fix: candidate should be auto-enqueued into the ApprovalQueue
|
|
487
|
+
expect(result.approvalId).not.toBeNull();
|
|
482
488
|
}, 60_000);
|
|
483
489
|
|
|
484
490
|
// ── Test 9 (E fix): retried status exhausted → stage marked 'degraded' ──
|