principles-disciple 1.124.0 → 1.125.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/core/event-log.ts +13 -0
- package/src/core/rule-host.ts +69 -8
- package/src/core/rule-implementation-runtime.ts +65 -3
- package/src/types/event-types.ts +1 -0
- package/tests/core/rule-host-adversarial-output.test.ts +242 -0
- package/tests/core/rule-host-autocorrect-vm.test.ts +163 -0
- package/tests/core/rule-host-resource-bounds.test.ts +231 -0
- package/tests/core/rule-host-unhealthy-visibility.test.ts +234 -0
- package/tests/core/rule-host-validation.test.ts +315 -0
- package/tests/hooks/gate-rule-host-real-pipeline.test.ts +190 -0
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "principles-disciple",
|
|
3
3
|
"name": "Principles Disciple",
|
|
4
4
|
"description": "Evolutionary programming agent framework with strategic guardrails and reflection loops.",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.125.0",
|
|
6
6
|
"activation": {
|
|
7
7
|
"onCapabilities": [
|
|
8
8
|
"hook"
|
package/package.json
CHANGED
package/src/core/event-log.ts
CHANGED
|
@@ -28,6 +28,7 @@ import type {
|
|
|
28
28
|
RuleHostAutoCorrectProposedEventData,
|
|
29
29
|
RuleHostAutoCorrectAppliedEventData,
|
|
30
30
|
RuntimeV2PromptActivationsInjectedEventData,
|
|
31
|
+
RuleHostUnhealthyEventData,
|
|
31
32
|
} from '../types/event-types.js';
|
|
32
33
|
import { createEmptyDailyStats } from '../types/event-types.js';
|
|
33
34
|
import { atomicWriteFileSync } from '../utils/io.js';
|
|
@@ -210,6 +211,18 @@ export class EventLog {
|
|
|
210
211
|
this.record('runtime_v2_prompt_activations_injected', 'injected', data.sessionId, data);
|
|
211
212
|
}
|
|
212
213
|
|
|
214
|
+
/**
|
|
215
|
+
* PRI-437: Record that an approved rule failed to compile or load.
|
|
216
|
+
*
|
|
217
|
+
* This is NOT just a logger.warn — the unhealthy state is persisted to EventLog
|
|
218
|
+
* so it's visible to CLI (pd runtime health) and Console API.
|
|
219
|
+
*
|
|
220
|
+
* ERR-002: degradation includes a reason and nextAction (not silent).
|
|
221
|
+
*/
|
|
222
|
+
recordRuleHostUnhealthy(data: RuleHostUnhealthyEventData): void {
|
|
223
|
+
this.record('rulehost_unhealthy', 'failure', undefined, data);
|
|
224
|
+
}
|
|
225
|
+
|
|
213
226
|
/**
|
|
214
227
|
* Redact telemetry-sensitive string values in event data before persistence.
|
|
215
228
|
* Applies redactTelemetryString to known high-risk fields (filePath, command,
|
package/src/core/rule-host.ts
CHANGED
|
@@ -26,8 +26,10 @@
|
|
|
26
26
|
|
|
27
27
|
import { createRuleHostHelpers } from '@principles/core/runtime-v2';
|
|
28
28
|
import { mergeDecisions } from '@principles/core/runtime-v2';
|
|
29
|
+
import { validateRuleHostResult } from '@principles/core/runtime-v2';
|
|
29
30
|
import { SqliteConnection } from '@principles/core/runtime-v2';
|
|
30
31
|
import { loadRuleImplementationModule } from './rule-implementation-runtime.js';
|
|
32
|
+
import { EventLogService } from './event-log.js';
|
|
31
33
|
import type {
|
|
32
34
|
RuleHostInput,
|
|
33
35
|
RuleHostResult,
|
|
@@ -241,10 +243,13 @@ export class RuleHost {
|
|
|
241
243
|
const implId = `act-impl-${activationId}`;
|
|
242
244
|
const moduleExports = loadRuleImplementationModule(implementationCode, implId);
|
|
243
245
|
|
|
244
|
-
if (!moduleExports || typeof moduleExports.
|
|
246
|
+
if (!moduleExports || typeof moduleExports.callEvaluate !== 'function') {
|
|
247
|
+
const reason = 'compiled module has no evaluate function';
|
|
245
248
|
this.logger.warn?.(
|
|
246
|
-
`[RuleHost] Activation ${activationId}:
|
|
249
|
+
`[RuleHost] Activation ${activationId}: ${reason}, skipping`
|
|
247
250
|
);
|
|
251
|
+
this._recordUnhealthy(activationId, artifactId, ruleId, reason,
|
|
252
|
+
'Fix the RuleCode to export an evaluate(input, helpers) function, then re-activate');
|
|
248
253
|
continue;
|
|
249
254
|
}
|
|
250
255
|
|
|
@@ -258,10 +263,10 @@ export class RuleHost {
|
|
|
258
263
|
? moduleExports.meta
|
|
259
264
|
: fallbackMeta;
|
|
260
265
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
266
|
+
// PRI-437: Use callEvaluate (vm-context-bounded) instead of raw evaluate.
|
|
267
|
+
// callEvaluate runs the invocation INSIDE the vm context with a time
|
|
268
|
+
// boundary, terminating infinite loops and excessive computation.
|
|
269
|
+
const boundedCallEvaluate = moduleExports.callEvaluate;
|
|
265
270
|
|
|
266
271
|
loaded.push({
|
|
267
272
|
implId,
|
|
@@ -269,7 +274,18 @@ export class RuleHost {
|
|
|
269
274
|
meta,
|
|
270
275
|
evaluate: (input: RuleHostInput): RuleHostResult => {
|
|
271
276
|
const frozenHelpers = createRuleHostHelpers(input);
|
|
272
|
-
|
|
277
|
+
// PRI-437: Execute inside vm context with timeout boundary.
|
|
278
|
+
// If the RuleCode infinite-loops or exceeds the time budget,
|
|
279
|
+
// vm throws an error that is caught by the caller (mergeDecisions
|
|
280
|
+
// try/catch), resulting in conservative degradation (undefined).
|
|
281
|
+
const rawResult = boundedCallEvaluate(input, frozenHelpers);
|
|
282
|
+
const validation = validateRuleHostResult(rawResult);
|
|
283
|
+
if (!validation.valid) {
|
|
284
|
+
throw new Error(
|
|
285
|
+
`[RuleHost] Activation ${activationId} returned invalid RuleHostResult: ${validation.errors.join('; ')}`
|
|
286
|
+
);
|
|
287
|
+
}
|
|
288
|
+
const result = rawResult as RuleHostResult;
|
|
273
289
|
if (result.matched && (result.decision === 'block' || result.decision === 'requireApproval')) {
|
|
274
290
|
result.ruleId = ruleId;
|
|
275
291
|
result.principleId = meta.ruleId ?? ruleId;
|
|
@@ -278,9 +294,16 @@ export class RuleHost {
|
|
|
278
294
|
},
|
|
279
295
|
});
|
|
280
296
|
} catch (loadError: unknown) {
|
|
297
|
+
const reason = `compilation failed: ${String(loadError)}`;
|
|
281
298
|
this.logger.warn?.(
|
|
282
|
-
`[RuleHost] Failed to load activation ${activationId}: ${
|
|
299
|
+
`[RuleHost] Failed to load activation ${activationId}: ${reason}`
|
|
283
300
|
);
|
|
301
|
+
// ruleId is declared inside the try block and may not be assigned yet;
|
|
302
|
+
// fall back to sourceRuleId or artifactId (both available in scope)
|
|
303
|
+
this._recordUnhealthy(activationId, artifactId,
|
|
304
|
+
sourceRuleId ?? artifactId,
|
|
305
|
+
reason,
|
|
306
|
+
'Fix the RuleCode syntax/compilation error, then re-activate the rule');
|
|
284
307
|
}
|
|
285
308
|
}
|
|
286
309
|
|
|
@@ -293,4 +316,42 @@ export class RuleHost {
|
|
|
293
316
|
}
|
|
294
317
|
}
|
|
295
318
|
}
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* PRI-437: Record an unhealthy activation state to EventLog.
|
|
322
|
+
*
|
|
323
|
+
* This makes compile/load failures visible to CLI (pd runtime health) and
|
|
324
|
+
* Console API — NOT just a logger.warn that's silently skipped.
|
|
325
|
+
*
|
|
326
|
+
* ERR-002: degradation includes a reason and nextAction (not silent).
|
|
327
|
+
* Failures in EventLog recording are caught and logged (never throw).
|
|
328
|
+
*/
|
|
329
|
+
private _recordUnhealthy(
|
|
330
|
+
activationId: string,
|
|
331
|
+
artifactId: string,
|
|
332
|
+
ruleId: string,
|
|
333
|
+
reason: string,
|
|
334
|
+
nextAction: string,
|
|
335
|
+
): void {
|
|
336
|
+
try {
|
|
337
|
+
// Pass undefined as logger: RuleHostLogger only has warn(), but EventLog
|
|
338
|
+
// calls this.logger.error() without optional chaining. Passing the
|
|
339
|
+
// RuleHostLogger directly would cause TypeError if EventLog tried to
|
|
340
|
+
// log an internal error. EventLog's logger is optional; RuleHost already
|
|
341
|
+
// logs its own warnings for the unhealthy event.
|
|
342
|
+
const eventLog = EventLogService.get(this.stateDir);
|
|
343
|
+
eventLog.recordRuleHostUnhealthy({
|
|
344
|
+
activationId,
|
|
345
|
+
artifactId,
|
|
346
|
+
ruleId,
|
|
347
|
+
reason,
|
|
348
|
+
nextAction,
|
|
349
|
+
});
|
|
350
|
+
} catch (recordError: unknown) {
|
|
351
|
+
// EventLog recording must never break RuleHost evaluation
|
|
352
|
+
this.logger.warn?.(
|
|
353
|
+
`[RuleHost] Failed to record unhealthy event for activation ${activationId}: ${String(recordError)}`
|
|
354
|
+
);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
296
357
|
}
|
|
@@ -3,8 +3,27 @@ import { nodeVm } from '../utils/node-vm-polyfill.js';
|
|
|
3
3
|
export interface RuleImplementationModuleExports {
|
|
4
4
|
meta?: unknown;
|
|
5
5
|
evaluate?: unknown;
|
|
6
|
+
/**
|
|
7
|
+
* Call evaluate(input, helpers) INSIDE the vm context with a time boundary.
|
|
8
|
+
*
|
|
9
|
+
* PRI-437: The evaluate() function extracted from a vm context executes in
|
|
10
|
+
* the vm realm, but a direct host-realm call has NO timeout protection —
|
|
11
|
+
* an infinite loop in RuleCode would hang the host process forever.
|
|
12
|
+
*
|
|
13
|
+
* callEvaluate runs the invocation inside the vm context via
|
|
14
|
+
* Script.runInContext({ timeout }), which terminates infinite loops.
|
|
15
|
+
*
|
|
16
|
+
* Throws on timeout, compilation error, or if evaluate is missing.
|
|
17
|
+
*/
|
|
18
|
+
callEvaluate?: (input: unknown, helpers: unknown) => unknown;
|
|
6
19
|
}
|
|
7
20
|
|
|
21
|
+
/** Timeout (ms) for compiling RuleCode (defining evaluate + meta). */
|
|
22
|
+
const COMPILE_TIMEOUT_MS = 1000;
|
|
23
|
+
|
|
24
|
+
/** Timeout (ms) for executing evaluate(input, helpers) inside the vm. */
|
|
25
|
+
const EVALUATE_TIMEOUT_MS = 1000;
|
|
26
|
+
|
|
8
27
|
function normalizeImplementationSource(sourceCode: string): string {
|
|
9
28
|
const withoutExports = sourceCode
|
|
10
29
|
.replace(/export\s+const\s+meta\s*=/, 'const meta =')
|
|
@@ -26,13 +45,56 @@ export function loadRuleImplementationModule(
|
|
|
26
45
|
filename,
|
|
27
46
|
});
|
|
28
47
|
|
|
48
|
+
// Compile phase: define evaluate + meta (timeout-bounded)
|
|
29
49
|
script.runInContext(context, {
|
|
30
|
-
timeout:
|
|
50
|
+
timeout: COMPILE_TIMEOUT_MS,
|
|
31
51
|
displayErrors: true,
|
|
32
52
|
});
|
|
33
53
|
|
|
34
54
|
const moduleExports = (context as { __pdRuleModule?: RuleImplementationModuleExports }).__pdRuleModule;
|
|
35
|
-
|
|
55
|
+
// Note: keep __pdRuleModule on the context so callEvaluate can reference it.
|
|
56
|
+
// We do NOT delete it here — it's needed for subsequent evaluate calls.
|
|
57
|
+
|
|
58
|
+
const hasEvaluate = typeof moduleExports?.evaluate === 'function';
|
|
59
|
+
const meta = moduleExports?.meta;
|
|
60
|
+
|
|
61
|
+
if (!hasEvaluate) {
|
|
62
|
+
// No evaluate function — return early with no callEvaluate
|
|
63
|
+
return { meta, evaluate: moduleExports?.evaluate };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// PRI-437: Create a context-aware caller that runs evaluate INSIDE the vm
|
|
67
|
+
// context with a time boundary. This terminates infinite loops and
|
|
68
|
+
// excessive computation that would otherwise hang the host process.
|
|
69
|
+
//
|
|
70
|
+
// The callEvaluate function:
|
|
71
|
+
// 1. Sets input and helpers as context globals (sandboxed)
|
|
72
|
+
// 2. Compiles a tiny call script
|
|
73
|
+
// 3. Runs it in the context with EVALUATE_TIMEOUT_MS
|
|
74
|
+
// 4. Cleans up globals
|
|
75
|
+
// 5. Returns the raw result (validation happens in the caller)
|
|
76
|
+
const callScript = new nodeVm.Script(
|
|
77
|
+
'__pdRuleModule.evaluate(__pdCallInput, __pdCallHelpers)',
|
|
78
|
+
{ filename: `${filename}.call` },
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
const callEvaluate = (input: unknown, helpers: unknown): unknown => {
|
|
82
|
+
(context as { __pdCallInput?: unknown }).__pdCallInput = input;
|
|
83
|
+
(context as { __pdCallHelpers?: unknown }).__pdCallHelpers = helpers;
|
|
84
|
+
try {
|
|
85
|
+
return callScript.runInContext(context, {
|
|
86
|
+
timeout: EVALUATE_TIMEOUT_MS,
|
|
87
|
+
displayErrors: true,
|
|
88
|
+
});
|
|
89
|
+
} finally {
|
|
90
|
+
try { delete (context as { __pdCallInput?: unknown }).__pdCallInput; } catch { /* noop */ }
|
|
91
|
+
try { delete (context as { __pdCallHelpers?: unknown }).__pdCallHelpers; } catch { /* noop */ }
|
|
92
|
+
}
|
|
93
|
+
};
|
|
36
94
|
|
|
37
|
-
return
|
|
95
|
+
return {
|
|
96
|
+
meta,
|
|
97
|
+
evaluate: moduleExports?.evaluate,
|
|
98
|
+
callEvaluate,
|
|
99
|
+
};
|
|
38
100
|
}
|
package/src/types/event-types.ts
CHANGED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PRI-437 Slice 5: Invalid tier/adversarial diagnostics cannot corrupt output
|
|
3
|
+
*
|
|
4
|
+
* PURPOSE: Verify that adversarial RuleCode cannot corrupt the RuleHost output
|
|
5
|
+
* or merge logic through:
|
|
6
|
+
* 1. Prototype pollution in diagnostics field
|
|
7
|
+
* 2. Invalid tier in input (non-number epTier)
|
|
8
|
+
* 3. Adversarial correctionProposal with prototype pollution
|
|
9
|
+
*
|
|
10
|
+
* ERR risk mitigation:
|
|
11
|
+
* - ERR-001: no `as` bypass on untrusted VM output
|
|
12
|
+
* - ERR-013: Object.hasOwn for untrusted keys
|
|
13
|
+
* - ERR-005: validate array element types
|
|
14
|
+
*
|
|
15
|
+
* Test approach:
|
|
16
|
+
* - Real SQLite activation with adversarial RuleCode
|
|
17
|
+
* - Real RuleHost.evaluate() (public interface)
|
|
18
|
+
* - Verify output is either rejected (undefined) or safely contained
|
|
19
|
+
*/
|
|
20
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
21
|
+
import * as fs from 'fs';
|
|
22
|
+
import * as os from 'os';
|
|
23
|
+
import * as path from 'path';
|
|
24
|
+
import { SqliteConnection, SqliteActivationStateStore } from '@principles/core/runtime-v2';
|
|
25
|
+
import type { RuleHostInput } from '@principles/core/runtime-v2';
|
|
26
|
+
import { RuleHost } from '../../src/core/rule-host.js';
|
|
27
|
+
|
|
28
|
+
// ── Test helpers ───────────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
let tempWorkspaceDir: string;
|
|
31
|
+
let tempStateDir: string;
|
|
32
|
+
let sqliteConn: SqliteConnection;
|
|
33
|
+
|
|
34
|
+
function setupTempDirs(): void {
|
|
35
|
+
const baseTmp = os.tmpdir();
|
|
36
|
+
tempWorkspaceDir = fs.mkdtempSync(path.join(baseTmp, 'pd-rulehost-adversarial-'));
|
|
37
|
+
tempStateDir = path.join(tempWorkspaceDir, '.principles');
|
|
38
|
+
fs.mkdirSync(tempStateDir, { recursive: true });
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function insertRuleArtifact(
|
|
42
|
+
artifactId: string,
|
|
43
|
+
ruleId: string,
|
|
44
|
+
sourceTaskId: string,
|
|
45
|
+
code: string,
|
|
46
|
+
): void {
|
|
47
|
+
const db = sqliteConn.getDb();
|
|
48
|
+
const now = new Date().toISOString();
|
|
49
|
+
const contentJson = JSON.stringify({
|
|
50
|
+
principleId: `P_${ruleId}`,
|
|
51
|
+
ruleId,
|
|
52
|
+
implementationCode: code,
|
|
53
|
+
goldenTrace: { traceId: `trace-${ruleId}`, cases: [], createdAt: now, version: 1 },
|
|
54
|
+
ruleHostGateDecision: 'accepted_shadow',
|
|
55
|
+
affectedTools: ['write_file'],
|
|
56
|
+
painReasonSummary: 'Test: adversarial',
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
db.prepare(`
|
|
60
|
+
INSERT INTO pi_artifacts (artifact_id, artifact_kind, source_task_id, source_principle_id, source_rule_id, lineage_artifact_ids, validation_status, content_json, created_at, updated_at)
|
|
61
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
62
|
+
`).run(
|
|
63
|
+
artifactId, 'rule', sourceTaskId, `P_${ruleId}`, ruleId,
|
|
64
|
+
'[]', 'validated', contentJson, now, now,
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async function insertActivation(
|
|
69
|
+
activationId: string,
|
|
70
|
+
artifactId: string,
|
|
71
|
+
ruleId: string,
|
|
72
|
+
): Promise<void> {
|
|
73
|
+
const store = new SqliteActivationStateStore(sqliteConn);
|
|
74
|
+
const now = new Date().toISOString();
|
|
75
|
+
await store.recordActivation({
|
|
76
|
+
activationId,
|
|
77
|
+
idempotencyKey: `${artifactId}::code_tool_hook`,
|
|
78
|
+
artifactId,
|
|
79
|
+
channel: 'code_tool_hook',
|
|
80
|
+
action: 'code_tool_hook_shadow_activate',
|
|
81
|
+
targetRef: `impl://${ruleId}`,
|
|
82
|
+
activatedAt: now,
|
|
83
|
+
deactivatedAt: null,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function makeInput(normalizedPath: string, epTier?: unknown): RuleHostInput {
|
|
88
|
+
return {
|
|
89
|
+
action: {
|
|
90
|
+
toolName: 'write_file',
|
|
91
|
+
normalizedPath,
|
|
92
|
+
paramsSummary: { path: normalizedPath },
|
|
93
|
+
},
|
|
94
|
+
workspace: { isRiskPath: false, planStatus: 'NONE' as const, hasPlanFile: false },
|
|
95
|
+
session: { sessionId: 'test-session-adversarial', currentGfi: 0, recentThinking: false },
|
|
96
|
+
evolution: { epTier: epTier as number },
|
|
97
|
+
derived: { estimatedLineChanges: 1, bashRisk: 'safe' as const },
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ── Setup / Teardown ───────────────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
beforeEach(() => {
|
|
104
|
+
setupTempDirs();
|
|
105
|
+
sqliteConn = new SqliteConnection(tempWorkspaceDir);
|
|
106
|
+
sqliteConn.getDb();
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
afterEach(() => {
|
|
110
|
+
try { sqliteConn?.close(); } catch { /* best-effort */ }
|
|
111
|
+
try { fs.rmSync(tempWorkspaceDir, { recursive: true, force: true }); } catch { /* Windows */ }
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
// ── Slice 5: Adversarial diagnostics cannot corrupt output ─────────────────
|
|
115
|
+
|
|
116
|
+
describe('PRI-437 Slice 5: Invalid tier/adversarial diagnostics cannot corrupt output', () => {
|
|
117
|
+
it('prototype pollution in diagnostics field is rejected by validator', async () => {
|
|
118
|
+
const RULE_ID = 'R_TEST_PROTO_005';
|
|
119
|
+
const ARTIFACT_ID = 'art-proto-005';
|
|
120
|
+
const ACTIVATION_ID = `act_code_${RULE_ID}`;
|
|
121
|
+
|
|
122
|
+
// RuleCode that tries to inject __proto__ as an own property in diagnostics
|
|
123
|
+
const ADVERSARIAL_CODE = `
|
|
124
|
+
function evaluate(input, helpers) {
|
|
125
|
+
var diag = {};
|
|
126
|
+
Object.defineProperty(diag, '__proto__', { value: { polluted: true }, enumerable: true, configurable: true });
|
|
127
|
+
Object.defineProperty(diag, 'constructor', { value: { polluted: true }, enumerable: true, configurable: true });
|
|
128
|
+
return { decision: 'block', matched: true, reason: 'adversarial', diagnostics: diag };
|
|
129
|
+
}
|
|
130
|
+
var meta = { name: 'proto-rule', version: '1', ruleId: '${RULE_ID}', coversCondition: 'all' };
|
|
131
|
+
`;
|
|
132
|
+
|
|
133
|
+
insertRuleArtifact(ARTIFACT_ID, RULE_ID, 'task-proto-005', ADVERSARIAL_CODE);
|
|
134
|
+
await insertActivation(ACTIVATION_ID, ARTIFACT_ID, RULE_ID);
|
|
135
|
+
|
|
136
|
+
const warnCalls: string[] = [];
|
|
137
|
+
const spyLogger = {
|
|
138
|
+
warn: (message: string) => { warnCalls.push(message); },
|
|
139
|
+
error: () => {},
|
|
140
|
+
info: () => {},
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
const ruleHost = new RuleHost(tempStateDir, spyLogger, { workspaceDir: tempWorkspaceDir });
|
|
144
|
+
const result = ruleHost.evaluate(makeInput('/etc/passwd'));
|
|
145
|
+
|
|
146
|
+
// PRI-437 fail-closed contract: adversarial results must be rejected entirely,
|
|
147
|
+
// resulting in conservative degradation (undefined). Not "accepted but sanitized".
|
|
148
|
+
expect(result).toBeUndefined();
|
|
149
|
+
|
|
150
|
+
// Must emit warn evidence about the invalid result
|
|
151
|
+
expect(warnCalls.length).toBeGreaterThan(0);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it('invalid tier (string) in input does not corrupt output validation', async () => {
|
|
155
|
+
const RULE_ID = 'R_TEST_TIER_005';
|
|
156
|
+
const ARTIFACT_ID = 'art-tier-005';
|
|
157
|
+
const ACTIVATION_ID = `act_code_${RULE_ID}`;
|
|
158
|
+
|
|
159
|
+
// RuleCode that tries to use epTier as a string and produces invalid output
|
|
160
|
+
const ADVERSARIAL_CODE = `
|
|
161
|
+
function evaluate(input, helpers) {
|
|
162
|
+
var tier = input.evolution.epTier;
|
|
163
|
+
// If tier is a string, try to use it to bypass validation
|
|
164
|
+
if (typeof tier === 'string') {
|
|
165
|
+
return { decision: 'BLOCK', matched: 'yes', reason: 123 };
|
|
166
|
+
}
|
|
167
|
+
return { decision: 'block', matched: true, reason: 'valid block' };
|
|
168
|
+
}
|
|
169
|
+
var meta = { name: 'tier-rule', version: '1', ruleId: '${RULE_ID}', coversCondition: 'all' };
|
|
170
|
+
`;
|
|
171
|
+
|
|
172
|
+
insertRuleArtifact(ARTIFACT_ID, RULE_ID, 'task-tier-005', ADVERSARIAL_CODE);
|
|
173
|
+
await insertActivation(ACTIVATION_ID, ARTIFACT_ID, RULE_ID);
|
|
174
|
+
|
|
175
|
+
const warnCalls: string[] = [];
|
|
176
|
+
const spyLogger = {
|
|
177
|
+
warn: (message: string) => { warnCalls.push(message); },
|
|
178
|
+
error: () => {},
|
|
179
|
+
info: () => {},
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
const ruleHost = new RuleHost(tempStateDir, spyLogger, { workspaceDir: tempWorkspaceDir });
|
|
183
|
+
|
|
184
|
+
// Pass invalid tier (string instead of number)
|
|
185
|
+
const result = ruleHost.evaluate(makeInput('/etc/passwd', 'invalid_tier_string'));
|
|
186
|
+
|
|
187
|
+
// The invalid output (decision='BLOCK' instead of 'block', matched='yes' instead of boolean)
|
|
188
|
+
// must be rejected by the validator → undefined (conservative degradation)
|
|
189
|
+
expect(result).toBeUndefined();
|
|
190
|
+
|
|
191
|
+
// Must emit warn evidence about the invalid result
|
|
192
|
+
expect(warnCalls.length).toBeGreaterThan(0);
|
|
193
|
+
const invalidWarn = warnCalls.find(m =>
|
|
194
|
+
m.toLowerCase().includes('invalid') ||
|
|
195
|
+
m.toLowerCase().includes('evaluation failed')
|
|
196
|
+
);
|
|
197
|
+
expect(invalidWarn).toBeDefined();
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it('adversarial correctionProposal with prototype pollution is rejected', async () => {
|
|
201
|
+
const RULE_ID = 'R_TEST_CORR_005';
|
|
202
|
+
const ARTIFACT_ID = 'art-corr-005';
|
|
203
|
+
const ACTIVATION_ID = `act_code_${RULE_ID}`;
|
|
204
|
+
|
|
205
|
+
// RuleCode that returns auto_correct with adversarial correctionProposal
|
|
206
|
+
const ADVERSARIAL_CODE = `
|
|
207
|
+
function evaluate(input, helpers) {
|
|
208
|
+
var proposal = {
|
|
209
|
+
ruleId: '${RULE_ID}',
|
|
210
|
+
correctedFields: [{ field: 'file_path', reason: 'x' }],
|
|
211
|
+
proposedParams: { file_path: '/safe/path' },
|
|
212
|
+
applicationMode: 'live',
|
|
213
|
+
confidence: 0.9,
|
|
214
|
+
};
|
|
215
|
+
// Try to inject __proto__ into the proposal
|
|
216
|
+
Object.defineProperty(proposal, '__proto__', { value: { polluted: true }, enumerable: true });
|
|
217
|
+
return { decision: 'auto_correct', matched: true, reason: 'adversarial correction', correctionProposal: proposal };
|
|
218
|
+
}
|
|
219
|
+
var meta = { name: 'corr-rule', version: '1', ruleId: '${RULE_ID}', coversCondition: 'all' };
|
|
220
|
+
`;
|
|
221
|
+
|
|
222
|
+
insertRuleArtifact(ARTIFACT_ID, RULE_ID, 'task-corr-005', ADVERSARIAL_CODE);
|
|
223
|
+
await insertActivation(ACTIVATION_ID, ARTIFACT_ID, RULE_ID);
|
|
224
|
+
|
|
225
|
+
const warnCalls: string[] = [];
|
|
226
|
+
const spyLogger = {
|
|
227
|
+
warn: (message: string) => { warnCalls.push(message); },
|
|
228
|
+
error: () => {},
|
|
229
|
+
info: () => {},
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
const ruleHost = new RuleHost(tempStateDir, spyLogger, { workspaceDir: tempWorkspaceDir });
|
|
233
|
+
const result = ruleHost.evaluate(makeInput('/etc/passwd'));
|
|
234
|
+
|
|
235
|
+
// PRI-437 fail-closed contract: adversarial correctionProposal must be rejected
|
|
236
|
+
// entirely, resulting in conservative degradation (undefined).
|
|
237
|
+
expect(result).toBeUndefined();
|
|
238
|
+
|
|
239
|
+
// Must emit warn evidence
|
|
240
|
+
expect(warnCalls.length).toBeGreaterThan(0);
|
|
241
|
+
});
|
|
242
|
+
});
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PRI-437 Adversarial Self-Review: Valid auto_correct from VM must be accepted
|
|
3
|
+
*
|
|
4
|
+
* PURPOSE: Verify that a valid auto_correct proposal created INSIDE the vm
|
|
5
|
+
* context is accepted by the validator (not rejected due to prototype realm
|
|
6
|
+
* mismatch).
|
|
7
|
+
*
|
|
8
|
+
* CONTEXT: validateCorrectionProposal uses isPlainObject which checks
|
|
9
|
+
* Object.getPrototypeOf() === Object.prototype. VM-created objects have
|
|
10
|
+
* prototypes from the VM realm, not the host realm, so isPlainObject
|
|
11
|
+
* returns false and rejects all VM-created proposals.
|
|
12
|
+
*
|
|
13
|
+
* ERR risk mitigation:
|
|
14
|
+
* - ERR-001: no `as` bypass on untrusted VM output
|
|
15
|
+
* - ERR-002: fail-closed with reason (not silent rejection)
|
|
16
|
+
*/
|
|
17
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
18
|
+
import * as fs from 'fs';
|
|
19
|
+
import * as os from 'os';
|
|
20
|
+
import * as path from 'path';
|
|
21
|
+
import { SqliteConnection, SqliteActivationStateStore } from '@principles/core/runtime-v2';
|
|
22
|
+
import type { RuleHostInput } from '@principles/core/runtime-v2';
|
|
23
|
+
import { RuleHost } from '../../src/core/rule-host.js';
|
|
24
|
+
|
|
25
|
+
let tempWorkspaceDir: string;
|
|
26
|
+
let tempStateDir: string;
|
|
27
|
+
let sqliteConn: SqliteConnection;
|
|
28
|
+
|
|
29
|
+
function setupTempDirs(): void {
|
|
30
|
+
const baseTmp = os.tmpdir();
|
|
31
|
+
tempWorkspaceDir = fs.mkdtempSync(path.join(baseTmp, 'pd-rulehost-autocorrect-'));
|
|
32
|
+
tempStateDir = path.join(tempWorkspaceDir, '.principles');
|
|
33
|
+
fs.mkdirSync(tempStateDir, { recursive: true });
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function insertRuleArtifact(
|
|
37
|
+
artifactId: string,
|
|
38
|
+
ruleId: string,
|
|
39
|
+
sourceTaskId: string,
|
|
40
|
+
code: string,
|
|
41
|
+
): void {
|
|
42
|
+
const db = sqliteConn.getDb();
|
|
43
|
+
const now = new Date().toISOString();
|
|
44
|
+
const contentJson = JSON.stringify({
|
|
45
|
+
principleId: `P_${ruleId}`,
|
|
46
|
+
ruleId,
|
|
47
|
+
implementationCode: code,
|
|
48
|
+
goldenTrace: { traceId: `trace-${ruleId}`, cases: [], createdAt: now, version: 1 },
|
|
49
|
+
ruleHostGateDecision: 'accepted_shadow',
|
|
50
|
+
affectedTools: ['write_file'],
|
|
51
|
+
painReasonSummary: 'Test: auto_correct',
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
db.prepare(`
|
|
55
|
+
INSERT INTO pi_artifacts (artifact_id, artifact_kind, source_task_id, source_principle_id, source_rule_id, lineage_artifact_ids, validation_status, content_json, created_at, updated_at)
|
|
56
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
57
|
+
`).run(
|
|
58
|
+
artifactId, 'rule', sourceTaskId, `P_${ruleId}`, ruleId,
|
|
59
|
+
'[]', 'validated', contentJson, now, now,
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async function insertActivation(
|
|
64
|
+
activationId: string,
|
|
65
|
+
artifactId: string,
|
|
66
|
+
ruleId: string,
|
|
67
|
+
): Promise<void> {
|
|
68
|
+
const store = new SqliteActivationStateStore(sqliteConn);
|
|
69
|
+
const now = new Date().toISOString();
|
|
70
|
+
await store.recordActivation({
|
|
71
|
+
activationId,
|
|
72
|
+
idempotencyKey: `${artifactId}::code_tool_hook`,
|
|
73
|
+
artifactId,
|
|
74
|
+
channel: 'code_tool_hook',
|
|
75
|
+
action: 'code_tool_hook_shadow_activate',
|
|
76
|
+
targetRef: `impl://${ruleId}`,
|
|
77
|
+
activatedAt: now,
|
|
78
|
+
deactivatedAt: null,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function makeInput(normalizedPath: string): RuleHostInput {
|
|
83
|
+
return {
|
|
84
|
+
action: {
|
|
85
|
+
toolName: 'write_file',
|
|
86
|
+
normalizedPath,
|
|
87
|
+
paramsSummary: { path: normalizedPath },
|
|
88
|
+
},
|
|
89
|
+
workspace: { isRiskPath: false, planStatus: 'NONE' as const, hasPlanFile: false },
|
|
90
|
+
session: { sessionId: 'test-session-autocorrect', currentGfi: 0, recentThinking: false },
|
|
91
|
+
evolution: { epTier: 0 },
|
|
92
|
+
derived: { estimatedLineChanges: 1, bashRisk: 'safe' as const },
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
beforeEach(() => {
|
|
97
|
+
setupTempDirs();
|
|
98
|
+
sqliteConn = new SqliteConnection(tempWorkspaceDir);
|
|
99
|
+
sqliteConn.getDb();
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
afterEach(() => {
|
|
103
|
+
try { sqliteConn?.close(); } catch { /* best-effort */ }
|
|
104
|
+
try { fs.rmSync(tempWorkspaceDir, { recursive: true, force: true }); } catch { /* Windows */ }
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
describe('PRI-437 Adversarial Self-Review: Valid auto_correct from VM must be accepted', () => {
|
|
108
|
+
it('valid auto_correct proposal created inside VM is accepted (not rejected by isPlainObject)', async () => {
|
|
109
|
+
const RULE_ID = 'R_TEST_AUTOCORRECT_VALID';
|
|
110
|
+
const ARTIFACT_ID = 'art-autocorrect-valid';
|
|
111
|
+
const ACTIVATION_ID = `act_code_${RULE_ID}`;
|
|
112
|
+
|
|
113
|
+
// RuleCode that returns a valid auto_correct proposal.
|
|
114
|
+
// The proposal object is created INSIDE the vm context, so its prototype
|
|
115
|
+
// is the VM realm's Object.prototype, not the host's.
|
|
116
|
+
const VALID_AUTOCORRECT_CODE = `
|
|
117
|
+
function evaluate(input, helpers) {
|
|
118
|
+
return {
|
|
119
|
+
decision: 'auto_correct',
|
|
120
|
+
matched: true,
|
|
121
|
+
reason: 'path should be within workspace',
|
|
122
|
+
correctionProposal: {
|
|
123
|
+
ruleId: '${RULE_ID}',
|
|
124
|
+
correctedFields: [{ field: 'file_path', original: input.action.normalizedPath, proposed: '/safe/path', reason: 'redirect to safe path' }],
|
|
125
|
+
proposedParams: { file_path: '/safe/path' },
|
|
126
|
+
applicationMode: 'shadow',
|
|
127
|
+
confidence: 0.9,
|
|
128
|
+
notifyAgent: true
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
var meta = { name: 'autocorrect-rule', version: '1', ruleId: '${RULE_ID}', coversCondition: 'all' };
|
|
133
|
+
`;
|
|
134
|
+
|
|
135
|
+
insertRuleArtifact(ARTIFACT_ID, RULE_ID, 'task-autocorrect-valid', VALID_AUTOCORRECT_CODE);
|
|
136
|
+
await insertActivation(ACTIVATION_ID, ARTIFACT_ID, RULE_ID);
|
|
137
|
+
|
|
138
|
+
const warnCalls: string[] = [];
|
|
139
|
+
const spyLogger = {
|
|
140
|
+
warn: (message: string) => { warnCalls.push(message); },
|
|
141
|
+
error: () => {},
|
|
142
|
+
info: () => {},
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
const ruleHost = new RuleHost(tempStateDir, spyLogger, { workspaceDir: tempWorkspaceDir });
|
|
146
|
+
const result = ruleHost.evaluate(makeInput('/etc/passwd'));
|
|
147
|
+
|
|
148
|
+
// The valid auto_correct proposal MUST be accepted — not rejected due to
|
|
149
|
+
// VM prototype realm mismatch.
|
|
150
|
+
expect(result).toBeDefined();
|
|
151
|
+
expect(result?.decision).toBe('auto_correct');
|
|
152
|
+
expect(result?.matched).toBe(true);
|
|
153
|
+
expect(result?.correctionProposal).toBeDefined();
|
|
154
|
+
expect(result?.correctionProposal?.ruleId).toBe(RULE_ID);
|
|
155
|
+
|
|
156
|
+
// No warnings should be emitted about invalid results
|
|
157
|
+
const invalidWarn = warnCalls.find(m =>
|
|
158
|
+
m.toLowerCase().includes('invalid') ||
|
|
159
|
+
m.toLowerCase().includes('must be a plain object')
|
|
160
|
+
);
|
|
161
|
+
expect(invalidWarn).toBeUndefined();
|
|
162
|
+
});
|
|
163
|
+
});
|