@principles/pd-cli 1.112.0 → 1.114.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/__tests__/run-rulehost-flag-wiring.test.d.ts +24 -0
- package/dist/commands/__tests__/run-rulehost-flag-wiring.test.d.ts.map +1 -0
- package/dist/commands/__tests__/run-rulehost-flag-wiring.test.js +223 -0
- package/dist/commands/__tests__/run-rulehost-flag-wiring.test.js.map +1 -0
- package/dist/commands/runtime-activation.d.ts +37 -0
- package/dist/commands/runtime-activation.d.ts.map +1 -1
- package/dist/commands/runtime-activation.js +416 -2
- package/dist/commands/runtime-activation.js.map +1 -1
- package/dist/commands/runtime-internalization-run-rulehost.d.ts +23 -0
- package/dist/commands/runtime-internalization-run-rulehost.d.ts.map +1 -0
- package/dist/commands/runtime-internalization-run-rulehost.js +364 -0
- package/dist/commands/runtime-internalization-run-rulehost.js.map +1 -0
- package/dist/index.js +56 -1
- package/dist/index.js.map +1 -1
- package/dist/services/demo-rule-compiler.d.ts +24 -0
- package/dist/services/demo-rule-compiler.d.ts.map +1 -0
- package/dist/services/demo-rule-compiler.js +53 -0
- package/dist/services/demo-rule-compiler.js.map +1 -0
- package/dist/services/rulehost-pipeline-runner.d.ts +132 -0
- package/dist/services/rulehost-pipeline-runner.d.ts.map +1 -0
- package/dist/services/rulehost-pipeline-runner.js +376 -0
- package/dist/services/rulehost-pipeline-runner.js.map +1 -0
- package/package.json +1 -1
- package/scripts/llm-dogfood.ts +419 -0
- package/src/commands/__tests__/run-rulehost-flag-wiring.test.ts +280 -0
- package/src/commands/runtime-activation.ts +459 -1
- package/src/commands/runtime-internalization-run-rulehost.ts +417 -0
- package/src/index.ts +60 -1
- package/src/services/demo-rule-compiler.ts +71 -0
- package/src/services/rulehost-pipeline-runner.ts +638 -0
- package/tests/commands/cli-command-tree.test.ts +14 -0
- package/tests/commands/runtime-activation.test.ts +553 -1
- package/tests/e2e/cross-package-acceptance.test.ts +549 -0
- package/tests/services/rulehost-pipeline-e2e.test.ts +477 -0
- package/tests/services/rulehost-pipeline-runner.test.ts +525 -0
|
@@ -0,0 +1,549 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-Package Acceptance Test — PRI-408 (P1/P2 fixes)
|
|
3
|
+
*
|
|
4
|
+
* UNSPLIPPABLE end-to-end test crossing pd-cli + principles-core packages:
|
|
5
|
+
* pain → RuleHost pipeline → candidate → auto-enqueue (P1 #1)
|
|
6
|
+
* → Owner edit approval (P1 #2) → Owner approve → dispatch with approval binding (P1 #3)
|
|
7
|
+
* → activation observable → deactivate → behavior restored
|
|
8
|
+
* → include-deactivated shows the record (P2 #5)
|
|
9
|
+
*
|
|
10
|
+
* Uses real SQLite stores, real pipeline runner, real approval queue, real
|
|
11
|
+
* activation dispatcher. The only scripted part is the LLM stage outputs
|
|
12
|
+
* (dreamer/philosopher/scribe/artificer/evaluator) — same pattern as the
|
|
13
|
+
* existing pipeline runner tests.
|
|
14
|
+
*
|
|
15
|
+
* This test exists because the original story-a-acceptance.test.ts started
|
|
16
|
+
* from a pre-built principle artifact and only checked that activation
|
|
17
|
+
* records existed. It did NOT exercise:
|
|
18
|
+
* - The pipeline → auto-enqueue chain (P1 #1)
|
|
19
|
+
* - The edit entry point (P1 #2)
|
|
20
|
+
* - The approval binding security boundary (P1 #3)
|
|
21
|
+
* - The include-deactivated query (P2 #5)
|
|
22
|
+
*
|
|
23
|
+
* ERR checklist:
|
|
24
|
+
* - ERR-001: All pipeline output treated as unknown, validated at runtime
|
|
25
|
+
* - ERR-002: Every failure path carries reason + nextAction
|
|
26
|
+
* - ERR-004/008: Lineage fields from same source, mismatch tested
|
|
27
|
+
* - ERR-009: Required fields fail loud
|
|
28
|
+
* - ERR-025: Production-path test, not demo helper
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { describe, it, expect, afterEach, vi } from 'vitest';
|
|
32
|
+
import * as os from 'node:os';
|
|
33
|
+
import * as path from 'node:path';
|
|
34
|
+
import * as fs from 'node:fs';
|
|
35
|
+
import {
|
|
36
|
+
runRuleHostPipeline,
|
|
37
|
+
createSandboxGateDeps,
|
|
38
|
+
} from '../../src/services/rulehost-pipeline-runner.js';
|
|
39
|
+
import { RuleHost } from '../../../openclaw-plugin/src/core/rule-host.js';
|
|
40
|
+
import type { CodeRuleCapability } from '../../src/services/rulehost-pipeline-runner.js';
|
|
41
|
+
import type { PDRuntimeAdapter, RunHandle, RunStatus, PIArtifactStore, RuntimeCapabilities, RuntimeHealth, RuntimeArtifactRef, ContextItem, StructuredRunOutput, StartRunInput } from '@principles/core/runtime-v2';
|
|
42
|
+
import {
|
|
43
|
+
RuntimeStateManager,
|
|
44
|
+
SqliteApprovalQueueStore,
|
|
45
|
+
SqliteActivationStateStore,
|
|
46
|
+
SqlitePIArtifactStore,
|
|
47
|
+
ActivationDispatcher,
|
|
48
|
+
ApprovalCompletionService,
|
|
49
|
+
PromptWriter,
|
|
50
|
+
DeferArchiveWriter,
|
|
51
|
+
RuleHostWriter,
|
|
52
|
+
createProductionGateDeps,
|
|
53
|
+
makeIdempotencyKey,
|
|
54
|
+
createPITaskDiagnosticJson,
|
|
55
|
+
} from '@principles/core/runtime-v2';
|
|
56
|
+
import type { PIArtifactSnapshot, RuleHostInput } from '@principles/core/runtime-v2';
|
|
57
|
+
|
|
58
|
+
// ── Scripted adapter (same pattern as rulehost-pipeline-runner.test.ts) ──────
|
|
59
|
+
|
|
60
|
+
type StageFactory = (taskId: string, priorArtifactId?: string) => unknown;
|
|
61
|
+
type EvaluatorFactory = (taskId: string, artificerArtifactId: string) => unknown;
|
|
62
|
+
|
|
63
|
+
class ScriptedAdapter implements PDRuntimeAdapter {
|
|
64
|
+
readonly startRunCalls: { taskId: string }[] = [];
|
|
65
|
+
readonly startRunInputs = new Map<string, StartRunInput>();
|
|
66
|
+
artifactStore: PIArtifactStore | null = null;
|
|
67
|
+
constructor(private readonly factories: { dreamer: StageFactory; philosopher: StageFactory; scribe: StageFactory; artificer: StageFactory; evaluator: EvaluatorFactory }) {}
|
|
68
|
+
|
|
69
|
+
private kindFor(taskId: string): 'dreamer' | 'philosopher' | 'scribe' | 'artificer' | 'evaluator' {
|
|
70
|
+
if (taskId.startsWith('dreamer')) return 'dreamer';
|
|
71
|
+
if (taskId.includes('philosopher')) return 'philosopher';
|
|
72
|
+
if (taskId.includes('scribe')) return 'scribe';
|
|
73
|
+
if (taskId.includes('artificer')) return 'artificer';
|
|
74
|
+
return 'evaluator';
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
private async priorArtifactId(priorKind: string): Promise<string | undefined> {
|
|
78
|
+
const calls = this.startRunCalls.filter((c) => this.kindFor(c.taskId) === priorKind);
|
|
79
|
+
const last = calls[calls.length - 1];
|
|
80
|
+
if (!last || !this.artifactStore) return undefined;
|
|
81
|
+
const arts = await this.artifactStore.listBySourceTaskId(last.taskId);
|
|
82
|
+
return arts[0]?.artifactId;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async startRun(input: StartRunInput): Promise<RunHandle> {
|
|
86
|
+
this.startRunCalls.push({ taskId: input.taskRef.taskId });
|
|
87
|
+
const runId = `run-${input.taskRef.taskId}`;
|
|
88
|
+
this.startRunInputs.set(runId, input);
|
|
89
|
+
return { runId, runtimeKind: 'test-double', startedAt: new Date().toISOString() };
|
|
90
|
+
}
|
|
91
|
+
async pollRun(_runId: string): Promise<RunStatus> { return { status: 'succeeded', runId: 'run-x' }; }
|
|
92
|
+
async fetchOutput(runId: string): Promise<StructuredRunOutput | null> {
|
|
93
|
+
const taskId = runId.replace(/^run-/, '');
|
|
94
|
+
const kind = this.kindFor(taskId);
|
|
95
|
+
let payload: unknown;
|
|
96
|
+
if (kind === 'dreamer') payload = this.factories.dreamer(taskId);
|
|
97
|
+
else if (kind === 'philosopher') payload = this.factories.philosopher(taskId, await this.priorArtifactId('dreamer'));
|
|
98
|
+
else if (kind === 'scribe') payload = this.factories.scribe(taskId, await this.priorArtifactId('philosopher'));
|
|
99
|
+
else if (kind === 'artificer') payload = this.factories.artificer(taskId, await this.priorArtifactId('scribe'));
|
|
100
|
+
else {
|
|
101
|
+
let artificerArtifactId = await this.priorArtifactId('artificer');
|
|
102
|
+
if (!artificerArtifactId) {
|
|
103
|
+
const inputPayload = this.startRunInputs.get(runId)?.inputPayload;
|
|
104
|
+
if (typeof inputPayload === 'string') {
|
|
105
|
+
const parsed: unknown = JSON.parse(inputPayload);
|
|
106
|
+
if (parsed !== null && typeof parsed === 'object' && Object.hasOwn(parsed, 'sourceArtificerArtifactId')) {
|
|
107
|
+
const candidate = Reflect.get(parsed, 'sourceArtificerArtifactId');
|
|
108
|
+
if (typeof candidate === 'string') artificerArtifactId = candidate;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
payload = this.factories.evaluator(taskId, requireLineage(artificerArtifactId, 'sourceArtificerArtifactId'));
|
|
113
|
+
}
|
|
114
|
+
return { runId, payload };
|
|
115
|
+
}
|
|
116
|
+
async cancelRun(_runId: string): Promise<void> { /* noop */ }
|
|
117
|
+
async getCapabilities(): Promise<RuntimeCapabilities> {
|
|
118
|
+
return {
|
|
119
|
+
supportsStructuredJsonOutput: false,
|
|
120
|
+
supportsToolUse: false,
|
|
121
|
+
supportsWorkingDirectory: false,
|
|
122
|
+
supportsModelSelection: false,
|
|
123
|
+
supportsLongRunningSessions: false,
|
|
124
|
+
supportsCancellation: false,
|
|
125
|
+
supportsArtifactWriteBack: false,
|
|
126
|
+
supportsConcurrentRuns: false,
|
|
127
|
+
supportsStreaming: false,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
async healthCheck(): Promise<RuntimeHealth> {
|
|
131
|
+
return { healthy: true, degraded: false, warnings: [], lastCheckedAt: new Date().toISOString() };
|
|
132
|
+
}
|
|
133
|
+
async fetchArtifacts(_runId: string): Promise<RuntimeArtifactRef[]> { return []; }
|
|
134
|
+
async appendContext(_runId: string, _items: ContextItem[]): Promise<void> { /* noop */ }
|
|
135
|
+
async refreshCapabilities(): Promise<RuntimeCapabilities> { return this.getCapabilities(); }
|
|
136
|
+
kind(): 'test-double' { return 'test-double'; }
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function requireLineage(id: string | undefined, field: string): string {
|
|
140
|
+
if (!id) throw new Error(`missing required lineage field: ${field}`);
|
|
141
|
+
return id;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// ── Scripted LLM stage outputs ───────────────────────────────────────────────
|
|
145
|
+
|
|
146
|
+
function dreamerOut(taskId: string, painId: string): unknown {
|
|
147
|
+
return {
|
|
148
|
+
valid: true, taskId,
|
|
149
|
+
candidates: [{ candidateIndex: 0, badDecision: 'Wrote to /etc/passwd', betterDecision: 'Block system path writes', rationale: 'OS damage risk', confidence: 0.85, riskLevel: 'low' as const, strategicPerspective: 'safety' }],
|
|
150
|
+
sourcePainId: painId, contextRefs: [], generatedAt: new Date().toISOString(),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function philosopherOut(taskId: string, priorId?: string): unknown {
|
|
155
|
+
return {
|
|
156
|
+
taskId, sourceDreamerArtifactId: requireLineage(priorId, 'sourceDreamerArtifactId'),
|
|
157
|
+
thesis: 'System path writes must be blocked',
|
|
158
|
+
principleCandidate: { title: 'Block system path writes', rationale: 'OS corruption risk', scope: 'write ops', confidence: 0.9 },
|
|
159
|
+
risks: [], generatedAt: new Date().toISOString(),
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function scribeOut(taskId: string, priorId?: string): unknown {
|
|
164
|
+
return {
|
|
165
|
+
taskId, sourcePhilosopherArtifactId: requireLineage(priorId, 'sourcePhilosopherArtifactId'),
|
|
166
|
+
principleDraft: { title: 'Block system path writes', statement: 'Writes to /etc, /boot, /sys must be blocked.', rationale: 'OS corruption risk', applicability: ['write_file'], antiPatterns: ['Hardcoded allow'], confidence: 0.9 },
|
|
167
|
+
sourceTrace: { philosopherArtifactId: requireLineage(priorId, 'sourceTrace.philosopherArtifactId') },
|
|
168
|
+
risks: [], generatedAt: new Date().toISOString(),
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function artificerV2(taskId: string, priorId?: string): unknown {
|
|
173
|
+
return {
|
|
174
|
+
taskId, sourceScribeArtifactId: requireLineage(priorId, 'sourceScribeArtifactId'),
|
|
175
|
+
implementationPlan: { summary: 'Block /etc writes', targetSurface: 'rule-host', changes: ['matcher'], tests: ['unit'], rolloutNotes: ['shadow'], confidence: 0.85 },
|
|
176
|
+
implementationCode: 'function evaluate(input, helpers) { const p = String(input?.action?.paramsSummary?.path ?? input?.action?.normalizedPath ?? ""); return p.startsWith("/etc") ? { decision: "block", matched: true, reason: "system path" } : { decision: "allow", matched: false, reason: "ok" }; }',
|
|
177
|
+
goldenTraceCases: [
|
|
178
|
+
{ caseId: 'pos-1', kind: 'positive', toolName: 'write_file', params: { path: '/project/f.txt' }, expectedDecision: 'allow' },
|
|
179
|
+
{ caseId: 'neg-1', kind: 'negative', toolName: 'write_file', params: { path: '/etc/passwd' }, expectedDecision: 'block' },
|
|
180
|
+
],
|
|
181
|
+
affectedTools: ['write_file'],
|
|
182
|
+
sourceTrace: { scribeArtifactId: requireLineage(priorId, 'sourceTrace.scribeArtifactId') },
|
|
183
|
+
risks: [], generatedAt: new Date().toISOString(),
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function evaluatorApproved(taskId: string, artificerArtifactId: string): unknown {
|
|
188
|
+
return {
|
|
189
|
+
taskId, sourceArtificerArtifactId: artificerArtifactId,
|
|
190
|
+
evaluation: { decision: 'approved', summary: 'approved', score: 0.9, strengths: [], concerns: [], requiredChanges: [] },
|
|
191
|
+
sourceTrace: { artificerArtifactId },
|
|
192
|
+
risks: [], generatedAt: new Date().toISOString(),
|
|
193
|
+
codeReview: { intentConsistency: { aligned: true, explanation: 'ok' }, scopePrecision: { verdict: 'precise' as const, explanation: 'ok' }, traceCoverage: { sufficient: true, gaps: [], explanation: 'ok' } },
|
|
194
|
+
adversarialCases: [{ caseId: 'adv-1', attackType: 'boundary' as const, toolName: 'write_file', params: { path: '/etc/shadow' }, expectedDecision: 'block' as const, rationale: 'system path' }],
|
|
195
|
+
adversarialResult: { passed: true, failedCases: [] },
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ── Test workspace helpers ───────────────────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
let tmpDir = '';
|
|
202
|
+
|
|
203
|
+
function makeTmpDir(): string {
|
|
204
|
+
const dir = path.join(os.tmpdir(), `pd-xpkg-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
|
|
205
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
206
|
+
return dir;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async function seedDreamerWithId(sm: RuntimeStateManager, taskId: string, painId: string): Promise<void> {
|
|
210
|
+
const baseMetadata = JSON.parse(createPITaskDiagnosticJson({
|
|
211
|
+
dependencyTaskIds: [], channel: 'prompt', timeoutMs: 1000, inputArtifactRefs: [], outputArtifactRefs: [],
|
|
212
|
+
})) as Record<string, unknown>;
|
|
213
|
+
const diagnosticJson = JSON.stringify({ ...baseMetadata, sourcePainId: painId });
|
|
214
|
+
await sm.createTask({ taskId, taskKind: 'dreamer', status: 'pending', attemptCount: 0, maxAttempts: 3, diagnosticJson });
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function makeAdapter(): ScriptedAdapter {
|
|
218
|
+
return new ScriptedAdapter({
|
|
219
|
+
dreamer: (taskId) => dreamerOut(taskId, 'pain-xpkg-001'),
|
|
220
|
+
philosopher: philosopherOut,
|
|
221
|
+
scribe: scribeOut,
|
|
222
|
+
artificer: artificerV2,
|
|
223
|
+
evaluator: evaluatorApproved,
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// ── Cross-Package Acceptance Test ────────────────────────────────────────────
|
|
228
|
+
|
|
229
|
+
describe('Cross-Package Acceptance Test (PRI-408 P1/P2 fixes) — unsplippable chain', () => {
|
|
230
|
+
afterEach(() => {
|
|
231
|
+
vi.restoreAllMocks();
|
|
232
|
+
if (tmpDir) {
|
|
233
|
+
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ }
|
|
234
|
+
tmpDir = '';
|
|
235
|
+
}
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
it('pain → pipeline → auto-enqueue → edit → approve → activate → observe → deactivate → restore', async () => {
|
|
239
|
+
// ── Step 1: Set up workspace and seed pain signal ──────────────────────
|
|
240
|
+
tmpDir = makeTmpDir();
|
|
241
|
+
const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
|
|
242
|
+
await sm.initialize();
|
|
243
|
+
await seedDreamerWithId(sm, 'dreamer-xpkg-001', 'pain-xpkg-001');
|
|
244
|
+
await sm.close();
|
|
245
|
+
|
|
246
|
+
// ── Step 2: Run RuleHost pipeline → candidate + auto-enqueue (P1 #1) ───
|
|
247
|
+
const adapter = makeAdapter();
|
|
248
|
+
const capability: CodeRuleCapability = { enabled: true, artificerAdapter: adapter };
|
|
249
|
+
const pipelineResult = await runRuleHostPipeline({
|
|
250
|
+
workspaceDir: tmpDir,
|
|
251
|
+
painId: 'pain-xpkg-001',
|
|
252
|
+
runtimeAdapter: adapter,
|
|
253
|
+
channel: 'code_tool_hook',
|
|
254
|
+
pollIntervalMs: 5,
|
|
255
|
+
timeoutMs: 5000,
|
|
256
|
+
codeRuleCapability: capability,
|
|
257
|
+
onStoreReady: (store) => { adapter.artifactStore = store; },
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
// Verify pipeline produced a candidate
|
|
261
|
+
expect(pipelineResult.decision, JSON.stringify(pipelineResult)).toBe('candidate_ready_for_owner_review');
|
|
262
|
+
expect(pipelineResult.ruleArtifactId).not.toBeNull();
|
|
263
|
+
const originalArtifactId = pipelineResult.ruleArtifactId!;
|
|
264
|
+
|
|
265
|
+
// P1 #1 fix: candidate must be auto-enqueued into the ApprovalQueue
|
|
266
|
+
expect(pipelineResult.approvalId).not.toBeNull();
|
|
267
|
+
const approvalId = pipelineResult.approvalId!;
|
|
268
|
+
|
|
269
|
+
// ── Step 3: Verify the approval is in the queue with pending status ────
|
|
270
|
+
const sm2 = new RuntimeStateManager({ workspaceDir: tmpDir });
|
|
271
|
+
await sm2.initialize();
|
|
272
|
+
const approvalStore = new SqliteApprovalQueueStore(sm2.connection);
|
|
273
|
+
const stateStore = new SqliteActivationStateStore(sm2.connection);
|
|
274
|
+
const artifactStore = new SqlitePIArtifactStore(sm2.connection);
|
|
275
|
+
|
|
276
|
+
const approvalRecord = await approvalStore.getById(approvalId);
|
|
277
|
+
expect(approvalRecord).not.toBeNull();
|
|
278
|
+
expect(approvalRecord!.status).toBe('pending');
|
|
279
|
+
expect(approvalRecord!.artifactId).toBe(originalArtifactId);
|
|
280
|
+
expect(approvalRecord!.channel).toBe('code_tool_hook');
|
|
281
|
+
|
|
282
|
+
// ── Step 4: Owner edits the approval to swap to a revised artifact (P1 #2)
|
|
283
|
+
// Create a revised artifact (simulating owner revision of the rule code)
|
|
284
|
+
const revisedArtifactId = 'art-rule-revised-001';
|
|
285
|
+
const originalArtifact = await artifactStore.getArtifactById(originalArtifactId);
|
|
286
|
+
expect(originalArtifact).not.toBeNull();
|
|
287
|
+
|
|
288
|
+
// Store the revised artifact
|
|
289
|
+
const revisedContentJson = JSON.stringify({
|
|
290
|
+
...JSON.parse(originalArtifact!.contentJson),
|
|
291
|
+
implementationCode: 'function evaluate(input, helpers) { const p = String(input?.action?.paramsSummary?.path ?? input?.action?.normalizedPath ?? ""); return (p.startsWith("/etc") || p.startsWith("/boot")) ? { decision: "block", matched: true, reason: "system path blocked (revised)" } : { decision: "allow", matched: false, reason: "ok" }; }',
|
|
292
|
+
revisionNote: 'Owner expanded coverage to include /boot path',
|
|
293
|
+
});
|
|
294
|
+
await artifactStore.upsertArtifact({
|
|
295
|
+
artifactId: revisedArtifactId,
|
|
296
|
+
artifactKind: 'rule',
|
|
297
|
+
sourceTaskId: originalArtifact!.sourceTaskId,
|
|
298
|
+
sourcePrincipleId: originalArtifact!.sourcePrincipleId,
|
|
299
|
+
sourceRuleId: originalArtifact!.sourceRuleId,
|
|
300
|
+
lineageArtifactIds: [originalArtifactId],
|
|
301
|
+
validationStatus: 'validated',
|
|
302
|
+
contentJson: revisedContentJson,
|
|
303
|
+
createdAt: new Date().toISOString(),
|
|
304
|
+
updatedAt: new Date().toISOString(),
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
// Edit the approval to swap to the revised artifact
|
|
308
|
+
const editResult = await approvalStore.edit({
|
|
309
|
+
approvalId,
|
|
310
|
+
editedBy: 'owner-001',
|
|
311
|
+
newArtifactId: revisedArtifactId,
|
|
312
|
+
editReason: 'Expanded /boot path coverage after review',
|
|
313
|
+
now: new Date().toISOString(),
|
|
314
|
+
});
|
|
315
|
+
expect(editResult.ok).toBe(true);
|
|
316
|
+
if (editResult.ok) {
|
|
317
|
+
expect(editResult.record.artifactId).toBe(revisedArtifactId);
|
|
318
|
+
expect(editResult.record.previousArtifactId).toBe(originalArtifactId);
|
|
319
|
+
expect(editResult.record.editedBy).toBe('owner-001');
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// ── Step 5: Owner approves the edited approval ─────────────────────────
|
|
323
|
+
const approveResult = await approvalStore.approve(approvalId, 'owner-001', 'Approved after edit');
|
|
324
|
+
expect(approveResult.ok).toBe(true);
|
|
325
|
+
|
|
326
|
+
// ── Step 6: Dispatch activation with approval binding (P1 #3) ──────────
|
|
327
|
+
const revisedArtifact = await artifactStore.getArtifactById(revisedArtifactId);
|
|
328
|
+
expect(revisedArtifact).not.toBeNull();
|
|
329
|
+
const revisedSnapshot: PIArtifactSnapshot = {
|
|
330
|
+
artifactId: revisedArtifact!.artifactId,
|
|
331
|
+
artifactKind: revisedArtifact!.artifactKind,
|
|
332
|
+
sourceTaskId: revisedArtifact!.sourceTaskId,
|
|
333
|
+
sourcePrincipleId: revisedArtifact!.sourcePrincipleId,
|
|
334
|
+
sourceRuleId: revisedArtifact!.sourceRuleId,
|
|
335
|
+
lineageArtifactIds: revisedArtifact!.lineageArtifactIds,
|
|
336
|
+
validationStatus: revisedArtifact!.validationStatus,
|
|
337
|
+
contentJson: revisedArtifact!.contentJson,
|
|
338
|
+
createdAt: revisedArtifact!.createdAt,
|
|
339
|
+
updatedAt: revisedArtifact!.updatedAt,
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
const artifactReadModel = {
|
|
343
|
+
getArtifactById: async (id: string): Promise<PIArtifactSnapshot | null> => {
|
|
344
|
+
if (id === revisedArtifactId) return revisedSnapshot;
|
|
345
|
+
const rec = await artifactStore.getArtifactById(id);
|
|
346
|
+
return rec ? {
|
|
347
|
+
artifactId: rec.artifactId,
|
|
348
|
+
artifactKind: rec.artifactKind,
|
|
349
|
+
sourceTaskId: rec.sourceTaskId,
|
|
350
|
+
sourcePrincipleId: rec.sourcePrincipleId,
|
|
351
|
+
sourceRuleId: rec.sourceRuleId,
|
|
352
|
+
lineageArtifactIds: rec.lineageArtifactIds,
|
|
353
|
+
validationStatus: rec.validationStatus,
|
|
354
|
+
contentJson: rec.contentJson,
|
|
355
|
+
createdAt: rec.createdAt,
|
|
356
|
+
updatedAt: rec.updatedAt,
|
|
357
|
+
} : null;
|
|
358
|
+
},
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
const dispatcher = new ActivationDispatcher(
|
|
362
|
+
artifactReadModel,
|
|
363
|
+
stateStore,
|
|
364
|
+
{
|
|
365
|
+
writers: [
|
|
366
|
+
new PromptWriter(),
|
|
367
|
+
new RuleHostWriter({ gateDeps: createProductionGateDeps() }),
|
|
368
|
+
new DeferArchiveWriter(),
|
|
369
|
+
],
|
|
370
|
+
approvalQueueStore: approvalStore,
|
|
371
|
+
},
|
|
372
|
+
);
|
|
373
|
+
|
|
374
|
+
const completionService = new ApprovalCompletionService(
|
|
375
|
+
approvalStore,
|
|
376
|
+
dispatcher,
|
|
377
|
+
stateStore,
|
|
378
|
+
);
|
|
379
|
+
|
|
380
|
+
const completionResult = await completionService.completeApproval({
|
|
381
|
+
approvalId,
|
|
382
|
+
actor: { kind: 'human', userId: 'owner-001' },
|
|
383
|
+
now: new Date().toISOString(),
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
expect(completionResult.ok).toBe(true);
|
|
387
|
+
if (completionResult.ok) {
|
|
388
|
+
expect(completionResult.decision.decision).toBe('activated');
|
|
389
|
+
expect(completionResult.activationId).toBeDefined();
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// ── Step 7: Verify activation is observable ────────────────────────────
|
|
393
|
+
const idempotencyKey = makeIdempotencyKey(revisedArtifactId, 'code_tool_hook');
|
|
394
|
+
const activationRecord = await stateStore.getActivationStatus(idempotencyKey);
|
|
395
|
+
expect(activationRecord).not.toBeNull();
|
|
396
|
+
expect(activationRecord!.artifactId).toBe(revisedArtifactId);
|
|
397
|
+
expect(activationRecord!.channel).toBe('code_tool_hook');
|
|
398
|
+
expect(activationRecord!.deactivatedAt).toBeNull();
|
|
399
|
+
|
|
400
|
+
// Verify via listCodeToolHookActivations (P2 #5: default excludes deactivated)
|
|
401
|
+
const activeActivations = await stateStore.listCodeToolHookActivations();
|
|
402
|
+
const ourActivation = activeActivations.find(a => a.artifactId === revisedArtifactId);
|
|
403
|
+
expect(ourActivation).toBeDefined();
|
|
404
|
+
expect(ourActivation!.deactivatedAt).toBeNull();
|
|
405
|
+
|
|
406
|
+
// ── Step 7b: Verify the rule actually affects behavior via the gate (P1 #4)
|
|
407
|
+
// The production RuleHost gate loads all active rules, compiles their code,
|
|
408
|
+
// and calls evaluate() on each tool call. This step simulates that:
|
|
409
|
+
// 1. Load active rules from the activation state store
|
|
410
|
+
// 2. Compile the rule code in the production vm sandbox
|
|
411
|
+
// 3. Call evaluate() with system-path and normal-path inputs
|
|
412
|
+
// 4. Verify the rule blocks system paths and allows normal paths
|
|
413
|
+
|
|
414
|
+
const makeRuleHostInput = (targetPath: string): RuleHostInput => ({
|
|
415
|
+
action: { toolName: 'write_file', normalizedPath: targetPath, paramsSummary: { path: targetPath } },
|
|
416
|
+
workspace: { isRiskPath: targetPath.startsWith('/etc'), planStatus: 'NONE', hasPlanFile: false },
|
|
417
|
+
session: { currentGfi: 0, recentThinking: false },
|
|
418
|
+
evolution: { epTier: 0 },
|
|
419
|
+
derived: { estimatedLineChanges: 1, bashRisk: 'safe' },
|
|
420
|
+
});
|
|
421
|
+
const ruleHost = new RuleHost(
|
|
422
|
+
path.join(tmpDir, '.state'),
|
|
423
|
+
{ warn: () => {} },
|
|
424
|
+
{ workspaceDir: tmpDir },
|
|
425
|
+
);
|
|
426
|
+
const blockResult = ruleHost.evaluate(makeRuleHostInput('/etc/passwd'));
|
|
427
|
+
expect(blockResult).toBeDefined();
|
|
428
|
+
expect(blockResult.decision).toBe('block');
|
|
429
|
+
expect(blockResult.matched).toBe(true);
|
|
430
|
+
expect(ruleHost.evaluate(makeRuleHostInput('/project/src/main.ts'))).toBeUndefined();
|
|
431
|
+
|
|
432
|
+
// ── Step 8: Owner deactivates (rollback) ───────────────────────────────
|
|
433
|
+
const activationId = activationRecord!.activationId;
|
|
434
|
+
const deactivateResult = await stateStore.deactivateActivation(activationId, new Date().toISOString());
|
|
435
|
+
expect(deactivateResult).toBe(true);
|
|
436
|
+
|
|
437
|
+
// ── Step 9: Verify behavior is restored (P1 #4: gate-level verification) ─
|
|
438
|
+
// After deactivation, the RuleHost gate loads active rules — our rule is
|
|
439
|
+
// no longer in the active list, so the gate would NOT evaluate it. The
|
|
440
|
+
// default behavior (no rules matching) is 'allow'. This proves behavior
|
|
441
|
+
// reverts after rollback, not just that the DB record changed.
|
|
442
|
+
const activeAfterDeactivate = await stateStore.listCodeToolHookActivations();
|
|
443
|
+
const stillActive = activeAfterDeactivate.find(a => a.artifactId === revisedArtifactId);
|
|
444
|
+
expect(stillActive).toBeUndefined();
|
|
445
|
+
|
|
446
|
+
// Simulate the gate: no active rules → default decision is 'allow' for all inputs
|
|
447
|
+
// (The gate iterates active rules; with zero active rules, no rule can block.)
|
|
448
|
+
const gateActiveRules = activeAfterDeactivate.filter(a => a.deactivatedAt === null);
|
|
449
|
+
expect(gateActiveRules).toHaveLength(0);
|
|
450
|
+
const ruleHostAfterRollback = new RuleHost(
|
|
451
|
+
path.join(tmpDir, '.state'),
|
|
452
|
+
{ warn: () => {} },
|
|
453
|
+
{ workspaceDir: tmpDir },
|
|
454
|
+
);
|
|
455
|
+
expect(ruleHostAfterRollback.evaluate(makeRuleHostInput('/etc/passwd'))).toBeUndefined();
|
|
456
|
+
|
|
457
|
+
// P2 #5 fix: with includeDeactivated=true, the record IS returned
|
|
458
|
+
const allAfterDeactivate = await stateStore.listCodeToolHookActivations(true);
|
|
459
|
+
const deactivatedRecord = allAfterDeactivate.find(a => a.artifactId === revisedArtifactId);
|
|
460
|
+
expect(deactivatedRecord).toBeDefined();
|
|
461
|
+
expect(deactivatedRecord!.deactivatedAt).not.toBeNull();
|
|
462
|
+
|
|
463
|
+
// ── Step 10: Verify the activation status record reflects deactivation ─
|
|
464
|
+
const activationAfterDeactivate = await stateStore.getActivationStatus(idempotencyKey);
|
|
465
|
+
expect(activationAfterDeactivate).not.toBeNull();
|
|
466
|
+
expect(activationAfterDeactivate!.deactivatedAt).not.toBeNull();
|
|
467
|
+
|
|
468
|
+
await sm2.close();
|
|
469
|
+
}, 60_000);
|
|
470
|
+
|
|
471
|
+
it('P1 #3: dispatch with approved but no approvalId is refused', async () => {
|
|
472
|
+
// This test verifies the approval binding security boundary (P1 #3).
|
|
473
|
+
// A caller cannot bypass owner approval by passing rolloutDecision='approved'
|
|
474
|
+
// without a verified approvalId.
|
|
475
|
+
|
|
476
|
+
tmpDir = makeTmpDir();
|
|
477
|
+
const sm = new RuntimeStateManager({ workspaceDir: tmpDir });
|
|
478
|
+
await sm.initialize();
|
|
479
|
+
|
|
480
|
+
const approvalStore = new SqliteApprovalQueueStore(sm.connection);
|
|
481
|
+
const stateStore = new SqliteActivationStateStore(sm.connection);
|
|
482
|
+
const artifactStore = new SqlitePIArtifactStore(sm.connection);
|
|
483
|
+
|
|
484
|
+
// Create a test artifact
|
|
485
|
+
const artifactId = 'art-test-001';
|
|
486
|
+
await artifactStore.upsertArtifact({
|
|
487
|
+
artifactId,
|
|
488
|
+
artifactKind: 'principle',
|
|
489
|
+
sourceTaskId: 'task-test-001',
|
|
490
|
+
sourcePrincipleId: 'P_test_001',
|
|
491
|
+
sourceRuleId: undefined,
|
|
492
|
+
lineageArtifactIds: [],
|
|
493
|
+
validationStatus: 'validated',
|
|
494
|
+
contentJson: JSON.stringify({ text: 'Test principle', language: 'en' }),
|
|
495
|
+
createdAt: new Date().toISOString(),
|
|
496
|
+
updatedAt: new Date().toISOString(),
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
const artifact = await artifactStore.getArtifactById(artifactId);
|
|
500
|
+
const artifactSnapshot: PIArtifactSnapshot = {
|
|
501
|
+
artifactId: artifact!.artifactId,
|
|
502
|
+
artifactKind: artifact!.artifactKind,
|
|
503
|
+
sourceTaskId: artifact!.sourceTaskId,
|
|
504
|
+
sourcePrincipleId: artifact!.sourcePrincipleId,
|
|
505
|
+
sourceRuleId: artifact!.sourceRuleId,
|
|
506
|
+
lineageArtifactIds: artifact!.lineageArtifactIds,
|
|
507
|
+
validationStatus: artifact!.validationStatus,
|
|
508
|
+
contentJson: artifact!.contentJson,
|
|
509
|
+
createdAt: artifact!.createdAt,
|
|
510
|
+
updatedAt: artifact!.updatedAt,
|
|
511
|
+
};
|
|
512
|
+
|
|
513
|
+
const artifactReadModel = {
|
|
514
|
+
getArtifactById: async (id: string): Promise<PIArtifactSnapshot | null> => {
|
|
515
|
+
return id === artifactId ? artifactSnapshot : null;
|
|
516
|
+
},
|
|
517
|
+
};
|
|
518
|
+
|
|
519
|
+
const dispatcher = new ActivationDispatcher(
|
|
520
|
+
artifactReadModel,
|
|
521
|
+
stateStore,
|
|
522
|
+
{
|
|
523
|
+
writers: [new PromptWriter()],
|
|
524
|
+
approvalQueueStore: approvalStore,
|
|
525
|
+
},
|
|
526
|
+
);
|
|
527
|
+
|
|
528
|
+
// Attempt to dispatch with 'approved' but no approvalId — must be refused
|
|
529
|
+
const result = await dispatcher.dispatch({
|
|
530
|
+
artifactId,
|
|
531
|
+
channel: 'prompt',
|
|
532
|
+
rolloutDecision: 'approved',
|
|
533
|
+
actor: { kind: 'system', source: 'attacker' },
|
|
534
|
+
now: new Date().toISOString(),
|
|
535
|
+
confirm: true,
|
|
536
|
+
// approvalId intentionally omitted — this is the attack vector
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
expect(result.decision).toBe('refused');
|
|
540
|
+
expect(result.reason).toContain('approval_id');
|
|
541
|
+
|
|
542
|
+
// Verify no activation was created
|
|
543
|
+
const idempotencyKey = makeIdempotencyKey(artifactId, 'prompt');
|
|
544
|
+
const activationRecord = await stateStore.getActivationStatus(idempotencyKey);
|
|
545
|
+
expect(activationRecord).toBeNull();
|
|
546
|
+
|
|
547
|
+
await sm.close();
|
|
548
|
+
}, 30_000);
|
|
549
|
+
});
|