principles-disciple 1.58.0 → 1.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/openclaw.plugin.json +4 -4
  2. package/package.json +1 -1
  3. package/src/commands/archive-impl.ts +2 -1
  4. package/src/commands/capabilities.ts +2 -1
  5. package/src/commands/context.ts +3 -5
  6. package/src/commands/disable-impl.ts +2 -1
  7. package/src/commands/evolution-status.ts +18 -1
  8. package/src/commands/export.ts +2 -1
  9. package/src/commands/focus.ts +2 -5
  10. package/src/commands/nocturnal-review.ts +2 -1
  11. package/src/commands/nocturnal-rollout.ts +2 -1
  12. package/src/commands/nocturnal-train.ts +2 -1
  13. package/src/commands/pain.ts +2 -1
  14. package/src/commands/pd-reflect.ts +5 -7
  15. package/src/commands/principle-rollback.ts +2 -1
  16. package/src/commands/promote-impl.ts +2 -1
  17. package/src/commands/rollback-impl.ts +2 -1
  18. package/src/commands/rollback.ts +2 -1
  19. package/src/commands/samples.ts +2 -1
  20. package/src/commands/strategy.ts +3 -2
  21. package/src/commands/thinking-os.ts +2 -1
  22. package/src/commands/workflow-debug.ts +2 -1
  23. package/src/core/event-log.ts +42 -3
  24. package/src/core/init.ts +2 -2
  25. package/src/core/principle-compiler/ledger-registrar.ts +11 -2
  26. package/src/core/rule-host-types.ts +4 -0
  27. package/src/core/rule-host.ts +7 -1
  28. package/src/hooks/gate.ts +15 -0
  29. package/src/hooks/prompt.ts +13 -0
  30. package/src/index.ts +13 -4
  31. package/src/service/evolution-worker.ts +30 -0
  32. package/src/service/runtime-summary-service.ts +38 -0
  33. package/src/tools/critique-prompt.ts +4 -5
  34. package/src/tools/deep-reflect.ts +4 -3
  35. package/src/types/event-types.ts +73 -3
  36. package/src/utils/workspace-resolver.ts +44 -3
  37. package/tests/commands/pd-reflect.test.ts +1 -1
  38. package/tests/core/bootstrap-rules.test.ts +14 -0
  39. package/tests/core/evolution-reducer.compilation-retry.test.ts +2 -1
  40. package/tests/core/ledger-registrar.test.ts +5 -2
  41. package/tests/core/principle-compiler.test.ts +4 -2
  42. package/tests/core/regression-v1-9-1.test.ts +2 -1
  43. package/tests/integration/gate-real-io.e2e.test.ts +5 -8
  44. package/tests/integration/pain-id-chain-e2e.test.ts +12 -6
  45. package/tests/integration/principle-compiler-e2e.test.ts +28 -9
  46. package/tests/integration/principle-lifecycle.e2e.test.ts +2 -1
package/src/index.ts CHANGED
@@ -262,8 +262,13 @@ const plugin = {
262
262
 
263
263
  (event: PluginHookSubagentSpawningEvent, _ctx: PluginHookSubagentContext): void | PluginHookSubagentSpawningResult => {
264
264
  try {
265
- // Resolve workspace via official API, falling back to PathResolver
266
- const workspaceDir = resolveWorkspaceDirFromApi(api, event.agentId) || '.';
265
+ // FIX (B): Never fall back to '.' — fail-fast with ERROR log if workspaceDir cannot be resolved.
266
+ // For subagent hooks, we use event.agentId as the target agent for workspace resolution.
267
+ const workspaceDir = resolveWorkspaceDirFromApi(api, event.agentId);
268
+ if (!workspaceDir) {
269
+ api.logger.error(`[PD] subagent_spawning: cannot resolve workspaceDir for agent "${event.agentId}" — skipping shadow routing`);
270
+ return { status: 'ok' };
271
+ }
267
272
  api.logger?.debug?.(`[PD] workspaceDir resolved for subagent_spawning: ${workspaceDir}`);
268
273
  const { agentId, childSessionKey } = event;
269
274
  // Only handle PD local worker profiles
@@ -301,8 +306,12 @@ const plugin = {
301
306
  'subagent_ended',
302
307
  (event: PluginHookSubagentEndedEvent, ctx: PluginHookSubagentContext): void => {
303
308
  try {
304
- // Resolve workspace via official API, falling back to PathResolver
305
- const workspaceDir = resolveWorkspaceDirFromApi(api, undefined) || '.';
309
+ // FIX (B): Never fall back to '.' — fail-fast with ERROR log if workspaceDir cannot be resolved.
310
+ const workspaceDir = resolveWorkspaceDirFromApi(api, undefined);
311
+ if (!workspaceDir) {
312
+ api.logger.error(`[PD] subagent_ended: cannot resolve workspaceDir — skipping shadow observation completion`);
313
+ return;
314
+ }
306
315
  api.logger?.debug?.(`[PD] workspaceDir resolved for subagent_ended: ${workspaceDir}`);
307
316
  // Complete any pending shadow observation for this subagent session
308
317
  const shadowObsId = pendingShadowObservations.get(event.targetSessionKey);
@@ -922,6 +922,8 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
922
922
  if (logger) logger.info(`[PD:EvolutionWorker] Task ${task.id} completed - marker file detected`);
923
923
 
924
924
  let principlesGenerated = 0;
925
+ // C: Track report success for event recording
926
+ let reportSuccess = false;
925
927
  // Create principle from the diagnostician's JSON report.
926
928
  const reportPath = path.join(wctx.stateDir, `.diagnostician_report_${task.id}.json`);
927
929
  if (fs.existsSync(reportPath)) {
@@ -1023,6 +1025,14 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1023
1025
  if (principleId) {
1024
1026
  logger.info(`[PD:EvolutionWorker] Created principle ${principleId} from marker fallback for task ${task.id}`);
1025
1027
  principlesGenerated = 1;
1028
+ // C: Record principle_candidate_created event for observability
1029
+ if (eventLog) {
1030
+ eventLog.recordPrincipleCandidate({
1031
+ principleId,
1032
+ taskId: task.id,
1033
+ source: 'diagnostician',
1034
+ });
1035
+ }
1026
1036
  } else {
1027
1037
  logger.warn(`[PD:EvolutionWorker] createPrincipleFromDiagnosis returned null for task ${task.id} (may be duplicate or blacklisted)`);
1028
1038
  }
@@ -1038,6 +1048,8 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1038
1048
  } catch (err) {
1039
1049
  logger.warn(`[PD:EvolutionWorker] Failed to parse diagnostician report for task ${task.id}: ${String(err)}`);
1040
1050
  }
1051
+ // C: Report was found and processed (try block succeeded or had non-fatal issues)
1052
+ reportSuccess = true;
1041
1053
  } else {
1042
1054
  logger.warn(`[PD:EvolutionWorker] No diagnostician report found for completed task ${task.id} (expected: .diagnostician_report_${task.id}.json)`);
1043
1055
  }
@@ -1059,6 +1071,15 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1059
1071
  // FIX (#187): Remove the task from the diagnostician task store
1060
1072
  await completeDiagnosticianTask(wctx.stateDir, task.id);
1061
1073
 
1074
+ // C: Record diagnostician_report event for observability
1075
+ if (eventLog) {
1076
+ eventLog.recordDiagnosticianReport({
1077
+ taskId: task.id,
1078
+ reportPath,
1079
+ success: reportSuccess,
1080
+ });
1081
+ }
1082
+
1062
1083
  // Log to EvolutionLogger
1063
1084
  const durationMs = task.started_at
1064
1085
  ? Date.now() - new Date(task.started_at).getTime()
@@ -1349,6 +1370,15 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
1349
1370
  await addDiagnosticianTask(wctx.stateDir, highestScoreTask.id, heartbeatContent);
1350
1371
  if (logger) logger.info(`[PD:EvolutionWorker] Wrote diagnostician task to diagnostician_tasks.json for task ${highestScoreTask.id}`);
1351
1372
 
1373
+ // C: Record diagnosis_task_written event for observability
1374
+ if (eventLog) {
1375
+ eventLog.recordDiagnosisTask({
1376
+ taskId: highestScoreTask.id,
1377
+ painEventId: highestScoreTask.painEventId !== undefined ? String(highestScoreTask.painEventId) : undefined,
1378
+ sessionId: highestScoreTask.session_id,
1379
+ });
1380
+ }
1381
+
1352
1382
  // Task store write succeeded, now mark task as in_progress
1353
1383
  highestScoreTask.task = taskDescription;
1354
1384
  highestScoreTask.status = 'in_progress';
@@ -5,6 +5,7 @@ import { listSessions } from '../core/session-tracker.js';
5
5
  import { WorkspaceContext } from '../core/workspace-context.js';
6
6
  import { evaluatePhase3Inputs } from './phase3-input-filter.js';
7
7
  import { TrajectoryRegistry } from '../core/trajectory.js';
8
+ import { getPendingDiagnosticianTasks } from '../core/diagnostician-task-store.js';
8
9
  import type { RuntimeTruth, AnalyticsTruth } from '../types/runtime-summary.js';
9
10
 
10
11
  export type RuntimeDataQuality = 'authoritative' | 'partial';
@@ -60,6 +61,19 @@ export interface RuntimeSummary {
60
61
  };
61
62
  dataQuality: RuntimeDataQuality;
62
63
  };
64
+ // D: Heartbeat Diagnostician chain — separate from evolution/nocturnal chain
65
+ heartbeatDiagnosis: {
66
+ /** Tasks pending in diagnostician_tasks.json (not yet processed by heartbeat) */
67
+ pendingTasks: number;
68
+ /** Total diagnosis tasks written by evolution worker (today from event log) */
69
+ tasksWrittenToday: number;
70
+ /** Total diagnostician reports written (today from event log) */
71
+ reportsWrittenToday: number;
72
+ /** Total principle candidates created from heartbeat chain (today from event log) */
73
+ candidatesCreatedToday: number;
74
+ /** Heartbeats that injected diagnostician tasks (today from event log) */
75
+ heartbeatsInjectedToday: number;
76
+ };
63
77
  phase3: {
64
78
  queueTruthReady: boolean;
65
79
  phase3ShadowEligible: boolean;
@@ -177,6 +191,14 @@ export class RuntimeSummaryService {
177
191
  toolCalls?: number;
178
192
  painSignals?: number;
179
193
  evolutionTasks?: number;
194
+ evolution?: {
195
+ diagnosisTasksWritten?: number;
196
+ diagnosticianReportsWritten?: number;
197
+ principleCandidatesCreated?: number;
198
+ heartbeatsInjected?: number;
199
+ [key: string]: unknown;
200
+ };
201
+ [key: string]: unknown;
180
202
  }>>(
181
203
  path.join(wctx.stateDir, 'logs', 'daily-stats.json'),
182
204
  warnings,
@@ -233,6 +255,20 @@ export class RuntimeSummaryService {
233
255
  const gfiSources = this.buildGfiSources(events, selectedSessionId);
234
256
  const gateStats = this.buildGateStats(events, selectedSessionId, warnings);
235
257
 
258
+ // D: Heartbeat Diagnostician chain — separate from evolution/nocturnal chain
259
+ // Read pending tasks from the diagnostician task store
260
+ const pendingDiagTasks = getPendingDiagnosticianTasks(wctx.stateDir);
261
+ // Read heartbeat diagnosis stats from daily event log
262
+ const todayStr = generatedAt.slice(0, 10);
263
+ const diagDailyStats = dailyStats?.[todayStr]?.evolution;
264
+ const heartbeatDiagnosis = {
265
+ pendingTasks: pendingDiagTasks.length,
266
+ tasksWrittenToday: diagDailyStats?.diagnosisTasksWritten ?? 0,
267
+ reportsWrittenToday: diagDailyStats?.diagnosticianReportsWritten ?? 0,
268
+ candidatesCreatedToday: diagDailyStats?.principleCandidatesCreated ?? 0,
269
+ heartbeatsInjectedToday: diagDailyStats?.heartbeatsInjected ?? 0,
270
+ };
271
+
236
272
  // Read trajectory analytics data (historical data, NOT runtime truth)
237
273
  const trajectoryStats = this.readTrajectoryStats(workspaceDir, warnings);
238
274
 
@@ -310,6 +346,8 @@ export class RuntimeSummaryService {
310
346
  lastSignal: lastPainSignal,
311
347
  },
312
348
  gate: gateStats,
349
+ // D: Heartbeat Diagnostician chain — separate from evolution/nocturnal
350
+ heartbeatDiagnosis,
313
351
  metadata: {
314
352
  generatedAt,
315
353
  workspaceDir,
@@ -29,11 +29,10 @@ export function buildCritiquePromptV2(
29
29
  ): string {
30
30
  const { context, depth = 2, workspaceDir, api } = params;
31
31
 
32
- // 1. 确定工作区目录 (优先级:显式传入 > api.config > official API)
33
- const effectiveWorkspaceDir = workspaceDir
34
- || (api?.config?.workspaceDir as string)
35
- || resolveWorkspaceDirFromApi(api);
36
-
32
+ // FIX (B): Priority: explicitly passed workspaceDir > official API resolution
33
+ // Do NOT chain through api.config?.workspaceDir which may be stale.
34
+ const effectiveWorkspaceDir = workspaceDir || resolveWorkspaceDirFromApi(api);
35
+
37
36
  if (!effectiveWorkspaceDir) {
38
37
  throw new Error('Workspace directory is required for deep reflection.');
39
38
  }
@@ -148,10 +148,11 @@ export function createDeepReflectTool(api: OpenClawPluginApi) {
148
148
  * Resolve workspace directory for deep reflection tool.
149
149
  */
150
150
  function resolveReflectionWorkspace(api: OpenClawPluginApi): string {
151
- const dir = (api.config?.workspaceDir as string)
152
- || resolveWorkspaceDirFromApi(api);
151
+ // FIX (B): Only use resolveWorkspaceDirFromApi — do not chain through api.config?.workspaceDir
152
+ // which may be stale. Fail-fast if workspace cannot be resolved.
153
+ const dir = resolveWorkspaceDirFromApi(api);
153
154
  if (!dir) {
154
- throw new WorkspaceNotFoundError('deep-reflect: workspace directory could not be resolved via API or config');
155
+ throw new WorkspaceNotFoundError('deep-reflect: workspace directory could not be resolved via API');
155
156
  }
156
157
  return dir;
157
158
  }
@@ -15,10 +15,15 @@ export type EventType =
15
15
  | 'plan_approval'
16
16
  | 'evolution_task'
17
17
  | 'deep_reflection'
18
-
19
18
  | 'empathy_rollback'
20
19
  | 'error'
21
- | 'warn';
20
+ | 'warn'
21
+ // C: Diagnostician heartbeat chain events
22
+ | 'diagnosis_task' // Diagnostician task written to task store
23
+ | 'heartbeat_diagnosis' // Heartbeat injected diagnostician tasks
24
+ | 'diagnostician_report' // Diagnostician completed and wrote report
25
+ | 'principle_candidate' // Principle candidate created from report
26
+ | 'rule_enforced'; // Rule enforced (matched) during tool call
22
27
 
23
28
  export type EventCategory =
24
29
  | 'success'
@@ -32,7 +37,12 @@ export type EventCategory =
32
37
  | 'promoted'
33
38
  | 'passed'
34
39
  | 'changed'
35
- | 'rolled_back';
40
+ | 'rolled_back'
41
+ // C: New categories for diagnostician heartbeat chain
42
+ | 'written'
43
+ | 'injected'
44
+ | 'created'
45
+ | 'matched';
36
46
 
37
47
  /**
38
48
  * Base event structure for JSONL logging.
@@ -174,6 +184,54 @@ export interface EmpathyRollbackEventData {
174
184
  triggeredBy: 'user_command' | 'natural_language' | 'system';
175
185
  }
176
186
 
187
+ /**
188
+ * C: New event data types for diagnostician heartbeat chain observability.
189
+ * Maps heartbeat_injected -> when prompt.ts injects diagnostician tasks into heartbeat
190
+ */
191
+ export interface HeartbeatDiagnosisEventData {
192
+ taskCount: number;
193
+ taskIds: string[];
194
+ trigger: 'heartbeat' | 'immediate';
195
+ }
196
+
197
+ /**
198
+ * Maps diagnosis_task_written -> when evolution-worker writes to diagnostician_tasks.json
199
+ */
200
+ export interface DiagnosisTaskEventData {
201
+ taskId: string;
202
+ painEventId?: string;
203
+ sessionId?: string;
204
+ }
205
+
206
+ /**
207
+ * Maps diagnostician_report_written -> when diagnostician completes and writes report
208
+ */
209
+ export interface DiagnosticianReportEventData {
210
+ taskId: string;
211
+ reportPath: string;
212
+ success: boolean;
213
+ }
214
+
215
+ /**
216
+ * Maps principle_candidate_created -> when evolution-worker extracts principle from report
217
+ */
218
+ export interface PrincipleCandidateEventData {
219
+ principleId: string;
220
+ taskId: string;
221
+ source: 'diagnostician' | 'nocturnal' | 'manual';
222
+ }
223
+
224
+ /**
225
+ * Maps rule_enforced -> when RuleHost evaluate() returns matched during tool call
226
+ */
227
+ export interface RuleEnforcedEventData {
228
+ ruleId: string;
229
+ principleId: string;
230
+ enforcement: 'warn' | 'block' | 'requireApproval';
231
+ toolName: string;
232
+ filePath: string;
233
+ }
234
+
177
235
  // ============== Daily Statistics ==============
178
236
 
179
237
  export interface ToolCallStats {
@@ -264,6 +322,12 @@ export interface EvolutionStats {
264
322
  tasksEnqueued: number;
265
323
  tasksCompleted: number;
266
324
  rulesPromoted: number;
325
+ // C: Diagnostician heartbeat chain counters
326
+ diagnosisTasksWritten: number;
327
+ heartbeatsInjected: number;
328
+ diagnosticianReportsWritten: number;
329
+ principleCandidatesCreated: number;
330
+ rulesEnforced: number;
267
331
  }
268
332
 
269
333
  export interface HookStats {
@@ -422,6 +486,12 @@ export function createEmptyDailyStats(date: string): DailyStats {
422
486
  tasksEnqueued: 0,
423
487
  tasksCompleted: 0,
424
488
  rulesPromoted: 0,
489
+ // C: Diagnostician heartbeat chain counters
490
+ diagnosisTasksWritten: 0,
491
+ heartbeatsInjected: 0,
492
+ diagnosticianReportsWritten: 0,
493
+ principleCandidatesCreated: 0,
494
+ rulesEnforced: 0,
425
495
  },
426
496
  hooks: {
427
497
  total: 0,
@@ -4,7 +4,7 @@
4
4
  * Shared helpers for resolving workspace directories across commands and hooks.
5
5
  */
6
6
 
7
- import type { OpenClawPluginApi } from '../openclaw-sdk.js';
7
+ import type { OpenClawPluginApi, PluginCommandContext } from '../openclaw-sdk.js';
8
8
  import { validateWorkspaceDir, type WorkspaceResolutionContext } from '../core/workspace-dir-validation.js';
9
9
  import { resolveWorkspaceDir } from '../core/workspace-dir-service.js';
10
10
  import { resolveWorkspaceDirFromApi } from '../core/path-resolver.js';
@@ -25,7 +25,10 @@ export function resolveCommandWorkspaceDir(
25
25
  if (ctx.workspaceDir) {
26
26
  const issue = validateWorkspaceDir(ctx.workspaceDir);
27
27
  if (!issue) return ctx.workspaceDir;
28
- api.logger.error(`[PD:Command] ctx.workspaceDir="${ctx.workspaceDir}" is invalid: ${issue}`);
28
+ // Validation failed fail immediately, do not silently fall back
29
+ const errorMsg = `[PD:Command] ctx.workspaceDir="${ctx.workspaceDir}" is invalid: ${issue}`;
30
+ api.logger.error(errorMsg);
31
+ throw new Error(errorMsg);
29
32
  }
30
33
 
31
34
  // 2. Official OpenClaw API → env vars → config file
@@ -34,13 +37,51 @@ export function resolveCommandWorkspaceDir(
34
37
 
35
38
  // CRITICAL FAILURE: Cannot determine workspace directory
36
39
  const errorMsg = `[PD:Command] CRITICAL: Cannot resolve workspace directory. ` +
37
- `ctx.workspaceDir="${ctx.workspaceDir}" is invalid, and all fallbacks failed. ` +
40
+ `ctx.workspaceDir="${ctx.workspaceDir ?? ''}" is invalid, and all fallbacks failed. ` +
38
41
  `Commands will NOT execute to prevent data corruption.`;
39
42
  api.logger.error(errorMsg);
40
43
 
41
44
  throw new Error(errorMsg);
42
45
  }
43
46
 
47
+ /**
48
+ * Resolve workspace directory for plugin command execution.
49
+ *
50
+ * Chain: ctx.workspaceDir (canonical) → ctx.config.workspaceDir (dispatcher fallback)
51
+ *
52
+ * CRITICAL: Throws if workspaceDir cannot be resolved. Commands must NEVER silently
53
+ * fall back to process.cwd() as this masks configuration errors and can corrupt
54
+ * the wrong workspace.
55
+ *
56
+ * @param ctx - Plugin command context (has workspaceDir + config properties)
57
+ * @param source - Source label for error messages (e.g. 'evolution-status', 'pain')
58
+ */
59
+ export function resolvePluginCommandWorkspaceDir(
60
+ ctx: PluginCommandContext,
61
+ source: string,
62
+ ): string {
63
+ // 1. Canonical workspaceDir field (set by OpenClaw command dispatcher)
64
+ if (ctx.workspaceDir) {
65
+ const issue = validateWorkspaceDir(ctx.workspaceDir);
66
+ if (!issue) return ctx.workspaceDir;
67
+ throw new Error(`[PD:Command:${source}] ctx.workspaceDir="${ctx.workspaceDir}" is invalid: ${issue}`);
68
+ }
69
+
70
+ // 2. Dispatcher may also put workspaceDir in config (legacy/alternative path)
71
+ const configWorkspaceDir = ctx.config?.workspaceDir as string | undefined;
72
+ if (configWorkspaceDir) {
73
+ const issue = validateWorkspaceDir(configWorkspaceDir);
74
+ if (!issue) return configWorkspaceDir;
75
+ throw new Error(`[PD:Command:${source}] ctx.config.workspaceDir="${configWorkspaceDir}" is invalid: ${issue}`);
76
+ }
77
+
78
+ // CRITICAL FAILURE: No workspace directory available
79
+ throw new Error(
80
+ `[PD:Command:${source}] CRITICAL: workspaceDir is not set in ctx.workspaceDir or ctx.config.workspaceDir. ` +
81
+ `Commands cannot execute without a valid workspace. Set OPENCLAW_WORKSPACE_DIR env var or ensure the workspace is properly initialized.`,
82
+ );
83
+ }
84
+
44
85
  /**
45
86
  * Resolve workspace directory for tool hook execution (safe version).
46
87
  * Returns undefined instead of throwing if resolution fails.
@@ -20,7 +20,7 @@ describe('pd-reflect command', () => {
20
20
  it('requires an explicit resolved workspace directory', async () => {
21
21
  const result = await handlePdReflect.handler({} as any);
22
22
  expect(result.isError).toBe(true);
23
- expect(result.text).toContain('Cannot determine workspace directory');
23
+ expect(result.text).toContain('workspaceDir is not set');
24
24
  });
25
25
 
26
26
  it('enqueues into the provided active workspace', async () => {
@@ -235,6 +235,20 @@ describe('bootstrap-rules', () => {
235
235
  // Act & Assert: Should throw
236
236
  expect(() => selectPrinciplesForBootstrap(stateDir, 3)).toThrow('No deterministic principles');
237
237
  });
238
+
239
+ // Regression test for Issue #356
240
+ it('T-01..T-10 as deterministic — no crash on fresh workspace', () => {
241
+ const trainingStates = [
242
+ { principleId: 'T-01', evaluability: 'deterministic', applicableOpportunityCount: 0, observedViolationCount: 0, complianceRate: 1, violationTrend: 0, generatedSampleCount: 0, approvedSampleCount: 0, includedTrainRunIds: [], deployedCheckpointIds: [], internalizationStatus: 'needs_training' },
243
+ { principleId: 'T-02', evaluability: 'deterministic', applicableOpportunityCount: 0, observedViolationCount: 0, complianceRate: 1, violationTrend: 0, generatedSampleCount: 0, approvedSampleCount: 0, includedTrainRunIds: [], deployedCheckpointIds: [], internalizationStatus: 'needs_training' },
244
+ ];
245
+ const principles = trainingStates.map((s) => createLedgerPrinciple(s.principleId, { evaluability: s.evaluability }));
246
+ setupLedger(trainingStates, principles);
247
+ const selected = selectPrinciplesForBootstrap(stateDir, 3);
248
+ expect(selected).toHaveLength(2);
249
+ expect(selected).toContain('T-01');
250
+ expect(selected).toContain('T-02');
251
+ });
238
252
  });
239
253
 
240
254
  describe('bootstrapRules', () => {
@@ -4,6 +4,7 @@ import * as path from 'path';
4
4
  import { afterEach, describe, expect, it } from 'vitest';
5
5
  import { EvolutionReducerImpl } from '../../src/core/evolution-reducer.js';
6
6
  import { loadLedger } from '../../src/core/principle-tree-ledger.js';
7
+ import { safeRmDir } from '../test-utils.js';
7
8
 
8
9
  const tempDirs: string[] = [];
9
10
 
@@ -29,7 +30,7 @@ function makeStateDir(workspace: string): string {
29
30
 
30
31
  afterEach(() => {
31
32
  for (const dir of tempDirs.splice(0)) {
32
- fs.rmSync(dir, { recursive: true, force: true });
33
+ safeRmDir(dir);
33
34
  }
34
35
  });
35
36
 
@@ -106,7 +106,10 @@ describe('ledger-registrar', () => {
106
106
  expect(rule).toBeDefined();
107
107
  expect(rule.id).toBe('R_P_001_auto');
108
108
  expect(rule.type).toBe('gate');
109
- expect(rule.enforcement).toBe('block');
109
+ // FIX: Auto-generated rules default to 'warn' enforcement (not 'block')
110
+ // to prevent false positives like P_001 mis-blocking normal edits.
111
+ // They also start as 'candidate' lifecycle until replay evaluation passes.
112
+ expect(rule.enforcement).toBe('warn');
110
113
  expect(rule.status).toBe('proposed');
111
114
  expect(rule.principleId).toBe('P_001');
112
115
  expect(rule.implementationIds).toContain('IMPL_P_001_auto');
@@ -118,7 +121,7 @@ describe('ledger-registrar', () => {
118
121
  expect(impl.ruleId).toBe('R_P_001_auto');
119
122
  expect(impl.type).toBe('code');
120
123
  expect(impl.coversCondition).toBe('file_write');
121
- expect(impl.lifecycleState).toBe('active');
124
+ expect(impl.lifecycleState).toBe('candidate');
122
125
 
123
126
  // Verify principle linked to rule
124
127
  const principle = ledger.tree.principles['P_001'];
@@ -127,12 +127,14 @@ describe('PrincipleCompiler', () => {
127
127
  const rule = ledger.tree.rules['R_P_066_auto'];
128
128
  expect(rule).toBeDefined();
129
129
  expect(rule.type).toBe('gate');
130
- expect(rule.enforcement).toBe('block');
130
+ // FIX: Auto-generated rules default to 'warn' enforcement
131
+ expect(rule.enforcement).toBe('warn');
131
132
  expect(rule.status).toBe('proposed');
132
133
 
133
134
  const impl = ledger.tree.implementations['IMPL_P_066_auto'];
134
135
  expect(impl).toBeDefined();
135
- expect(impl.lifecycleState).toBe('active');
136
+ // FIX: Auto-generated implementations start as 'candidate' (not 'active')
137
+ expect(impl.lifecycleState).toBe('candidate');
136
138
  });
137
139
 
138
140
  // -----------------------------------------------------------------------
@@ -21,6 +21,7 @@ import {
21
21
  } from '../../src/core/principle-training-state.js';
22
22
  import { isExpectedSubagentError } from '../../src/service/subagent-workflow/subagent-error-utils.js';
23
23
  import { WorkspaceContext } from '../../src/core/workspace-context.js';
24
+ import { safeRmDir } from '../test-utils.js';
24
25
 
25
26
  const tempDirs: string[] = [];
26
27
 
@@ -32,7 +33,7 @@ function makeTempDir(): string {
32
33
 
33
34
  afterEach(() => {
34
35
  for (const dir of tempDirs.splice(0)) {
35
- fs.rmSync(dir, { recursive: true, force: true });
36
+ safeRmDir(dir);
36
37
  }
37
38
  });
38
39
 
@@ -16,6 +16,7 @@ import * as os from 'os';
16
16
  import * as path from 'path';
17
17
  import { TrajectoryDatabase } from '../../src/core/trajectory.js';
18
18
  import { EventLog } from '../../src/core/event-log.js';
19
+ import { safeRmDir } from '../test-utils.js';
19
20
 
20
21
  // ─────────────────────────────────────────────────────────────────────
21
22
  // Helper functions
@@ -83,12 +84,8 @@ function createTestWorkspace(): TestWorkspace {
83
84
 
84
85
  function cleanupWorkspace(ws: TestWorkspace | null): void {
85
86
  if (!ws) return;
86
- try {
87
- ws.trajectory?.dispose();
88
- fs.rmSync(ws.workspaceDir, { recursive: true, force: true });
89
- } catch {
90
- // ignore
91
- }
87
+ ws.trajectory?.dispose();
88
+ safeRmDir(ws.workspaceDir);
92
89
  }
93
90
 
94
91
  // ─────────────────────────────────────────────────────────────────────
@@ -237,8 +234,8 @@ describe('Gate: Resilience', () => {
237
234
 
238
235
  describe('RESILIENCE: Missing state directory', () => {
239
236
  it('EventLog MUST handle missing logs directory', () => {
240
- // Remove state directory
241
- fs.rmSync(ws!.stateDir, { recursive: true, force: true });
237
+ // Remove state directory (safeRmDir handles Windows EPERM from held handles)
238
+ safeRmDir(ws!.stateDir);
242
239
 
243
240
  // Attempt to create event log
244
241
  // Should recreate the directory
@@ -5,8 +5,9 @@
5
5
  * 1. recordPainEvent() returns AUTOINCREMENT row ID as number
6
6
  * 2. createPrincipleFromDiagnosis(painId: String(painEventId))
7
7
  * 3. derivedFromPainIds stores the stringified numeric ID
8
- * 4. PrincipleCompiler.compileOne() succeeds (registers active implementation)
9
- * 5. RuleHost.evaluate(matching input) → block
8
+ * 4. PrincipleCompiler.compileOne() succeeds (registers candidate implementation)
9
+ * 5. Promote to active
10
+ * 6. RuleHost.evaluate(matching input) → block
10
11
  * 6. RuleHost.evaluate(non-matching input) → undefined (passthrough)
11
12
  *
12
13
  * Pain ID chain fixed in commits 4b0dce59 and 0146bbb7:
@@ -25,7 +26,9 @@ import { RuleHost } from '../../src/core/rule-host.js';
25
26
  import { EvolutionReducerImpl } from '../../src/core/evolution-reducer.js';
26
27
  import {
27
28
  loadLedger,
29
+ transitionImplementationState,
28
30
  } from '../../src/core/principle-tree-ledger.js';
31
+ import { safeRmDir } from '../test-utils.js';
29
32
  import type { RuleHostInput } from '../../src/core/rule-host-types.js';
30
33
 
31
34
  // ---------------------------------------------------------------------------
@@ -52,7 +55,7 @@ function createTestWorkspace(): TestWorkspace {
52
55
 
53
56
  function disposeTestWorkspace(ws: TestWorkspace): void {
54
57
  ws.trajectory.dispose();
55
- fs.rmSync(ws.workspaceDir, { recursive: true, force: true });
58
+ safeRmDir(ws.workspaceDir);
56
59
  }
57
60
 
58
61
  // ---------------------------------------------------------------------------
@@ -131,12 +134,15 @@ describe('Pain ID Chain E2E: pain event → principle → compile → RuleHost',
131
134
  expect(compileResult.ruleId).toBeDefined();
132
135
  expect(compileResult.implementationId).toBeDefined();
133
136
 
134
- // Verify implementation is active
137
+ // Verify implementation is candidate (not active — must be promoted before enforcing)
135
138
  const updatedLedger = loadLedger(ws.stateDir);
136
139
  const impl = updatedLedger.tree.implementations[compileResult.implementationId!];
137
- expect(impl.lifecycleState).toBe('active');
140
+ expect(impl.lifecycleState).toBe('candidate');
138
141
 
139
- // ── Step 5: RuleHost.evaluate(matching input) block ──
142
+ // ── Step 5: Promote to active so RuleHost will enforce ──
143
+ transitionImplementationState(ws.stateDir, compileResult.implementationId!, 'active');
144
+
145
+ // ── Step 6: RuleHost.evaluate(matching input) → block ──
140
146
  const host = new RuleHost(ws.stateDir, { warn: () => {} });
141
147
 
142
148
  const matchingInput: RuleHostInput = {
@@ -4,9 +4,11 @@
4
4
  * Tests the full chain:
5
5
  * 1. Set up principle in ledger with derivedFromPainIds
6
6
  * 2. Record tool call (bash, failure) and pain event in trajectory DB
7
- * 3. Compile principle via PrincipleCompiler (registers as active + persists code)
8
- * 4. RuleHost.evaluate(matching input) → block
9
- * 5. RuleHost.evaluate(non-matching input) undefined (passthrough)
7
+ * 3. Compile principle via PrincipleCompiler (registers as 'candidate' NOT 'active')
8
+ * 4. RuleHost.evaluate(matching input) → NO block yet (candidate not loaded)
9
+ * 5. Promote implementation to 'active'
10
+ * 6. RuleHost.evaluate(matching input) → block
11
+ * 7. RuleHost.evaluate(non-matching input) → undefined (passthrough)
10
12
  */
11
13
 
12
14
  import { describe, it, expect, beforeEach, afterEach } from 'vitest';
@@ -19,6 +21,7 @@ import { RuleHost } from '../../src/core/rule-host.js';
19
21
  import {
20
22
  loadLedger,
21
23
  saveLedger,
24
+ transitionImplementationState,
22
25
  } from '../../src/core/principle-tree-ledger.js';
23
26
  import type { RuleHostInput } from '../../src/core/rule-host-types.js';
24
27
 
@@ -132,15 +135,14 @@ describe('Principle Compiler E2E: compile → promote → RuleHost blocks', () =
132
135
 
133
136
  const implId = result.implementationId!;
134
137
 
135
- // Verify the implementation was registered as active (not candidate)
138
+ // Verify the implementation was registered as 'candidate' (not 'active')
139
+ // FIX: Auto-generated implementations start as 'candidate' until explicitly promoted
140
+ // after replay evaluation and human approval. This prevents false-positive blocks.
136
141
  const ledger = loadLedger(ws.stateDir);
137
142
  const impl = ledger.tree.implementations[implId];
138
- expect(impl.lifecycleState).toBe('active');
143
+ expect(impl.lifecycleState).toBe('candidate');
139
144
 
140
- // ── Step 4: Create RuleHost and evaluate with matching input ──
141
- const host = new RuleHost(ws.stateDir, { warn: () => {} });
142
-
143
- // Matching input: bash tool with a heartbeat command
145
+ // Define matching input for RuleHost evaluation (used in both Step 4 and Step 6)
144
146
  const matchingInput: RuleHostInput = {
145
147
  action: {
146
148
  toolName: 'bash',
@@ -166,6 +168,23 @@ describe('Principle Compiler E2E: compile → promote → RuleHost blocks', () =
166
168
  },
167
169
  };
168
170
 
171
+ // ── Step 4: RuleHost should NOT block yet (candidate not loaded) ──
172
+ const hostBeforePromote = new RuleHost(ws.stateDir, { warn: () => {} });
173
+ const noBlockResult = hostBeforePromote.evaluate(matchingInput);
174
+ expect(noBlockResult).toBeUndefined(); // candidate not loaded → no block
175
+
176
+ // ── Step 5: Promote to 'active' so RuleHost will enforce ──
177
+ transitionImplementationState(ws.stateDir, implId, 'active');
178
+
179
+ // Verify promotion
180
+ const ledgerAfterPromote = loadLedger(ws.stateDir);
181
+ const implAfterPromote = ledgerAfterPromote.tree.implementations[implId];
182
+ expect(implAfterPromote.lifecycleState).toBe('active');
183
+
184
+ // ── Step 6: Create RuleHost and evaluate with matching input ──
185
+ const host = new RuleHost(ws.stateDir, { warn: () => {} });
186
+
187
+ // Matching input: bash tool with a heartbeat command (defined in Step 4)
169
188
  const blockResult = host.evaluate(matchingInput);
170
189
 
171
190
  // Verify RuleHost blocks the matching input