principles-disciple 1.7.6 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/commands/context.js +5 -15
  2. package/dist/commands/evolution-status.js +2 -9
  3. package/dist/commands/export.js +61 -8
  4. package/dist/commands/nocturnal-review.d.ts +24 -0
  5. package/dist/commands/nocturnal-review.js +265 -0
  6. package/dist/commands/nocturnal-rollout.d.ts +27 -0
  7. package/dist/commands/nocturnal-rollout.js +671 -0
  8. package/dist/commands/nocturnal-train.d.ts +25 -0
  9. package/dist/commands/nocturnal-train.js +919 -0
  10. package/dist/commands/pain.js +8 -21
  11. package/dist/constants/tools.d.ts +2 -2
  12. package/dist/constants/tools.js +1 -1
  13. package/dist/core/adaptive-thresholds.d.ts +186 -0
  14. package/dist/core/adaptive-thresholds.js +300 -0
  15. package/dist/core/config.d.ts +2 -38
  16. package/dist/core/config.js +6 -61
  17. package/dist/core/event-log.d.ts +1 -2
  18. package/dist/core/event-log.js +0 -3
  19. package/dist/core/evolution-engine.js +1 -21
  20. package/dist/core/evolution-reducer.d.ts +7 -1
  21. package/dist/core/evolution-reducer.js +56 -4
  22. package/dist/core/evolution-types.d.ts +61 -9
  23. package/dist/core/evolution-types.js +31 -9
  24. package/dist/core/external-training-contract.d.ts +276 -0
  25. package/dist/core/external-training-contract.js +269 -0
  26. package/dist/core/local-worker-routing.d.ts +175 -0
  27. package/dist/core/local-worker-routing.js +525 -0
  28. package/dist/core/model-deployment-registry.d.ts +218 -0
  29. package/dist/core/model-deployment-registry.js +503 -0
  30. package/dist/core/model-training-registry.d.ts +295 -0
  31. package/dist/core/model-training-registry.js +475 -0
  32. package/dist/core/nocturnal-arbiter.d.ts +159 -0
  33. package/dist/core/nocturnal-arbiter.js +534 -0
  34. package/dist/core/nocturnal-candidate-scoring.d.ts +137 -0
  35. package/dist/core/nocturnal-candidate-scoring.js +266 -0
  36. package/dist/core/nocturnal-compliance.d.ts +175 -0
  37. package/dist/core/nocturnal-compliance.js +824 -0
  38. package/dist/core/nocturnal-dataset.d.ts +224 -0
  39. package/dist/core/nocturnal-dataset.js +443 -0
  40. package/dist/core/nocturnal-executability.d.ts +85 -0
  41. package/dist/core/nocturnal-executability.js +331 -0
  42. package/dist/core/nocturnal-export.d.ts +124 -0
  43. package/dist/core/nocturnal-export.js +275 -0
  44. package/dist/core/nocturnal-paths.d.ts +124 -0
  45. package/dist/core/nocturnal-paths.js +214 -0
  46. package/dist/core/nocturnal-trajectory-extractor.d.ts +242 -0
  47. package/dist/core/nocturnal-trajectory-extractor.js +307 -0
  48. package/dist/core/nocturnal-trinity.d.ts +311 -0
  49. package/dist/core/nocturnal-trinity.js +880 -0
  50. package/dist/core/paths.d.ts +6 -0
  51. package/dist/core/paths.js +6 -0
  52. package/dist/core/principle-training-state.d.ts +121 -0
  53. package/dist/core/principle-training-state.js +321 -0
  54. package/dist/core/promotion-gate.d.ts +238 -0
  55. package/dist/core/promotion-gate.js +529 -0
  56. package/dist/core/session-tracker.d.ts +10 -0
  57. package/dist/core/session-tracker.js +14 -0
  58. package/dist/core/shadow-observation-registry.d.ts +217 -0
  59. package/dist/core/shadow-observation-registry.js +308 -0
  60. package/dist/core/training-program.d.ts +233 -0
  61. package/dist/core/training-program.js +433 -0
  62. package/dist/core/trajectory.d.ts +95 -1
  63. package/dist/core/trajectory.js +220 -6
  64. package/dist/core/workspace-context.d.ts +0 -6
  65. package/dist/core/workspace-context.js +0 -12
  66. package/dist/hooks/bash-risk.d.ts +6 -6
  67. package/dist/hooks/bash-risk.js +8 -8
  68. package/dist/hooks/gate-block-helper.js +1 -1
  69. package/dist/hooks/gate.d.ts +1 -1
  70. package/dist/hooks/gate.js +2 -2
  71. package/dist/hooks/gfi-gate.d.ts +3 -3
  72. package/dist/hooks/gfi-gate.js +15 -14
  73. package/dist/hooks/pain.js +6 -9
  74. package/dist/hooks/progressive-trust-gate.d.ts +21 -49
  75. package/dist/hooks/progressive-trust-gate.js +51 -204
  76. package/dist/hooks/prompt.d.ts +11 -11
  77. package/dist/hooks/prompt.js +158 -72
  78. package/dist/hooks/subagent.js +43 -6
  79. package/dist/i18n/commands.js +8 -8
  80. package/dist/index.js +129 -28
  81. package/dist/service/evolution-worker.d.ts +42 -4
  82. package/dist/service/evolution-worker.js +321 -13
  83. package/dist/service/nocturnal-runtime.d.ts +183 -0
  84. package/dist/service/nocturnal-runtime.js +352 -0
  85. package/dist/service/nocturnal-service.d.ts +163 -0
  86. package/dist/service/nocturnal-service.js +787 -0
  87. package/dist/service/nocturnal-target-selector.d.ts +145 -0
  88. package/dist/service/nocturnal-target-selector.js +315 -0
  89. package/dist/service/phase3-input-filter.d.ts +2 -23
  90. package/dist/service/phase3-input-filter.js +3 -27
  91. package/dist/service/runtime-summary-service.d.ts +0 -10
  92. package/dist/service/runtime-summary-service.js +1 -54
  93. package/dist/tools/deep-reflect.js +2 -1
  94. package/dist/types/event-types.d.ts +2 -10
  95. package/dist/types/runtime-summary.d.ts +1 -8
  96. package/dist/types.d.ts +0 -3
  97. package/dist/types.js +0 -2
  98. package/openclaw.plugin.json +1 -1
  99. package/package.json +1 -1
  100. package/templates/langs/en/skills/pd-mentor/SKILL.md +5 -5
  101. package/templates/langs/zh/skills/pd-mentor/SKILL.md +5 -5
  102. package/templates/pain_settings.json +0 -6
  103. package/dist/commands/trust.d.ts +0 -4
  104. package/dist/commands/trust.js +0 -78
  105. package/dist/core/trust-engine.d.ts +0 -96
  106. package/dist/core/trust-engine.js +0 -286
@@ -0,0 +1,475 @@
1
+ /**
2
+ * Model Training Registry — Training Run, Checkpoint, and Eval Summary Lineage
3
+ * =============================================================================
4
+ *
5
+ * PURPOSE: Establish strict auditable lineage from training run → checkpoint → eval
6
+ * so that "deployable" is a controlled state, not a free-text field.
7
+ *
8
+ * ARCHITECTURE:
9
+ * - Registry file: {stateDir}/.state/nocturnal/training-registry.json
10
+ * - Three record types in one store: TrainingRun, Checkpoint, EvalSummary
11
+ * - File locking on all write operations
12
+ * - Family alignment enforced at every transition
13
+ *
14
+ * LINEAGE CHAIN (enforced):
15
+ * TrainingRun → Checkpoint → EvalSummary
16
+ * DatasetFingerprint → TrainingRun → Checkpoint → EvalSummary
17
+ *
18
+ * DEPLOYABILITY RULE:
19
+ * A Checkpoint can only be marked deployable if:
20
+ * 1. It has an attached EvalSummary
21
+ * 2. The EvalSummary has a verdict of 'pass' or 'compare_only' (not 'fail')
22
+ * 3. The EvalSummary's targetModelFamily matches the Checkpoint's targetModelFamily
23
+ * 4. The Checkpoint's trainRun is in 'completed' status
24
+ *
25
+ * DESIGN CONSTRAINTS:
26
+ * - No real training invocation (Phase 4 only)
27
+ * - No checkpoint deploy routing (Phase 5)
28
+ * - No automatic promotion
29
+ * - Registry is append-only for runs and checkpoints
30
+ * - EvalSummary attachment is the only mutable operation on a Checkpoint
31
+ */
32
+ import * as fs from 'fs';
33
+ import * as path from 'path';
34
+ import * as crypto from 'crypto';
35
+ import { withLock } from '../utils/file-lock.js';
36
+ // ---------------------------------------------------------------------------
37
+ // Constants
38
+ // ---------------------------------------------------------------------------
39
+ const REGISTRY_FILE = '.state/nocturnal/training-registry.json';
40
+ // ---------------------------------------------------------------------------
41
+ // Registry Path
42
+ // ---------------------------------------------------------------------------
43
+ function getRegistryPath(stateDir) {
44
+ return path.join(stateDir, REGISTRY_FILE);
45
+ }
46
+ /**
47
+ * Ensure the registry directory exists.
48
+ */
49
+ function ensureRegistryDir(stateDir) {
50
+ const registryPath = getRegistryPath(stateDir);
51
+ const dir = path.dirname(registryPath);
52
+ if (!fs.existsSync(dir)) {
53
+ fs.mkdirSync(dir, { recursive: true });
54
+ }
55
+ }
56
+ // ---------------------------------------------------------------------------
57
+ // File Operations
58
+ // ---------------------------------------------------------------------------
59
+ /**
60
+ * Read the registry from disk. Returns empty registry if missing.
61
+ */
62
+ function readRegistry(stateDir) {
63
+ const registryPath = getRegistryPath(stateDir);
64
+ if (!fs.existsSync(registryPath)) {
65
+ return { trainingRuns: [], checkpoints: [], evalSummaries: [] };
66
+ }
67
+ try {
68
+ const content = fs.readFileSync(registryPath, 'utf-8');
69
+ return JSON.parse(content);
70
+ }
71
+ catch (err) {
72
+ console.warn(`[model-training-registry] Registry corrupted at ${registryPath}, recovering with empty state: ${String(err)}`);
73
+ return { trainingRuns: [], checkpoints: [], evalSummaries: [] };
74
+ }
75
+ }
76
+ /**
77
+ * Write the registry to disk atomically.
78
+ * Caller must hold the registry lock.
79
+ */
80
+ function writeRegistry(stateDir, registry) {
81
+ ensureRegistryDir(stateDir);
82
+ const registryPath = getRegistryPath(stateDir);
83
+ const tmpPath = `${registryPath}.tmp`;
84
+ fs.writeFileSync(tmpPath, JSON.stringify(registry, null, 2), 'utf-8');
85
+ fs.renameSync(tmpPath, registryPath);
86
+ }
87
+ /**
88
+ * Execute a read-modify-write under an exclusive file lock.
89
+ */
90
+ function withRegistryLock(stateDir, fn) {
91
+ const registryPath = getRegistryPath(stateDir);
92
+ return withLock(registryPath, () => {
93
+ const registry = readRegistry(stateDir);
94
+ return fn(registry);
95
+ });
96
+ }
97
+ // ---------------------------------------------------------------------------
98
+ // Training Run Operations
99
+ // ---------------------------------------------------------------------------
100
+ /**
101
+ * Valid training run status transitions.
102
+ * pending → running → completed | failed
103
+ * (no backward transitions)
104
+ */
105
+ const VALID_RUN_TRANSITIONS = {
106
+ pending: ['running'],
107
+ running: ['completed', 'failed'],
108
+ completed: [], // terminal
109
+ failed: [], // terminal
110
+ };
111
+ /**
112
+ * Register a new training run.
113
+ *
114
+ * @param stateDir - Workspace state directory
115
+ * @param params - Run parameters
116
+ * @returns The registered TrainingRun
117
+ */
118
+ export function registerTrainingRun(stateDir, params) {
119
+ return withRegistryLock(stateDir, (registry) => {
120
+ const now = new Date().toISOString();
121
+ const trainRunId = crypto.randomUUID();
122
+ const run = {
123
+ trainRunId,
124
+ experimentId: params.experimentId,
125
+ targetModelFamily: params.targetModelFamily,
126
+ datasetFingerprint: params.datasetFingerprint,
127
+ exportId: params.exportId,
128
+ sampleCount: params.sampleCount,
129
+ configFingerprint: params.configFingerprint,
130
+ createdAt: now,
131
+ status: 'pending',
132
+ checkpointIds: [],
133
+ };
134
+ registry.trainingRuns.push(run);
135
+ writeRegistry(stateDir, registry);
136
+ return run;
137
+ });
138
+ }
139
+ /**
140
+ * Update a training run's status.
141
+ *
142
+ * @throws Error if run not found or transition is invalid
143
+ */
144
+ export function updateTrainingRunStatus(stateDir, trainRunId, newStatus, failureReason) {
145
+ return withRegistryLock(stateDir, (registry) => {
146
+ const idx = registry.trainingRuns.findIndex((r) => r.trainRunId === trainRunId);
147
+ if (idx === -1) {
148
+ throw new Error(`Training run not found: ${trainRunId}`);
149
+ }
150
+ const run = registry.trainingRuns[idx];
151
+ const allowed = VALID_RUN_TRANSITIONS[run.status];
152
+ if (!allowed.includes(newStatus)) {
153
+ throw new Error(`Invalid status transition for training run ${trainRunId}: ${run.status} → ${newStatus}. ` +
154
+ `Allowed transitions from ${run.status}: ${allowed.join(', ') || 'none'}`);
155
+ }
156
+ registry.trainingRuns[idx] = {
157
+ ...run,
158
+ status: newStatus,
159
+ completedAt: newStatus === 'completed' || newStatus === 'failed'
160
+ ? new Date().toISOString()
161
+ : undefined,
162
+ failureReason: newStatus === 'failed' ? failureReason : undefined,
163
+ };
164
+ writeRegistry(stateDir, registry);
165
+ return registry.trainingRuns[idx];
166
+ });
167
+ }
168
+ /**
169
+ * Complete a training run (convenience wrapper).
170
+ */
171
+ export function completeTrainingRun(stateDir, trainRunId) {
172
+ return updateTrainingRunStatus(stateDir, trainRunId, 'completed');
173
+ }
174
+ /**
175
+ * Fail a training run (convenience wrapper).
176
+ */
177
+ export function failTrainingRun(stateDir, trainRunId, reason) {
178
+ return updateTrainingRunStatus(stateDir, trainRunId, 'failed', reason);
179
+ }
180
+ /**
181
+ * Start a training run (convenience wrapper).
182
+ */
183
+ export function startTrainingRun(stateDir, trainRunId) {
184
+ return updateTrainingRunStatus(stateDir, trainRunId, 'running');
185
+ }
186
+ /**
187
+ * Get a training run by ID.
188
+ */
189
+ export function getTrainingRun(stateDir, trainRunId) {
190
+ const registry = readRegistry(stateDir);
191
+ return registry.trainingRuns.find((r) => r.trainRunId === trainRunId) ?? null;
192
+ }
193
+ /**
194
+ * List all training runs, optionally filtered by status or family.
195
+ */
196
+ export function listTrainingRuns(stateDir, filter) {
197
+ const registry = readRegistry(stateDir);
198
+ let runs = registry.trainingRuns;
199
+ if (filter?.status) {
200
+ runs = runs.filter((r) => r.status === filter.status);
201
+ }
202
+ if (filter?.targetModelFamily) {
203
+ runs = runs.filter((r) => r.targetModelFamily === filter.targetModelFamily);
204
+ }
205
+ return runs.sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
206
+ }
207
+ // ---------------------------------------------------------------------------
208
+ // Checkpoint Operations
209
+ // ---------------------------------------------------------------------------
210
+ /**
211
+ * Register a checkpoint produced by a training run.
212
+ *
213
+ * @throws Error if the training run is not found
214
+ * @throws Error if the targetModelFamily does not match the run's family
215
+ */
216
+ export function registerCheckpoint(stateDir, params) {
217
+ return withRegistryLock(stateDir, (registry) => {
218
+ // Verify the training run exists
219
+ const run = registry.trainingRuns.find((r) => r.trainRunId === params.trainRunId);
220
+ if (!run) {
221
+ throw new Error(`Training run not found: ${params.trainRunId}`);
222
+ }
223
+ // Verify family alignment
224
+ if (run.targetModelFamily !== params.targetModelFamily) {
225
+ throw new Error(`Target model family mismatch: checkpoint family "${params.targetModelFamily}" ` +
226
+ `does not match training run family "${run.targetModelFamily}"`);
227
+ }
228
+ const now = new Date().toISOString();
229
+ const checkpointId = crypto.randomUUID();
230
+ const checkpoint = {
231
+ checkpointId,
232
+ trainRunId: params.trainRunId,
233
+ targetModelFamily: params.targetModelFamily,
234
+ artifactPath: params.artifactPath,
235
+ createdAt: now,
236
+ deployable: false, // Always starts as false
237
+ };
238
+ registry.checkpoints.push(checkpoint);
239
+ // Update the training run's checkpoint IDs
240
+ const runIdx = registry.trainingRuns.findIndex((r) => r.trainRunId === params.trainRunId);
241
+ registry.trainingRuns[runIdx] = {
242
+ ...run,
243
+ checkpointIds: [...run.checkpointIds, checkpointId],
244
+ };
245
+ writeRegistry(stateDir, registry);
246
+ return checkpoint;
247
+ });
248
+ }
249
+ /**
250
+ * Get a checkpoint by ID.
251
+ */
252
+ export function getCheckpoint(stateDir, checkpointId) {
253
+ const registry = readRegistry(stateDir);
254
+ return registry.checkpoints.find((c) => c.checkpointId === checkpointId) ?? null;
255
+ }
256
+ /**
257
+ * List all checkpoints, optionally filtered.
258
+ */
259
+ export function listCheckpoints(stateDir, filter) {
260
+ const registry = readRegistry(stateDir);
261
+ let checkpoints = registry.checkpoints;
262
+ if (filter?.trainRunId) {
263
+ checkpoints = checkpoints.filter((c) => c.trainRunId === filter.trainRunId);
264
+ }
265
+ if (filter?.targetModelFamily) {
266
+ checkpoints = checkpoints.filter((c) => c.targetModelFamily === filter.targetModelFamily);
267
+ }
268
+ if (filter?.deployable !== undefined) {
269
+ checkpoints = checkpoints.filter((c) => c.deployable === filter.deployable);
270
+ }
271
+ return checkpoints.sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
272
+ }
273
+ /**
274
+ * List all deployable checkpoints for a target model family.
275
+ */
276
+ export function listDeployableCheckpoints(stateDir, targetModelFamily) {
277
+ return listCheckpoints(stateDir, {
278
+ targetModelFamily,
279
+ deployable: true,
280
+ });
281
+ }
282
+ // ---------------------------------------------------------------------------
283
+ // Eval Summary Operations
284
+ // ---------------------------------------------------------------------------
285
+ /**
286
+ * Attach an eval summary to a checkpoint.
287
+ *
288
+ * @param stateDir - Workspace state directory
289
+ * @param checkpointId - The checkpoint to attach to
290
+ * @param summary - The eval summary to attach
291
+ *
292
+ * @throws Error if checkpoint not found
293
+ * @throws Error if targetModelFamily mismatch between summary and checkpoint
294
+ */
295
+ export function attachEvalSummary(stateDir, checkpointId, summary) {
296
+ return withRegistryLock(stateDir, (registry) => {
297
+ const checkpointIdx = registry.checkpoints.findIndex((c) => c.checkpointId === checkpointId);
298
+ if (checkpointIdx === -1) {
299
+ throw new Error(`Checkpoint not found: ${checkpointId}`);
300
+ }
301
+ const checkpoint = registry.checkpoints[checkpointIdx];
302
+ // FAMILY ALIGNMENT — enforced fail-closed:
303
+ // An eval for a gpt-4 checkpoint cannot be attached to a claude-3 checkpoint
304
+ // (and vice versa), even if the eval verdict is 'pass'.
305
+ if (summary.targetModelFamily !== checkpoint.targetModelFamily) {
306
+ throw new Error(`Family mismatch: eval targets "${summary.targetModelFamily}" ` +
307
+ `but checkpoint "${checkpointId}" is "${checkpoint.targetModelFamily}". ` +
308
+ `EvalSummary.targetModelFamily must match the checkpoint's targetModelFamily.`);
309
+ }
310
+ const evalSummary = {
311
+ ...summary,
312
+ createdAt: new Date().toISOString(),
313
+ };
314
+ registry.evalSummaries.push(evalSummary);
315
+ // Update the checkpoint's lastEvalSummaryRef
316
+ registry.checkpoints[checkpointIdx] = {
317
+ ...checkpoint,
318
+ lastEvalSummaryRef: evalSummary.evalId,
319
+ };
320
+ writeRegistry(stateDir, registry);
321
+ return evalSummary;
322
+ });
323
+ }
324
+ /**
325
+ * Get an eval summary by ID.
326
+ */
327
+ export function getEvalSummary(stateDir, evalId) {
328
+ const registry = readRegistry(stateDir);
329
+ return registry.evalSummaries.find((e) => e.evalId === evalId) ?? null;
330
+ }
331
+ /**
332
+ * List eval summaries, optionally filtered.
333
+ */
334
+ export function listEvalSummaries(stateDir, filter) {
335
+ const registry = readRegistry(stateDir);
336
+ let evals = registry.evalSummaries;
337
+ if (filter?.checkpointId) {
338
+ evals = evals.filter((e) => e.checkpointId === filter.checkpointId);
339
+ }
340
+ if (filter?.benchmarkId) {
341
+ evals = evals.filter((e) => e.benchmarkId === filter.benchmarkId);
342
+ }
343
+ if (filter?.verdict) {
344
+ evals = evals.filter((e) => e.verdict === filter.verdict);
345
+ }
346
+ if (filter?.targetModelFamily) {
347
+ evals = evals.filter((e) => e.targetModelFamily === filter.targetModelFamily);
348
+ }
349
+ return evals.sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
350
+ }
351
+ // ---------------------------------------------------------------------------
352
+ // Deployability — Core Gating Logic
353
+ // ---------------------------------------------------------------------------
354
+ /**
355
+ * MARK the deployability status of a checkpoint.
356
+ *
357
+ * DEPLOYABILITY RULE (fail-closed):
358
+ * A checkpoint can only be marked deployable if ALL of:
359
+ * 1. It has an attached EvalSummary (lastEvalSummaryRef is set)
360
+ * 2. The attached EvalSummary has verdict 'pass' or 'compare_only' (not 'fail')
361
+ * 3. The EvalSummary's targetModelFamily matches the Checkpoint's targetModelFamily
362
+ * NOTE: This is enforced at attachEvalSummary() time (see attachEvalSummary).
363
+ * If a mismatched-family eval is attached, attachEvalSummary throws before
364
+ * the registry is modified, so no eval with wrong family can ever reach here.
365
+ * 4. The parent TrainingRun is in 'completed' status
366
+ *
367
+ * @param stateDir - Workspace state directory
368
+ * @param checkpointId - The checkpoint to mark
369
+ * @param deployable - true to mark as deployable; false to revoke
370
+ *
371
+ * @throws Error if checkpoint not found
372
+ * @throws Error if preconditions for deployable=true are not met
373
+ */
374
+ export function markCheckpointDeployable(stateDir, checkpointId, deployable) {
375
+ return withRegistryLock(stateDir, (registry) => {
376
+ const idx = registry.checkpoints.findIndex((c) => c.checkpointId === checkpointId);
377
+ if (idx === -1) {
378
+ throw new Error(`Checkpoint not found: ${checkpointId}`);
379
+ }
380
+ const checkpoint = registry.checkpoints[idx];
381
+ if (deployable) {
382
+ // FAIL-CLOSED: Verify all preconditions
383
+ // 1. Must have an attached eval summary
384
+ if (!checkpoint.lastEvalSummaryRef) {
385
+ throw new Error(`Cannot mark checkpoint ${checkpointId} as deployable: ` +
386
+ `no eval summary attached. Attach an EvalSummary first.`);
387
+ }
388
+ // 2. Find the eval summary
389
+ const evalSummary = registry.evalSummaries.find((e) => e.evalId === checkpoint.lastEvalSummaryRef);
390
+ if (!evalSummary) {
391
+ throw new Error(`Cannot mark checkpoint ${checkpointId} as deployable: ` +
392
+ `eval summary "${checkpoint.lastEvalSummaryRef}" not found`);
393
+ }
394
+ // 3. Verdict must be 'pass' or 'compare_only' (not 'fail')
395
+ if (evalSummary.verdict === 'fail') {
396
+ throw new Error(`Cannot mark checkpoint ${checkpointId} as deployable: ` +
397
+ `eval verdict is '${evalSummary.verdict}' (evalId: ${evalSummary.evalId}). ` +
398
+ `Only 'pass' or 'compare_only' verdicts allow deployment.`);
399
+ }
400
+ // 4. Parent training run must be completed
401
+ const run = registry.trainingRuns.find((r) => r.trainRunId === checkpoint.trainRunId);
402
+ if (!run) {
403
+ throw new Error(`Cannot mark checkpoint ${checkpointId} as deployable: ` +
404
+ `parent training run "${checkpoint.trainRunId}" not found`);
405
+ }
406
+ if (run.status !== 'completed') {
407
+ throw new Error(`Cannot mark checkpoint ${checkpointId} as deployable: ` +
408
+ `parent training run is in '${run.status}' status (must be 'completed')`);
409
+ }
410
+ }
411
+ // Apply the update (both marking deployable and revoking deployability)
412
+ registry.checkpoints[idx] = {
413
+ ...checkpoint,
414
+ deployable,
415
+ // If revoking deployability, also clear the eval ref
416
+ lastEvalSummaryRef: deployable ? checkpoint.lastEvalSummaryRef : undefined,
417
+ };
418
+ writeRegistry(stateDir, registry);
419
+ return registry.checkpoints[idx];
420
+ });
421
+ }
422
+ /**
423
+ * Convenience: check if a checkpoint is deployable.
424
+ */
425
+ export function isCheckpointDeployable(stateDir, checkpointId) {
426
+ const checkpoint = getCheckpoint(stateDir, checkpointId);
427
+ return checkpoint?.deployable ?? false;
428
+ }
429
+ // ---------------------------------------------------------------------------
430
+ // Registry-Level Queries
431
+ // ---------------------------------------------------------------------------
432
+ /**
433
+ * Get the full lineage chain for a checkpoint.
434
+ * Returns: { run, checkpoint, eval? } or null if not found.
435
+ */
436
+ export function getCheckpointLineage(stateDir, checkpointId) {
437
+ const registry = readRegistry(stateDir);
438
+ const checkpoint = registry.checkpoints.find((c) => c.checkpointId === checkpointId);
439
+ if (!checkpoint)
440
+ return null;
441
+ const run = registry.trainingRuns.find((r) => r.trainRunId === checkpoint.trainRunId);
442
+ if (!run)
443
+ return null;
444
+ const eval_ = checkpoint.lastEvalSummaryRef
445
+ ? registry.evalSummaries.find((e) => e.evalId === checkpoint.lastEvalSummaryRef) ?? null
446
+ : null;
447
+ return { run, checkpoint, eval: eval_ ?? null };
448
+ }
449
+ /**
450
+ * Get the complete registry (for debugging/admin purposes).
451
+ */
452
+ export function getFullRegistry(stateDir) {
453
+ return readRegistry(stateDir);
454
+ }
455
+ /**
456
+ * Compute stats for the training registry.
457
+ */
458
+ export function getTrainingRegistryStats(stateDir) {
459
+ const registry = readRegistry(stateDir);
460
+ const runs = registry.trainingRuns;
461
+ const checkpoints = registry.checkpoints;
462
+ const evals = registry.evalSummaries;
463
+ return {
464
+ totalRuns: runs.length,
465
+ completedRuns: runs.filter((r) => r.status === 'completed').length,
466
+ failedRuns: runs.filter((r) => r.status === 'failed').length,
467
+ pendingRuns: runs.filter((r) => r.status === 'pending').length,
468
+ runningRuns: runs.filter((r) => r.status === 'running').length,
469
+ totalCheckpoints: checkpoints.length,
470
+ deployableCheckpoints: checkpoints.filter((c) => c.deployable).length,
471
+ totalEvals: evals.length,
472
+ passingEvals: evals.filter((e) => e.verdict === 'pass' || e.verdict === 'compare_only').length,
473
+ failingEvals: evals.filter((e) => e.verdict === 'fail').length,
474
+ };
475
+ }
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Nocturnal Arbiter — Deterministic Validation of Reflection Artifacts
3
+ * ===================================================================
4
+ *
5
+ * PURPOSE: Validate that a reflection artifact passes all deterministic checks
6
+ * before being approved for persistence. This module is PURE FUNCTIONS —
7
+ * no side effects, no file I/O.
8
+ *
9
+ * VALIDATION RULES:
10
+ * 1. JSON is parseable and has required fields
11
+ * 2. principleId matches the target principle
12
+ * 3. sessionId matches the source snapshot
13
+ * 4. All required string fields are non-empty
14
+ * 5. No fields contain placeholder or dummy values
15
+ * 6. artifactId is a valid unique identifier
16
+ * 7. No raw/private content in any text field
17
+ *
18
+ * DESIGN CONSTRAINTS:
19
+ * - Pure functions only — no I/O, no side effects
20
+ * - Deterministic — same input always produces same output
21
+ * - Fail closed — invalid artifacts are rejected, never sanitized
22
+ * - No LLM involvement — all checks are algorithmic
23
+ */
24
+ /**
25
+ * A raw reflection artifact as generated by the reflector.
26
+ * This is the raw output before arbiter validation.
27
+ */
28
+ export interface RawReflectionArtifact {
29
+ artifactId?: unknown;
30
+ sessionId?: unknown;
31
+ principleId?: unknown;
32
+ sourceSnapshotRef?: unknown;
33
+ badDecision?: unknown;
34
+ betterDecision?: unknown;
35
+ rationale?: unknown;
36
+ createdAt?: unknown;
37
+ thinkingModelDelta?: unknown;
38
+ planningRatioGain?: unknown;
39
+ invalid?: unknown;
40
+ reason?: unknown;
41
+ }
42
+ /**
43
+ * A validated and approved reflection artifact.
44
+ * This is the output after arbiter validation passes.
45
+ */
46
+ export interface NocturnalArtifact {
47
+ artifactId: string;
48
+ sessionId: string;
49
+ principleId: string;
50
+ sourceSnapshotRef: string;
51
+ badDecision: string;
52
+ betterDecision: string;
53
+ rationale: string;
54
+ createdAt: string;
55
+ /** Design-alignment metric: delta in thinking model activation (-1 to 1) */
56
+ thinkingModelDelta?: number;
57
+ /** Design-alignment metric: gain in planning ratio (-1 to 1) */
58
+ planningRatioGain?: number;
59
+ }
60
+ /**
61
+ * Validation failure reason.
62
+ */
63
+ export interface ArbiterFailure {
64
+ reason: string;
65
+ field?: string;
66
+ }
67
+ /**
68
+ * Result of arbiter validation.
69
+ */
70
+ export interface ArbiterResult {
71
+ /** Whether the artifact passed validation */
72
+ passed: boolean;
73
+ /** The validated artifact (if passed) */
74
+ artifact?: NocturnalArtifact;
75
+ /** The raw input (if failed) */
76
+ rawInput?: RawReflectionArtifact;
77
+ /** Failure reasons (if failed) */
78
+ failures: ArbiterFailure[];
79
+ }
80
+ /**
81
+ * Validation result for a Trinity stage output.
82
+ */
83
+ export interface TrinityStageValidationResult {
84
+ valid: boolean;
85
+ failures: string[];
86
+ }
87
+ /**
88
+ * Validate a Dreamer output contract.
89
+ * Ensures the output is well-formed before passing to Philosopher.
90
+ */
91
+ export declare function validateDreamerOutput(output: unknown): TrinityStageValidationResult;
92
+ /**
93
+ * Validate a Philosopher output contract.
94
+ * Ensures the output is well-formed before passing to Scribe.
95
+ */
96
+ export declare function validatePhilosopherOutput(output: unknown): TrinityStageValidationResult;
97
+ /**
98
+ * Validate a TrinityDraftArtifact contract.
99
+ * This is the final artifact before arbiter approval.
100
+ */
101
+ export declare function validateTrinityDraft(draft: unknown): TrinityStageValidationResult;
102
+ /**
103
+ * Parse and validate a Dreamer output from JSON string.
104
+ */
105
+ export declare function parseAndValidateDreamerOutput(jsonString: string): TrinityStageValidationResult;
106
+ /**
107
+ * Parse and validate a Philosopher output from JSON string.
108
+ */
109
+ export declare function parseAndValidatePhilosopherOutput(jsonString: string): TrinityStageValidationResult;
110
+ /**
111
+ * Parse and validate a Trinity draft artifact from JSON string.
112
+ */
113
+ export declare function parseAndValidateTrinityDraft(jsonString: string): TrinityStageValidationResult;
114
+ export interface ArbiterOptions {
115
+ /**
116
+ * Expected principle ID (from target selection).
117
+ * If provided, the artifact's principleId must match this.
118
+ */
119
+ expectedPrincipleId?: string;
120
+ /**
121
+ * Expected session ID (from snapshot).
122
+ * If provided, the artifact's sessionId must match this.
123
+ */
124
+ expectedSessionId?: string;
125
+ /**
126
+ * Minimum quality thresholds for reflection metrics.
127
+ * If provided, artifacts failing these thresholds are rejected.
128
+ */
129
+ qualityThresholds?: {
130
+ /**
131
+ * Minimum thinkingModelDelta (delta in thinking model activation).
132
+ * Must be > 0 for artifact to pass (cognitive improvement required).
133
+ * Default: undefined (no threshold — only range check [-1, 1])
134
+ */
135
+ thinkingModelDeltaMin?: number;
136
+ /**
137
+ * Minimum planningRatioGain.
138
+ * Must be >= -0.5 for artifact to pass (no catastrophic planning regression).
139
+ * Default: undefined (no threshold — only range check [-1, 1])
140
+ */
141
+ planningRatioGainMin?: number;
142
+ };
143
+ }
144
+ /**
145
+ * Validate a raw reflection artifact against all arbiter rules.
146
+ *
147
+ * @param raw - The raw artifact JSON (already parsed)
148
+ * @param options - Expected values for cross-validation
149
+ * @returns ArbiterResult with passed/failed status and details
150
+ */
151
+ export declare function validateArtifact(raw: unknown, options?: ArbiterOptions): ArbiterResult;
152
+ /**
153
+ * Parse and validate a JSON string as a reflection artifact.
154
+ *
155
+ * @param jsonString - Raw JSON string from reflector
156
+ * @param options - Expected values for cross-validation
157
+ * @returns ArbiterResult
158
+ */
159
+ export declare function parseAndValidateArtifact(jsonString: string, options?: ArbiterOptions): ArbiterResult;