principles-disciple 1.7.6 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/commands/context.js +5 -15
  2. package/dist/commands/evolution-status.js +2 -9
  3. package/dist/commands/export.js +61 -8
  4. package/dist/commands/nocturnal-review.d.ts +24 -0
  5. package/dist/commands/nocturnal-review.js +265 -0
  6. package/dist/commands/nocturnal-rollout.d.ts +27 -0
  7. package/dist/commands/nocturnal-rollout.js +671 -0
  8. package/dist/commands/nocturnal-train.d.ts +25 -0
  9. package/dist/commands/nocturnal-train.js +919 -0
  10. package/dist/commands/pain.js +8 -21
  11. package/dist/constants/tools.d.ts +2 -2
  12. package/dist/constants/tools.js +1 -1
  13. package/dist/core/adaptive-thresholds.d.ts +186 -0
  14. package/dist/core/adaptive-thresholds.js +300 -0
  15. package/dist/core/config.d.ts +2 -38
  16. package/dist/core/config.js +6 -61
  17. package/dist/core/event-log.d.ts +1 -2
  18. package/dist/core/event-log.js +0 -3
  19. package/dist/core/evolution-engine.js +1 -21
  20. package/dist/core/evolution-reducer.d.ts +7 -1
  21. package/dist/core/evolution-reducer.js +56 -4
  22. package/dist/core/evolution-types.d.ts +61 -9
  23. package/dist/core/evolution-types.js +31 -9
  24. package/dist/core/external-training-contract.d.ts +276 -0
  25. package/dist/core/external-training-contract.js +269 -0
  26. package/dist/core/local-worker-routing.d.ts +175 -0
  27. package/dist/core/local-worker-routing.js +525 -0
  28. package/dist/core/model-deployment-registry.d.ts +218 -0
  29. package/dist/core/model-deployment-registry.js +503 -0
  30. package/dist/core/model-training-registry.d.ts +295 -0
  31. package/dist/core/model-training-registry.js +475 -0
  32. package/dist/core/nocturnal-arbiter.d.ts +159 -0
  33. package/dist/core/nocturnal-arbiter.js +534 -0
  34. package/dist/core/nocturnal-candidate-scoring.d.ts +137 -0
  35. package/dist/core/nocturnal-candidate-scoring.js +266 -0
  36. package/dist/core/nocturnal-compliance.d.ts +175 -0
  37. package/dist/core/nocturnal-compliance.js +824 -0
  38. package/dist/core/nocturnal-dataset.d.ts +224 -0
  39. package/dist/core/nocturnal-dataset.js +443 -0
  40. package/dist/core/nocturnal-executability.d.ts +85 -0
  41. package/dist/core/nocturnal-executability.js +331 -0
  42. package/dist/core/nocturnal-export.d.ts +124 -0
  43. package/dist/core/nocturnal-export.js +275 -0
  44. package/dist/core/nocturnal-paths.d.ts +124 -0
  45. package/dist/core/nocturnal-paths.js +214 -0
  46. package/dist/core/nocturnal-trajectory-extractor.d.ts +242 -0
  47. package/dist/core/nocturnal-trajectory-extractor.js +307 -0
  48. package/dist/core/nocturnal-trinity.d.ts +311 -0
  49. package/dist/core/nocturnal-trinity.js +880 -0
  50. package/dist/core/paths.d.ts +6 -0
  51. package/dist/core/paths.js +6 -0
  52. package/dist/core/principle-training-state.d.ts +121 -0
  53. package/dist/core/principle-training-state.js +321 -0
  54. package/dist/core/promotion-gate.d.ts +238 -0
  55. package/dist/core/promotion-gate.js +529 -0
  56. package/dist/core/session-tracker.d.ts +10 -0
  57. package/dist/core/session-tracker.js +14 -0
  58. package/dist/core/shadow-observation-registry.d.ts +217 -0
  59. package/dist/core/shadow-observation-registry.js +308 -0
  60. package/dist/core/training-program.d.ts +233 -0
  61. package/dist/core/training-program.js +433 -0
  62. package/dist/core/trajectory.d.ts +95 -1
  63. package/dist/core/trajectory.js +220 -6
  64. package/dist/core/workspace-context.d.ts +0 -6
  65. package/dist/core/workspace-context.js +0 -12
  66. package/dist/hooks/bash-risk.d.ts +6 -6
  67. package/dist/hooks/bash-risk.js +8 -8
  68. package/dist/hooks/gate-block-helper.js +1 -1
  69. package/dist/hooks/gate.d.ts +1 -1
  70. package/dist/hooks/gate.js +2 -2
  71. package/dist/hooks/gfi-gate.d.ts +3 -3
  72. package/dist/hooks/gfi-gate.js +15 -14
  73. package/dist/hooks/pain.js +6 -9
  74. package/dist/hooks/progressive-trust-gate.d.ts +21 -49
  75. package/dist/hooks/progressive-trust-gate.js +51 -204
  76. package/dist/hooks/prompt.d.ts +11 -11
  77. package/dist/hooks/prompt.js +158 -72
  78. package/dist/hooks/subagent.js +43 -6
  79. package/dist/i18n/commands.js +8 -8
  80. package/dist/index.js +129 -28
  81. package/dist/service/evolution-worker.d.ts +42 -4
  82. package/dist/service/evolution-worker.js +321 -13
  83. package/dist/service/nocturnal-runtime.d.ts +183 -0
  84. package/dist/service/nocturnal-runtime.js +352 -0
  85. package/dist/service/nocturnal-service.d.ts +163 -0
  86. package/dist/service/nocturnal-service.js +787 -0
  87. package/dist/service/nocturnal-target-selector.d.ts +145 -0
  88. package/dist/service/nocturnal-target-selector.js +315 -0
  89. package/dist/service/phase3-input-filter.d.ts +2 -23
  90. package/dist/service/phase3-input-filter.js +3 -27
  91. package/dist/service/runtime-summary-service.d.ts +0 -10
  92. package/dist/service/runtime-summary-service.js +1 -54
  93. package/dist/tools/deep-reflect.js +2 -1
  94. package/dist/types/event-types.d.ts +2 -10
  95. package/dist/types/runtime-summary.d.ts +1 -8
  96. package/dist/types.d.ts +0 -3
  97. package/dist/types.js +0 -2
  98. package/openclaw.plugin.json +1 -1
  99. package/package.json +1 -1
  100. package/templates/langs/en/skills/pd-mentor/SKILL.md +5 -5
  101. package/templates/langs/zh/skills/pd-mentor/SKILL.md +5 -5
  102. package/templates/pain_settings.json +0 -6
  103. package/dist/commands/trust.d.ts +0 -4
  104. package/dist/commands/trust.js +0 -78
  105. package/dist/core/trust-engine.d.ts +0 -96
  106. package/dist/core/trust-engine.js +0 -286
@@ -0,0 +1,529 @@
1
+ /**
2
+ * Promotion Gate — Checkpoint Promotion State Machine and Gate Logic
3
+ * ==================================================================
4
+ *
5
+ * PURPOSE: Control when a checkpoint can advance from training → shadow → promotion.
6
+ * Training success alone is not enough — a checkpoint must prove it improves
7
+ * bounded worker behavior under the existing offline benchmark and does not
8
+ * regress runtime safety signals.
9
+ *
10
+ * PROMOTION STATES:
11
+ * - rejected: The checkpoint must not be routed
12
+ * - candidate_only: The checkpoint is valid but not yet ready for shadow
13
+ * - shadow_ready: The checkpoint may enter controlled shadow rollout
14
+ * - promotable: The checkpoint may replace the active checkpoint
15
+ *
16
+ * STATE TRANSITIONS:
17
+ * training_completed
18
+ * ↓
19
+ * candidate_only ←── (eval attached, lineage complete)
20
+ * ↓
21
+ * shadow_ready ←── (positive delta, safe constraints)
22
+ * ↓
23
+ * promotable ←── (shadow window passed, orchestrator review passed)
24
+ * ↓
25
+ * deployed
26
+ *
27
+ * PRIMARY OBJECTIVE:
28
+ * maximize reduced_prompt_holdout_delta
29
+ *
30
+ * CONSTRAINT METRICS (must all pass for promotion):
31
+ * - arbiterRejectRate <= baseline + allowedMargin
32
+ * - executabilityRejectRate <= baseline + allowedMargin
33
+ * - reviewedSubsetQuality >= baseline
34
+ * - routingScopeNotExpanded == true
35
+ *
36
+ * DESIGN CONSTRAINTS:
37
+ * - No automatic promotion without explicit gate approval
38
+ * - Orchestrator review remains mandatory for all promotions
39
+ * - Rollback path must be always available
40
+ * - First rollout limited to `local-reader` only
41
+ */
42
+ import * as fs from 'fs';
43
+ import * as path from 'path';
44
+ import * as crypto from 'crypto';
45
+ import { withLock } from '../utils/file-lock.js';
46
+ import { getCheckpoint, getEvalSummary, } from './model-training-registry.js';
47
+ import { computeShadowStats, } from './shadow-observation-registry.js';
48
+ // ---------------------------------------------------------------------------
49
+ // Constants
50
+ // ---------------------------------------------------------------------------
51
+ /**
52
+ * Candidate delta must exceed this to enter shadow_ready.
53
+ */
54
+ export const DEFAULT_MIN_DELTA = 0.05;
55
+ /**
56
+ * Default allowed margin for constraint metrics.
57
+ * Constraint metrics can regress by at most this amount.
58
+ */
59
+ export const DEFAULT_ALLOWED_MARGIN = 0.05;
60
+ /**
61
+ * Allowed worker profiles for Phase 7 shadow rollout.
62
+ * Only bounded local workers eligible. local-reader first, local-editor deferred.
63
+ */
64
+ const ALLOWED_ROLLOUT_PROFILES = ['local-reader'];
65
+ /**
66
+ * Registry file for promotion records.
67
+ */
68
+ const PROMOTION_REGISTRY_FILE = 'promotion-registry.json';
69
+ /**
70
+ * Minimum shadow window duration in milliseconds.
71
+ * A checkpoint must remain in shadow_ready for at least this duration
72
+ * before it can be promoted to promotable.
73
+ *
74
+ * Phase 7 default: 1 hour (3600000 ms)
75
+ * This gives time for real-world feedback before full promotion.
76
+ */
77
+ export const MIN_SHADOW_WINDOW_MS = 60 * 60 * 1000; // 1 hour
78
+ // ---------------------------------------------------------------------------
79
+ // Registry Path
80
+ // ---------------------------------------------------------------------------
81
+ function getRegistryPath(stateDir) {
82
+ return path.join(stateDir, PROMOTION_REGISTRY_FILE);
83
+ }
84
+ /**
85
+ * Ensure the registry directory exists.
86
+ */
87
+ function ensureRegistryDir(stateDir) {
88
+ const registryPath = getRegistryPath(stateDir);
89
+ const dir = path.dirname(registryPath);
90
+ if (!fs.existsSync(dir)) {
91
+ fs.mkdirSync(dir, { recursive: true });
92
+ }
93
+ }
94
+ // ---------------------------------------------------------------------------
95
+ // File Operations
96
+ // ---------------------------------------------------------------------------
97
+ /**
98
+ * Read the registry from disk. Returns empty registry if missing.
99
+ */
100
+ function readRegistry(stateDir) {
101
+ const registryPath = getRegistryPath(stateDir);
102
+ if (!fs.existsSync(registryPath)) {
103
+ return { promotions: [] };
104
+ }
105
+ try {
106
+ const content = fs.readFileSync(registryPath, 'utf-8');
107
+ return JSON.parse(content);
108
+ }
109
+ catch (err) {
110
+ console.warn(`[promotion-gate] Registry corrupted at ${registryPath}, recovering with empty state: ${String(err)}`);
111
+ return { promotions: [] };
112
+ }
113
+ }
114
+ /**
115
+ * Write the registry to disk atomically.
116
+ */
117
+ function writeRegistry(stateDir, registry) {
118
+ ensureRegistryDir(stateDir);
119
+ const registryPath = getRegistryPath(stateDir);
120
+ const tmpPath = `${registryPath}.tmp`;
121
+ fs.writeFileSync(tmpPath, JSON.stringify(registry, null, 2), 'utf-8');
122
+ fs.renameSync(tmpPath, registryPath);
123
+ }
124
+ /**
125
+ * Execute a read-modify-write under an exclusive file lock.
126
+ */
127
+ function withPromotionRegistryLock(stateDir, fn) {
128
+ const registryPath = getRegistryPath(stateDir);
129
+ return withLock(registryPath, () => {
130
+ const registry = readRegistry(stateDir);
131
+ return fn(registry);
132
+ });
133
+ }
134
+ /**
135
+ * Evaluate whether a checkpoint passes the promotion gate.
136
+ *
137
+ * @param stateDir - Workspace state directory
138
+ * @param params - Evaluation parameters
139
+ * @returns PromotionGateResult with pass/fail and details
140
+ *
141
+ * FAIL-CLOSED: Returns { passes: false } if:
142
+ * - No eval attached to checkpoint
143
+ * - Delta is negative or below threshold
144
+ * - Any constraint metric regresses beyond allowed margin
145
+ * - Profile is not in allowed rollout list
146
+ */
147
+ export function evaluatePromotionGate(stateDir, params) {
148
+ const { checkpointId, targetProfile, baselineMetrics, minDelta = DEFAULT_MIN_DELTA, allowedMargin = DEFAULT_ALLOWED_MARGIN, } = params;
149
+ const blockers = [];
150
+ const constraintChecks = [];
151
+ // --- Check 1: Checkpoint exists ---
152
+ const checkpoint = getCheckpoint(stateDir, checkpointId);
153
+ if (!checkpoint) {
154
+ blockers.push(`Checkpoint not found: ${checkpointId}`);
155
+ return {
156
+ passes: false,
157
+ blockers,
158
+ constraintChecks: [],
159
+ deltaCheck: { actual: 0, threshold: minDelta, passed: false },
160
+ };
161
+ }
162
+ // --- Check 2: Has eval attached ---
163
+ if (!checkpoint.lastEvalSummaryRef) {
164
+ blockers.push(`Checkpoint ${checkpointId} has no eval summary attached. ` +
165
+ `Run benchmark evaluation before promotion gate.`);
166
+ return {
167
+ passes: false,
168
+ blockers,
169
+ constraintChecks: [],
170
+ deltaCheck: { actual: 0, threshold: minDelta, passed: false },
171
+ };
172
+ }
173
+ // --- Check 3: Get eval summary ---
174
+ const evalSummary = getEvalSummary(stateDir, checkpoint.lastEvalSummaryRef);
175
+ if (!evalSummary) {
176
+ blockers.push(`Eval summary '${checkpoint.lastEvalSummaryRef}' not found. ` +
177
+ `Cannot evaluate promotion gate without valid eval.`);
178
+ return {
179
+ passes: false,
180
+ blockers,
181
+ constraintChecks: [],
182
+ deltaCheck: { actual: 0, threshold: minDelta, passed: false },
183
+ };
184
+ }
185
+ // --- Check 4: Delta must be positive and above threshold ---
186
+ const delta = evalSummary.delta;
187
+ const deltaCheck = {
188
+ actual: delta,
189
+ threshold: minDelta,
190
+ passed: delta >= minDelta,
191
+ };
192
+ if (!deltaCheck.passed) {
193
+ blockers.push(`Reduced-prompt holdout delta (${delta.toFixed(4)}) is below threshold (${minDelta}). ` +
194
+ `Checkpoint must show positive improvement to be promoted.`);
195
+ }
196
+ // --- Check 5: Arbiter reject rate constraint ---
197
+ // PREFER real shadow evidence over eval verdict proxy
198
+ // Shadow evidence comes from actual runtime routing decisions
199
+ const shadowStats = computeShadowStats(stateDir, { checkpointId });
200
+ let arbiterRejectRate;
201
+ let arbiterRejectSource;
202
+ if (shadowStats && shadowStats.isStatisticallySignificant) {
203
+ // Use real shadow evidence: reject rate from shadow routing
204
+ arbiterRejectRate = shadowStats.rejectRate;
205
+ arbiterRejectSource = 'shadow';
206
+ }
207
+ else {
208
+ // Fall back to eval verdict proxy (Phase 7 initial state)
209
+ // This is a coarse approximation: 'fail' verdict maps to 100% reject
210
+ arbiterRejectRate = evalSummary.verdict === 'fail' ? 1 : 0;
211
+ arbiterRejectSource = 'eval-proxy';
212
+ }
213
+ const arbiterRejectCheck = {
214
+ constraint: 'arbiterRejectRate',
215
+ actual: arbiterRejectRate,
216
+ baseline: baselineMetrics.arbiterRejectRate,
217
+ threshold: baselineMetrics.arbiterRejectRate + allowedMargin,
218
+ passed: arbiterRejectRate <= baselineMetrics.arbiterRejectRate + allowedMargin,
219
+ source: arbiterRejectSource,
220
+ };
221
+ constraintChecks.push(arbiterRejectCheck);
222
+ if (!arbiterRejectCheck.passed) {
223
+ blockers.push(`arbiterRejectRate regressed: ${arbiterRejectRate.toFixed(4)} > ${arbiterRejectCheck.threshold.toFixed(4)} ` +
224
+ `(baseline: ${baselineMetrics.arbiterRejectRate.toFixed(4)}, margin: ${allowedMargin}) ` +
225
+ `[source: ${arbiterRejectSource}${shadowStats ? `, n=${shadowStats.totalCount}` : ''}]`);
226
+ }
227
+ // --- Check 6: Executability reject rate constraint ---
228
+ // PREFER real shadow evidence: escalation rate + profile rejection rate
229
+ let executabilityRejectRate;
230
+ let executabilityRejectSource;
231
+ if (shadowStats && shadowStats.isStatisticallySignificant) {
232
+ // Use real shadow evidence: escalation + profile rejection from routing
233
+ executabilityRejectRate = shadowStats.escalationRate + shadowStats.profileRejectedRate;
234
+ executabilityRejectSource = 'shadow';
235
+ }
236
+ else {
237
+ // Fall back to eval verdict proxy
238
+ // This is a coarse approximation
239
+ executabilityRejectRate = evalSummary.verdict === 'fail' ? 0.1 : 0;
240
+ executabilityRejectSource = 'eval-proxy';
241
+ }
242
+ const executabilityRejectCheck = {
243
+ constraint: 'executabilityRejectRate',
244
+ actual: executabilityRejectRate,
245
+ baseline: baselineMetrics.executabilityRejectRate,
246
+ threshold: baselineMetrics.executabilityRejectRate + allowedMargin,
247
+ passed: executabilityRejectRate <= baselineMetrics.executabilityRejectRate + allowedMargin,
248
+ source: executabilityRejectSource,
249
+ };
250
+ constraintChecks.push(executabilityRejectCheck);
251
+ if (!executabilityRejectCheck.passed) {
252
+ blockers.push(`executabilityRejectRate regressed: ${executabilityRejectRate.toFixed(4)} > ${executabilityRejectCheck.threshold.toFixed(4)} ` +
253
+ `(baseline: ${baselineMetrics.executabilityRejectRate.toFixed(4)}, margin: ${allowedMargin}) ` +
254
+ `[source: ${executabilityRejectSource}${shadowStats ? `, n=${shadowStats.totalCount}` : ''}]`);
255
+ }
256
+ // --- Check 7: Reviewed subset quality constraint ---
257
+ // Use eval score as proxy for quality
258
+ const reviewedSubsetQuality = evalSummary.candidateScore;
259
+ const qualityCheck = {
260
+ constraint: 'reviewedSubsetQuality',
261
+ actual: reviewedSubsetQuality,
262
+ baseline: baselineMetrics.reviewedSubsetQuality,
263
+ threshold: baselineMetrics.reviewedSubsetQuality - allowedMargin,
264
+ passed: reviewedSubsetQuality >= baselineMetrics.reviewedSubsetQuality - allowedMargin,
265
+ };
266
+ constraintChecks.push(qualityCheck);
267
+ if (!qualityCheck.passed) {
268
+ blockers.push(`reviewedSubsetQuality regressed: ${reviewedSubsetQuality.toFixed(4)} < ${qualityCheck.threshold.toFixed(4)} ` +
269
+ `(baseline: ${baselineMetrics.reviewedSubsetQuality.toFixed(4)})`);
270
+ }
271
+ // --- Determine if passes ---
272
+ const allPassed = deltaCheck.passed &&
273
+ arbiterRejectCheck.passed &&
274
+ executabilityRejectCheck.passed &&
275
+ qualityCheck.passed;
276
+ // --- Suggest state based on checks ---
277
+ let suggestedState;
278
+ if (allPassed) {
279
+ suggestedState = 'candidate_only';
280
+ // If delta is strong enough, could be shadow_ready directly
281
+ if (delta >= minDelta * 2) {
282
+ suggestedState = 'shadow_ready';
283
+ }
284
+ }
285
+ else {
286
+ suggestedState = 'rejected';
287
+ }
288
+ return {
289
+ passes: allPassed,
290
+ suggestedState,
291
+ blockers,
292
+ constraintChecks,
293
+ deltaCheck,
294
+ };
295
+ }
296
+ /**
297
+ * Advance a checkpoint's promotion state.
298
+ *
299
+ * @param stateDir - Workspace state directory
300
+ * @param params - Advancement parameters
301
+ * @returns The updated PromotionRecord
302
+ *
303
+ * @throws Error if gate evaluation fails
304
+ * @throws Error if state transition is not allowed
305
+ */
306
+ export function advancePromotion(stateDir, params) {
307
+ const { checkpointId, targetProfile, baselineMetrics, orchestratorReviewPassed = false, reviewNote, minDelta = DEFAULT_MIN_DELTA, allowedMargin = DEFAULT_ALLOWED_MARGIN, } = params;
308
+ // First, evaluate the gate
309
+ const gateResult = evaluatePromotionGate(stateDir, {
310
+ checkpointId,
311
+ targetProfile,
312
+ baselineMetrics,
313
+ minDelta,
314
+ allowedMargin,
315
+ });
316
+ // Find existing promotion record (if any) - need this to know current state
317
+ return withPromotionRegistryLock(stateDir, (registry) => {
318
+ const now = new Date().toISOString();
319
+ const existingIdx = registry.promotions.findIndex((p) => p.checkpointId === checkpointId);
320
+ const currentState = existingIdx >= 0 ? registry.promotions[existingIdx].state : 'candidate_only';
321
+ // Determine the target state based on current state, gate result, and review
322
+ //
323
+ // STATE TRANSITION RULES:
324
+ // - Any state → rejected: if gate fails
325
+ // - rejected/candidate_only → candidate_only: if gate passes but no review yet
326
+ // - shadow_ready → promotable: if gate passes + review + shadow window elapsed
327
+ // - rejected → candidate_only/shadow_ready: allowed via re-evaluation
328
+ // (new eval data may reverse a previous rejection)
329
+ //
330
+ let targetState;
331
+ if (!gateResult.passes) {
332
+ targetState = 'rejected';
333
+ }
334
+ else if (!orchestratorReviewPassed) {
335
+ // Gate passed but need orchestrator review before shadow_ready
336
+ // Review is ALWAYS required to reach shadow_ready, regardless of delta strength
337
+ targetState = 'candidate_only';
338
+ }
339
+ else {
340
+ // Gate passed and orchestrator review passed: advance one level
341
+ // Only go to promotable if already at shadow_ready; otherwise advance to shadow_ready
342
+ if (currentState === 'shadow_ready') {
343
+ // Check shadow window duration before allowing promotion
344
+ const existing = existingIdx >= 0 ? registry.promotions[existingIdx] : null;
345
+ const shadowStartedAt = existing?.shadowStartedAt;
346
+ if (shadowStartedAt) {
347
+ const shadowElapsed = Date.now() - new Date(shadowStartedAt).getTime();
348
+ if (shadowElapsed < MIN_SHADOW_WINDOW_MS) {
349
+ // Shadow window not elapsed yet — stay at shadow_ready
350
+ targetState = 'shadow_ready';
351
+ }
352
+ else {
353
+ // Shadow window elapsed — allow promotion to promotable
354
+ targetState = 'promotable';
355
+ }
356
+ }
357
+ else {
358
+ // No shadowStartedAt, allow promotion (backward compat)
359
+ targetState = 'promotable';
360
+ }
361
+ }
362
+ else {
363
+ // At candidate_only (or new), advance to shadow_ready
364
+ targetState = 'shadow_ready';
365
+ }
366
+ }
367
+ // Get previous promotion ID for chain
368
+ const previousPromotionId = existingIdx >= 0
369
+ ? registry.promotions[existingIdx].promotionId
370
+ : undefined;
371
+ // Get checkpoint info for lineage
372
+ const checkpoint = getCheckpoint(stateDir, checkpointId);
373
+ const evalSummary = checkpoint?.lastEvalSummaryRef
374
+ ? getEvalSummary(stateDir, checkpoint.lastEvalSummaryRef)
375
+ : null;
376
+ // Get current delta
377
+ const reducedPromptDelta = evalSummary?.delta ?? 0;
378
+ // Create/update promotion record
379
+ const promotion = {
380
+ promotionId: existingIdx >= 0
381
+ ? registry.promotions[existingIdx].promotionId
382
+ : crypto.randomUUID(),
383
+ checkpointId,
384
+ state: targetState,
385
+ targetProfile,
386
+ targetModelFamily: checkpoint?.targetModelFamily ?? 'unknown',
387
+ reducedPromptDelta,
388
+ constraintMetrics: {
389
+ arbiterRejectRate: evalSummary?.verdict === 'fail' ? 1 : 0,
390
+ executabilityRejectRate: evalSummary?.verdict === 'fail' ? 0.1 : 0,
391
+ reviewedSubsetQuality: evalSummary?.candidateScore ?? 0,
392
+ routingScopeNotExpanded: true, // Always true in Phase 7
393
+ },
394
+ baselineMetrics,
395
+ orchestratorReviewPassed,
396
+ reviewNote,
397
+ stateChangedAt: now,
398
+ createdAt: existingIdx >= 0
399
+ ? registry.promotions[existingIdx].createdAt
400
+ : now,
401
+ shadowStartedAt: (targetState === 'shadow_ready' || targetState === 'promotable')
402
+ ? (() => {
403
+ const existing = existingIdx >= 0 ? registry.promotions[existingIdx] : null;
404
+ // Only preserve shadowStartedAt if the checkpoint was already on the
405
+ // shadow path (shadow_ready or promotable). A demotion to candidate_only
406
+ // or rejected means the next shadow entry is a fresh start — use now.
407
+ if (existing?.shadowStartedAt &&
408
+ (existing.state === 'shadow_ready' || existing.state === 'promotable')) {
409
+ return existing.shadowStartedAt;
410
+ }
411
+ return now;
412
+ })()
413
+ : existingIdx >= 0
414
+ ? registry.promotions[existingIdx].shadowStartedAt
415
+ : undefined,
416
+ promotableAt: targetState === 'promotable'
417
+ ? now
418
+ : existingIdx >= 0
419
+ ? registry.promotions[existingIdx].promotableAt
420
+ : undefined,
421
+ previousPromotionId,
422
+ };
423
+ if (existingIdx >= 0) {
424
+ registry.promotions[existingIdx] = promotion;
425
+ }
426
+ else {
427
+ registry.promotions.push(promotion);
428
+ }
429
+ writeRegistry(stateDir, registry);
430
+ return promotion;
431
+ });
432
+ }
433
+ // ---------------------------------------------------------------------------
434
+ // Promotion Queries
435
+ // ---------------------------------------------------------------------------
436
+ /**
437
+ * Get the current promotion state for a checkpoint.
438
+ */
439
+ export function getPromotionState(stateDir, checkpointId) {
440
+ const registry = readRegistry(stateDir);
441
+ const promotion = registry.promotions.find((p) => p.checkpointId === checkpointId);
442
+ return promotion?.state ?? null;
443
+ }
444
+ /**
445
+ * Get the promotion record for a checkpoint.
446
+ */
447
+ export function getPromotionRecord(stateDir, checkpointId) {
448
+ const registry = readRegistry(stateDir);
449
+ return registry.promotions.find((p) => p.checkpointId === checkpointId) ?? null;
450
+ }
451
+ /**
452
+ * List promotions by state.
453
+ */
454
+ export function listPromotionsByState(stateDir, state) {
455
+ const registry = readRegistry(stateDir);
456
+ return registry.promotions.filter((p) => p.state === state);
457
+ }
458
+ /**
459
+ * List all promotions for a profile.
460
+ */
461
+ export function listPromotionsForProfile(stateDir, targetProfile) {
462
+ const registry = readRegistry(stateDir);
463
+ return registry.promotions.filter((p) => p.targetProfile === targetProfile);
464
+ }
465
+ // ---------------------------------------------------------------------------
466
+ // Rollback Support
467
+ // ---------------------------------------------------------------------------
468
+ /**
469
+ * Reject a checkpoint, preventing it from being promoted.
470
+ *
471
+ * @param stateDir - Workspace state directory
472
+ * @param checkpointId - Checkpoint to reject
473
+ * @param reason - Reason for rejection
474
+ * @returns The updated PromotionRecord
475
+ */
476
+ export function rejectCheckpoint(stateDir, checkpointId, reason) {
477
+ return withPromotionRegistryLock(stateDir, (registry) => {
478
+ const now = new Date().toISOString();
479
+ const existingIdx = registry.promotions.findIndex((p) => p.checkpointId === checkpointId);
480
+ const checkpoint = getCheckpoint(stateDir, checkpointId);
481
+ const promotion = {
482
+ promotionId: existingIdx >= 0
483
+ ? registry.promotions[existingIdx].promotionId
484
+ : crypto.randomUUID(),
485
+ checkpointId,
486
+ state: 'rejected',
487
+ targetProfile: 'local-reader', // Default, should be overridden
488
+ targetModelFamily: checkpoint?.targetModelFamily ?? 'unknown',
489
+ reducedPromptDelta: 0,
490
+ constraintMetrics: {
491
+ arbiterRejectRate: 1,
492
+ executabilityRejectRate: 1,
493
+ reviewedSubsetQuality: 0,
494
+ routingScopeNotExpanded: true,
495
+ },
496
+ baselineMetrics: {
497
+ arbiterRejectRate: 0,
498
+ executabilityRejectRate: 0,
499
+ reviewedSubsetQuality: 0,
500
+ },
501
+ orchestratorReviewPassed: false,
502
+ reviewNote: reason,
503
+ stateChangedAt: now,
504
+ createdAt: existingIdx >= 0
505
+ ? registry.promotions[existingIdx].createdAt
506
+ : now,
507
+ };
508
+ if (existingIdx >= 0) {
509
+ registry.promotions[existingIdx] = promotion;
510
+ }
511
+ else {
512
+ registry.promotions.push(promotion);
513
+ }
514
+ writeRegistry(stateDir, registry);
515
+ return promotion;
516
+ });
517
+ }
518
+ // ---------------------------------------------------------------------------
519
+ // Default Baseline Metrics
520
+ // ---------------------------------------------------------------------------
521
+ /**
522
+ * Default baseline metrics for Phase 7.
523
+ * These represent the "acceptable" thresholds that new checkpoints must meet.
524
+ */
525
+ export const DEFAULT_BASELINE_METRICS = {
526
+ arbiterRejectRate: 0.15, // 15% max arbiter rejection
527
+ executabilityRejectRate: 0.10, // 10% max executability rejection
528
+ reviewedSubsetQuality: 0.70, // 70% minimum quality score
529
+ };
@@ -73,6 +73,16 @@ export declare function clearInjectedProbationIds(sessionId: string, workspaceDi
73
73
  export declare function getSession(sessionId: string): SessionState | undefined;
74
74
  export declare function listSessions(workspaceDir?: string): SessionState[];
75
75
  export declare function clearSession(sessionId: string): void;
76
+ /**
77
+ * Seed a session directly into SessionTracker.sessions for testing.
78
+ * This bypasses the normal tool-call flow to set up test data for
79
+ * checkWorkspaceIdle without requiring full integration test setup.
80
+ *
81
+ * @param sessionId - Session ID
82
+ * @param workspaceDir - Workspace directory (optional, for filtering)
83
+ * @param lastActivityAt - Unix timestamp in ms (default: now)
84
+ */
85
+ export declare function seedSessionForTest(sessionId: string, workspaceDir?: string, lastActivityAt?: number): void;
76
86
  export declare function garbageCollectSessions(): void;
77
87
  /**
78
88
  * Get daily statistics summary for a session.
@@ -325,6 +325,20 @@ export function clearSession(sessionId) {
325
325
  }
326
326
  sessions.delete(sessionId);
327
327
  }
328
+ /**
329
+ * Seed a session directly into SessionTracker.sessions for testing.
330
+ * This bypasses the normal tool-call flow to set up test data for
331
+ * checkWorkspaceIdle without requiring full integration test setup.
332
+ *
333
+ * @param sessionId - Session ID
334
+ * @param workspaceDir - Workspace directory (optional, for filtering)
335
+ * @param lastActivityAt - Unix timestamp in ms (default: now)
336
+ */
337
+ export function seedSessionForTest(sessionId, workspaceDir, lastActivityAt) {
338
+ const state = getOrCreateSession(sessionId, workspaceDir);
339
+ state.lastActivityAt = lastActivityAt ?? Date.now();
340
+ state.lastControlActivityAt = state.lastActivityAt;
341
+ }
328
342
  // Memory cleanup for abandoned sessions (older than 2 hours)
329
343
  export function garbageCollectSessions() {
330
344
  const twoHoursAgo = Date.now() - 2 * 60 * 60 * 1000;