principles-disciple 1.7.6 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/commands/context.js +5 -15
  2. package/dist/commands/evolution-status.js +2 -9
  3. package/dist/commands/export.js +61 -8
  4. package/dist/commands/nocturnal-review.d.ts +24 -0
  5. package/dist/commands/nocturnal-review.js +265 -0
  6. package/dist/commands/nocturnal-rollout.d.ts +27 -0
  7. package/dist/commands/nocturnal-rollout.js +671 -0
  8. package/dist/commands/nocturnal-train.d.ts +25 -0
  9. package/dist/commands/nocturnal-train.js +919 -0
  10. package/dist/commands/pain.js +8 -21
  11. package/dist/constants/tools.d.ts +2 -2
  12. package/dist/constants/tools.js +1 -1
  13. package/dist/core/adaptive-thresholds.d.ts +186 -0
  14. package/dist/core/adaptive-thresholds.js +300 -0
  15. package/dist/core/config.d.ts +2 -38
  16. package/dist/core/config.js +6 -61
  17. package/dist/core/event-log.d.ts +1 -2
  18. package/dist/core/event-log.js +0 -3
  19. package/dist/core/evolution-engine.js +1 -21
  20. package/dist/core/evolution-reducer.d.ts +7 -1
  21. package/dist/core/evolution-reducer.js +56 -4
  22. package/dist/core/evolution-types.d.ts +61 -9
  23. package/dist/core/evolution-types.js +31 -9
  24. package/dist/core/external-training-contract.d.ts +276 -0
  25. package/dist/core/external-training-contract.js +269 -0
  26. package/dist/core/local-worker-routing.d.ts +175 -0
  27. package/dist/core/local-worker-routing.js +525 -0
  28. package/dist/core/model-deployment-registry.d.ts +218 -0
  29. package/dist/core/model-deployment-registry.js +503 -0
  30. package/dist/core/model-training-registry.d.ts +295 -0
  31. package/dist/core/model-training-registry.js +475 -0
  32. package/dist/core/nocturnal-arbiter.d.ts +159 -0
  33. package/dist/core/nocturnal-arbiter.js +534 -0
  34. package/dist/core/nocturnal-candidate-scoring.d.ts +137 -0
  35. package/dist/core/nocturnal-candidate-scoring.js +266 -0
  36. package/dist/core/nocturnal-compliance.d.ts +175 -0
  37. package/dist/core/nocturnal-compliance.js +824 -0
  38. package/dist/core/nocturnal-dataset.d.ts +224 -0
  39. package/dist/core/nocturnal-dataset.js +443 -0
  40. package/dist/core/nocturnal-executability.d.ts +85 -0
  41. package/dist/core/nocturnal-executability.js +331 -0
  42. package/dist/core/nocturnal-export.d.ts +124 -0
  43. package/dist/core/nocturnal-export.js +275 -0
  44. package/dist/core/nocturnal-paths.d.ts +124 -0
  45. package/dist/core/nocturnal-paths.js +214 -0
  46. package/dist/core/nocturnal-trajectory-extractor.d.ts +242 -0
  47. package/dist/core/nocturnal-trajectory-extractor.js +307 -0
  48. package/dist/core/nocturnal-trinity.d.ts +311 -0
  49. package/dist/core/nocturnal-trinity.js +880 -0
  50. package/dist/core/paths.d.ts +6 -0
  51. package/dist/core/paths.js +6 -0
  52. package/dist/core/principle-training-state.d.ts +121 -0
  53. package/dist/core/principle-training-state.js +321 -0
  54. package/dist/core/promotion-gate.d.ts +238 -0
  55. package/dist/core/promotion-gate.js +529 -0
  56. package/dist/core/session-tracker.d.ts +10 -0
  57. package/dist/core/session-tracker.js +14 -0
  58. package/dist/core/shadow-observation-registry.d.ts +217 -0
  59. package/dist/core/shadow-observation-registry.js +308 -0
  60. package/dist/core/training-program.d.ts +233 -0
  61. package/dist/core/training-program.js +433 -0
  62. package/dist/core/trajectory.d.ts +95 -1
  63. package/dist/core/trajectory.js +220 -6
  64. package/dist/core/workspace-context.d.ts +0 -6
  65. package/dist/core/workspace-context.js +0 -12
  66. package/dist/hooks/bash-risk.d.ts +6 -6
  67. package/dist/hooks/bash-risk.js +8 -8
  68. package/dist/hooks/gate-block-helper.js +1 -1
  69. package/dist/hooks/gate.d.ts +1 -1
  70. package/dist/hooks/gate.js +2 -2
  71. package/dist/hooks/gfi-gate.d.ts +3 -3
  72. package/dist/hooks/gfi-gate.js +15 -14
  73. package/dist/hooks/pain.js +6 -9
  74. package/dist/hooks/progressive-trust-gate.d.ts +21 -49
  75. package/dist/hooks/progressive-trust-gate.js +51 -204
  76. package/dist/hooks/prompt.d.ts +11 -11
  77. package/dist/hooks/prompt.js +158 -72
  78. package/dist/hooks/subagent.js +43 -6
  79. package/dist/i18n/commands.js +8 -8
  80. package/dist/index.js +129 -28
  81. package/dist/service/evolution-worker.d.ts +42 -4
  82. package/dist/service/evolution-worker.js +321 -13
  83. package/dist/service/nocturnal-runtime.d.ts +183 -0
  84. package/dist/service/nocturnal-runtime.js +352 -0
  85. package/dist/service/nocturnal-service.d.ts +163 -0
  86. package/dist/service/nocturnal-service.js +787 -0
  87. package/dist/service/nocturnal-target-selector.d.ts +145 -0
  88. package/dist/service/nocturnal-target-selector.js +315 -0
  89. package/dist/service/phase3-input-filter.d.ts +2 -23
  90. package/dist/service/phase3-input-filter.js +3 -27
  91. package/dist/service/runtime-summary-service.d.ts +0 -10
  92. package/dist/service/runtime-summary-service.js +1 -54
  93. package/dist/tools/deep-reflect.js +2 -1
  94. package/dist/types/event-types.d.ts +2 -10
  95. package/dist/types/runtime-summary.d.ts +1 -8
  96. package/dist/types.d.ts +0 -3
  97. package/dist/types.js +0 -2
  98. package/openclaw.plugin.json +1 -1
  99. package/package.json +1 -1
  100. package/templates/langs/en/skills/pd-mentor/SKILL.md +5 -5
  101. package/templates/langs/zh/skills/pd-mentor/SKILL.md +5 -5
  102. package/templates/pain_settings.json +0 -6
  103. package/dist/commands/trust.d.ts +0 -4
  104. package/dist/commands/trust.js +0 -78
  105. package/dist/core/trust-engine.d.ts +0 -96
  106. package/dist/core/trust-engine.js +0 -286
@@ -0,0 +1,224 @@
1
+ /**
2
+ * Nocturnal Dataset — Sample Lineage Store and Review State Registry
3
+ * =================================================================
4
+ *
5
+ * PURPOSE: Establish each approved nocturnal sample as a first-class auditable
6
+ * data asset with fingerprint, lineage, review state, and model family binding.
7
+ *
8
+ * ARCHITECTURE:
9
+ * - Registry file: {stateDir}/.state/nocturnal/dataset-registry.json
10
+ * - One JSON array of NocturnalDatasetRecord
11
+ * - Each record is immutable except for reviewStatus and reviewReason
12
+ * - sampleFingerprint is the primary key (deterministic: SHA-256 of artifactId+principleId+sessionId)
13
+ *
14
+ * RELATIONSHIP TO NOCTURNAL ARTIFACTS:
15
+ * - Artifacts live in: .state/nocturnal/samples/{artifactId}.json
16
+ * - Dataset records reference artifacts via artifactId and artifactPath
17
+ * - Artifacts are NOT modified by dataset operations
18
+ *
19
+ * DESIGN CONSTRAINTS:
20
+ * - No training run registry (Phase 4)
21
+ * - No checkpoint registry (Phase 4)
22
+ * - No worker routing changes
23
+ * - No JSONL export (that's Task 3.2)
24
+ * - Lineage is append-only for approved records
25
+ * - reviewStatus transitions are the only state mutations allowed
26
+ */
27
+ import type { NocturnalArtifact } from './nocturnal-arbiter.js';
28
+ /**
29
+ * Review status for a nocturnal dataset sample.
30
+ * Follows the lifecycle: pending_review → approved_for_training | rejected | superseded
31
+ */
32
+ export type NocturnalReviewStatus = 'pending_review' | 'approved_for_training' | 'rejected' | 'superseded';
33
+ /**
34
+ * A nocturnal dataset record — the immutable lineage entry for one sample.
35
+ *
36
+ * PRIMARY KEY: sampleFingerprint (deterministic SHA-256)
37
+ * MUTABLE FIELDS: reviewStatus, reviewReason only
38
+ * IMMUTABLE FIELDS: all others
39
+ */
40
+ export interface NocturnalDatasetRecord {
41
+ /**
42
+ * Deterministic fingerprint: SHA-256(artifactId + principleId + sessionId).
43
+ * Primary key for dataset operations.
44
+ */
45
+ sampleFingerprint: string;
46
+ /** Reference to the original artifact */
47
+ artifactId: string;
48
+ /** Source session */
49
+ sessionId: string;
50
+ /** Target principle that generated this sample */
51
+ principleId: string;
52
+ /** Reference to the trajectory snapshot used */
53
+ sourceSnapshotRef: string;
54
+ /**
55
+ * Current review state.
56
+ * Only transitions allowed: pending_review → approved_for_training | rejected | superseded
57
+ */
58
+ reviewStatus: NocturnalReviewStatus;
59
+ /**
60
+ * Human-provided reason for the review decision.
61
+ * Required for approved_for_training and rejected; optional for superseded.
62
+ */
63
+ reviewReason?: string;
64
+ /**
65
+ * Target model family this sample is bound to.
66
+ * REQUIRED for export-ready samples.
67
+ * NULL means "not yet assigned" (pending_review defaults to null).
68
+ */
69
+ targetModelFamily: string | null;
70
+ /**
71
+ * When this sample was first registered in the dataset.
72
+ */
73
+ createdAt: string;
74
+ /**
75
+ * Last time reviewStatus or reviewReason was updated.
76
+ */
77
+ updatedAt: string;
78
+ /**
79
+ * Absolute path to the artifact file.
80
+ */
81
+ artifactPath: string;
82
+ }
83
+ /**
84
+ * Filter options for listing dataset records.
85
+ */
86
+ export interface DatasetFilterOptions {
87
+ /**
88
+ * Filter by review status.
89
+ */
90
+ reviewStatus?: NocturnalReviewStatus | NocturnalReviewStatus[];
91
+ /**
92
+ * Filter by target model family.
93
+ * NULL means "any" (including null/unassigned).
94
+ */
95
+ targetModelFamily?: string | null;
96
+ /**
97
+ * Include only export-ready records.
98
+ * An export-ready record must have:
99
+ * - reviewStatus === 'approved_for_training'
100
+ * - targetModelFamily !== null
101
+ * - artifactPath points to an existing file
102
+ */
103
+ exportReadyOnly?: boolean;
104
+ }
105
+ /**
106
+ * Result of registering a sample.
107
+ */
108
+ export interface RegisterSampleResult {
109
+ /** The registered record */
110
+ record: NocturnalDatasetRecord;
111
+ /** Whether this was a new registration (true) or duplicate link (false) */
112
+ isNew: boolean;
113
+ /**
114
+ * If isNew === false, this points to the existing record.
115
+ */
116
+ existingRecord?: NocturnalDatasetRecord;
117
+ }
118
+ /**
119
+ * Generate a deterministic sample fingerprint from an artifact.
120
+ *
121
+ * FINGERPRINT = SHA-256(artifactId || principleId || sessionId)
122
+ *
123
+ * The fingerprint is deterministic so the same sample always produces
124
+ * the same fingerprint, enabling duplicate detection.
125
+ */
126
+ export declare function generateSampleFingerprint(artifactId: string, principleId: string, sessionId: string): string;
127
+ /**
128
+ * Generate a fingerprint from an existing NocturnalArtifact.
129
+ */
130
+ export declare function generateFingerprintFromArtifact(artifact: NocturnalArtifact): string;
131
+ /**
132
+ * Register an approved nocturnal artifact in the dataset registry.
133
+ *
134
+ * DUPLICATE HANDLING:
135
+ * - If a record with the same sampleFingerprint already exists, returns
136
+ * existingRecord (isNew === false) instead of creating a duplicate.
137
+ * - The original artifact file is never modified.
138
+ *
139
+ * @param workspaceDir - Workspace directory
140
+ * @param artifact - The approved NocturnalArtifact
141
+ * @param artifactPath - Absolute path where the artifact file is stored
142
+ * @param targetModelFamily - Model family binding (required for export-ready)
143
+ * @returns RegisterSampleResult
144
+ */
145
+ export declare function registerSample(workspaceDir: string, artifact: NocturnalArtifact, artifactPath: string, targetModelFamily?: string | null): RegisterSampleResult;
146
+ /**
147
+ * Get a dataset record by fingerprint.
148
+ */
149
+ export declare function getDatasetRecord(workspaceDir: string, sampleFingerprint: string): NocturnalDatasetRecord | null;
150
+ /**
151
+ * Get a dataset record by artifactId.
152
+ */
153
+ export declare function getDatasetRecordByArtifactId(workspaceDir: string, artifactId: string): NocturnalDatasetRecord | null;
154
+ /**
155
+ * List dataset records with optional filtering.
156
+ *
157
+ * @param workspaceDir - Workspace directory
158
+ * @param filter - Optional filter criteria
159
+ * @returns Filtered records sorted by createdAt descending
160
+ */
161
+ export declare function listDatasetRecords(workspaceDir: string, filter?: DatasetFilterOptions): NocturnalDatasetRecord[];
162
+ /**
163
+ * Update the review status of a dataset record.
164
+ *
165
+ * @param workspaceDir - Workspace directory
166
+ * @param sampleFingerprint - The fingerprint of the record to update
167
+ * @param newStatus - The new review status
168
+ * @param reason - Optional reason (required for approved/rejected per spec)
169
+ * @returns Updated record, or null if not found
170
+ * @throws Error if transition is invalid
171
+ */
172
+ export declare function updateReviewStatus(workspaceDir: string, sampleFingerprint: string, newStatus: NocturnalReviewStatus, reason?: string): NocturnalDatasetRecord;
173
+ /**
174
+ * Update the target model family binding.
175
+ */
176
+ export declare function updateTargetModelFamily(workspaceDir: string, sampleFingerprint: string, targetModelFamily: string | null): NocturnalDatasetRecord;
177
+ /**
178
+ * Check if a sample is export-ready.
179
+ *
180
+ * EXPORT-READY means:
181
+ * - reviewStatus === 'approved_for_training'
182
+ * - targetModelFamily !== null
183
+ * - artifact file exists
184
+ * - lineage fields are complete
185
+ */
186
+ export declare function isExportReady(workspaceDir: string, sampleFingerprint: string): boolean;
187
+ /**
188
+ * List all export-ready records for a specific target model family.
189
+ */
190
+ export declare function listExportReadyRecords(workspaceDir: string, targetModelFamily?: string | null): NocturnalDatasetRecord[];
191
+ /**
192
+ * Get the artifact path for a dataset record.
193
+ * Verifies the file exists before returning.
194
+ */
195
+ export declare function getArtifactPath(workspaceDir: string, sampleFingerprint: string): string | null;
196
+ /**
197
+ * Read the artifact file for a dataset record.
198
+ * @throws Error if record not found, artifact file missing, or unreadable
199
+ */
200
+ export declare function readDatasetArtifact(workspaceDir: string, sampleFingerprint: string): NocturnalArtifact;
201
+ /**
202
+ * Count records by status for dashboard purposes.
203
+ */
204
+ export declare function getDatasetStats(workspaceDir: string): {
205
+ total: number;
206
+ pendingReview: number;
207
+ approvedForTraining: number;
208
+ rejected: number;
209
+ superseded: number;
210
+ exportReadyByFamily: Record<string, number>;
211
+ };
212
+ /**
213
+ * Scan the samples directory and register any approved artifacts
214
+ * that are not yet in the dataset registry.
215
+ *
216
+ * This is used for:
217
+ * 1. Initial migration of Phase 2 artifacts to Phase 3 dataset
218
+ * 2. Recovering from registry corruption
219
+ *
220
+ * @param workspaceDir - Workspace directory
221
+ * @param targetModelFamily - Default target family for migrated samples
222
+ * @returns Number of newly registered samples
223
+ */
224
+ export declare function migrateSampleArtifacts(workspaceDir: string, targetModelFamily?: string | null): number;
@@ -0,0 +1,443 @@
1
+ /**
2
+ * Nocturnal Dataset — Sample Lineage Store and Review State Registry
3
+ * =================================================================
4
+ *
5
+ * PURPOSE: Establish each approved nocturnal sample as a first-class auditable
6
+ * data asset with fingerprint, lineage, review state, and model family binding.
7
+ *
8
+ * ARCHITECTURE:
9
+ * - Registry file: {stateDir}/.state/nocturnal/dataset-registry.json
10
+ * - One JSON array of NocturnalDatasetRecord
11
+ * - Each record is immutable except for reviewStatus and reviewReason
12
+ * - sampleFingerprint is the primary key (deterministic: SHA-256 of artifactId+principleId+sessionId)
13
+ *
14
+ * RELATIONSHIP TO NOCTURNAL ARTIFACTS:
15
+ * - Artifacts live in: .state/nocturnal/samples/{artifactId}.json
16
+ * - Dataset records reference artifacts via artifactId and artifactPath
17
+ * - Artifacts are NOT modified by dataset operations
18
+ *
19
+ * DESIGN CONSTRAINTS:
20
+ * - No training run registry (Phase 4)
21
+ * - No checkpoint registry (Phase 4)
22
+ * - No worker routing changes
23
+ * - No JSONL export (that's Task 3.2)
24
+ * - Lineage is append-only for approved records
25
+ * - reviewStatus transitions are the only state mutations allowed
26
+ */
27
+ import * as fs from 'fs';
28
+ import * as path from 'path';
29
+ import * as crypto from 'crypto';
30
+ import { NocturnalPathResolver, resolveNocturnalDir } from './nocturnal-paths.js';
31
+ import { withLock } from '../utils/file-lock.js';
32
+ // ---------------------------------------------------------------------------
33
+ // Fingerprint Generation
34
+ // ---------------------------------------------------------------------------
35
+ /**
36
+ * Generate a deterministic sample fingerprint from an artifact.
37
+ *
38
+ * FINGERPRINT = SHA-256(artifactId || principleId || sessionId)
39
+ *
40
+ * The fingerprint is deterministic so the same sample always produces
41
+ * the same fingerprint, enabling duplicate detection.
42
+ */
43
+ export function generateSampleFingerprint(artifactId, principleId, sessionId) {
44
+ const input = `${artifactId}|${principleId}|${sessionId}`;
45
+ return crypto.createHash('sha256').update(input, 'utf8').digest('hex');
46
+ }
47
+ /**
48
+ * Generate a fingerprint from an existing NocturnalArtifact.
49
+ */
50
+ export function generateFingerprintFromArtifact(artifact) {
51
+ return generateSampleFingerprint(artifact.artifactId, artifact.principleId, artifact.sessionId);
52
+ }
53
+ // ---------------------------------------------------------------------------
54
+ // Registry Path
55
+ // ---------------------------------------------------------------------------
56
+ /**
57
+ * Path to the dataset registry file.
58
+ */
59
+ function getRegistryPath(workspaceDir) {
60
+ // Registry lives in .state/nocturnal/dataset-registry.json
61
+ const nocturnalRoot = resolveNocturnalDir(workspaceDir, 'ROOT');
62
+ return path.join(nocturnalRoot, 'dataset-registry.json');
63
+ }
64
+ /**
65
+ * Ensure the registry directory exists.
66
+ */
67
+ function ensureRegistryDir(workspaceDir) {
68
+ const registryPath = getRegistryPath(workspaceDir);
69
+ const dir = path.dirname(registryPath);
70
+ if (!fs.existsSync(dir)) {
71
+ fs.mkdirSync(dir, { recursive: true });
72
+ }
73
+ }
74
+ /**
75
+ * Read the registry file. Returns empty array if missing.
76
+ */
77
+ function readRegistry(workspaceDir) {
78
+ const registryPath = getRegistryPath(workspaceDir);
79
+ if (!fs.existsSync(registryPath)) {
80
+ return [];
81
+ }
82
+ try {
83
+ const content = fs.readFileSync(registryPath, 'utf-8');
84
+ return JSON.parse(content);
85
+ }
86
+ catch (err) {
87
+ // Corrupted registry — fail-safe to empty array, but log the problem
88
+ console.warn(`[nocturnal-dataset] Registry corrupted at ${registryPath}, recovering with empty state: ${String(err)}`);
89
+ return [];
90
+ }
91
+ }
92
+ /**
93
+ * Write the registry file atomically (write-then-rename for atomicity).
94
+ * Caller must hold the registry lock (via withRegistryLock).
95
+ */
96
+ function writeRegistry(workspaceDir, records) {
97
+ ensureRegistryDir(workspaceDir);
98
+ const registryPath = getRegistryPath(workspaceDir);
99
+ const tmpPath = `${registryPath}.tmp`;
100
+ fs.writeFileSync(tmpPath, JSON.stringify(records, null, 2), 'utf-8');
101
+ fs.renameSync(tmpPath, registryPath);
102
+ }
103
+ // ---------------------------------------------------------------------------
104
+ // Core Operations
105
+ // ---------------------------------------------------------------------------
106
+ /**
107
+ * Execute a read-modify-write on the registry under an exclusive lock.
108
+ * This prevents concurrent writers from racing on the same file.
109
+ */
110
+ function withRegistryLock(workspaceDir, fn) {
111
+ const registryPath = getRegistryPath(workspaceDir);
112
+ return withLock(registryPath, () => {
113
+ const records = readRegistry(workspaceDir);
114
+ return fn(records);
115
+ });
116
+ }
117
+ /**
118
+ * Register an approved nocturnal artifact in the dataset registry.
119
+ *
120
+ * DUPLICATE HANDLING:
121
+ * - If a record with the same sampleFingerprint already exists, returns
122
+ * existingRecord (isNew === false) instead of creating a duplicate.
123
+ * - The original artifact file is never modified.
124
+ *
125
+ * @param workspaceDir - Workspace directory
126
+ * @param artifact - The approved NocturnalArtifact
127
+ * @param artifactPath - Absolute path where the artifact file is stored
128
+ * @param targetModelFamily - Model family binding (required for export-ready)
129
+ * @returns RegisterSampleResult
130
+ */
131
+ export function registerSample(workspaceDir, artifact, artifactPath, targetModelFamily = null) {
132
+ const fingerprint = generateFingerprintFromArtifact(artifact);
133
+ const now = new Date().toISOString();
134
+ return withRegistryLock(workspaceDir, (records) => {
135
+ const existing = records.find((r) => r.sampleFingerprint === fingerprint);
136
+ if (existing) {
137
+ return {
138
+ record: existing,
139
+ isNew: false,
140
+ existingRecord: existing,
141
+ };
142
+ }
143
+ const record = {
144
+ sampleFingerprint: fingerprint,
145
+ artifactId: artifact.artifactId,
146
+ sessionId: artifact.sessionId,
147
+ principleId: artifact.principleId,
148
+ sourceSnapshotRef: artifact.sourceSnapshotRef,
149
+ reviewStatus: 'pending_review',
150
+ reviewReason: undefined,
151
+ targetModelFamily,
152
+ createdAt: now,
153
+ updatedAt: now,
154
+ artifactPath: path.normalize(artifactPath),
155
+ };
156
+ records.push(record);
157
+ writeRegistry(workspaceDir, records);
158
+ return { record, isNew: true };
159
+ });
160
+ }
161
+ /**
162
+ * Get a dataset record by fingerprint.
163
+ */
164
+ export function getDatasetRecord(workspaceDir, sampleFingerprint) {
165
+ const records = readRegistry(workspaceDir);
166
+ return records.find((r) => r.sampleFingerprint === sampleFingerprint) ?? null;
167
+ }
168
+ /**
169
+ * Get a dataset record by artifactId.
170
+ */
171
+ export function getDatasetRecordByArtifactId(workspaceDir, artifactId) {
172
+ const records = readRegistry(workspaceDir);
173
+ return records.find((r) => r.artifactId === artifactId) ?? null;
174
+ }
175
+ /**
176
+ * List dataset records with optional filtering.
177
+ *
178
+ * @param workspaceDir - Workspace directory
179
+ * @param filter - Optional filter criteria
180
+ * @returns Filtered records sorted by createdAt descending
181
+ */
182
+ export function listDatasetRecords(workspaceDir, filter) {
183
+ let records = readRegistry(workspaceDir);
184
+ if (!filter) {
185
+ return records.sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
186
+ }
187
+ // Filter by reviewStatus
188
+ if (filter.reviewStatus !== undefined) {
189
+ const statuses = Array.isArray(filter.reviewStatus)
190
+ ? filter.reviewStatus
191
+ : [filter.reviewStatus];
192
+ records = records.filter((r) => statuses.includes(r.reviewStatus));
193
+ }
194
+ // Filter by targetModelFamily
195
+ if (filter.targetModelFamily !== undefined) {
196
+ if (filter.targetModelFamily === null) {
197
+ // Include only null/unassigned
198
+ records = records.filter((r) => r.targetModelFamily === null);
199
+ }
200
+ else {
201
+ records = records.filter((r) => r.targetModelFamily === filter.targetModelFamily);
202
+ }
203
+ }
204
+ // Filter export-ready only
205
+ if (filter.exportReadyOnly === true) {
206
+ records = records.filter((r) => {
207
+ if (r.reviewStatus !== 'approved_for_training')
208
+ return false;
209
+ if (r.targetModelFamily === null)
210
+ return false;
211
+ // Verify artifact file exists
212
+ if (!fs.existsSync(r.artifactPath))
213
+ return false;
214
+ return true;
215
+ });
216
+ }
217
+ return records.sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime());
218
+ }
219
+ /**
220
+ * Valid review status transitions.
221
+ *pending_review → approved_for_training | rejected | superseded
222
+ * approved_for_training → superseded (if a better sample replaces it)
223
+ * rejected → pending_review (if re-review is requested)
224
+ * superseded → (terminal state, no transitions)
225
+ */
226
+ const VALID_TRANSITIONS = {
227
+ pending_review: ['approved_for_training', 'rejected', 'superseded'],
228
+ approved_for_training: ['superseded'],
229
+ rejected: ['pending_review', 'superseded'],
230
+ superseded: [], // terminal
231
+ };
232
+ /**
233
+ * Update the review status of a dataset record.
234
+ *
235
+ * @param workspaceDir - Workspace directory
236
+ * @param sampleFingerprint - The fingerprint of the record to update
237
+ * @param newStatus - The new review status
238
+ * @param reason - Optional reason (required for approved/rejected per spec)
239
+ * @returns Updated record, or null if not found
240
+ * @throws Error if transition is invalid
241
+ */
242
+ export function updateReviewStatus(workspaceDir, sampleFingerprint, newStatus, reason) {
243
+ return withRegistryLock(workspaceDir, (records) => {
244
+ const idx = records.findIndex((r) => r.sampleFingerprint === sampleFingerprint);
245
+ if (idx === -1) {
246
+ throw new Error(`Dataset record not found: ${sampleFingerprint}`);
247
+ }
248
+ const record = records[idx];
249
+ // Validate transition
250
+ const allowed = VALID_TRANSITIONS[record.reviewStatus];
251
+ if (!allowed.includes(newStatus)) {
252
+ throw new Error(`Invalid review status transition: ${record.reviewStatus} → ${newStatus}. ` +
253
+ `Allowed transitions from ${record.reviewStatus}: ${allowed.join(', ') || 'none'}`);
254
+ }
255
+ // Enforce reason requirement for approved/rejected
256
+ if ((newStatus === 'approved_for_training' || newStatus === 'rejected') &&
257
+ !reason) {
258
+ throw new Error(`reviewReason is required when transitioning to ${newStatus}`);
259
+ }
260
+ // Apply update
261
+ records[idx] = {
262
+ ...record,
263
+ reviewStatus: newStatus,
264
+ reviewReason: reason ?? record.reviewReason,
265
+ updatedAt: new Date().toISOString(),
266
+ };
267
+ writeRegistry(workspaceDir, records);
268
+ return records[idx];
269
+ });
270
+ }
271
+ /**
272
+ * Update the target model family binding.
273
+ */
274
+ export function updateTargetModelFamily(workspaceDir, sampleFingerprint, targetModelFamily) {
275
+ return withRegistryLock(workspaceDir, (records) => {
276
+ const idx = records.findIndex((r) => r.sampleFingerprint === sampleFingerprint);
277
+ if (idx === -1) {
278
+ throw new Error(`Dataset record not found: ${sampleFingerprint}`);
279
+ }
280
+ records[idx] = {
281
+ ...records[idx],
282
+ targetModelFamily,
283
+ updatedAt: new Date().toISOString(),
284
+ };
285
+ writeRegistry(workspaceDir, records);
286
+ return records[idx];
287
+ });
288
+ }
289
+ /**
290
+ * Check if a sample is export-ready.
291
+ *
292
+ * EXPORT-READY means:
293
+ * - reviewStatus === 'approved_for_training'
294
+ * - targetModelFamily !== null
295
+ * - artifact file exists
296
+ * - lineage fields are complete
297
+ */
298
+ export function isExportReady(workspaceDir, sampleFingerprint) {
299
+ const record = getDatasetRecord(workspaceDir, sampleFingerprint);
300
+ if (!record)
301
+ return false;
302
+ if (record.reviewStatus !== 'approved_for_training')
303
+ return false;
304
+ if (record.targetModelFamily === null)
305
+ return false;
306
+ if (!fs.existsSync(record.artifactPath))
307
+ return false;
308
+ return true;
309
+ }
310
+ /**
311
+ * List all export-ready records for a specific target model family.
312
+ */
313
+ export function listExportReadyRecords(workspaceDir, targetModelFamily) {
314
+ return listDatasetRecords(workspaceDir, {
315
+ exportReadyOnly: true,
316
+ targetModelFamily: targetModelFamily ?? undefined,
317
+ });
318
+ }
319
+ /**
320
+ * Get the artifact path for a dataset record.
321
+ * Verifies the file exists before returning.
322
+ */
323
+ export function getArtifactPath(workspaceDir, sampleFingerprint) {
324
+ const record = getDatasetRecord(workspaceDir, sampleFingerprint);
325
+ if (!record)
326
+ return null;
327
+ if (!fs.existsSync(record.artifactPath))
328
+ return null;
329
+ return record.artifactPath;
330
+ }
331
+ /**
332
+ * Read the artifact file for a dataset record.
333
+ * @throws Error if record not found, artifact file missing, or unreadable
334
+ */
335
+ export function readDatasetArtifact(workspaceDir, sampleFingerprint) {
336
+ const artifactPath = getArtifactPath(workspaceDir, sampleFingerprint);
337
+ if (!artifactPath) {
338
+ throw new Error(`Artifact file not found for sample ${sampleFingerprint}`);
339
+ }
340
+ const content = fs.readFileSync(artifactPath, 'utf-8');
341
+ const parsed = JSON.parse(content);
342
+ // Return only the NocturnalArtifact fields (not the extended sample record)
343
+ return {
344
+ artifactId: parsed.artifactId,
345
+ sessionId: parsed.sessionId,
346
+ principleId: parsed.principleId,
347
+ sourceSnapshotRef: parsed.sourceSnapshotRef,
348
+ badDecision: parsed.badDecision,
349
+ betterDecision: parsed.betterDecision,
350
+ rationale: parsed.rationale,
351
+ createdAt: parsed.createdAt,
352
+ };
353
+ }
354
+ /**
355
+ * Count records by status for dashboard purposes.
356
+ */
357
+ export function getDatasetStats(workspaceDir) {
358
+ const records = readRegistry(workspaceDir);
359
+ const counts = {
360
+ total: records.length,
361
+ pendingReview: 0,
362
+ approvedForTraining: 0,
363
+ rejected: 0,
364
+ superseded: 0,
365
+ exportReadyByFamily: {},
366
+ };
367
+ for (const record of records) {
368
+ switch (record.reviewStatus) {
369
+ case 'pending_review':
370
+ counts.pendingReview++;
371
+ break;
372
+ case 'approved_for_training':
373
+ counts.approvedForTraining++;
374
+ break;
375
+ case 'rejected':
376
+ counts.rejected++;
377
+ break;
378
+ case 'superseded':
379
+ counts.superseded++;
380
+ break;
381
+ }
382
+ // Count export-ready by family
383
+ if (record.reviewStatus === 'approved_for_training' &&
384
+ record.targetModelFamily !== null &&
385
+ fs.existsSync(record.artifactPath)) {
386
+ const family = record.targetModelFamily;
387
+ counts.exportReadyByFamily[family] = (counts.exportReadyByFamily[family] || 0) + 1;
388
+ }
389
+ }
390
+ return counts;
391
+ }
392
+ // ---------------------------------------------------------------------------
393
+ // Auto-registration from persisted samples
394
+ // ---------------------------------------------------------------------------
395
+ /**
396
+ * Scan the samples directory and register any approved artifacts
397
+ * that are not yet in the dataset registry.
398
+ *
399
+ * This is used for:
400
+ * 1. Initial migration of Phase 2 artifacts to Phase 3 dataset
401
+ * 2. Recovering from registry corruption
402
+ *
403
+ * @param workspaceDir - Workspace directory
404
+ * @param targetModelFamily - Default target family for migrated samples
405
+ * @returns Number of newly registered samples
406
+ */
407
+ export function migrateSampleArtifacts(workspaceDir, targetModelFamily = null) {
408
+ const samplePaths = NocturnalPathResolver.listSamples(workspaceDir);
409
+ let newCount = 0;
410
+ for (const samplePath of samplePaths) {
411
+ try {
412
+ const content = fs.readFileSync(samplePath, 'utf-8');
413
+ const sample = JSON.parse(content);
414
+ // Only process approved samples
415
+ if (sample.status !== 'approved')
416
+ continue;
417
+ if (!sample.artifactId || !sample.sessionId || !sample.principleId)
418
+ continue;
419
+ // Skip if already in registry
420
+ const fingerprint = generateSampleFingerprint(sample.artifactId, sample.principleId, sample.sessionId);
421
+ const existing = getDatasetRecord(workspaceDir, fingerprint);
422
+ if (existing)
423
+ continue;
424
+ // Register the artifact
425
+ const artifact = {
426
+ artifactId: sample.artifactId,
427
+ sessionId: sample.sessionId,
428
+ principleId: sample.principleId,
429
+ sourceSnapshotRef: sample.sourceSnapshotRef || '',
430
+ badDecision: sample.badDecision || '',
431
+ betterDecision: sample.betterDecision || '',
432
+ rationale: sample.rationale || '',
433
+ createdAt: sample.createdAt || new Date().toISOString(),
434
+ };
435
+ registerSample(workspaceDir, artifact, samplePath, targetModelFamily);
436
+ newCount++;
437
+ }
438
+ catch {
439
+ // Skip malformed files
440
+ }
441
+ }
442
+ return newCount;
443
+ }