principles-disciple 1.7.6 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/dist/commands/context.js +5 -15
  2. package/dist/commands/evolution-status.js +2 -9
  3. package/dist/commands/export.js +61 -8
  4. package/dist/commands/nocturnal-review.d.ts +24 -0
  5. package/dist/commands/nocturnal-review.js +265 -0
  6. package/dist/commands/nocturnal-rollout.d.ts +27 -0
  7. package/dist/commands/nocturnal-rollout.js +671 -0
  8. package/dist/commands/nocturnal-train.d.ts +25 -0
  9. package/dist/commands/nocturnal-train.js +919 -0
  10. package/dist/commands/pain.js +8 -21
  11. package/dist/constants/tools.d.ts +2 -2
  12. package/dist/constants/tools.js +1 -1
  13. package/dist/core/adaptive-thresholds.d.ts +186 -0
  14. package/dist/core/adaptive-thresholds.js +300 -0
  15. package/dist/core/config.d.ts +2 -38
  16. package/dist/core/config.js +6 -61
  17. package/dist/core/event-log.d.ts +1 -2
  18. package/dist/core/event-log.js +0 -3
  19. package/dist/core/evolution-engine.js +1 -21
  20. package/dist/core/evolution-reducer.d.ts +7 -1
  21. package/dist/core/evolution-reducer.js +56 -4
  22. package/dist/core/evolution-types.d.ts +61 -9
  23. package/dist/core/evolution-types.js +31 -9
  24. package/dist/core/external-training-contract.d.ts +276 -0
  25. package/dist/core/external-training-contract.js +269 -0
  26. package/dist/core/local-worker-routing.d.ts +175 -0
  27. package/dist/core/local-worker-routing.js +525 -0
  28. package/dist/core/model-deployment-registry.d.ts +218 -0
  29. package/dist/core/model-deployment-registry.js +503 -0
  30. package/dist/core/model-training-registry.d.ts +295 -0
  31. package/dist/core/model-training-registry.js +475 -0
  32. package/dist/core/nocturnal-arbiter.d.ts +159 -0
  33. package/dist/core/nocturnal-arbiter.js +534 -0
  34. package/dist/core/nocturnal-candidate-scoring.d.ts +137 -0
  35. package/dist/core/nocturnal-candidate-scoring.js +266 -0
  36. package/dist/core/nocturnal-compliance.d.ts +175 -0
  37. package/dist/core/nocturnal-compliance.js +824 -0
  38. package/dist/core/nocturnal-dataset.d.ts +224 -0
  39. package/dist/core/nocturnal-dataset.js +443 -0
  40. package/dist/core/nocturnal-executability.d.ts +85 -0
  41. package/dist/core/nocturnal-executability.js +331 -0
  42. package/dist/core/nocturnal-export.d.ts +124 -0
  43. package/dist/core/nocturnal-export.js +275 -0
  44. package/dist/core/nocturnal-paths.d.ts +124 -0
  45. package/dist/core/nocturnal-paths.js +214 -0
  46. package/dist/core/nocturnal-trajectory-extractor.d.ts +242 -0
  47. package/dist/core/nocturnal-trajectory-extractor.js +307 -0
  48. package/dist/core/nocturnal-trinity.d.ts +311 -0
  49. package/dist/core/nocturnal-trinity.js +880 -0
  50. package/dist/core/paths.d.ts +6 -0
  51. package/dist/core/paths.js +6 -0
  52. package/dist/core/principle-training-state.d.ts +121 -0
  53. package/dist/core/principle-training-state.js +321 -0
  54. package/dist/core/promotion-gate.d.ts +238 -0
  55. package/dist/core/promotion-gate.js +529 -0
  56. package/dist/core/session-tracker.d.ts +10 -0
  57. package/dist/core/session-tracker.js +14 -0
  58. package/dist/core/shadow-observation-registry.d.ts +217 -0
  59. package/dist/core/shadow-observation-registry.js +308 -0
  60. package/dist/core/training-program.d.ts +233 -0
  61. package/dist/core/training-program.js +433 -0
  62. package/dist/core/trajectory.d.ts +95 -1
  63. package/dist/core/trajectory.js +220 -6
  64. package/dist/core/workspace-context.d.ts +0 -6
  65. package/dist/core/workspace-context.js +0 -12
  66. package/dist/hooks/bash-risk.d.ts +6 -6
  67. package/dist/hooks/bash-risk.js +8 -8
  68. package/dist/hooks/gate-block-helper.js +1 -1
  69. package/dist/hooks/gate.d.ts +1 -1
  70. package/dist/hooks/gate.js +2 -2
  71. package/dist/hooks/gfi-gate.d.ts +3 -3
  72. package/dist/hooks/gfi-gate.js +15 -14
  73. package/dist/hooks/pain.js +6 -9
  74. package/dist/hooks/progressive-trust-gate.d.ts +21 -49
  75. package/dist/hooks/progressive-trust-gate.js +51 -204
  76. package/dist/hooks/prompt.d.ts +11 -11
  77. package/dist/hooks/prompt.js +158 -72
  78. package/dist/hooks/subagent.js +43 -6
  79. package/dist/i18n/commands.js +8 -8
  80. package/dist/index.js +129 -28
  81. package/dist/service/evolution-worker.d.ts +42 -4
  82. package/dist/service/evolution-worker.js +321 -13
  83. package/dist/service/nocturnal-runtime.d.ts +183 -0
  84. package/dist/service/nocturnal-runtime.js +352 -0
  85. package/dist/service/nocturnal-service.d.ts +163 -0
  86. package/dist/service/nocturnal-service.js +787 -0
  87. package/dist/service/nocturnal-target-selector.d.ts +145 -0
  88. package/dist/service/nocturnal-target-selector.js +315 -0
  89. package/dist/service/phase3-input-filter.d.ts +2 -23
  90. package/dist/service/phase3-input-filter.js +3 -27
  91. package/dist/service/runtime-summary-service.d.ts +0 -10
  92. package/dist/service/runtime-summary-service.js +1 -54
  93. package/dist/tools/deep-reflect.js +2 -1
  94. package/dist/types/event-types.d.ts +2 -10
  95. package/dist/types/runtime-summary.d.ts +1 -8
  96. package/dist/types.d.ts +0 -3
  97. package/dist/types.js +0 -2
  98. package/openclaw.plugin.json +1 -1
  99. package/package.json +1 -1
  100. package/templates/langs/en/skills/pd-mentor/SKILL.md +5 -5
  101. package/templates/langs/zh/skills/pd-mentor/SKILL.md +5 -5
  102. package/templates/pain_settings.json +0 -6
  103. package/dist/commands/trust.d.ts +0 -4
  104. package/dist/commands/trust.js +0 -78
  105. package/dist/core/trust-engine.d.ts +0 -96
  106. package/dist/core/trust-engine.js +0 -286
@@ -0,0 +1,233 @@
1
+ /**
2
+ * Training Program — Orchestrates External Training Execution and Lineage
3
+ * =========================================================================
4
+ *
5
+ * PURPOSE: Coordinate the training workflow from experiment spec creation
6
+ * through checkpoint registration and eval attachment.
7
+ *
8
+ * ARCHITECTURE:
9
+ * - TrainingProgram orchestrates the workflow but does NOT execute training itself
10
+ * - External trainer backends (Python scripts) do the actual training
11
+ * - Training run, checkpoint, and eval lineage is registered in model-training-registry
12
+ * - Promotion gate (promotion-gate.ts) controls deployment readiness
13
+ *
14
+ * WORKFLOW:
15
+ * 1. Create experiment spec (TrainingProgram.createExperiment)
16
+ * 2. Execute external trainer (TrainingProgram.executeTrainer)
17
+ * 3. Validate trainer result against spec (validateTrainerResult)
18
+ * 4. Register training run (model-training-registry)
19
+ * 5. Register checkpoint (model-training-registry)
20
+ * 6. Attach eval summary after benchmark (model-training-registry)
21
+ * 7. Promotion gate controls deployment readiness
22
+ *
23
+ * DESIGN CONSTRAINTS:
24
+ * - ORPO-first: only 'orpo' training mode
25
+ * - No training inside the plugin runtime
26
+ * - No direct deployment binding from trainer output
27
+ * - Trainer backends are fire-and-forget (plugin does not poll trainer)
28
+ * - All lineage must be traceable through model-training-registry
29
+ */
30
+ import { type TrainingExperimentSpec, type TrainingExperimentResult, type TrainerBackendKind, type TrainableWorkerProfile, type HardwareTier, type TrainingHyperparameters, type TrainingBudget } from './external-training-contract.js';
31
+ /**
32
+ * Default hyperparameters for ORPO training.
33
+ * These are conservative defaults for consumer GPU (RTX 4090 24GB).
34
+ */
35
+ export declare const DEFAULT_ORPO_HYPERPARAMETERS: TrainingHyperparameters;
36
+ /**
37
+ * Default budget for training runs.
38
+ */
39
+ export declare const DEFAULT_BUDGET: TrainingBudget;
40
+ /**
41
+ * Parameters for creating a training experiment.
42
+ */
43
+ export interface CreateExperimentParams {
44
+ /** Target worker profile */
45
+ targetWorkerProfile: TrainableWorkerProfile;
46
+ /** Target model family */
47
+ targetModelFamily: string;
48
+ /** Hardware tier */
49
+ hardwareTier?: HardwareTier;
50
+ /** Backend to use */
51
+ backend: TrainerBackendKind;
52
+ /** Dataset export ID */
53
+ datasetExportId: string;
54
+ /** Dataset export path */
55
+ datasetExportPath: string;
56
+ /** Dataset fingerprint */
57
+ datasetFingerprint: string;
58
+ /** Benchmark export ID */
59
+ benchmarkExportId: string;
60
+ /** Output directory for checkpoints */
61
+ outputDir: string;
62
+ /** Custom hyperparameters (optional) */
63
+ hyperparameters?: Partial<TrainingHyperparameters>;
64
+ /** Custom budget (optional) */
65
+ budget?: Partial<TrainingBudget>;
66
+ }
67
+ /**
68
+ * Result of creating an experiment.
69
+ */
70
+ export interface CreateExperimentResult {
71
+ /** The experiment specification */
72
+ spec: TrainingExperimentSpec;
73
+ /** The registered training run ID */
74
+ trainRunId: string;
75
+ }
76
+ /**
77
+ * Create a new training experiment.
78
+ *
79
+ * This creates the experiment spec and registers a training run in the registry.
80
+ *
81
+ * @param stateDir - Workspace state directory
82
+ * @param params - Experiment parameters
83
+ * @returns Created experiment spec and registered training run ID
84
+ *
85
+ * @throws Error if worker profile is not allowed (local-editor not yet enabled)
86
+ * @throws Error if model family is not valid for the worker profile
87
+ * @throws Error if hardware tier is not valid for the backend
88
+ */
89
+ export declare function createExperiment(stateDir: string, params: CreateExperimentParams): CreateExperimentResult;
90
+ /**
91
+ * Parameters for executing an external trainer.
92
+ */
93
+ export interface ExecuteTrainerParams {
94
+ /** The experiment specification */
95
+ spec: TrainingExperimentSpec;
96
+ /** Path to the trainer scripts directory */
97
+ scriptsDir?: string;
98
+ }
99
+ /**
100
+ * Execute an external trainer backend.
101
+ *
102
+ * This function:
103
+ * 1. Validates the trainer script exists
104
+ * 2. Serializes the experiment spec to JSON
105
+ * 3. Invokes the Python backend
106
+ * 4. Returns the trainer's parsed result
107
+ *
108
+ * The trainer protocol:
109
+ * - stdout: MUST contain only the machine-readable JSON result (TrainingExperimentResult)
110
+ * - stderr: Contains training progress logs (ignored by plugin)
111
+ * - result file: Written to output dir as backup if stdout parsing fails
112
+ *
113
+ * NOTE: This is a fire-and-forget execution. The plugin does not poll
114
+ * the trainer. For Phase 7, trainer execution is assumed to be synchronous
115
+ * or to complete before this function returns.
116
+ *
117
+ * @param spec - The experiment specification
118
+ * @param scriptsDir - Override for the scripts directory
119
+ * @returns The trainer's result as parsed JSON object
120
+ *
121
+ * @throws Error if the trainer script is not found
122
+ * @throws Error if trainer execution fails
123
+ * @throws Error if result cannot be parsed
124
+ */
125
+ export declare function executeTrainer(spec: TrainingExperimentSpec, scriptsDir?: string): Promise<import('./external-training-contract.js').TrainingExperimentResult>;
126
+ /**
127
+ * Parameters for processing a trainer result.
128
+ */
129
+ export interface ProcessTrainerResultParams {
130
+ /** The original experiment specification */
131
+ spec: TrainingExperimentSpec;
132
+ /** The training run ID from registry */
133
+ trainRunId: string;
134
+ /** The trainer's result (parsed) */
135
+ result: TrainingExperimentResult;
136
+ /** Workspace state directory */
137
+ stateDir: string;
138
+ }
139
+ /**
140
+ * Process a trainer result:
141
+ * 1. Validate result against spec
142
+ * 2. Register checkpoint in training registry
143
+ * 3. Return checkpoint for eval attachment
144
+ *
145
+ * @param params - Processing parameters
146
+ * @returns The registered checkpoint, or null for dry_run (no checkpoint produced)
147
+ *
148
+ * @throws Error if validation fails
149
+ * @throws Error if checkpoint registration fails
150
+ */
151
+ export declare function processTrainerResult(params: ProcessTrainerResultParams): {
152
+ checkpointId: string;
153
+ checkpointRef: string;
154
+ } | null;
155
+ /**
156
+ * The TrainingProgram class orchestrates the complete training workflow.
157
+ *
158
+ * Usage:
159
+ * ```typescript
160
+ * const program = new TrainingProgram(stateDir);
161
+ *
162
+ * // Create experiment
163
+ * const { spec, trainRunId } = program.createExperiment({
164
+ * backend: 'peft-trl-orpo',
165
+ * targetWorkerProfile: 'local-reader',
166
+ * targetModelFamily: 'qwen2.5-7b-reader',
167
+ * datasetExportId: 'export-123',
168
+ * datasetExportPath: '.state/exports/orpo/export-123.jsonl',
169
+ * datasetFingerprint: 'abc123',
170
+ * benchmarkExportId: 'benchmark-456',
171
+ * outputDir: '.state/nocturnal/checkpoints',
172
+ * });
173
+ *
174
+ * // Execute trainer (external)
175
+ * const trainerOutput = await executeTrainer(spec);
176
+ *
177
+ * // Process result
178
+ * const { checkpointId } = program.processResult({
179
+ * spec,
180
+ * trainRunId,
181
+ * result: JSON.parse(trainerOutput),
182
+ * });
183
+ *
184
+ * // Attach eval (after benchmark runs)
185
+ * program.attachEval(checkpointId, evalSummary);
186
+ * ```
187
+ */
188
+ export declare class TrainingProgram {
189
+ private readonly stateDir;
190
+ constructor(stateDir: string);
191
+ /**
192
+ * Create a new training experiment.
193
+ */
194
+ createExperiment(params: CreateExperimentParams): CreateExperimentResult;
195
+ /**
196
+ * Process a trainer result and register the checkpoint.
197
+ * Returns null for dry_run (no checkpoint produced).
198
+ */
199
+ processResult(params: {
200
+ spec: TrainingExperimentSpec;
201
+ trainRunId: string;
202
+ result: TrainingExperimentResult;
203
+ }): {
204
+ checkpointId: string;
205
+ checkpointRef: string;
206
+ } | null;
207
+ /**
208
+ * Attach an eval summary to a checkpoint and mark it deployable if eval passes.
209
+ *
210
+ * @param checkpointId - The checkpoint to attach eval to
211
+ * @param evalSummary - The eval summary (from benchmark run)
212
+ * @returns The updated checkpoint
213
+ */
214
+ attachEvalAndMarkDeployable(checkpointId: string, evalSummary: {
215
+ evalId: string;
216
+ checkpointId: string;
217
+ benchmarkId: string;
218
+ targetModelFamily: string;
219
+ mode: 'prompt_assisted' | 'reduced_prompt';
220
+ baselineScore: number;
221
+ candidateScore: number;
222
+ delta: number;
223
+ verdict: 'pass' | 'fail' | 'compare_only';
224
+ }): void;
225
+ /**
226
+ * Get checkpoint lineage for audit.
227
+ */
228
+ getCheckpointLineage(checkpointId: string): {
229
+ run: import("./model-training-registry.js").TrainingRun;
230
+ checkpoint: import("./model-training-registry.js").Checkpoint;
231
+ eval: import("./model-training-registry.js").EvalSummary | null;
232
+ } | null;
233
+ }
@@ -0,0 +1,433 @@
1
+ /**
2
+ * Training Program — Orchestrates External Training Execution and Lineage
3
+ * =========================================================================
4
+ *
5
+ * PURPOSE: Coordinate the training workflow from experiment spec creation
6
+ * through checkpoint registration and eval attachment.
7
+ *
8
+ * ARCHITECTURE:
9
+ * - TrainingProgram orchestrates the workflow but does NOT execute training itself
10
+ * - External trainer backends (Python scripts) do the actual training
11
+ * - Training run, checkpoint, and eval lineage is registered in model-training-registry
12
+ * - Promotion gate (promotion-gate.ts) controls deployment readiness
13
+ *
14
+ * WORKFLOW:
15
+ * 1. Create experiment spec (TrainingProgram.createExperiment)
16
+ * 2. Execute external trainer (TrainingProgram.executeTrainer)
17
+ * 3. Validate trainer result against spec (validateTrainerResult)
18
+ * 4. Register training run (model-training-registry)
19
+ * 5. Register checkpoint (model-training-registry)
20
+ * 6. Attach eval summary after benchmark (model-training-registry)
21
+ * 7. Promotion gate controls deployment readiness
22
+ *
23
+ * DESIGN CONSTRAINTS:
24
+ * - ORPO-first: only 'orpo' training mode
25
+ * - No training inside the plugin runtime
26
+ * - No direct deployment binding from trainer output
27
+ * - Trainer backends are fire-and-forget (plugin does not poll trainer)
28
+ * - All lineage must be traceable through model-training-registry
29
+ */
30
+ import * as fs from 'fs';
31
+ import * as path from 'path';
32
+ import { fileURLToPath } from 'url';
33
+ import { validateTrainerResult, computeConfigFingerprint, computeCodeHash, generateExperimentId, validateHardwareTier, getDefaultHardwareTier, isValidModelFamilyForProfile, LOCAL_EDITOR_ENABLED, } from './external-training-contract.js';
34
+ import { registerTrainingRun, startTrainingRun, completeTrainingRun, failTrainingRun, registerCheckpoint, attachEvalSummary, markCheckpointDeployable, getCheckpointLineage, } from './model-training-registry.js';
35
+ // ---------------------------------------------------------------------------
36
+ // Constants
37
+ // ---------------------------------------------------------------------------
38
+ /**
39
+ * Path to the external trainer scripts directory.
40
+ */
41
+ const TRAINER_SCRIPTS_DIR = 'scripts/nocturnal/trainer';
42
+ const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
43
+ const REPO_ROOT = path.resolve(MODULE_DIR, '..', '..', '..', '..');
44
+ /**
45
+ * Default hyperparameters for ORPO training.
46
+ * These are conservative defaults for consumer GPU (RTX 4090 24GB).
47
+ */
48
+ export const DEFAULT_ORPO_HYPERPARAMETERS = {
49
+ learningRate: 3e-4,
50
+ batchSize: 2,
51
+ gradientAccumulation: 8,
52
+ loraRank: 16,
53
+ loraAlpha: 32,
54
+ loraDropout: 0.05,
55
+ warmupRatio: 0.1,
56
+ maxSteps: 1000,
57
+ maxSeqLength: 2048,
58
+ };
59
+ /**
60
+ * Default budget for training runs.
61
+ */
62
+ export const DEFAULT_BUDGET = {
63
+ maxWallClockMinutes: 240, // 4 hours
64
+ maxTrainTokens: 2_000_000,
65
+ };
66
+ /**
67
+ * Create a new training experiment.
68
+ *
69
+ * This creates the experiment spec and registers a training run in the registry.
70
+ *
71
+ * @param stateDir - Workspace state directory
72
+ * @param params - Experiment parameters
73
+ * @returns Created experiment spec and registered training run ID
74
+ *
75
+ * @throws Error if worker profile is not allowed (local-editor not yet enabled)
76
+ * @throws Error if model family is not valid for the worker profile
77
+ * @throws Error if hardware tier is not valid for the backend
78
+ */
79
+ export function createExperiment(stateDir, params) {
80
+ // --- Validate worker profile ---
81
+ if (params.targetWorkerProfile === 'local-editor' && !LOCAL_EDITOR_ENABLED) {
82
+ throw new Error(`Training for 'local-editor' is not yet enabled. ` +
83
+ `Phase 7 first rollout is limited to 'local-reader'. ` +
84
+ `To enable local-editor training, set LOCAL_EDITOR_ENABLED = true ` +
85
+ `in external-training-contract.ts after appropriate review.`);
86
+ }
87
+ // --- Validate model family for profile ---
88
+ if (!isValidModelFamilyForProfile(params.targetModelFamily, params.targetWorkerProfile)) {
89
+ throw new Error(`Model family '${params.targetModelFamily}' is not valid for profile ` +
90
+ `'${params.targetWorkerProfile}'. ` +
91
+ `Ensure the model family name contains an appropriate keyword.`);
92
+ }
93
+ // --- Validate hardware tier ---
94
+ const tier = params.hardwareTier ?? getDefaultHardwareTier(params.backend);
95
+ validateHardwareTier(params.backend, tier);
96
+ // --- Merge hyperparameters ---
97
+ const hyperparameters = {
98
+ ...DEFAULT_ORPO_HYPERPARAMETERS,
99
+ ...params.hyperparameters,
100
+ };
101
+ // --- Compute fingerprints ---
102
+ const configFingerprint = computeConfigFingerprint(hyperparameters);
103
+ const codeHash = computeCodeHash();
104
+ // --- Create experiment spec ---
105
+ const spec = {
106
+ experimentId: generateExperimentId(),
107
+ backend: params.backend,
108
+ trainingMode: 'orpo',
109
+ targetWorkerProfile: params.targetWorkerProfile,
110
+ targetModelFamily: params.targetModelFamily,
111
+ hardwareTier: tier,
112
+ datasetExportId: params.datasetExportId,
113
+ datasetExportPath: params.datasetExportPath,
114
+ datasetFingerprint: params.datasetFingerprint,
115
+ benchmarkExportId: params.benchmarkExportId,
116
+ outputDir: params.outputDir,
117
+ configFingerprint,
118
+ codeHash,
119
+ hyperparameters,
120
+ budget: { ...DEFAULT_BUDGET, ...params.budget },
121
+ expectedArtifact: {
122
+ checkpointName: `checkpoint-${params.targetModelFamily}-${Date.now()}`,
123
+ adapterFormat: 'peft-adapter',
124
+ },
125
+ };
126
+ // --- Register training run in registry ---
127
+ const trainRun = registerTrainingRun(stateDir, {
128
+ experimentId: spec.experimentId,
129
+ targetModelFamily: spec.targetModelFamily,
130
+ datasetFingerprint: spec.datasetFingerprint,
131
+ exportId: spec.datasetExportId,
132
+ sampleCount: 0, // Will be updated when result is registered
133
+ configFingerprint: spec.configFingerprint,
134
+ });
135
+ return { spec, trainRunId: trainRun.trainRunId };
136
+ }
137
+ /**
138
+ * Execute an external trainer backend.
139
+ *
140
+ * This function:
141
+ * 1. Validates the trainer script exists
142
+ * 2. Serializes the experiment spec to JSON
143
+ * 3. Invokes the Python backend
144
+ * 4. Returns the trainer's parsed result
145
+ *
146
+ * The trainer protocol:
147
+ * - stdout: MUST contain only the machine-readable JSON result (TrainingExperimentResult)
148
+ * - stderr: Contains training progress logs (ignored by plugin)
149
+ * - result file: Written to output dir as backup if stdout parsing fails
150
+ *
151
+ * NOTE: This is a fire-and-forget execution. The plugin does not poll
152
+ * the trainer. For Phase 7, trainer execution is assumed to be synchronous
153
+ * or to complete before this function returns.
154
+ *
155
+ * @param spec - The experiment specification
156
+ * @param scriptsDir - Override for the scripts directory
157
+ * @returns The trainer's result as parsed JSON object
158
+ *
159
+ * @throws Error if the trainer script is not found
160
+ * @throws Error if trainer execution fails
161
+ * @throws Error if result cannot be parsed
162
+ */
163
+ export async function executeTrainer(spec, scriptsDir) {
164
+ const baseDir = scriptsDir ?? path.join(REPO_ROOT, TRAINER_SCRIPTS_DIR);
165
+ // Map backend to script name
166
+ const scriptMap = {
167
+ 'peft-trl-orpo': 'main.py',
168
+ 'unsloth-orpo': 'main.py',
169
+ 'dry-run': 'main.py',
170
+ };
171
+ const scriptName = scriptMap[spec.backend];
172
+ const scriptPath = path.join(baseDir, scriptName);
173
+ // Check if script exists (for dry-run, we allow missing scripts in development)
174
+ if (spec.backend !== 'dry-run' && !fs.existsSync(scriptPath)) {
175
+ throw new Error(`Trainer script not found: ${scriptPath}. ` +
176
+ `Ensure the external trainer backends are installed at ${baseDir}.`);
177
+ }
178
+ // Serialize spec to JSON for passing to trainer
179
+ const specPath = path.join(baseDir, `experiment-${spec.experimentId}.json`);
180
+ const specJson = JSON.stringify(spec, null, 2);
181
+ // Write spec to file for trainer to read
182
+ const specDir = path.dirname(specPath);
183
+ if (!fs.existsSync(specDir)) {
184
+ fs.mkdirSync(specDir, { recursive: true });
185
+ }
186
+ fs.writeFileSync(specPath, specJson, 'utf-8');
187
+ // Result file path (written by trainer to output dir)
188
+ const resultFilePath = path.join(spec.outputDir, `result-${spec.experimentId}.json`);
189
+ try {
190
+ if (spec.backend === 'dry-run') {
191
+ // For dry-run, simulate a successful dry-run result
192
+ // No actual Python script execution needed - dry-run just validates spec
193
+ return {
194
+ experimentId: spec.experimentId,
195
+ backend: 'dry-run',
196
+ status: 'dry_run',
197
+ targetWorkerProfile: spec.targetWorkerProfile,
198
+ targetModelFamily: spec.targetModelFamily,
199
+ datasetFingerprint: spec.datasetFingerprint,
200
+ configFingerprint: spec.configFingerprint,
201
+ codeHash: spec.codeHash,
202
+ createdAt: new Date().toISOString(),
203
+ };
204
+ }
205
+ // Execute the Python trainer using spawn (streaming).
206
+ const { spawn } = await import('child_process');
207
+ // - stdout is collected into a fixed-size buffer (1MB max) to prevent OOM from training logs
208
+ // - stderr is piped directly to parent stderr so it never accumulates in memory
209
+ // - Non-zero exit codes are handled with clear error messages
210
+ const timeoutMs = (spec.budget.maxWallClockMinutes * 60 * 1000) + 30000;
211
+ const pythonExecutable = process.platform === 'win32' ? 'python' : 'python3';
212
+ const MAX_STDOUT_BUFFER = 1 * 1024 * 1024; // 1MB cap
213
+ const trainerResult = await new Promise((resolve, reject) => {
214
+ const proc = spawn(pythonExecutable, [scriptPath, '--spec', specPath, '--output-dir', spec.outputDir]);
215
+ // Collect stdout with size cap to prevent OOM from huge log output
216
+ const stdoutChunks = [];
217
+ let stdoutSize = 0;
218
+ proc.stdout.on('data', (chunk) => {
219
+ const remaining = MAX_STDOUT_BUFFER - stdoutSize;
220
+ if (remaining > 0) {
221
+ stdoutChunks.push(chunk.slice(0, remaining));
222
+ stdoutSize += Math.min(chunk.length, remaining);
223
+ }
224
+ });
225
+ // Pipe stderr directly — training logs must NOT accumulate in memory
226
+ proc.stderr.pipe(process.stderr);
227
+ const timer = setTimeout(() => {
228
+ proc.kill();
229
+ reject(new Error(`Trainer timed out after ${timeoutMs}ms`));
230
+ }, timeoutMs);
231
+ proc.on('close', (code) => {
232
+ clearTimeout(timer);
233
+ if (code === 0) {
234
+ const stdout = Buffer.concat(stdoutChunks).toString('utf-8');
235
+ const trimmed = stdout.trim();
236
+ if (trimmed) {
237
+ try {
238
+ resolve(JSON.parse(trimmed));
239
+ return;
240
+ }
241
+ catch {
242
+ // fall through to result file
243
+ }
244
+ }
245
+ // Fallback: try result file
246
+ if (fs.existsSync(resultFilePath)) {
247
+ try {
248
+ const content = fs.readFileSync(resultFilePath, 'utf-8');
249
+ resolve(JSON.parse(content));
250
+ return;
251
+ }
252
+ catch {
253
+ // fall through to error
254
+ }
255
+ }
256
+ reject(new Error(`Trainer stdout was not valid JSON and result file also invalid. ` +
257
+ `result file: ${resultFilePath}`));
258
+ }
259
+ else {
260
+ // Non-zero exit — try result file as last resort
261
+ if (fs.existsSync(resultFilePath)) {
262
+ try {
263
+ const content = fs.readFileSync(resultFilePath, 'utf-8');
264
+ resolve(JSON.parse(content));
265
+ }
266
+ catch {
267
+ reject(new Error(`Trainer exited with code ${code} and result file was invalid: ${resultFilePath}`));
268
+ }
269
+ }
270
+ else {
271
+ reject(new Error(`Trainer exited with code ${code} and no result file found at: ${resultFilePath}`));
272
+ }
273
+ }
274
+ });
275
+ proc.on('error', (err) => {
276
+ clearTimeout(timer);
277
+ reject(new Error(`Trainer spawn failed: ${err.message}`));
278
+ });
279
+ });
280
+ return trainerResult;
281
+ }
282
+ finally {
283
+ // Clean up spec file after execution
284
+ if (fs.existsSync(specPath)) {
285
+ fs.unlinkSync(specPath);
286
+ }
287
+ }
288
+ }
289
+ /**
290
+ * Process a trainer result:
291
+ * 1. Validate result against spec
292
+ * 2. Register checkpoint in training registry
293
+ * 3. Return checkpoint for eval attachment
294
+ *
295
+ * @param params - Processing parameters
296
+ * @returns The registered checkpoint, or null for dry_run (no checkpoint produced)
297
+ *
298
+ * @throws Error if validation fails
299
+ * @throws Error if checkpoint registration fails
300
+ */
301
+ export function processTrainerResult(params) {
302
+ const { spec, trainRunId, result, stateDir } = params;
303
+ // --- Handle dry_run BEFORE validation (it has no checkpoint and should not be validated) ---
304
+ if (result.status === 'dry_run') {
305
+ // Dry-run: mark completed (no checkpoint expected) and return null.
306
+ // This is a supported non-error outcome — upper layers distinguish it from
307
+ // completed (which has a checkpoint) by checking the return value.
308
+ startTrainingRun(stateDir, trainRunId);
309
+ completeTrainingRun(stateDir, trainRunId);
310
+ return null;
311
+ }
312
+ // --- Transition pending -> running first ---
313
+ // Must happen before any validation or failure path so that
314
+ // failTrainingRun has a valid transition (running → failed).
315
+ startTrainingRun(stateDir, trainRunId);
316
+ // --- Validate result against spec (fail-closed) ---
317
+ const validation = validateTrainerResult(spec, result);
318
+ if (!validation.valid) {
319
+ const errorMessages = validation.errors
320
+ .map((e) => ` - ${e.field}: ${e.reason} (expected: ${e.expected}, got: ${e.actual})`)
321
+ .join('\n');
322
+ // Fail the training run in registry (running → failed is valid)
323
+ failTrainingRun(stateDir, trainRunId, `Validation failed:\n${errorMessages}`);
324
+ throw new Error(`Trainer result validation failed (${validation.errors.length} errors):\n${errorMessages}\n` +
325
+ `The trainer result does not match the experiment spec. ` +
326
+ `This checkpoint will not be registered.`);
327
+ }
328
+ // --- Update training run status ---
329
+ // Already transitioned to 'running' above
330
+ if (result.status === 'failed') {
331
+ failTrainingRun(stateDir, trainRunId, result.failureReason ?? 'Unknown failure');
332
+ throw new Error(`Training failed: ${result.failureReason}`);
333
+ }
334
+ // result.status === 'completed' (or any other non-failed/dry_run) — proceed to checkpoint
335
+ if (!result.checkpointId || !result.artifact) {
336
+ // Mark run failed since it didn't produce a checkpoint (run is in 'running' state)
337
+ failTrainingRun(stateDir, trainRunId, 'Trainer result is marked completed but missing checkpointId or artifact');
338
+ throw new Error(`Trainer result is marked 'completed' but missing checkpointId or artifact.`);
339
+ }
340
+ // --- Register checkpoint BEFORE marking run completed ---
341
+ // Ordering matters: if registerCheckpoint throws, run stays in 'running' state
342
+ // (not 'completed'), making the failure visible in registry audits.
343
+ const checkpoint = registerCheckpoint(stateDir, {
344
+ trainRunId,
345
+ targetModelFamily: spec.targetModelFamily,
346
+ artifactPath: result.artifact.artifactPath,
347
+ });
348
+ // Checkpoint registered successfully — now mark run completed
349
+ completeTrainingRun(stateDir, trainRunId);
350
+ return {
351
+ checkpointId: checkpoint.checkpointId,
352
+ checkpointRef: result.checkpointRef ?? checkpoint.checkpointId,
353
+ };
354
+ }
355
+ // ---------------------------------------------------------------------------
356
+ // Training Program Orchestration
357
+ // ---------------------------------------------------------------------------
358
+ /**
359
+ * The TrainingProgram class orchestrates the complete training workflow.
360
+ *
361
+ * Usage:
362
+ * ```typescript
363
+ * const program = new TrainingProgram(stateDir);
364
+ *
365
+ * // Create experiment
366
+ * const { spec, trainRunId } = program.createExperiment({
367
+ * backend: 'peft-trl-orpo',
368
+ * targetWorkerProfile: 'local-reader',
369
+ * targetModelFamily: 'qwen2.5-7b-reader',
370
+ * datasetExportId: 'export-123',
371
+ * datasetExportPath: '.state/exports/orpo/export-123.jsonl',
372
+ * datasetFingerprint: 'abc123',
373
+ * benchmarkExportId: 'benchmark-456',
374
+ * outputDir: '.state/nocturnal/checkpoints',
375
+ * });
376
+ *
377
+ * // Execute trainer (external)
378
+ * const trainerOutput = await executeTrainer(spec);
379
+ *
380
+ * // Process result
381
+ * const { checkpointId } = program.processResult({
382
+ * spec,
383
+ * trainRunId,
384
+ * result: JSON.parse(trainerOutput),
385
+ * });
386
+ *
387
+ * // Attach eval (after benchmark runs)
388
+ * program.attachEval(checkpointId, evalSummary);
389
+ * ```
390
+ */
391
+ export class TrainingProgram {
392
+ stateDir;
393
+ constructor(stateDir) {
394
+ this.stateDir = stateDir;
395
+ }
396
+ /**
397
+ * Create a new training experiment.
398
+ */
399
+ createExperiment(params) {
400
+ return createExperiment(this.stateDir, params);
401
+ }
402
+ /**
403
+ * Process a trainer result and register the checkpoint.
404
+ * Returns null for dry_run (no checkpoint produced).
405
+ */
406
+ processResult(params) {
407
+ return processTrainerResult({
408
+ ...params,
409
+ stateDir: this.stateDir,
410
+ });
411
+ }
412
+ /**
413
+ * Attach an eval summary to a checkpoint and mark it deployable if eval passes.
414
+ *
415
+ * @param checkpointId - The checkpoint to attach eval to
416
+ * @param evalSummary - The eval summary (from benchmark run)
417
+ * @returns The updated checkpoint
418
+ */
419
+ attachEvalAndMarkDeployable(checkpointId, evalSummary) {
420
+ // Attach eval summary
421
+ attachEvalSummary(this.stateDir, checkpointId, evalSummary);
422
+ // Mark deployable if verdict is pass or compare_only
423
+ if (evalSummary.verdict === 'pass' || evalSummary.verdict === 'compare_only') {
424
+ markCheckpointDeployable(this.stateDir, checkpointId, true);
425
+ }
426
+ }
427
+ /**
428
+ * Get checkpoint lineage for audit.
429
+ */
430
+ getCheckpointLineage(checkpointId) {
431
+ return getCheckpointLineage(this.stateDir, checkpointId);
432
+ }
433
+ }