principles-disciple 1.107.0 → 1.109.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/openclaw.plugin.json +1 -1
  2. package/package.json +2 -2
  3. package/src/core/init.ts +3 -1
  4. package/src/core/workspace-dir-validation.ts +3 -3
  5. package/src/service/evolution-worker.ts +1 -1
  6. package/templates/langs/en/skills/pd-runtime-v2/SKILL.md +1 -1
  7. package/templates/langs/zh/skills/pd-runtime-v2/SKILL.md +1 -1
  8. package/tests/core-anti-growth.test.ts +0 -13
  9. package/tests/hooks/prompt-characterization.test.ts +1 -11
  10. package/tests/hooks/prompt-diet.test.ts +3 -11
  11. package/tests/hooks/prompt-size-guard.test.ts +0 -10
  12. package/tests/hooks/runtime-v2-prompt-activation.test.ts +0 -10
  13. package/tests/index.test.ts +1 -1
  14. package/tests/runtime-v2-discovery-guard.test.ts +1 -2
  15. package/vitest.config.ts +2 -3
  16. package/vitest.unit.config.ts +12 -0
  17. package/src/core/evolution-hook.ts +0 -74
  18. package/src/core/file-storage-adapter.ts +0 -203
  19. package/src/core/merge-gate-audit.ts +0 -314
  20. package/src/core/pain-context-extractor.ts +0 -306
  21. package/src/core/pain-lifecycle.ts +0 -38
  22. package/src/core/pain-signal-adapter.ts +0 -42
  23. package/src/core/pain-signal.ts +0 -22
  24. package/src/core/principle-injector.ts +0 -84
  25. package/src/core/principle-tree-migration.ts +0 -196
  26. package/src/core/storage-adapter.ts +0 -65
  27. package/src/core/telemetry-event.ts +0 -109
  28. package/src/core/training-program.ts +0 -632
  29. package/src/core/workspace-dir-service.ts +0 -119
  30. package/src/hooks/lifecycle-routing.ts +0 -125
  31. package/src/service/event-log-auditor.ts +0 -284
  32. package/src/service/evolution-queue-lock.ts +0 -47
  33. package/src/service/failure-classifier.ts +0 -79
  34. package/src/service/internalization-trigger-adapter.ts +0 -302
  35. package/src/service/monitoring-query-service.ts +0 -67
  36. package/src/service/subagent-workflow/index.ts +0 -17
  37. package/src/tools/critique-prompt.ts +0 -1
  38. package/src/tools/model-index.ts +0 -1
  39. package/src/types/event-payload.ts +0 -16
  40. package/src/utils/glob-match.ts +0 -50
  41. package/src/utils/nlp.ts +0 -25
  42. package/src/utils/plugin-logger.ts +0 -97
  43. package/src/utils/subagent-probe.ts +0 -81
  44. package/tests/core/evolution-hook.test.ts +0 -123
  45. package/tests/core/file-storage-adapter.test.ts +0 -285
  46. package/tests/core/merge-gate-audit.test.ts +0 -117
  47. package/tests/core/pain-context-extractor.test.ts +0 -279
  48. package/tests/core/pain-lifecycle.test.ts +0 -38
  49. package/tests/core/pain-signal-adapter.test.ts +0 -116
  50. package/tests/core/pain-signal.test.ts +0 -190
  51. package/tests/core/principle-injector.test.ts +0 -90
  52. package/tests/core/principle-tree-migration.test.ts +0 -77
  53. package/tests/core/storage-conformance.test.ts +0 -429
  54. package/tests/core/telemetry-event.test.ts +0 -119
  55. package/tests/core/training-program.test.ts +0 -472
  56. package/tests/core/workspace-dir-service.test.ts +0 -68
  57. package/tests/core/workspace-dir-validation.test.ts +0 -143
  58. package/tests/integration/internalization-trigger-guard.test.ts +0 -69
  59. package/tests/integration/pain-lifecycle-e2e.test.ts +0 -75
  60. package/tests/integration/tool-hooks-workspace-dir.e2e.test.ts +0 -209
  61. package/tests/service/failure-classifier.test.ts +0 -171
  62. package/tests/service/internalization-trigger-adapter.test.ts +0 -251
  63. package/tests/service/monitoring-query-service.test.ts +0 -67
  64. package/tests/utils/nlp.test.ts +0 -35
  65. package/tests/utils/plugin-logger.test.ts +0 -156
  66. package/tests/utils/subagent-probe.test.ts +0 -79
@@ -1,632 +0,0 @@
1
- /**
2
- * Training Program — Orchestrates External Training Execution and Lineage
3
- * =========================================================================
4
- *
5
- * PURPOSE: Coordinate the training workflow from experiment spec creation
6
- * through checkpoint registration and eval attachment.
7
- *
8
- * ARCHITECTURE:
9
- * - TrainingProgram orchestrates the workflow but does NOT execute training itself
10
- * - External trainer backends (Python scripts) do the actual training
11
- * - Training run, checkpoint, and eval lineage is registered in model-training-registry
12
- * - Promotion gate (promotion-gate.ts) controls deployment readiness
13
- *
14
- * WORKFLOW:
15
- * 1. Create experiment spec (TrainingProgram.createExperiment)
16
- * 2. Execute external trainer (TrainingProgram.executeTrainer)
17
- * 3. Validate trainer result against spec (validateTrainerResult)
18
- * 4. Register training run (model-training-registry)
19
- * 5. Register checkpoint (model-training-registry)
20
- * 6. Attach eval summary after benchmark (model-training-registry)
21
- * 7. Promotion gate controls deployment readiness
22
- *
23
- * DESIGN CONSTRAINTS:
24
- * - ORPO-first: only 'orpo' training mode
25
- * - No training inside the plugin runtime
26
- * - No direct deployment binding from trainer output
27
- * - Trainer backends are fire-and-forget (plugin does not poll trainer)
28
- * - All lineage must be traceable through model-training-registry
29
- */
30
-
31
- import * as fs from 'fs';
32
- import * as path from 'path';
33
- import { fileURLToPath } from 'url';
34
- import { atomicWriteFileSync } from '../utils/io.js';
35
- import {
36
- type TrainingExperimentSpec,
37
- type TrainingExperimentResult,
38
- type TrainerBackendKind,
39
- type TrainableWorkerProfile,
40
- type HardwareTier,
41
- type TrainingHyperparameters,
42
- type TrainingBudget,
43
- validateTrainerResult,
44
- computeConfigFingerprint,
45
- computeCodeHash,
46
- generateExperimentId,
47
- validateHardwareTier,
48
- getDefaultHardwareTier,
49
- isValidModelFamilyForProfile,
50
- LOCAL_EDITOR_ENABLED,
51
- } from './external-training-contract.js';
52
- import {
53
- registerTrainingRun,
54
- startTrainingRun,
55
- completeTrainingRun,
56
- failTrainingRun,
57
- registerCheckpoint,
58
- attachEvalSummary,
59
- markCheckpointDeployable,
60
- getCheckpointLineage,
61
- } from './model-training-registry.js';
62
-
63
- // ---------------------------------------------------------------------------
64
- // Constants
65
- // ---------------------------------------------------------------------------
66
-
67
- /**
68
- * Path to the external trainer scripts directory.
69
- */
70
- const TRAINER_SCRIPTS_DIR = 'scripts/nocturnal/trainer';
71
- const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
72
- const REPO_ROOT = path.resolve(MODULE_DIR, '..', '..', '..', '..');
73
-
74
- /**
75
- * Default hyperparameters for ORPO training.
76
- * These are conservative defaults for consumer GPU (RTX 4090 24GB).
77
- */
78
- export const DEFAULT_ORPO_HYPERPARAMETERS: TrainingHyperparameters = {
79
- learningRate: 3e-4,
80
- batchSize: 2,
81
- gradientAccumulation: 8,
82
- loraRank: 16,
83
- loraAlpha: 32,
84
- loraDropout: 0.05,
85
- warmupRatio: 0.1,
86
- maxSteps: 1000,
87
- maxSeqLength: 2048,
88
- };
89
-
90
- /**
91
- * Default budget for training runs.
92
- */
93
- export const DEFAULT_BUDGET: TrainingBudget = {
94
- maxWallClockMinutes: 240, // 4 hours
95
- maxTrainTokens: 2_000_000,
96
- };
97
-
98
- // ---------------------------------------------------------------------------
99
- // Experiment Spec Creation
100
- // ---------------------------------------------------------------------------
101
-
102
- /**
103
- * Parameters for creating a training experiment.
104
- */
105
- export interface CreateExperimentParams {
106
- /** Target worker profile */
107
- targetWorkerProfile: TrainableWorkerProfile;
108
-
109
- /** Target model family */
110
- targetModelFamily: string;
111
-
112
- /** Hardware tier */
113
- hardwareTier?: HardwareTier;
114
-
115
- /** Backend to use */
116
- backend: TrainerBackendKind;
117
-
118
- /** Dataset export ID */
119
- datasetExportId: string;
120
-
121
- /** Dataset export path */
122
- datasetExportPath: string;
123
-
124
- /** Dataset fingerprint */
125
- datasetFingerprint: string;
126
-
127
- /** Benchmark export ID */
128
- benchmarkExportId: string;
129
-
130
- /** Output directory for checkpoints */
131
- outputDir: string;
132
-
133
- /** Custom hyperparameters (optional) */
134
- hyperparameters?: Partial<TrainingHyperparameters>;
135
-
136
- /** Custom budget (optional) */
137
- budget?: Partial<TrainingBudget>;
138
- }
139
-
140
- /**
141
- * Result of creating an experiment.
142
- */
143
- export interface CreateExperimentResult {
144
- /** The experiment specification */
145
- spec: TrainingExperimentSpec;
146
-
147
- /** The registered training run ID */
148
- trainRunId: string;
149
- }
150
-
151
- /**
152
- * Create a new training experiment.
153
- *
154
- * This creates the experiment spec and registers a training run in the registry.
155
- *
156
- * @param stateDir - Workspace state directory
157
- * @param params - Experiment parameters
158
- * @returns Created experiment spec and registered training run ID
159
- *
160
- * @throws Error if worker profile is not allowed (local-editor not yet enabled)
161
- * @throws Error if model family is not valid for the worker profile
162
- * @throws Error if hardware tier is not valid for the backend
163
- */
164
- export function createExperiment(
165
- stateDir: string,
166
- params: CreateExperimentParams
167
- ): CreateExperimentResult {
168
- // --- Validate worker profile ---
169
- if (params.targetWorkerProfile === 'local-editor' && !LOCAL_EDITOR_ENABLED) {
170
- throw new Error(
171
- `Training for 'local-editor' is not yet enabled. ` +
172
- `Phase 7 first rollout is limited to 'local-reader'. ` +
173
- `To enable local-editor training, set LOCAL_EDITOR_ENABLED = true ` +
174
- `in external-training-contract.ts after appropriate review.`
175
- );
176
- }
177
-
178
- // --- Validate model family for profile ---
179
- if (!isValidModelFamilyForProfile(params.targetModelFamily, params.targetWorkerProfile)) {
180
- throw new Error(
181
- `Model family '${params.targetModelFamily}' is not valid for profile ` +
182
- `'${params.targetWorkerProfile}'. ` +
183
- `Ensure the model family name contains an appropriate keyword.`
184
- );
185
- }
186
-
187
- // --- Validate hardware tier ---
188
- const tier = params.hardwareTier ?? getDefaultHardwareTier(params.backend);
189
- validateHardwareTier(params.backend, tier);
190
-
191
- // --- Merge hyperparameters ---
192
- const hyperparameters: TrainingHyperparameters = {
193
- ...DEFAULT_ORPO_HYPERPARAMETERS,
194
- ...params.hyperparameters,
195
- };
196
-
197
- // --- Compute fingerprints ---
198
- const configFingerprint = computeConfigFingerprint(hyperparameters);
199
- const codeHash = computeCodeHash();
200
-
201
- // --- Create experiment spec ---
202
- const spec: TrainingExperimentSpec = {
203
- experimentId: generateExperimentId(),
204
- backend: params.backend,
205
- trainingMode: 'orpo',
206
- targetWorkerProfile: params.targetWorkerProfile,
207
- targetModelFamily: params.targetModelFamily,
208
- hardwareTier: tier,
209
- datasetExportId: params.datasetExportId,
210
- datasetExportPath: params.datasetExportPath,
211
- datasetFingerprint: params.datasetFingerprint,
212
- benchmarkExportId: params.benchmarkExportId,
213
- outputDir: params.outputDir,
214
- configFingerprint,
215
- codeHash,
216
- hyperparameters,
217
- budget: { ...DEFAULT_BUDGET, ...params.budget },
218
- expectedArtifact: {
219
- checkpointName: `checkpoint-${params.targetModelFamily}-${Date.now()}`,
220
- adapterFormat: 'peft-adapter',
221
- },
222
- };
223
-
224
- // --- Register training run in registry ---
225
- const trainRun = registerTrainingRun(stateDir, {
226
- experimentId: spec.experimentId,
227
- targetModelFamily: spec.targetModelFamily,
228
- datasetFingerprint: spec.datasetFingerprint,
229
- exportId: spec.datasetExportId,
230
- sampleCount: 0, // Will be updated when result is registered
231
- configFingerprint: spec.configFingerprint,
232
- });
233
-
234
- return { spec, trainRunId: trainRun.trainRunId };
235
- }
236
-
237
- // ---------------------------------------------------------------------------
238
- // Trainer Execution
239
- // ---------------------------------------------------------------------------
240
-
241
- /**
242
- * Parameters for executing an external trainer.
243
- */
244
- export interface ExecuteTrainerParams {
245
- /** The experiment specification */
246
- spec: TrainingExperimentSpec;
247
-
248
- /** Path to the trainer scripts directory */
249
- scriptsDir?: string;
250
- }
251
-
252
- /**
253
- * Execute an external trainer backend.
254
- *
255
- * This function:
256
- * 1. Validates the trainer script exists
257
- * 2. Serializes the experiment spec to JSON
258
- * 3. Invokes the Python backend
259
- * 4. Returns the trainer's parsed result
260
- *
261
- * The trainer protocol:
262
- * - stdout: MUST contain only the machine-readable JSON result (TrainingExperimentResult)
263
- * - stderr: Contains training progress logs (ignored by plugin)
264
- * - result file: Written to output dir as backup if stdout parsing fails
265
- *
266
- * NOTE: This is a fire-and-forget execution. The plugin does not poll
267
- * the trainer. For Phase 7, trainer execution is assumed to be synchronous
268
- * or to complete before this function returns.
269
- *
270
- * @param spec - The experiment specification
271
- * @param scriptsDir - Override for the scripts directory
272
- * @returns The trainer's result as parsed JSON object
273
- *
274
- * @throws Error if the trainer script is not found
275
- * @throws Error if trainer execution fails
276
- * @throws Error if result cannot be parsed
277
- */
278
- export async function executeTrainer(
279
- spec: TrainingExperimentSpec,
280
- scriptsDir?: string
281
- ): Promise<TrainingExperimentResult> {
282
- const baseDir = scriptsDir ?? path.join(REPO_ROOT, TRAINER_SCRIPTS_DIR);
283
-
284
- // Map backend to script name
285
- const scriptMap: Record<TrainerBackendKind, string> = {
286
- 'peft-trl-orpo': 'main.py',
287
- 'unsloth-orpo': 'main.py',
288
- 'dry-run': 'main.py',
289
- };
290
-
291
- const scriptName = scriptMap[spec.backend];
292
- const scriptPath = path.join(baseDir, scriptName);
293
-
294
- // Check if script exists (for dry-run, we allow missing scripts in development)
295
- if (spec.backend !== 'dry-run' && !fs.existsSync(scriptPath)) {
296
- throw new Error(
297
- `Trainer script not found: ${scriptPath}. ` +
298
- `Ensure the external trainer backends are installed at ${baseDir}.`
299
- );
300
- }
301
-
302
- // Serialize spec to JSON for passing to trainer
303
- const specPath = path.join(baseDir, `experiment-${spec.experimentId}.json`);
304
- const specJson = JSON.stringify(spec, null, 2);
305
-
306
- // Write spec to file for trainer to read
307
- const specDir = path.dirname(specPath);
308
- if (!fs.existsSync(specDir)) {
309
- fs.mkdirSync(specDir, { recursive: true });
310
- }
311
- atomicWriteFileSync(specPath, specJson);
312
-
313
- // Result file path (written by trainer to output dir)
314
- const resultFilePath = path.join(spec.outputDir, `result-${spec.experimentId}.json`);
315
-
316
- try {
317
- if (spec.backend === 'dry-run') {
318
- // For dry-run, simulate a successful dry-run result
319
- // No actual Python script execution needed - dry-run just validates spec
320
- return {
321
- experimentId: spec.experimentId,
322
- backend: 'dry-run',
323
- status: 'dry_run' as const,
324
- targetWorkerProfile: spec.targetWorkerProfile,
325
- targetModelFamily: spec.targetModelFamily,
326
- datasetFingerprint: spec.datasetFingerprint,
327
- configFingerprint: spec.configFingerprint,
328
- codeHash: spec.codeHash,
329
- createdAt: new Date().toISOString(),
330
- };
331
- }
332
-
333
- // Execute the Python trainer using spawn (streaming).
334
- const { spawn } = await import('child_process');
335
- // - stdout is collected into a fixed-size buffer (1MB max) to prevent OOM from training logs
336
- // - stderr is piped directly to parent stderr so it never accumulates in memory
337
- // - Non-zero exit codes are handled with clear error messages
338
- const timeoutMs = (spec.budget.maxWallClockMinutes * 60 * 1000) + 30000;
339
- const pythonExecutable = process.platform === 'win32' ? 'python' : 'python3';
340
- const MAX_STDOUT_BUFFER = 1 * 1024 * 1024; // 1MB cap
341
-
342
- const trainerResult = await new Promise<
343
- TrainingExperimentResult
344
- >((resolve, reject) => {
345
- const proc = spawn(pythonExecutable, [scriptPath, '--spec', specPath, '--output-dir', spec.outputDir]);
346
-
347
- // Collect stdout with size cap to prevent OOM from huge log output
348
- const stdoutChunks: Buffer[] = [];
349
- let stdoutSize = 0;
350
-
351
- proc.stdout.on('data', (chunk: Buffer) => {
352
- const remaining = MAX_STDOUT_BUFFER - stdoutSize;
353
- if (remaining > 0) {
354
- stdoutChunks.push(chunk.slice(0, remaining));
355
- stdoutSize += Math.min(chunk.length, remaining);
356
- }
357
- });
358
-
359
- // Pipe stderr directly — training logs must NOT accumulate in memory
360
- proc.stderr.pipe(process.stderr);
361
-
362
- const timer = setTimeout(() => {
363
- proc.kill();
364
- reject(new Error(`Trainer timed out after ${timeoutMs}ms`));
365
- }, timeoutMs);
366
- timer.unref(); // Don't keep process alive for timeout
367
-
368
- proc.on('close', (code) => {
369
- clearTimeout(timer);
370
- if (code === 0) {
371
- const stdout = Buffer.concat(stdoutChunks).toString('utf-8');
372
- const trimmed = stdout.trim();
373
- if (trimmed) {
374
- try {
375
- resolve(JSON.parse(trimmed) as TrainingExperimentResult);
376
- return;
377
- } catch {
378
- // fall through to result file
379
- }
380
- }
381
- // Fallback: try result file
382
- if (fs.existsSync(resultFilePath)) {
383
- try {
384
- const content = fs.readFileSync(resultFilePath, 'utf-8');
385
- resolve(JSON.parse(content) as TrainingExperimentResult);
386
- return;
387
- } catch {
388
- // fall through to error
389
- }
390
- }
391
- reject(
392
- new Error(
393
- `Trainer stdout was not valid JSON and result file also invalid. ` +
394
- `result file: ${resultFilePath}`
395
- )
396
- );
397
- } else {
398
- // Non-zero exit — try result file as last resort
399
- if (fs.existsSync(resultFilePath)) {
400
- try {
401
- const content = fs.readFileSync(resultFilePath, 'utf-8');
402
- resolve(JSON.parse(content) as TrainingExperimentResult);
403
- } catch {
404
- reject(new Error(`Trainer exited with code ${code} and result file was invalid: ${resultFilePath}`));
405
- }
406
- } else {
407
- reject(new Error(`Trainer exited with code ${code} and no result file found at: ${resultFilePath}`));
408
- }
409
- }
410
- });
411
-
412
- proc.on('error', (err) => {
413
- clearTimeout(timer);
414
- reject(new Error(`Trainer spawn failed: ${err.message}`));
415
- });
416
- });
417
-
418
- return trainerResult;
419
- } finally {
420
- // Clean up spec file after execution
421
- if (fs.existsSync(specPath)) {
422
- fs.unlinkSync(specPath);
423
- }
424
- }
425
- }
426
-
427
- // ---------------------------------------------------------------------------
428
- // Result Processing
429
- // ---------------------------------------------------------------------------
430
-
431
- /**
432
- * Parameters for processing a trainer result.
433
- */
434
- export interface ProcessTrainerResultParams {
435
- /** The original experiment specification */
436
- spec: TrainingExperimentSpec;
437
-
438
- /** The training run ID from registry */
439
- trainRunId: string;
440
-
441
- /** The trainer's result (parsed) */
442
- result: TrainingExperimentResult;
443
-
444
- /** Workspace state directory */
445
- stateDir: string;
446
- }
447
-
448
- /**
449
- * Process a trainer result:
450
- * 1. Validate result against spec
451
- * 2. Register checkpoint in training registry
452
- * 3. Return checkpoint for eval attachment
453
- *
454
- * @param params - Processing parameters
455
- * @returns The registered checkpoint, or null for dry_run (no checkpoint produced)
456
- *
457
- * @throws Error if validation fails
458
- * @throws Error if checkpoint registration fails
459
- */
460
- export function processTrainerResult(
461
- params: ProcessTrainerResultParams
462
- ): { checkpointId: string; checkpointRef: string } | null {
463
- const { spec, trainRunId, result, stateDir } = params;
464
-
465
- // --- Handle dry_run BEFORE validation (it has no checkpoint and should not be validated) ---
466
- if (result.status === 'dry_run') {
467
- // Dry-run: mark completed (no checkpoint expected) and return null.
468
- // This is a supported non-error outcome — upper layers distinguish it from
469
- // completed (which has a checkpoint) by checking the return value.
470
- startTrainingRun(stateDir, trainRunId);
471
- completeTrainingRun(stateDir, trainRunId);
472
- return null;
473
- }
474
-
475
- // --- Transition pending -> running first ---
476
- // Must happen before any validation or failure path so that
477
- // failTrainingRun has a valid transition (running → failed).
478
- startTrainingRun(stateDir, trainRunId);
479
-
480
- // --- Validate result against spec (fail-closed) ---
481
- const validation = validateTrainerResult(spec, result);
482
- if (!validation.valid) {
483
- const errorMessages = validation.errors
484
- .map((e) => ` - ${e.field}: ${e.reason} (expected: ${e.expected}, got: ${e.actual})`)
485
- .join('\n');
486
-
487
- // Fail the training run in registry (running → failed is valid)
488
- failTrainingRun(stateDir, trainRunId, `Validation failed:\n${errorMessages}`);
489
-
490
- throw new Error(
491
- `Trainer result validation failed (${validation.errors.length} errors):\n${errorMessages}\n` +
492
- `The trainer result does not match the experiment spec. ` +
493
- `This checkpoint will not be registered.`
494
- );
495
- }
496
-
497
- // --- Update training run status ---
498
- // Already transitioned to 'running' above
499
-
500
- if (result.status === 'failed') {
501
- failTrainingRun(stateDir, trainRunId, result.failureReason ?? 'Unknown failure');
502
- throw new Error(`Training failed: ${result.failureReason}`);
503
- }
504
-
505
- // result.status === 'completed' (or any other non-failed/dry_run) — proceed to checkpoint
506
- if (!result.checkpointId || !result.artifact) {
507
- // Mark run failed since it didn't produce a checkpoint (run is in 'running' state)
508
- failTrainingRun(stateDir, trainRunId, 'Trainer result is marked completed but missing checkpointId or artifact');
509
- throw new Error(
510
- `Trainer result is marked 'completed' but missing checkpointId or artifact.`
511
- );
512
- }
513
-
514
- // --- Register checkpoint BEFORE marking run completed ---
515
- // Ordering matters: if registerCheckpoint throws, run stays in 'running' state
516
- // (not 'completed'), making the failure visible in registry audits.
517
- const checkpoint = registerCheckpoint(stateDir, {
518
- trainRunId,
519
- targetModelFamily: spec.targetModelFamily,
520
- artifactPath: result.artifact.artifactPath,
521
- });
522
-
523
- // Checkpoint registered successfully — now mark run completed
524
- completeTrainingRun(stateDir, trainRunId);
525
-
526
- return {
527
- checkpointId: checkpoint.checkpointId,
528
- checkpointRef: result.checkpointRef ?? checkpoint.checkpointId,
529
- };
530
- }
531
-
532
- // ---------------------------------------------------------------------------
533
- // Training Program Orchestration
534
- // ---------------------------------------------------------------------------
535
-
536
- /**
537
- * The TrainingProgram class orchestrates the complete training workflow.
538
- *
539
- * Usage:
540
- * ```typescript
541
- * const program = new TrainingProgram(stateDir);
542
- *
543
- * // Create experiment
544
- * const { spec, trainRunId } = program.createExperiment({
545
- * backend: 'peft-trl-orpo',
546
- * targetWorkerProfile: 'local-reader',
547
- * targetModelFamily: 'qwen2.5-7b-reader',
548
- * datasetExportId: 'export-123',
549
- * datasetExportPath: '.state/exports/orpo/export-123.jsonl',
550
- * datasetFingerprint: 'abc123',
551
- * benchmarkExportId: 'benchmark-456',
552
- * outputDir: '.state/nocturnal/checkpoints',
553
- * });
554
- *
555
- * // Execute trainer (external)
556
- * const trainerOutput = await executeTrainer(spec);
557
- *
558
- * // Process result
559
- * const { checkpointId } = program.processResult({
560
- * spec,
561
- * trainRunId,
562
- * result: JSON.parse(trainerOutput),
563
- * });
564
- *
565
- * // Attach eval (after benchmark runs)
566
- * program.attachEval(checkpointId, evalSummary);
567
- * ```
568
- */
569
- export class TrainingProgram {
570
-
571
- constructor(private readonly stateDir: string) {}
572
-
573
-
574
- /**
575
- * Create a new training experiment.
576
- */
577
- createExperiment(params: CreateExperimentParams): CreateExperimentResult {
578
- return createExperiment(this.stateDir, params);
579
- }
580
-
581
- /**
582
- * Process a trainer result and register the checkpoint.
583
- * Returns null for dry_run (no checkpoint produced).
584
- */
585
- processResult(params: {
586
- spec: TrainingExperimentSpec;
587
- trainRunId: string;
588
- result: TrainingExperimentResult;
589
- }): { checkpointId: string; checkpointRef: string } | null {
590
- return processTrainerResult({
591
- ...params,
592
- stateDir: this.stateDir,
593
- });
594
- }
595
-
596
- /**
597
- * Attach an eval summary to a checkpoint and mark it deployable if eval passes.
598
- *
599
- * @param checkpointId - The checkpoint to attach eval to
600
- * @param evalSummary - The eval summary (from benchmark run)
601
- * @returns The updated checkpoint
602
- */
603
- attachEvalAndMarkDeployable(
604
- checkpointId: string,
605
- evalSummary: {
606
- evalId: string;
607
- checkpointId: string;
608
- benchmarkId: string;
609
- targetModelFamily: string;
610
- mode: 'prompt_assisted' | 'reduced_prompt';
611
- baselineScore: number;
612
- candidateScore: number;
613
- delta: number;
614
- verdict: 'pass' | 'fail' | 'compare_only';
615
- }
616
- ): void {
617
- // Attach eval summary
618
- attachEvalSummary(this.stateDir, checkpointId, evalSummary);
619
-
620
- // Mark deployable if verdict is pass or compare_only
621
- if (evalSummary.verdict === 'pass' || evalSummary.verdict === 'compare_only') {
622
- markCheckpointDeployable(this.stateDir, checkpointId, true);
623
- }
624
- }
625
-
626
- /**
627
- * Get checkpoint lineage for audit.
628
- */
629
- getCheckpointLineage(checkpointId: string) {
630
- return getCheckpointLineage(this.stateDir, checkpointId);
631
- }
632
- }