@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -11,18 +11,16 @@
11
11
  * 7. Monitor performance
12
12
  */
13
13
 
14
- import fs from 'node:fs/promises';
15
- import path from 'node:path';
16
- import { spawn } from 'child_process';
17
- import { getTrainingDataAdapter, getMarketDataAdapter } from '../adapter';
18
- import type { TrajectoryRecord } from '../adapter';
19
- import { getExportGroupedForGRPO } from '../dependencies';
20
- import { logger } from '../utils/logger';
21
- import { benchmarkService } from './BenchmarkService';
22
- import { MarketOutcomesTracker } from './MarketOutcomesTracker';
23
- import { modelSelectionService } from './ModelSelectionService';
24
- import { rewardBackpropagationService } from './RewardBackpropagationService';
25
- import { rulerScoringService } from './RulerScoringService';
14
+ import { spawn } from "node:child_process";
15
+ import fs from "node:fs/promises";
16
+ import path from "node:path";
17
+ import { getMarketDataAdapter, getTrainingDataAdapter } from "../adapter";
18
+ import { getExportGroupedForGRPO } from "../dependencies";
19
+ import { logger } from "../utils/logger";
20
+ import { benchmarkService } from "./BenchmarkService";
21
+ import { modelSelectionService } from "./ModelSelectionService";
22
+ import { rewardBackpropagationService } from "./RewardBackpropagationService";
23
+ import { rulerScoringService } from "./RulerScoringService";
26
24
  import type {
27
25
  AutomationConfig,
28
26
  AutomationStatus,
@@ -31,8 +29,8 @@ import type {
31
29
  TrainingTriggerOptions,
32
30
  TrainingTriggerResult,
33
31
  TrajectoryStep,
34
- } from './types';
35
- import { getCurrentWindowId, getPreviousWindowId } from './window-utils';
32
+ } from "./types";
33
+ import { getCurrentWindowId, getPreviousWindowId } from "./window-utils";
36
34
 
37
35
  export type { AutomationConfig };
38
36
 
@@ -42,12 +40,12 @@ export class AutomationPipeline {
42
40
 
43
41
  constructor(config: Partial<AutomationConfig> = {}) {
44
42
  const envMinTrajectories = parseInt(
45
- process.env.TRAINING_MIN_TRAJECTORIES ?? '',
46
- 10
43
+ process.env.TRAINING_MIN_TRAJECTORIES ?? "",
44
+ 10,
47
45
  );
48
46
  const envMinGroupSize = parseInt(
49
- process.env.TRAINING_MIN_GROUP_SIZE ?? '',
50
- 10
47
+ process.env.TRAINING_MIN_GROUP_SIZE ?? "",
48
+ 10,
51
49
  );
52
50
 
53
51
  this.config = {
@@ -64,39 +62,41 @@ export class AutomationPipeline {
64
62
  dataQualityThreshold: config.dataQualityThreshold ?? 0.95,
65
63
  autoTriggerTraining: config.autoTriggerTraining !== false,
66
64
  trainingInterval: config.trainingInterval || 24, // Daily by default
67
- baseModel: config.baseModel || 'unsloth/Qwen3-4B-128K', // 4B params, 128K context - ideal for fine-tuning
68
- modelNamePrefix: config.modelNamePrefix || 'eliza-agent',
65
+ baseModel: config.baseModel || "unsloth/Qwen3-4B-128K", // 4B params, 128K context - ideal for fine-tuning
66
+ modelNamePrefix: config.modelNamePrefix || "eliza-agent",
69
67
  modelIdPrefix:
70
68
  config.modelIdPrefix ||
71
69
  process.env.TRAINING_MODEL_ID_PREFIX ||
72
70
  config.modelNamePrefix ||
73
- 'eliza-agent',
71
+ "eliza-agent",
74
72
  modelStoragePath:
75
73
  config.modelStoragePath ||
76
- path.resolve(process.cwd(), 'storage/models'),
74
+ path.resolve(process.cwd(), "storage/models"),
77
75
  dataStoragePath:
78
76
  config.dataStoragePath ||
79
- path.resolve(process.cwd(), 'storage/training-data'),
77
+ path.resolve(process.cwd(), "storage/training-data"),
80
78
  pythonProjectRoot:
81
79
  config.pythonProjectRoot ||
82
80
  process.env.TRAINING_PYTHON_ROOT ||
83
- path.resolve(process.cwd(), 'packages/training/python'),
81
+ path.resolve(process.cwd(), "packages/training/python"),
84
82
  trainerScriptPath:
85
- config.trainerScriptPath || process.env.TRAINING_SCRIPT_PATH || undefined,
83
+ config.trainerScriptPath ||
84
+ process.env.TRAINING_SCRIPT_PATH ||
85
+ undefined,
86
86
  trainerPythonExecutable:
87
87
  config.trainerPythonExecutable ||
88
88
  process.env.TRAINING_PYTHON_EXECUTABLE ||
89
- (process.platform === 'win32' ? 'python' : 'python3'),
89
+ (process.platform === "win32" ? "python" : "python3"),
90
90
  trainingMode:
91
91
  config.trainingMode ||
92
- (process.env.TRAINING_MODE as 'atropos' | 'tinker') ||
93
- 'atropos',
92
+ (process.env.TRAINING_MODE as "atropos" | "tinker") ||
93
+ "atropos",
94
94
  atroposApiUrl:
95
95
  config.atroposApiUrl ||
96
96
  process.env.ATROPOS_API_URL ||
97
- 'http://localhost:8000',
97
+ "http://localhost:8000",
98
98
  vllmPort:
99
- config.vllmPort || parseInt(process.env.VLLM_PORT || '9001', 10),
99
+ config.vllmPort || parseInt(process.env.VLLM_PORT || "9001", 10),
100
100
  };
101
101
  }
102
102
 
@@ -108,7 +108,9 @@ export class AutomationPipeline {
108
108
 
109
109
  const scoredAndReady = await adapter.countScoredTrajectoriesReady();
110
110
  const unscored = await adapter.countUnscoredTrajectories();
111
- const scenarioGroups = await adapter.getScenarioGroups(this.config.minGroupSize);
111
+ const scenarioGroups = await adapter.getScenarioGroups(
112
+ this.config.minGroupSize,
113
+ );
112
114
  const quality = await this.calculateDataQuality();
113
115
 
114
116
  const stats = {
@@ -144,7 +146,7 @@ export class AutomationPipeline {
144
146
 
145
147
  return {
146
148
  ready: true,
147
- reason: 'Ready to train!',
149
+ reason: "Ready to train!",
148
150
  stats,
149
151
  };
150
152
  }
@@ -165,14 +167,14 @@ export class AutomationPipeline {
165
167
  // Validate stepsJson exists and is valid before parsing
166
168
  if (
167
169
  !traj.stepsJson ||
168
- traj.stepsJson === 'null' ||
169
- traj.stepsJson === '[]'
170
+ traj.stepsJson === "null" ||
171
+ traj.stepsJson === "[]"
170
172
  ) {
171
173
  continue; // Skip invalid trajectories
172
174
  }
173
175
 
174
176
  const steps: TrajectoryStep[] = JSON.parse(
175
- traj.stepsJson
177
+ traj.stepsJson,
176
178
  ) as TrajectoryStep[];
177
179
 
178
180
  if (!Array.isArray(steps)) {
@@ -186,7 +188,7 @@ export class AutomationPipeline {
186
188
  // Check 2: Steps have LLM calls
187
189
  totalChecks++;
188
190
  const hasLLMCalls = steps.every(
189
- (s) => s.llmCalls && Array.isArray(s.llmCalls) && s.llmCalls.length > 0
191
+ (s) => s.llmCalls && Array.isArray(s.llmCalls) && s.llmCalls.length > 0,
190
192
  );
191
193
  if (hasLLMCalls) qualityScore++;
192
194
 
@@ -200,8 +202,8 @@ export class AutomationPipeline {
200
202
  llm.systemPrompt &&
201
203
  llm.systemPrompt.length > 50 &&
202
204
  llm.userPrompt &&
203
- llm.userPrompt.length > 100
204
- )
205
+ llm.userPrompt.length > 100,
206
+ ),
205
207
  );
206
208
  if (hasGoodPrompts) qualityScore++;
207
209
 
@@ -211,14 +213,14 @@ export class AutomationPipeline {
211
213
  (s) =>
212
214
  s.providerAccesses &&
213
215
  Array.isArray(s.providerAccesses) &&
214
- s.providerAccesses.length > 0
216
+ s.providerAccesses.length > 0,
215
217
  );
216
218
  if (hasProviders) qualityScore++;
217
219
 
218
220
  // Check 5: Actions have results
219
221
  totalChecks++;
220
222
  const hasResults = steps.every(
221
- (s) => s.action && (s.action.result || s.action.error)
223
+ (s) => s.action && (s.action.result || s.action.error),
222
224
  );
223
225
  if (hasResults) qualityScore++;
224
226
  }
@@ -230,7 +232,7 @@ export class AutomationPipeline {
230
232
  * Trigger training job
231
233
  */
232
234
  async triggerTraining(
233
- options: TrainingTriggerOptions = {}
235
+ options: TrainingTriggerOptions = {},
234
236
  ): Promise<TrainingTriggerResult> {
235
237
  // Check readiness
236
238
  const readiness = await this.checkTrainingReadiness();
@@ -249,11 +251,11 @@ export class AutomationPipeline {
249
251
  readiness.stats.unscoredTrajectories > 0
250
252
  ) {
251
253
  logger.info(
252
- 'Force mode: Attempting to score unscored trajectories first',
254
+ "Force mode: Attempting to score unscored trajectories first",
253
255
  {
254
256
  unscored: readiness.stats.unscoredTrajectories,
255
257
  },
256
- 'AutomationPipeline'
258
+ "AutomationPipeline",
257
259
  );
258
260
 
259
261
  // Score recent trajectories
@@ -267,19 +269,19 @@ export class AutomationPipeline {
267
269
  // Re-check readiness after scoring
268
270
  const newReadiness = await this.checkTrainingReadiness();
269
271
  logger.info(
270
- 'After scoring',
272
+ "After scoring",
271
273
  {
272
274
  scored: newReadiness.stats.totalTrajectories,
273
275
  stillUnscored: newReadiness.stats.unscoredTrajectories,
274
276
  },
275
- 'AutomationPipeline'
277
+ "AutomationPipeline",
276
278
  );
277
279
  }
278
280
 
279
281
  // Use ModelSelectionService for smart model selection
280
282
  const modelSelection = await modelSelectionService.selectBaseModel();
281
283
 
282
- logger.info('Model selection for training', {
284
+ logger.info("Model selection for training", {
283
285
  strategy: modelSelection.strategy,
284
286
  modelPath: modelSelection.modelPath,
285
287
  bundleCount: modelSelection.metadata?.bundleCount,
@@ -289,7 +291,7 @@ export class AutomationPipeline {
289
291
  const dataLimit = await modelSelectionService.getTrainingDataLimit();
290
292
 
291
293
  // Prepare data
292
- logger.info('Preparing training data...', {
294
+ logger.info("Preparing training data...", {
293
295
  ...readiness.stats,
294
296
  selectedModel: modelSelection.modelPath,
295
297
  strategy: modelSelection.strategy,
@@ -314,14 +316,15 @@ export class AutomationPipeline {
314
316
  if (!exportResult.success) {
315
317
  return {
316
318
  success: false,
317
- error: 'Export failed: ' + exportResult.error,
319
+ error: `Export failed: ${exportResult.error}`,
318
320
  };
319
321
  }
320
322
 
321
323
  // Create training batch record
322
324
  const adapterForBatch = getTrainingDataAdapter();
323
325
  const nextVersion = await this.getNextModelVersion();
324
- const trajectoryIds = await adapterForBatch.getTrajectoryIdsForTraining(maxTrajectories);
326
+ const trajectoryIds =
327
+ await adapterForBatch.getTrajectoryIdsForTraining(maxTrajectories);
325
328
 
326
329
  const insertedBatchId = await adapterForBatch.insertBatch({
327
330
  id: batchId,
@@ -334,19 +337,22 @@ export class AutomationPipeline {
334
337
  rewardsJson: JSON.stringify([]),
335
338
  trainingLoss: null,
336
339
  policyImprovement: null,
337
- status: 'pending',
340
+ status: "pending",
338
341
  error: null,
339
342
  createdAt: new Date(),
340
343
  });
341
344
 
342
345
  const batch = await adapterForBatch.getBatchById(insertedBatchId);
343
346
  if (!batch) {
344
- return { success: false, error: 'Failed to create training batch record' };
345
- };
347
+ return {
348
+ success: false,
349
+ error: "Failed to create training batch record",
350
+ };
351
+ }
346
352
 
347
353
  // Determine training mode: 'tinker' for cloud-based or 'atropos' for local vLLM
348
- const trainingMode = this.config.trainingMode || 'atropos';
349
- const useTinker = trainingMode.toLowerCase() === 'tinker';
354
+ const trainingMode = this.config.trainingMode || "atropos";
355
+ const useTinker = trainingMode.toLowerCase() === "tinker";
350
356
 
351
357
  // Trigger appropriate Python training script based on mode.
352
358
  // Allow explicit override for packaged/runtime deployments.
@@ -354,10 +360,10 @@ export class AutomationPipeline {
354
360
  this.config.trainerScriptPath ||
355
361
  path.resolve(
356
362
  this.config.pythonProjectRoot ||
357
- path.resolve(process.cwd(), 'packages/training/python'),
358
- 'src',
359
- 'training',
360
- useTinker ? 'tinker_trainer.py' : 'atropos_trainer.py'
363
+ path.resolve(process.cwd(), "packages/training/python"),
364
+ "src",
365
+ "training",
366
+ useTinker ? "tinker_trainer.py" : "atropos_trainer.py",
361
367
  );
362
368
 
363
369
  try {
@@ -372,24 +378,24 @@ export class AutomationPipeline {
372
378
  // Set environment variables for Python script
373
379
  const env = {
374
380
  ...process.env,
375
- MODE: 'single',
381
+ MODE: "single",
376
382
  BATCH_ID: batchId,
377
383
  MODEL_VERSION: nextVersion,
378
384
  WINDOW_ID: windowId,
379
385
  BASE_MODEL: modelSelection.modelPath,
380
- MAX_EXAMPLES: dataLimit ? dataLimit.toString() : '2000',
381
- DATABASE_URL: process.env.DATABASE_URL || '',
382
- ATROPOS_API_URL: this.config.atroposApiUrl || 'http://localhost:8000',
386
+ MAX_EXAMPLES: dataLimit ? dataLimit.toString() : "2000",
387
+ DATABASE_URL: process.env.DATABASE_URL || "",
388
+ ATROPOS_API_URL: this.config.atroposApiUrl || "http://localhost:8000",
383
389
  VLLM_PORT: String(this.config.vllmPort || 9001),
384
- FORCE_TRAINING: options.force ? 'true' : 'false',
385
- MIN_AGENTS_PER_WINDOW: '1',
390
+ FORCE_TRAINING: options.force ? "true" : "false",
391
+ MIN_AGENTS_PER_WINDOW: "1",
386
392
  TRAINING_MODE: trainingMode,
387
393
  };
388
394
 
389
395
  logger.info(
390
396
  useTinker
391
- ? 'Training will use Tinker cloud-based GRPO'
392
- : 'Training will use Atropos GRPO with vLLM',
397
+ ? "Training will use Tinker cloud-based GRPO"
398
+ : "Training will use Atropos GRPO with vLLM",
393
399
  {
394
400
  trainingMode,
395
401
  ...(useTinker
@@ -400,36 +406,40 @@ export class AutomationPipeline {
400
406
  model: env.BASE_MODEL,
401
407
  }),
402
408
  },
403
- 'AutomationPipeline'
409
+ "AutomationPipeline",
404
410
  );
405
411
 
406
412
  const pythonCmd =
407
413
  this.config.trainerPythonExecutable ||
408
- (process.platform === 'win32' ? 'python' : 'python3');
414
+ (process.platform === "win32" ? "python" : "python3");
409
415
 
410
416
  const trainingProcess = spawn(pythonCmd, [pythonScript], {
411
417
  detached: false,
412
- stdio: ['ignore', 'pipe', 'pipe'],
418
+ stdio: ["ignore", "pipe", "pipe"],
413
419
  env,
414
420
  });
415
421
 
416
422
  // Capture and log training process output
417
- trainingProcess.stdout?.on('data', (data: Buffer) => {
418
- logger.info('Training stdout', { output: data.toString().trim() });
423
+ trainingProcess.stdout?.on("data", (data: Buffer) => {
424
+ logger.info("Training stdout", { output: data.toString().trim() });
419
425
  });
420
426
 
421
- trainingProcess.stderr?.on('data', (data: Buffer) => {
422
- logger.warn('Training stderr', { output: data.toString().trim() });
427
+ trainingProcess.stderr?.on("data", (data: Buffer) => {
428
+ logger.warn("Training stderr", { output: data.toString().trim() });
423
429
  });
424
430
 
425
- trainingProcess.on('error', (error: Error) => {
426
- logger.error('Training process error', { error: error.message });
431
+ trainingProcess.on("error", (error: Error) => {
432
+ logger.error("Training process error", { error: error.message });
427
433
  getTrainingDataAdapter()
428
- .updateBatchStatus(batchId, 'failed', `Process spawn failed: ${error.message}`)
434
+ .updateBatchStatus(
435
+ batchId,
436
+ "failed",
437
+ `Process spawn failed: ${error.message}`,
438
+ )
429
439
  .catch((err: unknown) =>
430
- logger.error('Failed to update batch status', {
440
+ logger.error("Failed to update batch status", {
431
441
  error: err instanceof Error ? err : String(err),
432
- })
442
+ }),
433
443
  );
434
444
  });
435
445
 
@@ -437,7 +447,7 @@ export class AutomationPipeline {
437
447
 
438
448
  this.currentTrainingJob = batch.id;
439
449
 
440
- logger.info('Training job triggered', {
450
+ logger.info("Training job triggered", {
441
451
  batchId: batch.id,
442
452
  version: nextVersion,
443
453
  trajectories: exportResult.trajectoriesExported,
@@ -456,24 +466,17 @@ export class AutomationPipeline {
456
466
  const latestModel = await getTrainingDataAdapter().getLatestModel();
457
467
 
458
468
  if (!latestModel) {
459
- return 'v1.0.0';
469
+ return "v1.0.0";
460
470
  }
461
471
 
462
472
  // Increment patch version
463
473
  const [major, minor, patch] = latestModel.version
464
474
  .substring(1)
465
- .split('.')
475
+ .split(".")
466
476
  .map(Number);
467
477
  return `v${major}.${minor}.${patch! + 1}`;
468
478
  }
469
479
 
470
- /**
471
- * Get trajectory IDs for training
472
- */
473
- private async getTrajectoryIds(limit?: number): Promise<string[]> {
474
- return getTrainingDataAdapter().getTrajectoryIdsForTraining(limit);
475
- }
476
-
477
480
  /**
478
481
  * Monitor training job.
479
482
  *
@@ -484,18 +487,22 @@ export class AutomationPipeline {
484
487
  const batch = await getTrainingDataAdapter().getBatchById(batchId);
485
488
 
486
489
  if (!batch) {
487
- return { status: 'not_found' };
490
+ return { status: "not_found" };
488
491
  }
489
492
 
490
493
  // Terminal states – return immediately
491
- if (batch.status === 'completed') {
492
- return { status: 'completed', progress: 1.0, error: undefined };
494
+ if (batch.status === "completed") {
495
+ return { status: "completed", progress: 1.0, error: undefined };
493
496
  }
494
- if (batch.status === 'failed') {
495
- return { status: 'failed', progress: 0, error: batch.error || 'Training failed' };
497
+ if (batch.status === "failed") {
498
+ return {
499
+ status: "failed",
500
+ progress: 0,
501
+ error: batch.error || "Training failed",
502
+ };
496
503
  }
497
- if (batch.status === 'pending') {
498
- return { status: 'pending', progress: 0 };
504
+ if (batch.status === "pending") {
505
+ return { status: "pending", progress: 0 };
499
506
  }
500
507
 
501
508
  // For 'training' status, attempt to read the metrics log written by
@@ -506,12 +513,12 @@ export class AutomationPipeline {
506
513
  const metricsLogPath = path.resolve(
507
514
  this.config.dataStoragePath,
508
515
  batchId,
509
- 'training_metrics.jsonl'
516
+ "training_metrics.jsonl",
510
517
  );
511
518
 
512
519
  try {
513
- const logContent = await fs.readFile(metricsLogPath, 'utf-8');
514
- const lines = logContent.trim().split('\n').filter(Boolean);
520
+ const logContent = await fs.readFile(metricsLogPath, "utf-8");
521
+ const lines = logContent.trim().split("\n").filter(Boolean);
515
522
  if (lines.length > 0) {
516
523
  const lastLine = lines[lines.length - 1]!;
517
524
  const lastMetric = JSON.parse(lastLine) as {
@@ -520,13 +527,13 @@ export class AutomationPipeline {
520
527
  elapsed_ms?: number;
521
528
  };
522
529
  if (
523
- typeof lastMetric.step === 'number' &&
524
- typeof lastMetric.total_steps === 'number' &&
530
+ typeof lastMetric.step === "number" &&
531
+ typeof lastMetric.total_steps === "number" &&
525
532
  lastMetric.total_steps > 0
526
533
  ) {
527
534
  progress = lastMetric.step / lastMetric.total_steps;
528
535
  // Estimate remaining time from elapsed
529
- if (typeof lastMetric.elapsed_ms === 'number' && progress > 0) {
536
+ if (typeof lastMetric.elapsed_ms === "number" && progress > 0) {
530
537
  const totalEstimatedMs = lastMetric.elapsed_ms / progress;
531
538
  eta = Math.max(0, totalEstimatedMs - lastMetric.elapsed_ms);
532
539
  }
@@ -568,12 +575,12 @@ export class AutomationPipeline {
568
575
  }
569
576
  await fs.rmdir(batchDir);
570
577
  logger.info(
571
- 'Cleaned up export files',
578
+ "Cleaned up export files",
572
579
  { batchId, filesRemoved: files.length, dir: batchDir },
573
- 'AutomationPipeline'
580
+ "AutomationPipeline",
574
581
  );
575
582
  } catch (err) {
576
- logger.warn('Failed to clean up export files', {
583
+ logger.warn("Failed to clean up export files", {
577
584
  batchId,
578
585
  dir: batchDir,
579
586
  error: err instanceof Error ? err.message : String(err),
@@ -585,17 +592,17 @@ export class AutomationPipeline {
585
592
  * Automation loop (called by cron)
586
593
  */
587
594
  async runAutomationCycle(): Promise<void> {
588
- logger.info('Running automation cycle');
595
+ logger.info("Running automation cycle");
589
596
 
590
597
  // Check if training is already running
591
598
  if (this.currentTrainingJob) {
592
599
  const status = await this.monitorTraining(this.currentTrainingJob);
593
- if (status.status === 'completed') {
600
+ if (status.status === "completed") {
594
601
  await this.deployModel(this.currentTrainingJob);
595
602
  await this.cleanupExportFiles(this.currentTrainingJob);
596
603
  this.currentTrainingJob = null;
597
- } else if (status.status === 'failed') {
598
- logger.error('Training job failed', {
604
+ } else if (status.status === "failed") {
605
+ logger.error("Training job failed", {
599
606
  batchId: this.currentTrainingJob,
600
607
  });
601
608
  await this.cleanupExportFiles(this.currentTrainingJob);
@@ -612,11 +619,11 @@ export class AutomationPipeline {
612
619
  if (newlyCompleted) {
613
620
  const alreadyDeployed = await da.getModelByBatchAndStatus(
614
621
  newlyCompleted.batchId,
615
- 'deployed'
622
+ "deployed",
616
623
  );
617
624
 
618
625
  if (!alreadyDeployed) {
619
- logger.info('Found newly completed training batch', {
626
+ logger.info("Found newly completed training batch", {
620
627
  batchId: newlyCompleted.batchId,
621
628
  });
622
629
  await this.deployModel(newlyCompleted.batchId);
@@ -633,7 +640,7 @@ export class AutomationPipeline {
633
640
  : 999;
634
641
 
635
642
  if (hoursSinceLastTraining >= this.config.trainingInterval) {
636
- logger.info('Triggering automatic training', readiness.stats);
643
+ logger.info("Triggering automatic training", readiness.stats);
637
644
  await this.triggerTraining();
638
645
  }
639
646
  }
@@ -641,11 +648,13 @@ export class AutomationPipeline {
641
648
  // Track market outcomes for recent windows (optional — only if market adapter registered)
642
649
  const marketAdapter = getMarketDataAdapter();
643
650
  if (marketAdapter) {
644
- const { MarketOutcomesTracker: MOT } = await import('./MarketOutcomesTracker');
651
+ const { MarketOutcomesTracker: MOT } = await import(
652
+ "./MarketOutcomesTracker"
653
+ );
645
654
  const outcomesTracker = new MOT();
646
655
  const synced = await outcomesTracker.syncRecentWindows(24);
647
656
  if (synced > 0) {
648
- logger.info('Synced market outcomes for windows', {
657
+ logger.info("Synced market outcomes for windows", {
649
658
  windowsSynced: synced,
650
659
  });
651
660
  }
@@ -653,7 +662,7 @@ export class AutomationPipeline {
653
662
  const processed =
654
663
  await rewardBackpropagationService.processPendingWindows();
655
664
  if (processed > 0) {
656
- logger.info('Updated rewards for trajectories', {
665
+ logger.info("Updated rewards for trajectories", {
657
666
  windowsProcessed: processed,
658
667
  });
659
668
  }
@@ -664,7 +673,7 @@ export class AutomationPipeline {
664
673
  const windowId = getPreviousWindowId(hoursAgo);
665
674
  const scored = await rulerScoringService.scoreWindow(windowId);
666
675
  if (scored > 0) {
667
- logger.info('Scored trajectories with RULER', { windowId, scored });
676
+ logger.info("Scored trajectories with RULER", { windowId, scored });
668
677
  }
669
678
  }
670
679
 
@@ -682,18 +691,18 @@ export class AutomationPipeline {
682
691
  const batch = await da.getBatchById(batchId);
683
692
 
684
693
  if (!batch) {
685
- logger.warn('Batch not found for deployment', { batchId });
694
+ logger.warn("Batch not found for deployment", { batchId });
686
695
  return;
687
696
  }
688
697
 
689
- const model = await da.getModelByBatchAndStatus(batch.id, 'ready');
698
+ const model = await da.getModelByBatchAndStatus(batch.id, "ready");
690
699
 
691
700
  if (!model) {
692
- logger.warn('Model not found for batch', { batchId });
701
+ logger.warn("Model not found for batch", { batchId });
693
702
  return;
694
703
  }
695
704
 
696
- logger.info('Deploying model', {
705
+ logger.info("Deploying model", {
697
706
  version: batch.modelVersion,
698
707
  modelId: model.modelId,
699
708
  batchId,
@@ -703,17 +712,17 @@ export class AutomationPipeline {
703
712
  let trajectoryIds: string[];
704
713
  if (
705
714
  !batch.trajectoryIds ||
706
- batch.trajectoryIds === 'null' ||
707
- batch.trajectoryIds === '[]'
715
+ batch.trajectoryIds === "null" ||
716
+ batch.trajectoryIds === "[]"
708
717
  ) {
709
- logger.warn('Training batch has invalid trajectoryIds', {
718
+ logger.warn("Training batch has invalid trajectoryIds", {
710
719
  batchId: batch.id,
711
720
  });
712
721
  trajectoryIds = [];
713
722
  } else {
714
723
  trajectoryIds = JSON.parse(batch.trajectoryIds) as string[];
715
724
  if (!Array.isArray(trajectoryIds)) {
716
- logger.warn('Training batch trajectoryIds is not an array', {
725
+ logger.warn("Training batch trajectoryIds is not an array", {
717
726
  batchId: batch.id,
718
727
  });
719
728
  trajectoryIds = [];
@@ -724,11 +733,11 @@ export class AutomationPipeline {
724
733
  await da.markTrajectoriesAsUsed(trajectoryIds, batch.id);
725
734
  }
726
735
 
727
- await da.updateModelStatus(model.modelId, 'deployed', {
736
+ await da.updateModelStatus(model.modelId, "deployed", {
728
737
  deployedAt: new Date(),
729
738
  });
730
739
 
731
- logger.info('Model deployed', {
740
+ logger.info("Model deployed", {
732
741
  version: batch.modelVersion,
733
742
  modelId: model.modelId,
734
743
  });
@@ -740,7 +749,7 @@ export class AutomationPipeline {
740
749
  */
741
750
  async benchmarkAndDeploy(
742
751
  batchId: string,
743
- autoDeploy = true
752
+ autoDeploy = true,
744
753
  ): Promise<{
745
754
  benchmarked: boolean;
746
755
  deployed: boolean;
@@ -750,37 +759,37 @@ export class AutomationPipeline {
750
759
  const batch = await da.getBatchById(batchId);
751
760
 
752
761
  if (!batch) {
753
- return { benchmarked: false, deployed: false, reason: 'Batch not found' };
762
+ return { benchmarked: false, deployed: false, reason: "Batch not found" };
754
763
  }
755
764
 
756
- const model = await da.getModelByBatchAndStatus(batch.id, 'ready');
765
+ const model = await da.getModelByBatchAndStatus(batch.id, "ready");
757
766
 
758
767
  if (!model) {
759
- return { benchmarked: false, deployed: false, reason: 'Model not found' };
768
+ return { benchmarked: false, deployed: false, reason: "Model not found" };
760
769
  }
761
770
 
762
771
  // Benchmark the model
763
772
  logger.info(
764
- 'Benchmarking model...',
773
+ "Benchmarking model...",
765
774
  { modelId: model.modelId },
766
- 'AutomationPipeline'
775
+ "AutomationPipeline",
767
776
  );
768
777
  const benchmarkResults = await benchmarkService.benchmarkModel(
769
- model.modelId
778
+ model.modelId,
770
779
  );
771
780
 
772
781
  // Compare with previous models
773
782
  const comparison = await benchmarkService.compareModels(model.modelId);
774
783
 
775
784
  logger.info(
776
- 'Benchmark complete',
785
+ "Benchmark complete",
777
786
  {
778
787
  modelId: model.modelId,
779
788
  score: benchmarkResults.benchmarkScore,
780
789
  shouldDeploy: comparison.shouldDeploy,
781
790
  reason: comparison.reason,
782
791
  },
783
- 'AutomationPipeline'
792
+ "AutomationPipeline",
784
793
  );
785
794
 
786
795
  // Deploy if performance is good enough (and autoDeploy is enabled)
@@ -796,7 +805,7 @@ export class AutomationPipeline {
796
805
  return {
797
806
  benchmarked: true,
798
807
  deployed: false,
799
- reason: comparison.reason || 'Performance below threshold',
808
+ reason: comparison.reason || "Performance below threshold",
800
809
  };
801
810
  }
802
811
 
@@ -821,13 +830,13 @@ export class AutomationPipeline {
821
830
  const da = getTrainingDataAdapter();
822
831
  const dbOk = await da.healthCheck();
823
832
  if (!dbOk) {
824
- logger.warn('Health check: database unreachable');
833
+ logger.warn("Health check: database unreachable");
825
834
  }
826
835
 
827
836
  const oneHourAgo = new Date(Date.now() - 60 * 60 * 1000);
828
837
  const last1h = await da.countTrajectoriesSince(oneHourAgo);
829
838
  if (last1h < 1) {
830
- logger.warn('Low data collection rate', { trajectoriesLastHour: last1h });
839
+ logger.warn("Low data collection rate", { trajectoriesLastHour: last1h });
831
840
  }
832
841
 
833
842
  // Ensure storage directories exist
@@ -891,10 +900,10 @@ export class AutomationPipeline {
891
900
  training: {
892
901
  currentJob: this.currentTrainingJob,
893
902
  lastCompleted: lastCompleted?.completedAt || null,
894
- nextScheduled: lastCompleted
903
+ nextScheduled: lastCompleted?.completedAt
895
904
  ? new Date(
896
- lastCompleted.completedAt!.getTime() +
897
- this.config.trainingInterval * 60 * 60 * 1000
905
+ lastCompleted.completedAt.getTime() +
906
+ this.config.trainingInterval * 60 * 60 * 1000,
898
907
  )
899
908
  : null,
900
909
  },