@elizaos/training 2.0.0-alpha.21 → 2.0.0-alpha.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.turbo/turbo-lint.log +2 -0
  2. package/.turbo/turbo-typecheck.log +1 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/adapter.js +59 -0
  5. package/dist/archetypes/ArchetypeConfigService.js +510 -0
  6. package/dist/archetypes/derive-archetype.js +196 -0
  7. package/dist/archetypes/index.js +7 -0
  8. package/dist/benchmark/ArchetypeMatchupBenchmark.js +547 -0
  9. package/dist/benchmark/BenchmarkChartGenerator.js +632 -0
  10. package/dist/benchmark/BenchmarkDataGenerator.js +825 -0
  11. package/dist/benchmark/BenchmarkDataViewer.js +197 -0
  12. package/dist/benchmark/BenchmarkHistoryService.js +135 -0
  13. package/dist/benchmark/BenchmarkRunner.js +483 -0
  14. package/dist/benchmark/BenchmarkValidator.js +158 -0
  15. package/dist/benchmark/FastEvalRunner.js +133 -0
  16. package/dist/benchmark/MetricsValidator.js +104 -0
  17. package/dist/benchmark/MetricsVisualizer.js +775 -0
  18. package/dist/benchmark/ModelBenchmarkService.js +433 -0
  19. package/dist/benchmark/ModelRegistry.js +122 -0
  20. package/dist/benchmark/RulerBenchmarkIntegration.js +168 -0
  21. package/dist/benchmark/SimulationA2AInterface.js +683 -0
  22. package/dist/benchmark/SimulationEngine.js +522 -0
  23. package/dist/benchmark/TaskRunner.js +60 -0
  24. package/dist/benchmark/__tests__/BenchmarkRunner.test.js +409 -0
  25. package/dist/benchmark/__tests__/HeadToHead.test.js +105 -0
  26. package/dist/benchmark/index.js +23 -0
  27. package/dist/benchmark/parseSimulationMetrics.js +86 -0
  28. package/dist/benchmark/simulation-types.js +1 -0
  29. package/dist/dependencies.js +197 -0
  30. package/dist/generation/TrajectoryGenerator.js +244 -0
  31. package/dist/generation/index.js +6 -0
  32. package/dist/huggingface/HuggingFaceDatasetUploader.js +463 -0
  33. package/dist/huggingface/HuggingFaceIntegrationService.js +272 -0
  34. package/dist/huggingface/HuggingFaceModelUploader.js +385 -0
  35. package/dist/huggingface/index.js +9 -0
  36. package/dist/huggingface/shared/HuggingFaceUploadUtil.js +144 -0
  37. package/dist/index.js +41 -0
  38. package/dist/init-training.js +43 -0
  39. package/dist/metrics/TrajectoryMetricsExtractor.js +523 -0
  40. package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +628 -0
  41. package/dist/metrics/index.js +7 -0
  42. package/dist/metrics/types.js +21 -0
  43. package/dist/rubrics/__tests__/index.test.js +150 -0
  44. package/dist/rubrics/ass-kisser.js +83 -0
  45. package/dist/rubrics/degen.js +78 -0
  46. package/dist/rubrics/goody-twoshoes.js +82 -0
  47. package/dist/rubrics/index.js +184 -0
  48. package/dist/rubrics/information-trader.js +82 -0
  49. package/dist/rubrics/infosec.js +99 -0
  50. package/dist/rubrics/liar.js +102 -0
  51. package/dist/rubrics/perps-trader.js +85 -0
  52. package/dist/rubrics/researcher.js +79 -0
  53. package/dist/rubrics/scammer.js +80 -0
  54. package/dist/rubrics/social-butterfly.js +71 -0
  55. package/dist/rubrics/super-predictor.js +95 -0
  56. package/dist/rubrics/trader.js +65 -0
  57. package/dist/scoring/ArchetypeScoringService.js +301 -0
  58. package/dist/scoring/JudgePromptBuilder.js +401 -0
  59. package/dist/scoring/LLMJudgeCache.js +263 -0
  60. package/dist/scoring/index.js +8 -0
  61. package/dist/training/AutomationPipeline.js +714 -0
  62. package/dist/training/BenchmarkService.js +370 -0
  63. package/dist/training/ConfigValidator.js +153 -0
  64. package/dist/training/MarketOutcomesTracker.js +142 -0
  65. package/dist/training/ModelDeployer.js +128 -0
  66. package/dist/training/ModelFetcher.js +48 -0
  67. package/dist/training/ModelSelectionService.js +248 -0
  68. package/dist/training/ModelUsageVerifier.js +106 -0
  69. package/dist/training/MultiModelOrchestrator.js +349 -0
  70. package/dist/training/RLModelConfig.js +295 -0
  71. package/dist/training/RewardBackpropagationService.js +117 -0
  72. package/dist/training/RulerScoringService.js +450 -0
  73. package/dist/training/TrainingMonitor.js +108 -0
  74. package/dist/training/TrajectoryRecorder.js +281 -0
  75. package/dist/training/__tests__/TrajectoryRecorder.test.js +363 -0
  76. package/dist/training/index.js +30 -0
  77. package/dist/training/logRLConfig.js +29 -0
  78. package/dist/training/pipeline.js +80 -0
  79. package/dist/training/storage/ModelStorageService.js +190 -0
  80. package/dist/training/storage/TrainingDataArchiver.js +136 -0
  81. package/dist/training/storage/index.js +7 -0
  82. package/dist/training/types.js +6 -0
  83. package/dist/training/window-utils.js +100 -0
  84. package/dist/utils/index.js +73 -0
  85. package/dist/utils/logger.js +55 -0
  86. package/dist/utils/snowflake.js +15 -0
  87. package/dist/utils/synthetic-detector.js +67 -0
  88. package/package.json +2 -2
  89. package/research-output/training-runs/training-run-1773742857616.json +38 -0
  90. package/research-output/training-runs/training-run-1773742946977.json +38 -0
  91. package/research-output/training-runs/training-run-1773743278891.json +38 -0
  92. package/research-output/training-runs/training-run-1773743409754.json +38 -0
  93. package/research-output/training-runs/training-run-1773743651086.json +38 -0
  94. package/research-output/training-runs/training-run-1773743782883.json +38 -0
@@ -0,0 +1,714 @@
1
+ /**
2
+ * Training Automation Pipeline
3
+ *
4
+ * Fully automated RL training pipeline:
5
+ * 1. Monitor data collection
6
+ * 2. Trigger training when ready
7
+ * 3. Score with RULER
8
+ * 4. Export data
9
+ * 5. Train model
10
+ * 6. Deploy new version
11
+ * 7. Monitor performance
12
+ */
13
+ import { spawn } from "node:child_process";
14
+ import fs from "node:fs/promises";
15
+ import path from "node:path";
16
+ import { getMarketDataAdapter, getTrainingDataAdapter } from "../adapter";
17
+ import { getExportGroupedForGRPO } from "../dependencies";
18
+ import { logger } from "../utils/logger";
19
+ import { benchmarkService } from "./BenchmarkService";
20
+ import { modelSelectionService } from "./ModelSelectionService";
21
+ import { rewardBackpropagationService } from "./RewardBackpropagationService";
22
+ import { rulerScoringService } from "./RulerScoringService";
23
+ import { getCurrentWindowId, getPreviousWindowId } from "./window-utils";
24
+ export class AutomationPipeline {
25
+ config;
26
+ currentTrainingJob = null;
27
+ constructor(config = {}) {
28
+ const envMinTrajectories = parseInt(process.env.TRAINING_MIN_TRAJECTORIES ?? "", 10);
29
+ const envMinGroupSize = parseInt(process.env.TRAINING_MIN_GROUP_SIZE ?? "", 10);
30
+ this.config = {
31
+ minTrajectoriesForTraining: config.minTrajectoriesForTraining ??
32
+ (Number.isFinite(envMinTrajectories) && envMinTrajectories > 0
33
+ ? envMinTrajectories
34
+ : 1),
35
+ minGroupSize: config.minGroupSize ??
36
+ (Number.isFinite(envMinGroupSize) && envMinGroupSize > 0
37
+ ? envMinGroupSize
38
+ : 1), // Keep at 1 for flexibility
39
+ dataQualityThreshold: config.dataQualityThreshold ?? 0.95,
40
+ autoTriggerTraining: config.autoTriggerTraining !== false,
41
+ trainingInterval: config.trainingInterval || 24, // Daily by default
42
+ baseModel: config.baseModel || "unsloth/Qwen3-4B-128K", // 4B params, 128K context - ideal for fine-tuning
43
+ modelNamePrefix: config.modelNamePrefix || "eliza-agent",
44
+ modelIdPrefix: config.modelIdPrefix ||
45
+ process.env.TRAINING_MODEL_ID_PREFIX ||
46
+ config.modelNamePrefix ||
47
+ "eliza-agent",
48
+ modelStoragePath: config.modelStoragePath ||
49
+ path.resolve(process.cwd(), "storage/models"),
50
+ dataStoragePath: config.dataStoragePath ||
51
+ path.resolve(process.cwd(), "storage/training-data"),
52
+ pythonProjectRoot: config.pythonProjectRoot ||
53
+ process.env.TRAINING_PYTHON_ROOT ||
54
+ path.resolve(process.cwd(), "packages/training/python"),
55
+ trainerScriptPath: config.trainerScriptPath ||
56
+ process.env.TRAINING_SCRIPT_PATH ||
57
+ undefined,
58
+ trainerPythonExecutable: config.trainerPythonExecutable ||
59
+ process.env.TRAINING_PYTHON_EXECUTABLE ||
60
+ (process.platform === "win32" ? "python" : "python3"),
61
+ trainingMode: config.trainingMode ||
62
+ process.env.TRAINING_MODE ||
63
+ "atropos",
64
+ atroposApiUrl: config.atroposApiUrl ||
65
+ process.env.ATROPOS_API_URL ||
66
+ "http://localhost:8000",
67
+ vllmPort: config.vllmPort || parseInt(process.env.VLLM_PORT || "9001", 10),
68
+ };
69
+ }
70
+ /**
71
+ * Check if we're ready to train
72
+ */
73
+ async checkTrainingReadiness() {
74
+ const adapter = getTrainingDataAdapter();
75
+ const scoredAndReady = await adapter.countScoredTrajectoriesReady();
76
+ const unscored = await adapter.countUnscoredTrajectories();
77
+ const scenarioGroups = await adapter.getScenarioGroups(this.config.minGroupSize);
78
+ const quality = await this.calculateDataQuality();
79
+ const stats = {
80
+ totalTrajectories: scoredAndReady,
81
+ unscoredTrajectories: unscored,
82
+ scenarioGroups: scenarioGroups.length,
83
+ dataQuality: quality,
84
+ };
85
+ if (scoredAndReady < this.config.minTrajectoriesForTraining) {
86
+ return {
87
+ ready: false,
88
+ reason: `Need ${this.config.minTrajectoriesForTraining - scoredAndReady} more trajectories`,
89
+ stats,
90
+ };
91
+ }
92
+ if (scenarioGroups.length < 10) {
93
+ return {
94
+ ready: false,
95
+ reason: `Need more scenario groups (${scenarioGroups.length}/10 minimum)`,
96
+ stats,
97
+ };
98
+ }
99
+ if (quality < this.config.dataQualityThreshold) {
100
+ return {
101
+ ready: false,
102
+ reason: `Data quality too low (${(quality * 100).toFixed(1)}% < ${this.config.dataQualityThreshold * 100}%)`,
103
+ stats,
104
+ };
105
+ }
106
+ return {
107
+ ready: true,
108
+ reason: "Ready to train!",
109
+ stats,
110
+ };
111
+ }
112
+ /**
113
+ * Calculate data quality score
114
+ */
115
+ async calculateDataQuality() {
116
+ const adapter = getTrainingDataAdapter();
117
+ const sample = await adapter.sampleRecentTrajectories(50);
118
+ if (sample.length === 0)
119
+ return 0;
120
+ let qualityScore = 0;
121
+ let totalChecks = 0;
122
+ for (const traj of sample) {
123
+ // Validate stepsJson exists and is valid before parsing
124
+ if (!traj.stepsJson ||
125
+ traj.stepsJson === "null" ||
126
+ traj.stepsJson === "[]") {
127
+ continue; // Skip invalid trajectories
128
+ }
129
+ const steps = JSON.parse(traj.stepsJson);
130
+ if (!Array.isArray(steps)) {
131
+ continue; // Skip if not an array
132
+ }
133
+ // Check 1: Has steps
134
+ totalChecks++;
135
+ if (steps.length > 0)
136
+ qualityScore++;
137
+ // Check 2: Steps have LLM calls
138
+ totalChecks++;
139
+ const hasLLMCalls = steps.every((s) => s.llmCalls && Array.isArray(s.llmCalls) && s.llmCalls.length > 0);
140
+ if (hasLLMCalls)
141
+ qualityScore++;
142
+ // Check 3: LLM calls have substantial prompts
143
+ totalChecks++;
144
+ const hasGoodPrompts = steps.every((s) => Array.isArray(s.llmCalls) &&
145
+ s.llmCalls.every((llm) => llm.systemPrompt &&
146
+ llm.systemPrompt.length > 50 &&
147
+ llm.userPrompt &&
148
+ llm.userPrompt.length > 100));
149
+ if (hasGoodPrompts)
150
+ qualityScore++;
151
+ // Check 4: Has provider accesses
152
+ totalChecks++;
153
+ const hasProviders = steps.some((s) => s.providerAccesses &&
154
+ Array.isArray(s.providerAccesses) &&
155
+ s.providerAccesses.length > 0);
156
+ if (hasProviders)
157
+ qualityScore++;
158
+ // Check 5: Actions have results
159
+ totalChecks++;
160
+ const hasResults = steps.every((s) => s.action && (s.action.result || s.action.error));
161
+ if (hasResults)
162
+ qualityScore++;
163
+ }
164
+ return qualityScore / totalChecks;
165
+ }
166
+ /**
167
+ * Trigger training job
168
+ */
169
+ async triggerTraining(options = {}) {
170
+ // Check readiness
171
+ const readiness = await this.checkTrainingReadiness();
172
+ if (!readiness.ready && !options.force) {
173
+ return {
174
+ success: false,
175
+ error: readiness.reason,
176
+ };
177
+ }
178
+ // If forcing but no trajectories at all, try to score some first
179
+ if (options.force &&
180
+ readiness.stats.totalTrajectories === 0 &&
181
+ readiness.stats.unscoredTrajectories > 0) {
182
+ logger.info("Force mode: Attempting to score unscored trajectories first", {
183
+ unscored: readiness.stats.unscoredTrajectories,
184
+ }, "AutomationPipeline");
185
+ // Score recent trajectories
186
+ const adapter = getTrainingDataAdapter();
187
+ const recentWindowIds = await adapter.getUnscoredWindowIds(5);
188
+ for (const windowId of recentWindowIds) {
189
+ await rulerScoringService.scoreWindow(windowId);
190
+ }
191
+ // Re-check readiness after scoring
192
+ const newReadiness = await this.checkTrainingReadiness();
193
+ logger.info("After scoring", {
194
+ scored: newReadiness.stats.totalTrajectories,
195
+ stillUnscored: newReadiness.stats.unscoredTrajectories,
196
+ }, "AutomationPipeline");
197
+ }
198
+ // Use ModelSelectionService for smart model selection
199
+ const modelSelection = await modelSelectionService.selectBaseModel();
200
+ logger.info("Model selection for training", {
201
+ strategy: modelSelection.strategy,
202
+ modelPath: modelSelection.modelPath,
203
+ bundleCount: modelSelection.metadata?.bundleCount,
204
+ });
205
+ // Get data limit based on bundle count
206
+ const dataLimit = await modelSelectionService.getTrainingDataLimit();
207
+ // Prepare data
208
+ logger.info("Preparing training data...", {
209
+ ...readiness.stats,
210
+ selectedModel: modelSelection.modelPath,
211
+ strategy: modelSelection.strategy,
212
+ dataLimit,
213
+ });
214
+ const batchId = `batch-${Date.now()}`;
215
+ // Use standardized window ID format (YYYY-MM-DDTHH:00)
216
+ const windowId = getCurrentWindowId();
217
+ // Export trajectories with data limit
218
+ const maxTrajectories = dataLimit || options.batchSize || readiness.stats.totalTrajectories;
219
+ const exportGroupedForGRPO = getExportGroupedForGRPO();
220
+ const exportResult = await exportGroupedForGRPO({
221
+ outputPath: `${this.config.dataStoragePath}/${batchId}`,
222
+ minTrajectoriesPerGroup: this.config.minGroupSize,
223
+ maxGroupSize: maxTrajectories,
224
+ });
225
+ if (!exportResult.success) {
226
+ return {
227
+ success: false,
228
+ error: `Export failed: ${exportResult.error}`,
229
+ };
230
+ }
231
+ // Create training batch record
232
+ const adapterForBatch = getTrainingDataAdapter();
233
+ const nextVersion = await this.getNextModelVersion();
234
+ const trajectoryIds = await adapterForBatch.getTrajectoryIdsForTraining(maxTrajectories);
235
+ const insertedBatchId = await adapterForBatch.insertBatch({
236
+ id: batchId,
237
+ batchId,
238
+ scenarioId: windowId,
239
+ baseModel: modelSelection.modelPath,
240
+ modelVersion: nextVersion,
241
+ trajectoryIds: JSON.stringify(trajectoryIds),
242
+ rankingsJson: null,
243
+ rewardsJson: JSON.stringify([]),
244
+ trainingLoss: null,
245
+ policyImprovement: null,
246
+ status: "pending",
247
+ error: null,
248
+ createdAt: new Date(),
249
+ });
250
+ const batch = await adapterForBatch.getBatchById(insertedBatchId);
251
+ if (!batch) {
252
+ return {
253
+ success: false,
254
+ error: "Failed to create training batch record",
255
+ };
256
+ }
257
+ // Determine training mode: 'tinker' for cloud-based or 'atropos' for local vLLM
258
+ const trainingMode = this.config.trainingMode || "atropos";
259
+ const useTinker = trainingMode.toLowerCase() === "tinker";
260
+ // Trigger appropriate Python training script based on mode.
261
+ // Allow explicit override for packaged/runtime deployments.
262
+ const pythonScript = this.config.trainerScriptPath ||
263
+ path.resolve(this.config.pythonProjectRoot ||
264
+ path.resolve(process.cwd(), "packages/training/python"), "src", "training", useTinker ? "tinker_trainer.py" : "atropos_trainer.py");
265
+ try {
266
+ await fs.access(pythonScript);
267
+ }
268
+ catch {
269
+ return {
270
+ success: false,
271
+ error: `Training script not found: ${pythonScript}`,
272
+ };
273
+ }
274
+ // Set environment variables for Python script
275
+ const env = {
276
+ ...process.env,
277
+ MODE: "single",
278
+ BATCH_ID: batchId,
279
+ MODEL_VERSION: nextVersion,
280
+ WINDOW_ID: windowId,
281
+ BASE_MODEL: modelSelection.modelPath,
282
+ MAX_EXAMPLES: dataLimit ? dataLimit.toString() : "2000",
283
+ DATABASE_URL: process.env.DATABASE_URL || "",
284
+ ATROPOS_API_URL: this.config.atroposApiUrl || "http://localhost:8000",
285
+ VLLM_PORT: String(this.config.vllmPort || 9001),
286
+ FORCE_TRAINING: options.force ? "true" : "false",
287
+ MIN_AGENTS_PER_WINDOW: "1",
288
+ TRAINING_MODE: trainingMode,
289
+ };
290
+ logger.info(useTinker
291
+ ? "Training will use Tinker cloud-based GRPO"
292
+ : "Training will use Atropos GRPO with vLLM", {
293
+ trainingMode,
294
+ ...(useTinker
295
+ ? { model: env.BASE_MODEL }
296
+ : {
297
+ atroposUrl: env.ATROPOS_API_URL,
298
+ vllmPort: env.VLLM_PORT,
299
+ model: env.BASE_MODEL,
300
+ }),
301
+ }, "AutomationPipeline");
302
+ const pythonCmd = this.config.trainerPythonExecutable ||
303
+ (process.platform === "win32" ? "python" : "python3");
304
+ const trainingProcess = spawn(pythonCmd, [pythonScript], {
305
+ detached: false,
306
+ stdio: ["ignore", "pipe", "pipe"],
307
+ env,
308
+ });
309
+ // Capture and log training process output
310
+ trainingProcess.stdout?.on("data", (data) => {
311
+ logger.info("Training stdout", { output: data.toString().trim() });
312
+ });
313
+ trainingProcess.stderr?.on("data", (data) => {
314
+ logger.warn("Training stderr", { output: data.toString().trim() });
315
+ });
316
+ trainingProcess.on("error", (error) => {
317
+ logger.error("Training process error", { error: error.message });
318
+ getTrainingDataAdapter()
319
+ .updateBatchStatus(batchId, "failed", `Process spawn failed: ${error.message}`)
320
+ .catch((err) => logger.error("Failed to update batch status", {
321
+ error: err instanceof Error ? err : String(err),
322
+ }));
323
+ });
324
+ trainingProcess.unref();
325
+ this.currentTrainingJob = batch.id;
326
+ logger.info("Training job triggered", {
327
+ batchId: batch.id,
328
+ version: nextVersion,
329
+ trajectories: exportResult.trajectoriesExported,
330
+ });
331
+ return {
332
+ success: true,
333
+ jobId: batch.id,
334
+ };
335
+ }
336
+ /**
337
+ * Get next model version
338
+ */
339
+ async getNextModelVersion() {
340
+ const latestModel = await getTrainingDataAdapter().getLatestModel();
341
+ if (!latestModel) {
342
+ return "v1.0.0";
343
+ }
344
+ // Increment patch version
345
+ const [major, minor, patch] = latestModel.version
346
+ .substring(1)
347
+ .split(".")
348
+ .map(Number);
349
+ const patchNum = patch ?? 0;
350
+ return `v${major}.${minor}.${patchNum + 1}`;
351
+ }
352
+ /**
353
+ * Monitor training job.
354
+ *
355
+ * Reads the training metrics log file written by the Python trainer to
356
+ * derive real progress instead of returning hardcoded values.
357
+ */
358
+ async monitorTraining(batchId) {
359
+ const batch = await getTrainingDataAdapter().getBatchById(batchId);
360
+ if (!batch) {
361
+ return { status: "not_found" };
362
+ }
363
+ // Terminal states – return immediately
364
+ if (batch.status === "completed") {
365
+ return { status: "completed", progress: 1.0, error: undefined };
366
+ }
367
+ if (batch.status === "failed") {
368
+ return {
369
+ status: "failed",
370
+ progress: 0,
371
+ error: batch.error || "Training failed",
372
+ };
373
+ }
374
+ if (batch.status === "pending") {
375
+ return { status: "pending", progress: 0 };
376
+ }
377
+ // For 'training' status, attempt to read the metrics log written by
378
+ // atropos_trainer.py / tinker_trainer.py to get real step counts.
379
+ let progress = 0;
380
+ let eta;
381
+ const metricsLogPath = path.resolve(this.config.dataStoragePath, batchId, "training_metrics.jsonl");
382
+ try {
383
+ const logContent = await fs.readFile(metricsLogPath, "utf-8");
384
+ const lines = logContent.trim().split("\n").filter(Boolean);
385
+ if (lines.length > 0) {
386
+ const lastLine = lines[lines.length - 1];
387
+ if (lastLine) {
388
+ const lastMetric = JSON.parse(lastLine);
389
+ if (typeof lastMetric.step === "number" &&
390
+ typeof lastMetric.total_steps === "number" &&
391
+ lastMetric.total_steps > 0) {
392
+ progress = lastMetric.step / lastMetric.total_steps;
393
+ // Estimate remaining time from elapsed
394
+ if (typeof lastMetric.elapsed_ms === "number" && progress > 0) {
395
+ const totalEstimatedMs = lastMetric.elapsed_ms / progress;
396
+ eta = Math.max(0, totalEstimatedMs - lastMetric.elapsed_ms);
397
+ }
398
+ }
399
+ }
400
+ }
401
+ }
402
+ catch {
403
+ // Log file doesn't exist yet or is unreadable – training may have
404
+ // just started. Return an honest "unknown progress" instead of faking.
405
+ progress = 0;
406
+ }
407
+ return {
408
+ status: batch.status,
409
+ progress,
410
+ eta,
411
+ error: batch.error || undefined,
412
+ };
413
+ }
414
+ /**
415
+ * Clean up export files for a specific batch to prevent disk accumulation.
416
+ *
417
+ * Only removes the batch-specific subdirectory, not the entire export root.
418
+ */
419
+ async cleanupExportFiles(batchId) {
420
+ const batchDir = path.resolve(this.config.dataStoragePath, batchId);
421
+ try {
422
+ await fs.access(batchDir);
423
+ }
424
+ catch {
425
+ // Directory doesn't exist – nothing to clean
426
+ return;
427
+ }
428
+ try {
429
+ const files = await fs.readdir(batchDir);
430
+ for (const file of files) {
431
+ const filePath = path.join(batchDir, file);
432
+ await fs.unlink(filePath);
433
+ }
434
+ await fs.rmdir(batchDir);
435
+ logger.info("Cleaned up export files", { batchId, filesRemoved: files.length, dir: batchDir }, "AutomationPipeline");
436
+ }
437
+ catch (err) {
438
+ logger.warn("Failed to clean up export files", {
439
+ batchId,
440
+ dir: batchDir,
441
+ error: err instanceof Error ? err.message : String(err),
442
+ });
443
+ }
444
+ }
445
+ /**
446
+ * Automation loop (called by cron)
447
+ */
448
+ async runAutomationCycle() {
449
+ logger.info("Running automation cycle");
450
+ // Check if training is already running
451
+ if (this.currentTrainingJob) {
452
+ const status = await this.monitorTraining(this.currentTrainingJob);
453
+ if (status.status === "completed") {
454
+ await this.deployModel(this.currentTrainingJob);
455
+ await this.cleanupExportFiles(this.currentTrainingJob);
456
+ this.currentTrainingJob = null;
457
+ }
458
+ else if (status.status === "failed") {
459
+ logger.error("Training job failed", {
460
+ batchId: this.currentTrainingJob,
461
+ });
462
+ await this.cleanupExportFiles(this.currentTrainingJob);
463
+ this.currentTrainingJob = null;
464
+ }
465
+ return;
466
+ }
467
+ // Check for newly completed batches (Python script may have completed)
468
+ const da = getTrainingDataAdapter();
469
+ const recentBatches = await da.getRecentlyCompletedBatches(24);
470
+ const newlyCompleted = recentBatches[0];
471
+ if (newlyCompleted) {
472
+ const alreadyDeployed = await da.getModelByBatchAndStatus(newlyCompleted.batchId, "deployed");
473
+ if (!alreadyDeployed) {
474
+ logger.info("Found newly completed training batch", {
475
+ batchId: newlyCompleted.batchId,
476
+ });
477
+ await this.deployModel(newlyCompleted.batchId);
478
+ }
479
+ }
480
+ // Check if we should trigger training
481
+ const readiness = await this.checkTrainingReadiness();
482
+ if (readiness.ready && this.config.autoTriggerTraining) {
483
+ const lastCompleted = await da.getLastCompletedBatch();
484
+ const hoursSinceLastTraining = lastCompleted?.completedAt
485
+ ? (Date.now() - lastCompleted.completedAt.getTime()) / (1000 * 60 * 60)
486
+ : 999;
487
+ if (hoursSinceLastTraining >= this.config.trainingInterval) {
488
+ logger.info("Triggering automatic training", readiness.stats);
489
+ await this.triggerTraining();
490
+ }
491
+ }
492
+ // Track market outcomes for recent windows (optional — only if market adapter registered)
493
+ const marketAdapter = getMarketDataAdapter();
494
+ if (marketAdapter) {
495
+ const { MarketOutcomesTracker: MOT } = await import("./MarketOutcomesTracker");
496
+ const outcomesTracker = new MOT();
497
+ const synced = await outcomesTracker.syncRecentWindows(24);
498
+ if (synced > 0) {
499
+ logger.info("Synced market outcomes for windows", {
500
+ windowsSynced: synced,
501
+ });
502
+ }
503
+ const processed = await rewardBackpropagationService.processPendingWindows();
504
+ if (processed > 0) {
505
+ logger.info("Updated rewards for trajectories", {
506
+ windowsProcessed: processed,
507
+ });
508
+ }
509
+ }
510
+ // Score trajectories using RULER framework
511
+ for (let hoursAgo = 0; hoursAgo < 24; hoursAgo++) {
512
+ const windowId = getPreviousWindowId(hoursAgo);
513
+ const scored = await rulerScoringService.scoreWindow(windowId);
514
+ if (scored > 0) {
515
+ logger.info("Scored trajectories with RULER", { windowId, scored });
516
+ }
517
+ }
518
+ await this.runHealthChecks();
519
+ }
520
+ /**
521
+ * Deploy trained model.
522
+ *
523
+ * The model is created by the Python training script. This method marks
524
+ * trajectories as used and updates the training batch status.
525
+ */
526
+ async deployModel(batchId) {
527
+ const da = getTrainingDataAdapter();
528
+ const batch = await da.getBatchById(batchId);
529
+ if (!batch) {
530
+ logger.warn("Batch not found for deployment", { batchId });
531
+ return;
532
+ }
533
+ const model = await da.getModelByBatchAndStatus(batch.id, "ready");
534
+ if (!model) {
535
+ logger.warn("Model not found for batch", { batchId });
536
+ return;
537
+ }
538
+ logger.info("Deploying model", {
539
+ version: batch.modelVersion,
540
+ modelId: model.modelId,
541
+ batchId,
542
+ });
543
+ // Mark trajectories as used
544
+ let trajectoryIds;
545
+ if (!batch.trajectoryIds ||
546
+ batch.trajectoryIds === "null" ||
547
+ batch.trajectoryIds === "[]") {
548
+ logger.warn("Training batch has invalid trajectoryIds", {
549
+ batchId: batch.id,
550
+ });
551
+ trajectoryIds = [];
552
+ }
553
+ else {
554
+ trajectoryIds = JSON.parse(batch.trajectoryIds);
555
+ if (!Array.isArray(trajectoryIds)) {
556
+ logger.warn("Training batch trajectoryIds is not an array", {
557
+ batchId: batch.id,
558
+ });
559
+ trajectoryIds = [];
560
+ }
561
+ }
562
+ if (trajectoryIds.length > 0) {
563
+ await da.markTrajectoriesAsUsed(trajectoryIds, batch.id);
564
+ }
565
+ await da.updateModelStatus(model.modelId, "deployed", {
566
+ deployedAt: new Date(),
567
+ });
568
+ logger.info("Model deployed", {
569
+ version: batch.modelVersion,
570
+ modelId: model.modelId,
571
+ });
572
+ }
573
+ /**
574
+ * Benchmark and conditionally deploy trained model
575
+ * Only deploys if performance meets threshold
576
+ */
577
+ async benchmarkAndDeploy(batchId, autoDeploy = true) {
578
+ const da = getTrainingDataAdapter();
579
+ const batch = await da.getBatchById(batchId);
580
+ if (!batch) {
581
+ return { benchmarked: false, deployed: false, reason: "Batch not found" };
582
+ }
583
+ const model = await da.getModelByBatchAndStatus(batch.id, "ready");
584
+ if (!model) {
585
+ return { benchmarked: false, deployed: false, reason: "Model not found" };
586
+ }
587
+ // Benchmark the model
588
+ logger.info("Benchmarking model...", { modelId: model.modelId }, "AutomationPipeline");
589
+ const benchmarkResults = await benchmarkService.benchmarkModel(model.modelId);
590
+ // Compare with previous models
591
+ const comparison = await benchmarkService.compareModels(model.modelId);
592
+ logger.info("Benchmark complete", {
593
+ modelId: model.modelId,
594
+ score: benchmarkResults.benchmarkScore,
595
+ shouldDeploy: comparison.shouldDeploy,
596
+ reason: comparison.reason,
597
+ }, "AutomationPipeline");
598
+ // Deploy if performance is good enough (and autoDeploy is enabled)
599
+ if (comparison.shouldDeploy && autoDeploy) {
600
+ await this.deployModel(batchId);
601
+ return {
602
+ benchmarked: true,
603
+ deployed: true,
604
+ reason: comparison.reason,
605
+ };
606
+ }
607
+ return {
608
+ benchmarked: true,
609
+ deployed: false,
610
+ reason: comparison.reason || "Performance below threshold",
611
+ };
612
+ }
613
+ /**
614
+ * Get model selection info for next training
615
+ */
616
+ async getModelSelectionInfo() {
617
+ const selection = await modelSelectionService.selectBaseModel();
618
+ const summary = await modelSelectionService.getSelectionSummary();
619
+ return {
620
+ success: true,
621
+ selection,
622
+ summary,
623
+ };
624
+ }
625
+ /**
626
+ * Run health checks
627
+ */
628
+ async runHealthChecks() {
629
+ const da = getTrainingDataAdapter();
630
+ const dbOk = await da.healthCheck();
631
+ if (!dbOk) {
632
+ logger.warn("Health check: database unreachable");
633
+ }
634
+ const oneHourAgo = new Date(Date.now() - 60 * 60 * 1000);
635
+ const last1h = await da.countTrajectoriesSince(oneHourAgo);
636
+ if (last1h < 1) {
637
+ logger.warn("Low data collection rate", { trajectoriesLastHour: last1h });
638
+ }
639
+ // Ensure storage directories exist
640
+ await fs.mkdir(this.config.modelStoragePath, { recursive: true });
641
+ await fs.mkdir(this.config.dataStoragePath, { recursive: true });
642
+ }
643
+ /**
644
+ * Get automation status
645
+ */
646
+ async getStatus() {
647
+ const da = getTrainingDataAdapter();
648
+ const twentyFourHoursAgo = new Date(Date.now() - 24 * 60 * 60 * 1000);
649
+ const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
650
+ const last24h = await da.countTrajectoriesSince(twentyFourHoursAgo);
651
+ const last7d = await da.countTrajectoriesSince(sevenDaysAgo);
652
+ const lastCompleted = await da.getLastCompletedBatch();
653
+ const latestModel = await da.getLatestModel();
654
+ const deployedCount = await da.countDeployedModels();
655
+ const trainingCount = await da.countTrainingBatches();
656
+ const dbHealthy = await da.healthCheck();
657
+ let storageHealthy = false;
658
+ try {
659
+ await fs.access(this.config.modelStoragePath);
660
+ storageHealthy = true;
661
+ }
662
+ catch {
663
+ try {
664
+ await fs.mkdir(this.config.modelStoragePath, { recursive: true });
665
+ storageHealthy = true;
666
+ }
667
+ catch {
668
+ storageHealthy = false;
669
+ }
670
+ }
671
+ let atroposHealthy = false;
672
+ if (this.config.atroposApiUrl) {
673
+ try {
674
+ const controller = new AbortController();
675
+ const timeout = setTimeout(() => controller.abort(), 3000);
676
+ const resp = await fetch(`${this.config.atroposApiUrl}/health`, {
677
+ signal: controller.signal,
678
+ });
679
+ clearTimeout(timeout);
680
+ atroposHealthy = resp.ok;
681
+ }
682
+ catch {
683
+ atroposHealthy = false;
684
+ }
685
+ }
686
+ return {
687
+ dataCollection: {
688
+ last24h,
689
+ last7d,
690
+ ratePerHour: last24h / 24,
691
+ },
692
+ training: {
693
+ currentJob: this.currentTrainingJob,
694
+ lastCompleted: lastCompleted?.completedAt || null,
695
+ nextScheduled: lastCompleted?.completedAt
696
+ ? new Date(lastCompleted.completedAt.getTime() +
697
+ this.config.trainingInterval * 60 * 60 * 1000)
698
+ : null,
699
+ },
700
+ models: {
701
+ latest: latestModel?.version || null,
702
+ deployed: deployedCount,
703
+ training: trainingCount,
704
+ },
705
+ health: {
706
+ database: dbHealthy,
707
+ storage: storageHealthy,
708
+ atropos: atroposHealthy,
709
+ },
710
+ };
711
+ }
712
+ }
713
+ // Singleton
714
+ export const automationPipeline = new AutomationPipeline();