@elizaos/training 2.0.0-alpha.21 → 2.0.0-alpha.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-lint.log +2 -0
- package/.turbo/turbo-typecheck.log +1 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/adapter.js +59 -0
- package/dist/archetypes/ArchetypeConfigService.js +510 -0
- package/dist/archetypes/derive-archetype.js +196 -0
- package/dist/archetypes/index.js +7 -0
- package/dist/benchmark/ArchetypeMatchupBenchmark.js +547 -0
- package/dist/benchmark/BenchmarkChartGenerator.js +632 -0
- package/dist/benchmark/BenchmarkDataGenerator.js +825 -0
- package/dist/benchmark/BenchmarkDataViewer.js +197 -0
- package/dist/benchmark/BenchmarkHistoryService.js +135 -0
- package/dist/benchmark/BenchmarkRunner.js +483 -0
- package/dist/benchmark/BenchmarkValidator.js +158 -0
- package/dist/benchmark/FastEvalRunner.js +133 -0
- package/dist/benchmark/MetricsValidator.js +104 -0
- package/dist/benchmark/MetricsVisualizer.js +775 -0
- package/dist/benchmark/ModelBenchmarkService.js +433 -0
- package/dist/benchmark/ModelRegistry.js +122 -0
- package/dist/benchmark/RulerBenchmarkIntegration.js +168 -0
- package/dist/benchmark/SimulationA2AInterface.js +683 -0
- package/dist/benchmark/SimulationEngine.js +522 -0
- package/dist/benchmark/TaskRunner.js +60 -0
- package/dist/benchmark/__tests__/BenchmarkRunner.test.js +409 -0
- package/dist/benchmark/__tests__/HeadToHead.test.js +105 -0
- package/dist/benchmark/index.js +23 -0
- package/dist/benchmark/parseSimulationMetrics.js +86 -0
- package/dist/benchmark/simulation-types.js +1 -0
- package/dist/dependencies.js +197 -0
- package/dist/generation/TrajectoryGenerator.js +244 -0
- package/dist/generation/index.js +6 -0
- package/dist/huggingface/HuggingFaceDatasetUploader.js +463 -0
- package/dist/huggingface/HuggingFaceIntegrationService.js +272 -0
- package/dist/huggingface/HuggingFaceModelUploader.js +385 -0
- package/dist/huggingface/index.js +9 -0
- package/dist/huggingface/shared/HuggingFaceUploadUtil.js +144 -0
- package/dist/index.js +41 -0
- package/dist/init-training.js +43 -0
- package/dist/metrics/TrajectoryMetricsExtractor.js +523 -0
- package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +628 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/types.js +21 -0
- package/dist/rubrics/__tests__/index.test.js +150 -0
- package/dist/rubrics/ass-kisser.js +83 -0
- package/dist/rubrics/degen.js +78 -0
- package/dist/rubrics/goody-twoshoes.js +82 -0
- package/dist/rubrics/index.js +184 -0
- package/dist/rubrics/information-trader.js +82 -0
- package/dist/rubrics/infosec.js +99 -0
- package/dist/rubrics/liar.js +102 -0
- package/dist/rubrics/perps-trader.js +85 -0
- package/dist/rubrics/researcher.js +79 -0
- package/dist/rubrics/scammer.js +80 -0
- package/dist/rubrics/social-butterfly.js +71 -0
- package/dist/rubrics/super-predictor.js +95 -0
- package/dist/rubrics/trader.js +65 -0
- package/dist/scoring/ArchetypeScoringService.js +301 -0
- package/dist/scoring/JudgePromptBuilder.js +401 -0
- package/dist/scoring/LLMJudgeCache.js +263 -0
- package/dist/scoring/index.js +8 -0
- package/dist/training/AutomationPipeline.js +714 -0
- package/dist/training/BenchmarkService.js +370 -0
- package/dist/training/ConfigValidator.js +153 -0
- package/dist/training/MarketOutcomesTracker.js +142 -0
- package/dist/training/ModelDeployer.js +128 -0
- package/dist/training/ModelFetcher.js +48 -0
- package/dist/training/ModelSelectionService.js +248 -0
- package/dist/training/ModelUsageVerifier.js +106 -0
- package/dist/training/MultiModelOrchestrator.js +349 -0
- package/dist/training/RLModelConfig.js +295 -0
- package/dist/training/RewardBackpropagationService.js +117 -0
- package/dist/training/RulerScoringService.js +450 -0
- package/dist/training/TrainingMonitor.js +108 -0
- package/dist/training/TrajectoryRecorder.js +281 -0
- package/dist/training/__tests__/TrajectoryRecorder.test.js +363 -0
- package/dist/training/index.js +30 -0
- package/dist/training/logRLConfig.js +29 -0
- package/dist/training/pipeline.js +80 -0
- package/dist/training/storage/ModelStorageService.js +190 -0
- package/dist/training/storage/TrainingDataArchiver.js +136 -0
- package/dist/training/storage/index.js +7 -0
- package/dist/training/types.js +6 -0
- package/dist/training/window-utils.js +100 -0
- package/dist/utils/index.js +73 -0
- package/dist/utils/logger.js +55 -0
- package/dist/utils/snowflake.js +15 -0
- package/dist/utils/synthetic-detector.js +67 -0
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773742857616.json +38 -0
- package/research-output/training-runs/training-run-1773742946977.json +38 -0
- package/research-output/training-runs/training-run-1773743278891.json +38 -0
- package/research-output/training-runs/training-run-1773743409754.json +38 -0
- package/research-output/training-runs/training-run-1773743651086.json +38 -0
- package/research-output/training-runs/training-run-1773743782883.json +38 -0
|
@@ -0,0 +1,714 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Training Automation Pipeline
|
|
3
|
+
*
|
|
4
|
+
* Fully automated RL training pipeline:
|
|
5
|
+
* 1. Monitor data collection
|
|
6
|
+
* 2. Trigger training when ready
|
|
7
|
+
* 3. Score with RULER
|
|
8
|
+
* 4. Export data
|
|
9
|
+
* 5. Train model
|
|
10
|
+
* 6. Deploy new version
|
|
11
|
+
* 7. Monitor performance
|
|
12
|
+
*/
|
|
13
|
+
import { spawn } from "node:child_process";
|
|
14
|
+
import fs from "node:fs/promises";
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import { getMarketDataAdapter, getTrainingDataAdapter } from "../adapter";
|
|
17
|
+
import { getExportGroupedForGRPO } from "../dependencies";
|
|
18
|
+
import { logger } from "../utils/logger";
|
|
19
|
+
import { benchmarkService } from "./BenchmarkService";
|
|
20
|
+
import { modelSelectionService } from "./ModelSelectionService";
|
|
21
|
+
import { rewardBackpropagationService } from "./RewardBackpropagationService";
|
|
22
|
+
import { rulerScoringService } from "./RulerScoringService";
|
|
23
|
+
import { getCurrentWindowId, getPreviousWindowId } from "./window-utils";
|
|
24
|
+
export class AutomationPipeline {
|
|
25
|
+
config;
|
|
26
|
+
currentTrainingJob = null;
|
|
27
|
+
constructor(config = {}) {
|
|
28
|
+
const envMinTrajectories = parseInt(process.env.TRAINING_MIN_TRAJECTORIES ?? "", 10);
|
|
29
|
+
const envMinGroupSize = parseInt(process.env.TRAINING_MIN_GROUP_SIZE ?? "", 10);
|
|
30
|
+
this.config = {
|
|
31
|
+
minTrajectoriesForTraining: config.minTrajectoriesForTraining ??
|
|
32
|
+
(Number.isFinite(envMinTrajectories) && envMinTrajectories > 0
|
|
33
|
+
? envMinTrajectories
|
|
34
|
+
: 1),
|
|
35
|
+
minGroupSize: config.minGroupSize ??
|
|
36
|
+
(Number.isFinite(envMinGroupSize) && envMinGroupSize > 0
|
|
37
|
+
? envMinGroupSize
|
|
38
|
+
: 1), // Keep at 1 for flexibility
|
|
39
|
+
dataQualityThreshold: config.dataQualityThreshold ?? 0.95,
|
|
40
|
+
autoTriggerTraining: config.autoTriggerTraining !== false,
|
|
41
|
+
trainingInterval: config.trainingInterval || 24, // Daily by default
|
|
42
|
+
baseModel: config.baseModel || "unsloth/Qwen3-4B-128K", // 4B params, 128K context - ideal for fine-tuning
|
|
43
|
+
modelNamePrefix: config.modelNamePrefix || "eliza-agent",
|
|
44
|
+
modelIdPrefix: config.modelIdPrefix ||
|
|
45
|
+
process.env.TRAINING_MODEL_ID_PREFIX ||
|
|
46
|
+
config.modelNamePrefix ||
|
|
47
|
+
"eliza-agent",
|
|
48
|
+
modelStoragePath: config.modelStoragePath ||
|
|
49
|
+
path.resolve(process.cwd(), "storage/models"),
|
|
50
|
+
dataStoragePath: config.dataStoragePath ||
|
|
51
|
+
path.resolve(process.cwd(), "storage/training-data"),
|
|
52
|
+
pythonProjectRoot: config.pythonProjectRoot ||
|
|
53
|
+
process.env.TRAINING_PYTHON_ROOT ||
|
|
54
|
+
path.resolve(process.cwd(), "packages/training/python"),
|
|
55
|
+
trainerScriptPath: config.trainerScriptPath ||
|
|
56
|
+
process.env.TRAINING_SCRIPT_PATH ||
|
|
57
|
+
undefined,
|
|
58
|
+
trainerPythonExecutable: config.trainerPythonExecutable ||
|
|
59
|
+
process.env.TRAINING_PYTHON_EXECUTABLE ||
|
|
60
|
+
(process.platform === "win32" ? "python" : "python3"),
|
|
61
|
+
trainingMode: config.trainingMode ||
|
|
62
|
+
process.env.TRAINING_MODE ||
|
|
63
|
+
"atropos",
|
|
64
|
+
atroposApiUrl: config.atroposApiUrl ||
|
|
65
|
+
process.env.ATROPOS_API_URL ||
|
|
66
|
+
"http://localhost:8000",
|
|
67
|
+
vllmPort: config.vllmPort || parseInt(process.env.VLLM_PORT || "9001", 10),
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Check if we're ready to train
|
|
72
|
+
*/
|
|
73
|
+
async checkTrainingReadiness() {
|
|
74
|
+
const adapter = getTrainingDataAdapter();
|
|
75
|
+
const scoredAndReady = await adapter.countScoredTrajectoriesReady();
|
|
76
|
+
const unscored = await adapter.countUnscoredTrajectories();
|
|
77
|
+
const scenarioGroups = await adapter.getScenarioGroups(this.config.minGroupSize);
|
|
78
|
+
const quality = await this.calculateDataQuality();
|
|
79
|
+
const stats = {
|
|
80
|
+
totalTrajectories: scoredAndReady,
|
|
81
|
+
unscoredTrajectories: unscored,
|
|
82
|
+
scenarioGroups: scenarioGroups.length,
|
|
83
|
+
dataQuality: quality,
|
|
84
|
+
};
|
|
85
|
+
if (scoredAndReady < this.config.minTrajectoriesForTraining) {
|
|
86
|
+
return {
|
|
87
|
+
ready: false,
|
|
88
|
+
reason: `Need ${this.config.minTrajectoriesForTraining - scoredAndReady} more trajectories`,
|
|
89
|
+
stats,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
if (scenarioGroups.length < 10) {
|
|
93
|
+
return {
|
|
94
|
+
ready: false,
|
|
95
|
+
reason: `Need more scenario groups (${scenarioGroups.length}/10 minimum)`,
|
|
96
|
+
stats,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
if (quality < this.config.dataQualityThreshold) {
|
|
100
|
+
return {
|
|
101
|
+
ready: false,
|
|
102
|
+
reason: `Data quality too low (${(quality * 100).toFixed(1)}% < ${this.config.dataQualityThreshold * 100}%)`,
|
|
103
|
+
stats,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
return {
|
|
107
|
+
ready: true,
|
|
108
|
+
reason: "Ready to train!",
|
|
109
|
+
stats,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Calculate data quality score
|
|
114
|
+
*/
|
|
115
|
+
async calculateDataQuality() {
|
|
116
|
+
const adapter = getTrainingDataAdapter();
|
|
117
|
+
const sample = await adapter.sampleRecentTrajectories(50);
|
|
118
|
+
if (sample.length === 0)
|
|
119
|
+
return 0;
|
|
120
|
+
let qualityScore = 0;
|
|
121
|
+
let totalChecks = 0;
|
|
122
|
+
for (const traj of sample) {
|
|
123
|
+
// Validate stepsJson exists and is valid before parsing
|
|
124
|
+
if (!traj.stepsJson ||
|
|
125
|
+
traj.stepsJson === "null" ||
|
|
126
|
+
traj.stepsJson === "[]") {
|
|
127
|
+
continue; // Skip invalid trajectories
|
|
128
|
+
}
|
|
129
|
+
const steps = JSON.parse(traj.stepsJson);
|
|
130
|
+
if (!Array.isArray(steps)) {
|
|
131
|
+
continue; // Skip if not an array
|
|
132
|
+
}
|
|
133
|
+
// Check 1: Has steps
|
|
134
|
+
totalChecks++;
|
|
135
|
+
if (steps.length > 0)
|
|
136
|
+
qualityScore++;
|
|
137
|
+
// Check 2: Steps have LLM calls
|
|
138
|
+
totalChecks++;
|
|
139
|
+
const hasLLMCalls = steps.every((s) => s.llmCalls && Array.isArray(s.llmCalls) && s.llmCalls.length > 0);
|
|
140
|
+
if (hasLLMCalls)
|
|
141
|
+
qualityScore++;
|
|
142
|
+
// Check 3: LLM calls have substantial prompts
|
|
143
|
+
totalChecks++;
|
|
144
|
+
const hasGoodPrompts = steps.every((s) => Array.isArray(s.llmCalls) &&
|
|
145
|
+
s.llmCalls.every((llm) => llm.systemPrompt &&
|
|
146
|
+
llm.systemPrompt.length > 50 &&
|
|
147
|
+
llm.userPrompt &&
|
|
148
|
+
llm.userPrompt.length > 100));
|
|
149
|
+
if (hasGoodPrompts)
|
|
150
|
+
qualityScore++;
|
|
151
|
+
// Check 4: Has provider accesses
|
|
152
|
+
totalChecks++;
|
|
153
|
+
const hasProviders = steps.some((s) => s.providerAccesses &&
|
|
154
|
+
Array.isArray(s.providerAccesses) &&
|
|
155
|
+
s.providerAccesses.length > 0);
|
|
156
|
+
if (hasProviders)
|
|
157
|
+
qualityScore++;
|
|
158
|
+
// Check 5: Actions have results
|
|
159
|
+
totalChecks++;
|
|
160
|
+
const hasResults = steps.every((s) => s.action && (s.action.result || s.action.error));
|
|
161
|
+
if (hasResults)
|
|
162
|
+
qualityScore++;
|
|
163
|
+
}
|
|
164
|
+
return qualityScore / totalChecks;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Trigger training job
|
|
168
|
+
*/
|
|
169
|
+
async triggerTraining(options = {}) {
|
|
170
|
+
// Check readiness
|
|
171
|
+
const readiness = await this.checkTrainingReadiness();
|
|
172
|
+
if (!readiness.ready && !options.force) {
|
|
173
|
+
return {
|
|
174
|
+
success: false,
|
|
175
|
+
error: readiness.reason,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
// If forcing but no trajectories at all, try to score some first
|
|
179
|
+
if (options.force &&
|
|
180
|
+
readiness.stats.totalTrajectories === 0 &&
|
|
181
|
+
readiness.stats.unscoredTrajectories > 0) {
|
|
182
|
+
logger.info("Force mode: Attempting to score unscored trajectories first", {
|
|
183
|
+
unscored: readiness.stats.unscoredTrajectories,
|
|
184
|
+
}, "AutomationPipeline");
|
|
185
|
+
// Score recent trajectories
|
|
186
|
+
const adapter = getTrainingDataAdapter();
|
|
187
|
+
const recentWindowIds = await adapter.getUnscoredWindowIds(5);
|
|
188
|
+
for (const windowId of recentWindowIds) {
|
|
189
|
+
await rulerScoringService.scoreWindow(windowId);
|
|
190
|
+
}
|
|
191
|
+
// Re-check readiness after scoring
|
|
192
|
+
const newReadiness = await this.checkTrainingReadiness();
|
|
193
|
+
logger.info("After scoring", {
|
|
194
|
+
scored: newReadiness.stats.totalTrajectories,
|
|
195
|
+
stillUnscored: newReadiness.stats.unscoredTrajectories,
|
|
196
|
+
}, "AutomationPipeline");
|
|
197
|
+
}
|
|
198
|
+
// Use ModelSelectionService for smart model selection
|
|
199
|
+
const modelSelection = await modelSelectionService.selectBaseModel();
|
|
200
|
+
logger.info("Model selection for training", {
|
|
201
|
+
strategy: modelSelection.strategy,
|
|
202
|
+
modelPath: modelSelection.modelPath,
|
|
203
|
+
bundleCount: modelSelection.metadata?.bundleCount,
|
|
204
|
+
});
|
|
205
|
+
// Get data limit based on bundle count
|
|
206
|
+
const dataLimit = await modelSelectionService.getTrainingDataLimit();
|
|
207
|
+
// Prepare data
|
|
208
|
+
logger.info("Preparing training data...", {
|
|
209
|
+
...readiness.stats,
|
|
210
|
+
selectedModel: modelSelection.modelPath,
|
|
211
|
+
strategy: modelSelection.strategy,
|
|
212
|
+
dataLimit,
|
|
213
|
+
});
|
|
214
|
+
const batchId = `batch-${Date.now()}`;
|
|
215
|
+
// Use standardized window ID format (YYYY-MM-DDTHH:00)
|
|
216
|
+
const windowId = getCurrentWindowId();
|
|
217
|
+
// Export trajectories with data limit
|
|
218
|
+
const maxTrajectories = dataLimit || options.batchSize || readiness.stats.totalTrajectories;
|
|
219
|
+
const exportGroupedForGRPO = getExportGroupedForGRPO();
|
|
220
|
+
const exportResult = await exportGroupedForGRPO({
|
|
221
|
+
outputPath: `${this.config.dataStoragePath}/${batchId}`,
|
|
222
|
+
minTrajectoriesPerGroup: this.config.minGroupSize,
|
|
223
|
+
maxGroupSize: maxTrajectories,
|
|
224
|
+
});
|
|
225
|
+
if (!exportResult.success) {
|
|
226
|
+
return {
|
|
227
|
+
success: false,
|
|
228
|
+
error: `Export failed: ${exportResult.error}`,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
// Create training batch record
|
|
232
|
+
const adapterForBatch = getTrainingDataAdapter();
|
|
233
|
+
const nextVersion = await this.getNextModelVersion();
|
|
234
|
+
const trajectoryIds = await adapterForBatch.getTrajectoryIdsForTraining(maxTrajectories);
|
|
235
|
+
const insertedBatchId = await adapterForBatch.insertBatch({
|
|
236
|
+
id: batchId,
|
|
237
|
+
batchId,
|
|
238
|
+
scenarioId: windowId,
|
|
239
|
+
baseModel: modelSelection.modelPath,
|
|
240
|
+
modelVersion: nextVersion,
|
|
241
|
+
trajectoryIds: JSON.stringify(trajectoryIds),
|
|
242
|
+
rankingsJson: null,
|
|
243
|
+
rewardsJson: JSON.stringify([]),
|
|
244
|
+
trainingLoss: null,
|
|
245
|
+
policyImprovement: null,
|
|
246
|
+
status: "pending",
|
|
247
|
+
error: null,
|
|
248
|
+
createdAt: new Date(),
|
|
249
|
+
});
|
|
250
|
+
const batch = await adapterForBatch.getBatchById(insertedBatchId);
|
|
251
|
+
if (!batch) {
|
|
252
|
+
return {
|
|
253
|
+
success: false,
|
|
254
|
+
error: "Failed to create training batch record",
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
// Determine training mode: 'tinker' for cloud-based or 'atropos' for local vLLM
|
|
258
|
+
const trainingMode = this.config.trainingMode || "atropos";
|
|
259
|
+
const useTinker = trainingMode.toLowerCase() === "tinker";
|
|
260
|
+
// Trigger appropriate Python training script based on mode.
|
|
261
|
+
// Allow explicit override for packaged/runtime deployments.
|
|
262
|
+
const pythonScript = this.config.trainerScriptPath ||
|
|
263
|
+
path.resolve(this.config.pythonProjectRoot ||
|
|
264
|
+
path.resolve(process.cwd(), "packages/training/python"), "src", "training", useTinker ? "tinker_trainer.py" : "atropos_trainer.py");
|
|
265
|
+
try {
|
|
266
|
+
await fs.access(pythonScript);
|
|
267
|
+
}
|
|
268
|
+
catch {
|
|
269
|
+
return {
|
|
270
|
+
success: false,
|
|
271
|
+
error: `Training script not found: ${pythonScript}`,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
// Set environment variables for Python script
|
|
275
|
+
const env = {
|
|
276
|
+
...process.env,
|
|
277
|
+
MODE: "single",
|
|
278
|
+
BATCH_ID: batchId,
|
|
279
|
+
MODEL_VERSION: nextVersion,
|
|
280
|
+
WINDOW_ID: windowId,
|
|
281
|
+
BASE_MODEL: modelSelection.modelPath,
|
|
282
|
+
MAX_EXAMPLES: dataLimit ? dataLimit.toString() : "2000",
|
|
283
|
+
DATABASE_URL: process.env.DATABASE_URL || "",
|
|
284
|
+
ATROPOS_API_URL: this.config.atroposApiUrl || "http://localhost:8000",
|
|
285
|
+
VLLM_PORT: String(this.config.vllmPort || 9001),
|
|
286
|
+
FORCE_TRAINING: options.force ? "true" : "false",
|
|
287
|
+
MIN_AGENTS_PER_WINDOW: "1",
|
|
288
|
+
TRAINING_MODE: trainingMode,
|
|
289
|
+
};
|
|
290
|
+
logger.info(useTinker
|
|
291
|
+
? "Training will use Tinker cloud-based GRPO"
|
|
292
|
+
: "Training will use Atropos GRPO with vLLM", {
|
|
293
|
+
trainingMode,
|
|
294
|
+
...(useTinker
|
|
295
|
+
? { model: env.BASE_MODEL }
|
|
296
|
+
: {
|
|
297
|
+
atroposUrl: env.ATROPOS_API_URL,
|
|
298
|
+
vllmPort: env.VLLM_PORT,
|
|
299
|
+
model: env.BASE_MODEL,
|
|
300
|
+
}),
|
|
301
|
+
}, "AutomationPipeline");
|
|
302
|
+
const pythonCmd = this.config.trainerPythonExecutable ||
|
|
303
|
+
(process.platform === "win32" ? "python" : "python3");
|
|
304
|
+
const trainingProcess = spawn(pythonCmd, [pythonScript], {
|
|
305
|
+
detached: false,
|
|
306
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
307
|
+
env,
|
|
308
|
+
});
|
|
309
|
+
// Capture and log training process output
|
|
310
|
+
trainingProcess.stdout?.on("data", (data) => {
|
|
311
|
+
logger.info("Training stdout", { output: data.toString().trim() });
|
|
312
|
+
});
|
|
313
|
+
trainingProcess.stderr?.on("data", (data) => {
|
|
314
|
+
logger.warn("Training stderr", { output: data.toString().trim() });
|
|
315
|
+
});
|
|
316
|
+
trainingProcess.on("error", (error) => {
|
|
317
|
+
logger.error("Training process error", { error: error.message });
|
|
318
|
+
getTrainingDataAdapter()
|
|
319
|
+
.updateBatchStatus(batchId, "failed", `Process spawn failed: ${error.message}`)
|
|
320
|
+
.catch((err) => logger.error("Failed to update batch status", {
|
|
321
|
+
error: err instanceof Error ? err : String(err),
|
|
322
|
+
}));
|
|
323
|
+
});
|
|
324
|
+
trainingProcess.unref();
|
|
325
|
+
this.currentTrainingJob = batch.id;
|
|
326
|
+
logger.info("Training job triggered", {
|
|
327
|
+
batchId: batch.id,
|
|
328
|
+
version: nextVersion,
|
|
329
|
+
trajectories: exportResult.trajectoriesExported,
|
|
330
|
+
});
|
|
331
|
+
return {
|
|
332
|
+
success: true,
|
|
333
|
+
jobId: batch.id,
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* Get next model version
|
|
338
|
+
*/
|
|
339
|
+
async getNextModelVersion() {
|
|
340
|
+
const latestModel = await getTrainingDataAdapter().getLatestModel();
|
|
341
|
+
if (!latestModel) {
|
|
342
|
+
return "v1.0.0";
|
|
343
|
+
}
|
|
344
|
+
// Increment patch version
|
|
345
|
+
const [major, minor, patch] = latestModel.version
|
|
346
|
+
.substring(1)
|
|
347
|
+
.split(".")
|
|
348
|
+
.map(Number);
|
|
349
|
+
const patchNum = patch ?? 0;
|
|
350
|
+
return `v${major}.${minor}.${patchNum + 1}`;
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Monitor training job.
|
|
354
|
+
*
|
|
355
|
+
* Reads the training metrics log file written by the Python trainer to
|
|
356
|
+
* derive real progress instead of returning hardcoded values.
|
|
357
|
+
*/
|
|
358
|
+
async monitorTraining(batchId) {
|
|
359
|
+
const batch = await getTrainingDataAdapter().getBatchById(batchId);
|
|
360
|
+
if (!batch) {
|
|
361
|
+
return { status: "not_found" };
|
|
362
|
+
}
|
|
363
|
+
// Terminal states – return immediately
|
|
364
|
+
if (batch.status === "completed") {
|
|
365
|
+
return { status: "completed", progress: 1.0, error: undefined };
|
|
366
|
+
}
|
|
367
|
+
if (batch.status === "failed") {
|
|
368
|
+
return {
|
|
369
|
+
status: "failed",
|
|
370
|
+
progress: 0,
|
|
371
|
+
error: batch.error || "Training failed",
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
if (batch.status === "pending") {
|
|
375
|
+
return { status: "pending", progress: 0 };
|
|
376
|
+
}
|
|
377
|
+
// For 'training' status, attempt to read the metrics log written by
|
|
378
|
+
// atropos_trainer.py / tinker_trainer.py to get real step counts.
|
|
379
|
+
let progress = 0;
|
|
380
|
+
let eta;
|
|
381
|
+
const metricsLogPath = path.resolve(this.config.dataStoragePath, batchId, "training_metrics.jsonl");
|
|
382
|
+
try {
|
|
383
|
+
const logContent = await fs.readFile(metricsLogPath, "utf-8");
|
|
384
|
+
const lines = logContent.trim().split("\n").filter(Boolean);
|
|
385
|
+
if (lines.length > 0) {
|
|
386
|
+
const lastLine = lines[lines.length - 1];
|
|
387
|
+
if (lastLine) {
|
|
388
|
+
const lastMetric = JSON.parse(lastLine);
|
|
389
|
+
if (typeof lastMetric.step === "number" &&
|
|
390
|
+
typeof lastMetric.total_steps === "number" &&
|
|
391
|
+
lastMetric.total_steps > 0) {
|
|
392
|
+
progress = lastMetric.step / lastMetric.total_steps;
|
|
393
|
+
// Estimate remaining time from elapsed
|
|
394
|
+
if (typeof lastMetric.elapsed_ms === "number" && progress > 0) {
|
|
395
|
+
const totalEstimatedMs = lastMetric.elapsed_ms / progress;
|
|
396
|
+
eta = Math.max(0, totalEstimatedMs - lastMetric.elapsed_ms);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
catch {
|
|
403
|
+
// Log file doesn't exist yet or is unreadable – training may have
|
|
404
|
+
// just started. Return an honest "unknown progress" instead of faking.
|
|
405
|
+
progress = 0;
|
|
406
|
+
}
|
|
407
|
+
return {
|
|
408
|
+
status: batch.status,
|
|
409
|
+
progress,
|
|
410
|
+
eta,
|
|
411
|
+
error: batch.error || undefined,
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Clean up export files for a specific batch to prevent disk accumulation.
|
|
416
|
+
*
|
|
417
|
+
* Only removes the batch-specific subdirectory, not the entire export root.
|
|
418
|
+
*/
|
|
419
|
+
async cleanupExportFiles(batchId) {
|
|
420
|
+
const batchDir = path.resolve(this.config.dataStoragePath, batchId);
|
|
421
|
+
try {
|
|
422
|
+
await fs.access(batchDir);
|
|
423
|
+
}
|
|
424
|
+
catch {
|
|
425
|
+
// Directory doesn't exist – nothing to clean
|
|
426
|
+
return;
|
|
427
|
+
}
|
|
428
|
+
try {
|
|
429
|
+
const files = await fs.readdir(batchDir);
|
|
430
|
+
for (const file of files) {
|
|
431
|
+
const filePath = path.join(batchDir, file);
|
|
432
|
+
await fs.unlink(filePath);
|
|
433
|
+
}
|
|
434
|
+
await fs.rmdir(batchDir);
|
|
435
|
+
logger.info("Cleaned up export files", { batchId, filesRemoved: files.length, dir: batchDir }, "AutomationPipeline");
|
|
436
|
+
}
|
|
437
|
+
catch (err) {
|
|
438
|
+
logger.warn("Failed to clean up export files", {
|
|
439
|
+
batchId,
|
|
440
|
+
dir: batchDir,
|
|
441
|
+
error: err instanceof Error ? err.message : String(err),
|
|
442
|
+
});
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
/**
|
|
446
|
+
* Automation loop (called by cron)
|
|
447
|
+
*/
|
|
448
|
+
async runAutomationCycle() {
|
|
449
|
+
logger.info("Running automation cycle");
|
|
450
|
+
// Check if training is already running
|
|
451
|
+
if (this.currentTrainingJob) {
|
|
452
|
+
const status = await this.monitorTraining(this.currentTrainingJob);
|
|
453
|
+
if (status.status === "completed") {
|
|
454
|
+
await this.deployModel(this.currentTrainingJob);
|
|
455
|
+
await this.cleanupExportFiles(this.currentTrainingJob);
|
|
456
|
+
this.currentTrainingJob = null;
|
|
457
|
+
}
|
|
458
|
+
else if (status.status === "failed") {
|
|
459
|
+
logger.error("Training job failed", {
|
|
460
|
+
batchId: this.currentTrainingJob,
|
|
461
|
+
});
|
|
462
|
+
await this.cleanupExportFiles(this.currentTrainingJob);
|
|
463
|
+
this.currentTrainingJob = null;
|
|
464
|
+
}
|
|
465
|
+
return;
|
|
466
|
+
}
|
|
467
|
+
// Check for newly completed batches (Python script may have completed)
|
|
468
|
+
const da = getTrainingDataAdapter();
|
|
469
|
+
const recentBatches = await da.getRecentlyCompletedBatches(24);
|
|
470
|
+
const newlyCompleted = recentBatches[0];
|
|
471
|
+
if (newlyCompleted) {
|
|
472
|
+
const alreadyDeployed = await da.getModelByBatchAndStatus(newlyCompleted.batchId, "deployed");
|
|
473
|
+
if (!alreadyDeployed) {
|
|
474
|
+
logger.info("Found newly completed training batch", {
|
|
475
|
+
batchId: newlyCompleted.batchId,
|
|
476
|
+
});
|
|
477
|
+
await this.deployModel(newlyCompleted.batchId);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
// Check if we should trigger training
|
|
481
|
+
const readiness = await this.checkTrainingReadiness();
|
|
482
|
+
if (readiness.ready && this.config.autoTriggerTraining) {
|
|
483
|
+
const lastCompleted = await da.getLastCompletedBatch();
|
|
484
|
+
const hoursSinceLastTraining = lastCompleted?.completedAt
|
|
485
|
+
? (Date.now() - lastCompleted.completedAt.getTime()) / (1000 * 60 * 60)
|
|
486
|
+
: 999;
|
|
487
|
+
if (hoursSinceLastTraining >= this.config.trainingInterval) {
|
|
488
|
+
logger.info("Triggering automatic training", readiness.stats);
|
|
489
|
+
await this.triggerTraining();
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
// Track market outcomes for recent windows (optional — only if market adapter registered)
|
|
493
|
+
const marketAdapter = getMarketDataAdapter();
|
|
494
|
+
if (marketAdapter) {
|
|
495
|
+
const { MarketOutcomesTracker: MOT } = await import("./MarketOutcomesTracker");
|
|
496
|
+
const outcomesTracker = new MOT();
|
|
497
|
+
const synced = await outcomesTracker.syncRecentWindows(24);
|
|
498
|
+
if (synced > 0) {
|
|
499
|
+
logger.info("Synced market outcomes for windows", {
|
|
500
|
+
windowsSynced: synced,
|
|
501
|
+
});
|
|
502
|
+
}
|
|
503
|
+
const processed = await rewardBackpropagationService.processPendingWindows();
|
|
504
|
+
if (processed > 0) {
|
|
505
|
+
logger.info("Updated rewards for trajectories", {
|
|
506
|
+
windowsProcessed: processed,
|
|
507
|
+
});
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
// Score trajectories using RULER framework
|
|
511
|
+
for (let hoursAgo = 0; hoursAgo < 24; hoursAgo++) {
|
|
512
|
+
const windowId = getPreviousWindowId(hoursAgo);
|
|
513
|
+
const scored = await rulerScoringService.scoreWindow(windowId);
|
|
514
|
+
if (scored > 0) {
|
|
515
|
+
logger.info("Scored trajectories with RULER", { windowId, scored });
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
await this.runHealthChecks();
|
|
519
|
+
}
|
|
520
|
+
/**
|
|
521
|
+
* Deploy trained model.
|
|
522
|
+
*
|
|
523
|
+
* The model is created by the Python training script. This method marks
|
|
524
|
+
* trajectories as used and updates the training batch status.
|
|
525
|
+
*/
|
|
526
|
+
async deployModel(batchId) {
|
|
527
|
+
const da = getTrainingDataAdapter();
|
|
528
|
+
const batch = await da.getBatchById(batchId);
|
|
529
|
+
if (!batch) {
|
|
530
|
+
logger.warn("Batch not found for deployment", { batchId });
|
|
531
|
+
return;
|
|
532
|
+
}
|
|
533
|
+
const model = await da.getModelByBatchAndStatus(batch.id, "ready");
|
|
534
|
+
if (!model) {
|
|
535
|
+
logger.warn("Model not found for batch", { batchId });
|
|
536
|
+
return;
|
|
537
|
+
}
|
|
538
|
+
logger.info("Deploying model", {
|
|
539
|
+
version: batch.modelVersion,
|
|
540
|
+
modelId: model.modelId,
|
|
541
|
+
batchId,
|
|
542
|
+
});
|
|
543
|
+
// Mark trajectories as used
|
|
544
|
+
let trajectoryIds;
|
|
545
|
+
if (!batch.trajectoryIds ||
|
|
546
|
+
batch.trajectoryIds === "null" ||
|
|
547
|
+
batch.trajectoryIds === "[]") {
|
|
548
|
+
logger.warn("Training batch has invalid trajectoryIds", {
|
|
549
|
+
batchId: batch.id,
|
|
550
|
+
});
|
|
551
|
+
trajectoryIds = [];
|
|
552
|
+
}
|
|
553
|
+
else {
|
|
554
|
+
trajectoryIds = JSON.parse(batch.trajectoryIds);
|
|
555
|
+
if (!Array.isArray(trajectoryIds)) {
|
|
556
|
+
logger.warn("Training batch trajectoryIds is not an array", {
|
|
557
|
+
batchId: batch.id,
|
|
558
|
+
});
|
|
559
|
+
trajectoryIds = [];
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
if (trajectoryIds.length > 0) {
|
|
563
|
+
await da.markTrajectoriesAsUsed(trajectoryIds, batch.id);
|
|
564
|
+
}
|
|
565
|
+
await da.updateModelStatus(model.modelId, "deployed", {
|
|
566
|
+
deployedAt: new Date(),
|
|
567
|
+
});
|
|
568
|
+
logger.info("Model deployed", {
|
|
569
|
+
version: batch.modelVersion,
|
|
570
|
+
modelId: model.modelId,
|
|
571
|
+
});
|
|
572
|
+
}
|
|
573
|
+
/**
|
|
574
|
+
* Benchmark and conditionally deploy trained model
|
|
575
|
+
* Only deploys if performance meets threshold
|
|
576
|
+
*/
|
|
577
|
+
async benchmarkAndDeploy(batchId, autoDeploy = true) {
|
|
578
|
+
const da = getTrainingDataAdapter();
|
|
579
|
+
const batch = await da.getBatchById(batchId);
|
|
580
|
+
if (!batch) {
|
|
581
|
+
return { benchmarked: false, deployed: false, reason: "Batch not found" };
|
|
582
|
+
}
|
|
583
|
+
const model = await da.getModelByBatchAndStatus(batch.id, "ready");
|
|
584
|
+
if (!model) {
|
|
585
|
+
return { benchmarked: false, deployed: false, reason: "Model not found" };
|
|
586
|
+
}
|
|
587
|
+
// Benchmark the model
|
|
588
|
+
logger.info("Benchmarking model...", { modelId: model.modelId }, "AutomationPipeline");
|
|
589
|
+
const benchmarkResults = await benchmarkService.benchmarkModel(model.modelId);
|
|
590
|
+
// Compare with previous models
|
|
591
|
+
const comparison = await benchmarkService.compareModels(model.modelId);
|
|
592
|
+
logger.info("Benchmark complete", {
|
|
593
|
+
modelId: model.modelId,
|
|
594
|
+
score: benchmarkResults.benchmarkScore,
|
|
595
|
+
shouldDeploy: comparison.shouldDeploy,
|
|
596
|
+
reason: comparison.reason,
|
|
597
|
+
}, "AutomationPipeline");
|
|
598
|
+
// Deploy if performance is good enough (and autoDeploy is enabled)
|
|
599
|
+
if (comparison.shouldDeploy && autoDeploy) {
|
|
600
|
+
await this.deployModel(batchId);
|
|
601
|
+
return {
|
|
602
|
+
benchmarked: true,
|
|
603
|
+
deployed: true,
|
|
604
|
+
reason: comparison.reason,
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
return {
|
|
608
|
+
benchmarked: true,
|
|
609
|
+
deployed: false,
|
|
610
|
+
reason: comparison.reason || "Performance below threshold",
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
/**
|
|
614
|
+
* Get model selection info for next training
|
|
615
|
+
*/
|
|
616
|
+
async getModelSelectionInfo() {
|
|
617
|
+
const selection = await modelSelectionService.selectBaseModel();
|
|
618
|
+
const summary = await modelSelectionService.getSelectionSummary();
|
|
619
|
+
return {
|
|
620
|
+
success: true,
|
|
621
|
+
selection,
|
|
622
|
+
summary,
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
/**
|
|
626
|
+
* Run health checks
|
|
627
|
+
*/
|
|
628
|
+
async runHealthChecks() {
|
|
629
|
+
const da = getTrainingDataAdapter();
|
|
630
|
+
const dbOk = await da.healthCheck();
|
|
631
|
+
if (!dbOk) {
|
|
632
|
+
logger.warn("Health check: database unreachable");
|
|
633
|
+
}
|
|
634
|
+
const oneHourAgo = new Date(Date.now() - 60 * 60 * 1000);
|
|
635
|
+
const last1h = await da.countTrajectoriesSince(oneHourAgo);
|
|
636
|
+
if (last1h < 1) {
|
|
637
|
+
logger.warn("Low data collection rate", { trajectoriesLastHour: last1h });
|
|
638
|
+
}
|
|
639
|
+
// Ensure storage directories exist
|
|
640
|
+
await fs.mkdir(this.config.modelStoragePath, { recursive: true });
|
|
641
|
+
await fs.mkdir(this.config.dataStoragePath, { recursive: true });
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* Get automation status
|
|
645
|
+
*/
|
|
646
|
+
async getStatus() {
|
|
647
|
+
const da = getTrainingDataAdapter();
|
|
648
|
+
const twentyFourHoursAgo = new Date(Date.now() - 24 * 60 * 60 * 1000);
|
|
649
|
+
const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
|
|
650
|
+
const last24h = await da.countTrajectoriesSince(twentyFourHoursAgo);
|
|
651
|
+
const last7d = await da.countTrajectoriesSince(sevenDaysAgo);
|
|
652
|
+
const lastCompleted = await da.getLastCompletedBatch();
|
|
653
|
+
const latestModel = await da.getLatestModel();
|
|
654
|
+
const deployedCount = await da.countDeployedModels();
|
|
655
|
+
const trainingCount = await da.countTrainingBatches();
|
|
656
|
+
const dbHealthy = await da.healthCheck();
|
|
657
|
+
let storageHealthy = false;
|
|
658
|
+
try {
|
|
659
|
+
await fs.access(this.config.modelStoragePath);
|
|
660
|
+
storageHealthy = true;
|
|
661
|
+
}
|
|
662
|
+
catch {
|
|
663
|
+
try {
|
|
664
|
+
await fs.mkdir(this.config.modelStoragePath, { recursive: true });
|
|
665
|
+
storageHealthy = true;
|
|
666
|
+
}
|
|
667
|
+
catch {
|
|
668
|
+
storageHealthy = false;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
let atroposHealthy = false;
|
|
672
|
+
if (this.config.atroposApiUrl) {
|
|
673
|
+
try {
|
|
674
|
+
const controller = new AbortController();
|
|
675
|
+
const timeout = setTimeout(() => controller.abort(), 3000);
|
|
676
|
+
const resp = await fetch(`${this.config.atroposApiUrl}/health`, {
|
|
677
|
+
signal: controller.signal,
|
|
678
|
+
});
|
|
679
|
+
clearTimeout(timeout);
|
|
680
|
+
atroposHealthy = resp.ok;
|
|
681
|
+
}
|
|
682
|
+
catch {
|
|
683
|
+
atroposHealthy = false;
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
return {
|
|
687
|
+
dataCollection: {
|
|
688
|
+
last24h,
|
|
689
|
+
last7d,
|
|
690
|
+
ratePerHour: last24h / 24,
|
|
691
|
+
},
|
|
692
|
+
training: {
|
|
693
|
+
currentJob: this.currentTrainingJob,
|
|
694
|
+
lastCompleted: lastCompleted?.completedAt || null,
|
|
695
|
+
nextScheduled: lastCompleted?.completedAt
|
|
696
|
+
? new Date(lastCompleted.completedAt.getTime() +
|
|
697
|
+
this.config.trainingInterval * 60 * 60 * 1000)
|
|
698
|
+
: null,
|
|
699
|
+
},
|
|
700
|
+
models: {
|
|
701
|
+
latest: latestModel?.version || null,
|
|
702
|
+
deployed: deployedCount,
|
|
703
|
+
training: trainingCount,
|
|
704
|
+
},
|
|
705
|
+
health: {
|
|
706
|
+
database: dbHealthy,
|
|
707
|
+
storage: storageHealthy,
|
|
708
|
+
atropos: atroposHealthy,
|
|
709
|
+
},
|
|
710
|
+
};
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
// Singleton
|
|
714
|
+
export const automationPipeline = new AutomationPipeline();
|