@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -13,12 +13,12 @@
13
13
  * @see ModelBenchmarkService - For HuggingFace upload evaluation
14
14
  */
15
15
 
16
- import fs from 'node:fs/promises';
17
- import path from 'node:path';
18
- import { getTrainingDataAdapter, type JsonValue } from '../adapter';
19
- import { BenchmarkRunner } from '../benchmark/BenchmarkRunner';
20
- import { getAgentRuntimeManager } from '../dependencies';
21
- import { logger } from '../utils/logger';
16
+ import fs from "node:fs/promises";
17
+ import path from "node:path";
18
+ import { getTrainingDataAdapter, type JsonValue } from "../adapter";
19
+ import { BenchmarkRunner } from "../benchmark/BenchmarkRunner";
20
+ import { getAgentRuntimeManager } from "../dependencies";
21
+ import { logger } from "../utils/logger";
22
22
 
23
23
  export interface BenchmarkResults {
24
24
  modelId: string;
@@ -48,11 +48,11 @@ export class BenchmarkService {
48
48
  // Use the 1-week benchmark we generated for comprehensive evaluation
49
49
  private readonly DEFAULT_BENCHMARK_PATH = path.resolve(
50
50
  process.cwd(),
51
- 'benchmarks/benchmark-week-10080-60-10-5-8-12345.json'
51
+ "benchmarks/benchmark-week-10080-60-10-5-8-12345.json",
52
52
  );
53
53
  private readonly RESULTS_DIR = path.resolve(
54
54
  process.cwd(),
55
- 'benchmark-results/models'
55
+ "benchmark-results/models",
56
56
  );
57
57
 
58
58
  /**
@@ -71,10 +71,10 @@ export class BenchmarkService {
71
71
  return this.DEFAULT_BENCHMARK_PATH;
72
72
  } catch {
73
73
  // Fallback: find any benchmark file
74
- const benchmarkDir = path.resolve(process.cwd(), 'benchmarks');
74
+ const benchmarkDir = path.resolve(process.cwd(), "benchmarks");
75
75
  const files = await fs.readdir(benchmarkDir);
76
76
  const benchmarkFiles = files.filter(
77
- (f) => f.startsWith('benchmark-') && f.endsWith('.json')
77
+ (f) => f.startsWith("benchmark-") && f.endsWith(".json"),
78
78
  );
79
79
 
80
80
  if (benchmarkFiles.length > 0) {
@@ -82,14 +82,14 @@ export class BenchmarkService {
82
82
  logger.warn(
83
83
  `Default benchmark not found, using: ${fallbackPath}`,
84
84
  undefined,
85
- 'BenchmarkService'
85
+ "BenchmarkService",
86
86
  );
87
87
  return fallbackPath;
88
88
  }
89
89
  }
90
90
 
91
91
  throw new Error(
92
- 'No benchmark files found. Generate benchmark data before running evaluation.'
92
+ "No benchmark files found. Generate benchmark data before running evaluation.",
93
93
  );
94
94
  }
95
95
 
@@ -113,12 +113,12 @@ export class BenchmarkService {
113
113
  */
114
114
  async benchmarkModel(
115
115
  modelId: string,
116
- benchmarkPath?: string
116
+ benchmarkPath?: string,
117
117
  ): Promise<BenchmarkResults> {
118
118
  logger.info(
119
119
  `Benchmarking model: ${modelId}`,
120
120
  undefined,
121
- 'BenchmarkService'
121
+ "BenchmarkService",
122
122
  );
123
123
 
124
124
  const startTime = Date.now();
@@ -133,7 +133,7 @@ export class BenchmarkService {
133
133
  const outputDir = path.join(
134
134
  this.RESULTS_DIR,
135
135
  modelId,
136
- Date.now().toString()
136
+ Date.now().toString(),
137
137
  );
138
138
  await fs.mkdir(outputDir, { recursive: true });
139
139
 
@@ -153,13 +153,13 @@ export class BenchmarkService {
153
153
 
154
154
  // Run benchmark
155
155
  logger.info(
156
- 'Running benchmark...',
156
+ "Running benchmark...",
157
157
  {
158
158
  modelId,
159
159
  modelIdentifier,
160
160
  agent: agent.username,
161
161
  },
162
- 'BenchmarkService'
162
+ "BenchmarkService",
163
163
  );
164
164
 
165
165
  const result = await BenchmarkRunner.runSingle({
@@ -195,17 +195,16 @@ export class BenchmarkService {
195
195
  };
196
196
 
197
197
  logger.info(
198
- 'Benchmark complete',
198
+ "Benchmark complete",
199
199
  {
200
200
  modelId,
201
201
  score: benchmarkScore.toFixed(3),
202
202
  pnl: result.metrics.totalPnl.toFixed(2),
203
- accuracy:
204
- (result.metrics.predictionMetrics.accuracy * 100).toFixed(1) + '%',
205
- optimality: result.metrics.optimalityScore.toFixed(1) + '%',
203
+ accuracy: `${(result.metrics.predictionMetrics.accuracy * 100).toFixed(1)}%`,
204
+ optimality: `${result.metrics.optimalityScore.toFixed(1)}%`,
206
205
  duration: `${(duration / 1000).toFixed(1)}s`,
207
206
  },
208
- 'BenchmarkService'
207
+ "BenchmarkService",
209
208
  );
210
209
 
211
210
  // Store results
@@ -235,12 +234,12 @@ export class BenchmarkService {
235
234
  */
236
235
  async compareModels(
237
236
  newModelId: string,
238
- threshold: number = this.DEPLOYMENT_THRESHOLD
237
+ threshold: number = this.DEPLOYMENT_THRESHOLD,
239
238
  ): Promise<ComparisonResults> {
240
239
  logger.info(
241
240
  `Comparing model: ${newModelId}`,
242
241
  undefined,
243
- 'BenchmarkService'
242
+ "BenchmarkService",
244
243
  );
245
244
 
246
245
  // Get new model's benchmark results
@@ -257,14 +256,15 @@ export class BenchmarkService {
257
256
  const newScore = newModel.benchmarkScore;
258
257
 
259
258
  // Get previous best model (excluding the new one)
260
- const previousBest = await getTrainingDataAdapter().getBestBenchmarkedModel(newModelId);
259
+ const previousBest =
260
+ await getTrainingDataAdapter().getBestBenchmarkedModel(newModelId);
261
261
 
262
262
  // If no previous model, always deploy
263
263
  if (!previousBest) {
264
264
  logger.info(
265
- 'No previous model to compare - will deploy',
265
+ "No previous model to compare - will deploy",
266
266
  { newScore },
267
- 'BenchmarkService'
267
+ "BenchmarkService",
268
268
  );
269
269
  return {
270
270
  newModel: newModelId,
@@ -273,7 +273,7 @@ export class BenchmarkService {
273
273
  previousScore: null,
274
274
  improvement: null,
275
275
  shouldDeploy: true,
276
- reason: 'First model - no comparison available',
276
+ reason: "First model - no comparison available",
277
277
  };
278
278
  }
279
279
 
@@ -282,7 +282,7 @@ export class BenchmarkService {
282
282
  const thresholdScore = previousScore * threshold;
283
283
  const shouldDeploy = newScore >= thresholdScore;
284
284
 
285
- let reason = '';
285
+ let reason = "";
286
286
  if (shouldDeploy) {
287
287
  if (newScore > previousScore) {
288
288
  reason = `Improved by ${improvement.toFixed(1)}% (${newScore.toFixed(3)} > ${previousScore.toFixed(3)})`;
@@ -294,17 +294,17 @@ export class BenchmarkService {
294
294
  }
295
295
 
296
296
  logger.info(
297
- 'Model comparison complete',
297
+ "Model comparison complete",
298
298
  {
299
299
  newModel: newModelId,
300
300
  newScore: newScore.toFixed(3),
301
301
  previousModel: previousBest.modelId,
302
302
  previousScore: previousScore.toFixed(3),
303
- improvement: improvement.toFixed(1) + '%',
303
+ improvement: `${improvement.toFixed(1)}%`,
304
304
  shouldDeploy,
305
305
  reason,
306
306
  },
307
- 'BenchmarkService'
307
+ "BenchmarkService",
308
308
  );
309
309
 
310
310
  return {
@@ -330,7 +330,7 @@ export class BenchmarkService {
330
330
  */
331
331
  async storeBenchmarkResults(
332
332
  modelId: string,
333
- results: BenchmarkResults
333
+ results: BenchmarkResults,
334
334
  ): Promise<void> {
335
335
  await getTrainingDataAdapter().updateModelBenchmarkResults(modelId, {
336
336
  benchmarkScore: results.benchmarkScore,
@@ -348,9 +348,9 @@ export class BenchmarkService {
348
348
  });
349
349
 
350
350
  logger.info(
351
- 'Stored benchmark results',
351
+ "Stored benchmark results",
352
352
  { modelId, score: results.benchmarkScore },
353
- 'BenchmarkService'
353
+ "BenchmarkService",
354
354
  );
355
355
  }
356
356
 
@@ -367,7 +367,7 @@ export class BenchmarkService {
367
367
  */
368
368
  async shouldDeploy(
369
369
  modelId: string,
370
- threshold: number = this.DEPLOYMENT_THRESHOLD
370
+ threshold: number = this.DEPLOYMENT_THRESHOLD,
371
371
  ): Promise<boolean> {
372
372
  const comparison = await this.compareModels(modelId, threshold);
373
373
  return comparison.shouldDeploy;
@@ -401,7 +401,7 @@ export class BenchmarkService {
401
401
 
402
402
  if (storagePath && storagePath.trim().length > 0) {
403
403
  // Check if it looks like a valid model ID
404
- if (storagePath.includes('/') || storagePath.includes(':')) {
404
+ if (storagePath.includes("/") || storagePath.includes(":")) {
405
405
  return storagePath;
406
406
  }
407
407
 
@@ -409,12 +409,12 @@ export class BenchmarkService {
409
409
  logger.warn(
410
410
  `Invalid storagePath format: ${storagePath}, falling back to modelId`,
411
411
  { modelId: model.modelId },
412
- 'BenchmarkService'
412
+ "BenchmarkService",
413
413
  );
414
414
  }
415
415
 
416
416
  // Fallback to base model if modelId also doesn't look valid
417
- if (model.modelId.includes('/')) {
417
+ if (model.modelId.includes("/")) {
418
418
  return model.modelId;
419
419
  }
420
420
 
@@ -422,7 +422,7 @@ export class BenchmarkService {
422
422
  logger.warn(
423
423
  `No valid model identifier found, using baseModel`,
424
424
  { modelId: model.modelId, baseModel: model.baseModel },
425
- 'BenchmarkService'
425
+ "BenchmarkService",
426
426
  );
427
427
  return model.baseModel;
428
428
  }
@@ -441,9 +441,13 @@ export class BenchmarkService {
441
441
  const allAgents = await adapter.getAgentUsers();
442
442
 
443
443
  // Try to find a specific test agent
444
- const preferredUsernames = ['trader-aggressive', 'test-agent', 'benchmark-agent'];
444
+ const preferredUsernames = [
445
+ "trader-aggressive",
446
+ "test-agent",
447
+ "benchmark-agent",
448
+ ];
445
449
  let agent = allAgents.find(
446
- (a) => a.username && preferredUsernames.includes(a.username)
450
+ (a) => a.username && preferredUsernames.includes(a.username),
447
451
  );
448
452
 
449
453
  // Fall back to any agent
@@ -452,7 +456,7 @@ export class BenchmarkService {
452
456
  }
453
457
 
454
458
  if (!agent) {
455
- throw new Error('No test agent available for benchmarking');
459
+ throw new Error("No test agent available for benchmarking");
456
460
  }
457
461
 
458
462
  return agent;
@@ -490,7 +494,7 @@ export class BenchmarkService {
490
494
  recentModels: summary
491
495
  .sort(
492
496
  (a: (typeof summary)[number], b: (typeof summary)[number]) =>
493
- b.createdAt.getTime() - a.createdAt.getTime()
497
+ b.createdAt.getTime() - a.createdAt.getTime(),
494
498
  )
495
499
  .slice(0, 5),
496
500
  };
@@ -501,7 +505,7 @@ export class BenchmarkService {
501
505
  */
502
506
  async benchmarkMultipleModels(
503
507
  modelIds: string[],
504
- benchmarkPath?: string
508
+ benchmarkPath?: string,
505
509
  ): Promise<Record<string, BenchmarkResults>> {
506
510
  const results: Record<string, BenchmarkResults> = {};
507
511
 
@@ -4,8 +4,8 @@
4
4
  * Validates RL pipeline configuration before execution.
5
5
  */
6
6
 
7
- import type { BenchmarkConfig } from '../benchmark/BenchmarkDataGenerator';
8
- import { logger } from '../utils/logger';
7
+ import type { BenchmarkConfig } from "../benchmark/BenchmarkDataGenerator";
8
+ import { logger } from "../utils/logger";
9
9
 
10
10
  export interface TrainingConfig {
11
11
  min_trajectories_per_batch: number;
@@ -37,56 +37,56 @@ export class ConfigValidator {
37
37
 
38
38
  // Validate batch size
39
39
  if (config.batch_size <= 0) {
40
- errors.push('batch_size must be greater than 0');
40
+ errors.push("batch_size must be greater than 0");
41
41
  }
42
42
  if (config.batch_size > 64) {
43
- warnings.push('batch_size > 64 may cause memory issues');
43
+ warnings.push("batch_size > 64 may cause memory issues");
44
44
  }
45
45
 
46
46
  // Validate learning rate
47
47
  if (config.learning_rate <= 0) {
48
- errors.push('learning_rate must be greater than 0');
48
+ errors.push("learning_rate must be greater than 0");
49
49
  }
50
50
  if (config.learning_rate > 1e-3) {
51
- warnings.push('learning_rate > 1e-3 may cause training instability');
51
+ warnings.push("learning_rate > 1e-3 may cause training instability");
52
52
  }
53
53
  if (config.learning_rate < 1e-8) {
54
54
  warnings.push(
55
- 'learning_rate < 1e-8 may be too small for effective learning'
55
+ "learning_rate < 1e-8 may be too small for effective learning",
56
56
  );
57
57
  }
58
58
 
59
59
  // Validate KL penalty
60
60
  if (config.kl_penalty < 0) {
61
- errors.push('kl_penalty must be non-negative');
61
+ errors.push("kl_penalty must be non-negative");
62
62
  }
63
63
  if (config.kl_penalty > 1.0) {
64
- warnings.push('kl_penalty > 1.0 may be too high');
64
+ warnings.push("kl_penalty > 1.0 may be too high");
65
65
  }
66
66
 
67
67
  // Validate iterations
68
68
  if (config.iterations_per_window <= 0) {
69
- errors.push('iterations_per_window must be greater than 0');
69
+ errors.push("iterations_per_window must be greater than 0");
70
70
  }
71
71
 
72
72
  // Validate warmup steps
73
73
  if (config.warmup_steps < 0) {
74
- errors.push('warmup_steps must be non-negative');
74
+ errors.push("warmup_steps must be non-negative");
75
75
  }
76
76
 
77
77
  // Validate max grad norm
78
78
  if (config.max_grad_norm <= 0) {
79
- errors.push('max_grad_norm must be greater than 0');
79
+ errors.push("max_grad_norm must be greater than 0");
80
80
  }
81
81
 
82
82
  // Validate gamma
83
83
  if (config.gamma < 0 || config.gamma > 1) {
84
- errors.push('gamma must be between 0 and 1');
84
+ errors.push("gamma must be between 0 and 1");
85
85
  }
86
86
 
87
87
  // Validate min trajectories
88
88
  if (config.min_trajectories_per_batch <= 0) {
89
- errors.push('min_trajectories_per_batch must be greater than 0');
89
+ errors.push("min_trajectories_per_batch must be greater than 0");
90
90
  }
91
91
 
92
92
  return {
@@ -109,24 +109,24 @@ export class ConfigValidator {
109
109
  const warnings: string[] = [];
110
110
 
111
111
  if (config.duration_minutes <= 0) {
112
- errors.push('duration_minutes must be greater than 0');
112
+ errors.push("duration_minutes must be greater than 0");
113
113
  }
114
114
  if (config.duration_minutes > 10080) {
115
115
  warnings.push(
116
- 'duration_minutes > 10080 (1 week) may take a long time to generate'
116
+ "duration_minutes > 10080 (1 week) may take a long time to generate",
117
117
  );
118
118
  }
119
119
 
120
120
  if (config.tick_interval_seconds <= 0) {
121
- errors.push('tick_interval_seconds must be greater than 0');
121
+ errors.push("tick_interval_seconds must be greater than 0");
122
122
  }
123
123
 
124
124
  if (config.num_prediction_markets <= 0) {
125
- errors.push('num_prediction_markets must be greater than 0');
125
+ errors.push("num_prediction_markets must be greater than 0");
126
126
  }
127
127
 
128
128
  if (config.num_perpetual_markets <= 0) {
129
- errors.push('num_perpetual_markets must be greater than 0');
129
+ errors.push("num_perpetual_markets must be greater than 0");
130
130
  }
131
131
 
132
132
  return {
@@ -149,7 +149,7 @@ export class ConfigValidator {
149
149
 
150
150
  // Validate benchmark config
151
151
  if (config.benchmark) {
152
- const benchmarkResult = this.validateBenchmarkConfig({
152
+ const benchmarkResult = ConfigValidator.validateBenchmarkConfig({
153
153
  duration_minutes: config.benchmark.durationMinutes,
154
154
  tick_interval_seconds: config.benchmark.tickInterval,
155
155
  num_prediction_markets: config.benchmark.numPredictionMarkets,
@@ -161,17 +161,19 @@ export class ConfigValidator {
161
161
 
162
162
  // Validate training config
163
163
  if (config.training) {
164
- const trainingResult = this.validateTrainingConfig(config.training);
164
+ const trainingResult = ConfigValidator.validateTrainingConfig(
165
+ config.training,
166
+ );
165
167
  errors.push(...trainingResult.errors);
166
168
  warnings.push(...trainingResult.warnings);
167
169
  }
168
170
 
169
171
  // Validate agent config
170
172
  if (config.agents.test_agent_count <= 0) {
171
- errors.push('test_agent_count must be greater than 0');
173
+ errors.push("test_agent_count must be greater than 0");
172
174
  }
173
175
  if (config.agents.test_agent_count > 10) {
174
- warnings.push('test_agent_count > 10 may be slow');
176
+ warnings.push("test_agent_count > 10 may be slow");
175
177
  }
176
178
 
177
179
  return {
@@ -189,31 +191,35 @@ export class ConfigValidator {
189
191
  training: TrainingConfig;
190
192
  agents: { test_agent_count: number };
191
193
  }): boolean {
192
- const result = this.validatePipelineConfig(config);
194
+ const result = ConfigValidator.validatePipelineConfig(config);
193
195
 
194
196
  if (result.warnings.length > 0) {
195
197
  logger.warn(
196
- 'Configuration warnings',
198
+ "Configuration warnings",
197
199
  { warnings: result.warnings },
198
- 'ConfigValidator'
200
+ "ConfigValidator",
199
201
  );
200
- result.warnings.forEach((w) => console.log(` ⚠️ ${w}`));
202
+ result.warnings.forEach((w) => {
203
+ console.log(` ⚠️ ${w}`);
204
+ });
201
205
  }
202
206
 
203
207
  if (result.errors.length > 0) {
204
208
  logger.error(
205
- 'Configuration errors',
209
+ "Configuration errors",
206
210
  { errors: result.errors },
207
- 'ConfigValidator'
211
+ "ConfigValidator",
208
212
  );
209
- result.errors.forEach((e) => console.error(` ❌ ${e}`));
213
+ result.errors.forEach((e) => {
214
+ console.error(` ❌ ${e}`);
215
+ });
210
216
  return false;
211
217
  }
212
218
 
213
219
  logger.info(
214
- 'Configuration validation passed',
220
+ "Configuration validation passed",
215
221
  undefined,
216
- 'ConfigValidator'
222
+ "ConfigValidator",
217
223
  );
218
224
  return true;
219
225
  }
@@ -5,9 +5,9 @@
5
5
  * This gives RULER the ground truth to evaluate agent decisions.
6
6
  */
7
7
 
8
- import { getMarketDataAdapter } from '../adapter';
9
- import { generateSnowflakeId, logger } from '../utils';
10
- import { getPreviousWindowId } from './window-utils';
8
+ import { getMarketDataAdapter } from "../adapter";
9
+ import { generateSnowflakeId, logger } from "../utils";
10
+ import { getPreviousWindowId } from "./window-utils";
11
11
 
12
12
  export interface WindowOutcomes {
13
13
  windowId: string;
@@ -36,7 +36,9 @@ export class MarketOutcomesTracker {
36
36
 
37
37
  const marketAdapter = getMarketDataAdapter();
38
38
  if (!marketAdapter) {
39
- logger.warn('Market data adapter not available, skipping outcome tracking');
39
+ logger.warn(
40
+ "Market data adapter not available, skipping outcome tracking",
41
+ );
40
42
  return;
41
43
  }
42
44
 
@@ -44,7 +46,10 @@ export class MarketOutcomesTracker {
44
46
  const windowEnd = new Date(windowStart.getTime() + 60 * 60 * 1000);
45
47
 
46
48
  // Get stock price movements from perpetual positions
47
- const perpTrades = await marketAdapter.getPerpPositionsForWindow(windowStart, windowEnd);
49
+ const perpTrades = await marketAdapter.getPerpPositionsForWindow(
50
+ windowStart,
51
+ windowEnd,
52
+ );
48
53
 
49
54
  // Group by ticker and calculate movements
50
55
  const stockMovements = new Map<
@@ -56,7 +61,9 @@ export class MarketOutcomesTracker {
56
61
  if (!trade.ticker) continue;
57
62
 
58
63
  const existing = stockMovements.get(trade.ticker);
59
- const endPrice = Number(trade.currentPrice ?? trade.exitPrice ?? trade.entryPrice);
64
+ const endPrice = Number(
65
+ trade.currentPrice ?? trade.exitPrice ?? trade.entryPrice,
66
+ );
60
67
  if (!existing) {
61
68
  stockMovements.set(trade.ticker, {
62
69
  start: Number(trade.entryPrice),
@@ -81,12 +88,15 @@ export class MarketOutcomesTracker {
81
88
  startPrice: String(data.start),
82
89
  endPrice: String(data.end),
83
90
  changePercent: String(changePercent),
84
- sentiment: changePercent > 0 ? 'BULLISH' : 'BEARISH',
91
+ sentiment: changePercent > 0 ? "BULLISH" : "BEARISH",
85
92
  });
86
93
  }
87
94
 
88
95
  // Get prediction market resolutions
89
- const resolvedMarkets = await marketAdapter.getResolvedMarketsForWindow(windowStart, windowEnd);
96
+ const resolvedMarkets = await marketAdapter.getResolvedMarketsForWindow(
97
+ windowStart,
98
+ windowEnd,
99
+ );
90
100
 
91
101
  // Save prediction outcomes
92
102
  for (const market of resolvedMarkets) {
@@ -95,7 +105,7 @@ export class MarketOutcomesTracker {
95
105
  windowId,
96
106
  predictionMarketId: market.id,
97
107
  question: market.question,
98
- outcome: market.outcome ? 'YES' : 'NO',
108
+ outcome: market.outcome ? "YES" : "NO",
99
109
  finalProbability: String(market.finalProbability ?? 0.5),
100
110
  });
101
111
  }
@@ -114,7 +124,7 @@ export class MarketOutcomesTracker {
114
124
 
115
125
  const marketAdapter = getMarketDataAdapter();
116
126
  if (!marketAdapter) {
117
- logger.warn('Market data adapter not available');
127
+ logger.warn("Market data adapter not available");
118
128
  return 0;
119
129
  }
120
130
 
@@ -172,8 +182,8 @@ export class MarketOutcomesTracker {
172
182
  const r = o as Record<string, unknown>;
173
183
  return {
174
184
  marketId: r.predictionMarketId as string,
175
- question: (r.question as string) || '',
176
- outcome: (r.outcome as string) || 'UNRESOLVED',
185
+ question: (r.question as string) || "",
186
+ outcome: (r.outcome as string) || "UNRESOLVED",
177
187
  finalProbability: Number(r.finalProbability || 0),
178
188
  };
179
189
  });
@@ -5,13 +5,13 @@
5
5
  * Handles gradual rollout and rollback if needed.
6
6
  */
7
7
 
8
- import { getTrainingDataAdapter } from '../adapter';
9
- import { getAgentRuntimeManager } from '../dependencies';
10
- import { logger } from '../utils/logger';
8
+ import { getTrainingDataAdapter } from "../adapter";
9
+ import { getAgentRuntimeManager } from "../dependencies";
10
+ import { logger } from "../utils/logger";
11
11
 
12
12
  export interface DeploymentOptions {
13
13
  modelVersion: string;
14
- strategy: 'immediate' | 'gradual' | 'test';
14
+ strategy: "immediate" | "gradual" | "test";
15
15
  rolloutPercentage?: number;
16
16
  testAgentIds?: string[];
17
17
  }
@@ -26,7 +26,7 @@ export interface DeploymentResult {
26
26
  interface DeploymentStatusRecord {
27
27
  deploymentId: string;
28
28
  modelVersion: string;
29
- status: 'in_progress' | 'deployed' | 'degraded' | 'failed';
29
+ status: "in_progress" | "deployed" | "degraded" | "failed";
30
30
  agentsUpdated: number;
31
31
  agentsFailed: number;
32
32
  performance: {
@@ -47,7 +47,7 @@ export class ModelDeployer {
47
47
  async deploy(options: DeploymentOptions): Promise<DeploymentResult> {
48
48
  const da = getTrainingDataAdapter();
49
49
 
50
- logger.info('Starting model deployment', {
50
+ logger.info("Starting model deployment", {
51
51
  version: options.modelVersion,
52
52
  strategy: options.strategy,
53
53
  });
@@ -59,7 +59,7 @@ export class ModelDeployer {
59
59
  }
60
60
 
61
61
  const strategy =
62
- options.strategy === 'immediate' ? 'all' : options.strategy;
62
+ options.strategy === "immediate" ? "all" : options.strategy;
63
63
 
64
64
  const targetAgents = await da.getAgentUsers({
65
65
  strategy,
@@ -73,7 +73,7 @@ export class ModelDeployer {
73
73
  this.deploymentStatus.set(deploymentId, {
74
74
  deploymentId,
75
75
  modelVersion: options.modelVersion,
76
- status: 'in_progress',
76
+ status: "in_progress",
77
77
  agentsUpdated: 0,
78
78
  agentsFailed: 0,
79
79
  performance: {
@@ -84,7 +84,7 @@ export class ModelDeployer {
84
84
  completedAt: null,
85
85
  });
86
86
 
87
- await da.updateModelStatus(model.modelId, 'deployed', {
87
+ await da.updateModelStatus(model.modelId, "deployed", {
88
88
  deployedAt: new Date(),
89
89
  agentsUsing: targetAgents.length,
90
90
  });
@@ -99,14 +99,14 @@ export class ModelDeployer {
99
99
  runtimesReset++;
100
100
  } catch (err) {
101
101
  runtimeResetFailures++;
102
- logger.warn('Failed to reset runtime for agent', {
102
+ logger.warn("Failed to reset runtime for agent", {
103
103
  agentId: agent.id,
104
104
  error: err instanceof Error ? err.message : String(err),
105
105
  });
106
106
  }
107
107
  }
108
108
 
109
- logger.info('Model deployed successfully', {
109
+ logger.info("Model deployed successfully", {
110
110
  version: options.modelVersion,
111
111
  agentsUpdated: targetAgents.length,
112
112
  deploymentId,
@@ -118,7 +118,7 @@ export class ModelDeployer {
118
118
  this.deploymentStatus.set(deploymentId, {
119
119
  deploymentId,
120
120
  modelVersion: options.modelVersion,
121
- status: runtimeResetFailures > 0 ? 'degraded' : 'deployed',
121
+ status: runtimeResetFailures > 0 ? "degraded" : "deployed",
122
122
  agentsUpdated: runtimesReset,
123
123
  agentsFailed: runtimeResetFailures,
124
124
  performance: {
@@ -146,16 +146,16 @@ export class ModelDeployer {
146
146
  */
147
147
  async rollback(
148
148
  currentVersion: string,
149
- targetVersion: string
149
+ targetVersion: string,
150
150
  ): Promise<DeploymentResult> {
151
- logger.info('Rolling back model', {
151
+ logger.info("Rolling back model", {
152
152
  from: currentVersion,
153
153
  to: targetVersion,
154
154
  });
155
155
 
156
156
  return await this.deploy({
157
157
  modelVersion: targetVersion,
158
- strategy: 'immediate',
158
+ strategy: "immediate",
159
159
  });
160
160
  }
161
161