@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -11,24 +11,24 @@
11
11
  * Can run multiple agents and compare their performance.
12
12
  */
13
13
 
14
- import type { IAgentRuntimeLike } from '../dependencies';
15
- import { promises as fs } from 'fs';
16
- import * as path from 'path';
17
- import { getAutonomousCoordinator } from '../dependencies';
18
- import { TrajectoryRecorder } from '../training/TrajectoryRecorder';
19
- import { logger } from '../utils/logger';
14
+ import { promises as fs } from "node:fs";
15
+ import * as path from "node:path";
16
+ import type { IAgentRuntimeLike } from "../dependencies";
17
+ import { getAutonomousCoordinator } from "../dependencies";
18
+ import { TrajectoryRecorder } from "../training/TrajectoryRecorder";
19
+ import { logger } from "../utils/logger";
20
20
  import {
21
21
  type BenchmarkConfig,
22
22
  BenchmarkDataGenerator,
23
23
  type BenchmarkGameSnapshot,
24
24
  SeededRandom,
25
- } from './BenchmarkDataGenerator';
26
- import { SimulationA2AInterface } from './SimulationA2AInterface';
25
+ } from "./BenchmarkDataGenerator";
26
+ import { SimulationA2AInterface } from "./SimulationA2AInterface";
27
27
  import {
28
28
  type SimulationConfig,
29
29
  SimulationEngine,
30
30
  type SimulationResult,
31
- } from './SimulationEngine';
31
+ } from "./SimulationEngine";
32
32
 
33
33
  export interface BenchmarkRunConfig {
34
34
  /** Path to benchmark snapshot file (or will generate new one) */
@@ -53,7 +53,7 @@ export interface BenchmarkRunConfig {
53
53
  forceModel?: string;
54
54
 
55
55
  /** Force a baseline strategy (overrides agent behavior) */
56
- forceStrategy?: 'random' | 'momentum';
56
+ forceStrategy?: "random" | "momentum";
57
57
  }
58
58
 
59
59
  export interface BenchmarkComparisonResult {
@@ -104,18 +104,18 @@ export class BenchmarkRunner {
104
104
  * ```
105
105
  */
106
106
  static async runSingle(
107
- config: BenchmarkRunConfig
107
+ config: BenchmarkRunConfig,
108
108
  ): Promise<SimulationResult> {
109
- logger.info('Starting benchmark run', {
109
+ logger.info("Starting benchmark run", {
110
110
  agentUserId: config.agentUserId,
111
111
  benchmarkPath: config.benchmarkPath,
112
- strategy: config.forceStrategy || 'agent-driven',
112
+ strategy: config.forceStrategy || "agent-driven",
113
113
  });
114
114
 
115
115
  // 1. Load or generate benchmark
116
116
  const snapshot = config.benchmarkPath
117
- ? await this.loadBenchmark(config.benchmarkPath)
118
- : await this.generateBenchmark(config.generatorConfig!);
117
+ ? await BenchmarkRunner.loadBenchmark(config.benchmarkPath)
118
+ : await BenchmarkRunner.generateBenchmark(config.generatorConfig!);
119
119
 
120
120
  // 2. Create simulation engine
121
121
  const simConfig: SimulationConfig = {
@@ -132,12 +132,16 @@ export class BenchmarkRunner {
132
132
 
133
133
  // Inject A2A interface into agent runtime (if using real agent and not forcing strategy)
134
134
  if (!config.forceStrategy) {
135
- (config.agentRuntime as IAgentRuntimeLike & { a2aClient?: SimulationA2AInterface }).a2aClient = a2aInterface;
135
+ (
136
+ config.agentRuntime as IAgentRuntimeLike & {
137
+ a2aClient?: SimulationA2AInterface;
138
+ }
139
+ ).a2aClient = a2aInterface;
136
140
  }
137
141
 
138
142
  // Force model if specified (for baseline testing)
139
143
  if (config.forceModel) {
140
- logger.info('Forcing model for benchmark', {
144
+ logger.info("Forcing model for benchmark", {
141
145
  agentUserId: config.agentUserId,
142
146
  forcedModel: config.forceModel,
143
147
  });
@@ -155,8 +159,8 @@ export class BenchmarkRunner {
155
159
  }
156
160
 
157
161
  if (runtime.setSetting) {
158
- runtime.setSetting('GROQ_LARGE_MODEL', config.forceModel);
159
- runtime.setSetting('GROQ_SMALL_MODEL', config.forceModel);
162
+ runtime.setSetting("GROQ_LARGE_MODEL", config.forceModel);
163
+ runtime.setSetting("GROQ_SMALL_MODEL", config.forceModel);
160
164
  }
161
165
  }
162
166
 
@@ -170,14 +174,14 @@ export class BenchmarkRunner {
170
174
  agentId: config.agentUserId,
171
175
  scenarioId: `benchmark-${snapshot.id}`,
172
176
  });
173
- logger.info('Trajectory recording started', { trajectoryId });
177
+ logger.info("Trajectory recording started", { trajectoryId });
174
178
  }
175
179
 
176
180
  // 5. Initialize simulation
177
181
  engine.initialize();
178
182
 
179
183
  // 6. Run simulation loop
180
- logger.info('Starting simulation loop', {
184
+ logger.info("Starting simulation loop", {
181
185
  agentUserId: config.agentUserId,
182
186
  totalTicks: snapshot.ticks.length,
183
187
  });
@@ -191,7 +195,7 @@ export class BenchmarkRunner {
191
195
  // Create seeded RNG for baseline strategies (reproducibility)
192
196
  // Use snapshot ID hash as seed for deterministic behavior across runs
193
197
  const baselineSeed = config.forceStrategy
194
- ? snapshot.id.split('').reduce((acc, c) => acc + c.charCodeAt(0), 0)
198
+ ? snapshot.id.split("").reduce((acc, c) => acc + c.charCodeAt(0), 0)
195
199
  : 0;
196
200
  const baselineRng = config.forceStrategy
197
201
  ? new SeededRandom(baselineSeed)
@@ -208,21 +212,21 @@ export class BenchmarkRunner {
208
212
  `Benchmark progress: ${currentTick}/${snapshot.ticks.length} ticks`,
209
213
  {
210
214
  agentUserId: config.agentUserId,
211
- }
215
+ },
212
216
  );
213
217
  }
214
218
 
215
219
  if (config.forceStrategy && baselineRng) {
216
220
  // Execute baseline strategy directly on engine (bypassing LLM)
217
- await this.executeBaselineStrategy(
221
+ await BenchmarkRunner.executeBaselineStrategy(
218
222
  config.forceStrategy,
219
223
  engine,
220
- baselineRng
224
+ baselineRng,
221
225
  );
222
226
  } else {
223
227
  if (!coordinator) {
224
228
  throw new Error(
225
- 'AutonomousCoordinator required for agent-driven benchmark but not configured.'
229
+ "AutonomousCoordinator required for agent-driven benchmark but not configured.",
226
230
  );
227
231
  }
228
232
 
@@ -230,7 +234,7 @@ export class BenchmarkRunner {
230
234
  // Fail fast - don't catch errors, let them propagate
231
235
  const tickResult = await coordinator.executeAutonomousTick(
232
236
  config.agentUserId,
233
- config.agentRuntime
237
+ config.agentRuntime,
234
238
  );
235
239
 
236
240
  if (tickResult.success && tickResult.actionsExecuted) {
@@ -243,7 +247,7 @@ export class BenchmarkRunner {
243
247
  tickResult.actionsExecuted.engagements;
244
248
 
245
249
  if (totalActions > 0) {
246
- logger.debug('Agent took actions', {
250
+ logger.debug("Agent took actions", {
247
251
  tick: currentTick,
248
252
  actions: tickResult.actionsExecuted,
249
253
  });
@@ -259,7 +263,7 @@ export class BenchmarkRunner {
259
263
  await new Promise((resolve) => setTimeout(resolve, 5));
260
264
  }
261
265
 
262
- logger.info('Simulation loop complete', {
266
+ logger.info("Simulation loop complete", {
263
267
  agentUserId: config.agentUserId,
264
268
  ticksCompleted,
265
269
  totalTicks: snapshot.ticks.length,
@@ -270,11 +274,11 @@ export class BenchmarkRunner {
270
274
 
271
275
  // 8. Validate results - ensure agent actually did something
272
276
  if (result.ticksProcessed === 0) {
273
- throw new Error('Benchmark failed: No ticks were processed');
277
+ throw new Error("Benchmark failed: No ticks were processed");
274
278
  }
275
279
 
276
280
  if (result.actions.length === 0) {
277
- logger.warn('Benchmark completed but agent took no actions', {
281
+ logger.warn("Benchmark completed but agent took no actions", {
278
282
  agentUserId: config.agentUserId,
279
283
  ticksProcessed: result.ticksProcessed,
280
284
  });
@@ -286,13 +290,13 @@ export class BenchmarkRunner {
286
290
  finalPnL: result.metrics.totalPnl,
287
291
  finalBalance: undefined, // Let recorder calculate from state
288
292
  });
289
- logger.info('Trajectory recording saved', { trajectoryId });
293
+ logger.info("Trajectory recording saved", { trajectoryId });
290
294
  }
291
295
 
292
296
  // 10. Save results
293
- await this.saveResult(result, config.outputDir);
297
+ await BenchmarkRunner.saveResult(result, config.outputDir);
294
298
 
295
- logger.info('Benchmark run completed', {
299
+ logger.info("Benchmark run completed", {
296
300
  agentUserId: config.agentUserId,
297
301
  totalPnl: result.metrics.totalPnl,
298
302
  accuracy: result.metrics.predictionMetrics.accuracy,
@@ -308,31 +312,31 @@ export class BenchmarkRunner {
308
312
  * Uses seeded RNG for reproducibility across benchmark runs.
309
313
  */
310
314
  private static async executeBaselineStrategy(
311
- strategy: 'random' | 'momentum',
315
+ strategy: "random" | "momentum",
312
316
  engine: SimulationEngine,
313
- rng: SeededRandom
317
+ rng: SeededRandom,
314
318
  ): Promise<void> {
315
319
  const state = engine.getGameState();
316
320
 
317
321
  // Rate limiting: Only trade in ~10% of ticks to simulate realistic frequency
318
322
  if (rng.next() > 0.1) return;
319
323
 
320
- if (strategy === 'random') {
324
+ if (strategy === "random") {
321
325
  // Random strategy: Buy prediction shares or open perps randomly
322
- const actionType = rng.next() > 0.5 ? 'prediction' : 'perp';
326
+ const actionType = rng.next() > 0.5 ? "prediction" : "perp";
323
327
 
324
- if (actionType === 'prediction' && state.predictionMarkets.length > 0) {
328
+ if (actionType === "prediction" && state.predictionMarkets.length > 0) {
325
329
  const marketIndex = Math.floor(
326
- rng.next() * state.predictionMarkets.length
330
+ rng.next() * state.predictionMarkets.length,
327
331
  );
328
332
  const market = state.predictionMarkets[marketIndex];
329
333
 
330
334
  if (market) {
331
- const outcome = rng.next() > 0.5 ? 'YES' : 'NO';
335
+ const outcome = rng.next() > 0.5 ? "YES" : "NO";
332
336
  // Random amount between 10 and 100
333
337
  const amount = 10 + rng.next() * 90;
334
338
 
335
- await engine.performAction('buy_prediction', {
339
+ await engine.performAction("buy_prediction", {
336
340
  marketId: market.id,
337
341
  outcome,
338
342
  amount,
@@ -340,13 +344,13 @@ export class BenchmarkRunner {
340
344
  }
341
345
  } else if (state.perpetualMarkets.length > 0) {
342
346
  const perpIndex = Math.floor(
343
- rng.next() * state.perpetualMarkets.length
347
+ rng.next() * state.perpetualMarkets.length,
344
348
  );
345
349
  const perp = state.perpetualMarkets[perpIndex];
346
350
 
347
351
  if (perp) {
348
- const side = rng.next() > 0.5 ? 'LONG' : 'SHORT';
349
- await engine.performAction('open_perp', {
352
+ const side = rng.next() > 0.5 ? "LONG" : "SHORT";
353
+ await engine.performAction("open_perp", {
350
354
  ticker: perp.ticker,
351
355
  side,
352
356
  size: 10,
@@ -354,11 +358,11 @@ export class BenchmarkRunner {
354
358
  });
355
359
  }
356
360
  }
357
- } else if (strategy === 'momentum') {
361
+ } else if (strategy === "momentum") {
358
362
  // Momentum strategy: Follow price trends
359
363
  if (state.perpetualMarkets.length > 0) {
360
364
  const perpIndex = Math.floor(
361
- rng.next() * state.perpetualMarkets.length
365
+ rng.next() * state.perpetualMarkets.length,
362
366
  );
363
367
  const perp = state.perpetualMarkets[perpIndex];
364
368
 
@@ -366,16 +370,16 @@ export class BenchmarkRunner {
366
370
  // If price up > 0.5% in 24h, go LONG. If down > 0.5%, go SHORT.
367
371
  // If relatively flat, do nothing (hold).
368
372
  if (perp.priceChange24h > 0.5) {
369
- await engine.performAction('open_perp', {
373
+ await engine.performAction("open_perp", {
370
374
  ticker: perp.ticker,
371
- side: 'LONG',
375
+ side: "LONG",
372
376
  size: 20,
373
377
  leverage: 2,
374
378
  });
375
379
  } else if (perp.priceChange24h < -0.5) {
376
- await engine.performAction('open_perp', {
380
+ await engine.performAction("open_perp", {
377
381
  ticker: perp.ticker,
378
- side: 'SHORT',
382
+ side: "SHORT",
379
383
  size: 20,
380
384
  leverage: 2,
381
385
  });
@@ -410,7 +414,7 @@ export class BenchmarkRunner {
410
414
  */
411
415
  static async runMultiple(
412
416
  config: BenchmarkRunConfig,
413
- numRuns: number
417
+ numRuns: number,
414
418
  ): Promise<BenchmarkComparisonResult> {
415
419
  logger.info(`Running ${numRuns} benchmark iterations`, {
416
420
  agentUserId: config.agentUserId,
@@ -422,7 +426,7 @@ export class BenchmarkRunner {
422
426
  for (let i = 0; i < numRuns; i++) {
423
427
  logger.info(`Starting run ${i + 1}/${numRuns}`);
424
428
 
425
- const result = await this.runSingle({
429
+ const result = await BenchmarkRunner.runSingle({
426
430
  ...config,
427
431
  outputDir: path.join(config.outputDir, `run-${i + 1}`),
428
432
  });
@@ -431,7 +435,7 @@ export class BenchmarkRunner {
431
435
 
432
436
  if (config.saveTrajectory) {
433
437
  trajectoryPaths.push(
434
- path.join(config.outputDir, `run-${i + 1}`, 'trajectory.json')
438
+ path.join(config.outputDir, `run-${i + 1}`, "trajectory.json"),
435
439
  );
436
440
  }
437
441
 
@@ -449,11 +453,11 @@ export class BenchmarkRunner {
449
453
  runs.reduce((sum, r) => sum + r.metrics.optimalityScore, 0) / runs.length;
450
454
 
451
455
  const bestRun = runs.reduce((best, current) =>
452
- current.metrics.totalPnl > best.metrics.totalPnl ? current : best
456
+ current.metrics.totalPnl > best.metrics.totalPnl ? current : best,
453
457
  );
454
458
 
455
459
  const worstRun = runs.reduce((worst, current) =>
456
- current.metrics.totalPnl < worst.metrics.totalPnl ? current : worst
460
+ current.metrics.totalPnl < worst.metrics.totalPnl ? current : worst,
457
461
  );
458
462
 
459
463
  const comparison = {
@@ -465,16 +469,16 @@ export class BenchmarkRunner {
465
469
  };
466
470
 
467
471
  // Save comparison report
468
- await this.saveComparison(
472
+ await BenchmarkRunner.saveComparison(
469
473
  {
470
474
  runs,
471
475
  comparison,
472
476
  trajectories: config.saveTrajectory ? trajectoryPaths : undefined,
473
477
  },
474
- config.outputDir
478
+ config.outputDir,
475
479
  );
476
480
 
477
- logger.info('Multiple benchmarks completed', comparison);
481
+ logger.info("Multiple benchmarks completed", comparison);
478
482
 
479
483
  return {
480
484
  runs,
@@ -513,7 +517,7 @@ export class BenchmarkRunner {
513
517
  static async compareAgents(
514
518
  agent1Config: BenchmarkRunConfig,
515
519
  agent2Config: BenchmarkRunConfig,
516
- benchmarkPath: string
520
+ benchmarkPath: string,
517
521
  ): Promise<{
518
522
  agent1: SimulationResult;
519
523
  agent2: SimulationResult;
@@ -524,7 +528,7 @@ export class BenchmarkRunner {
524
528
  optimality: number;
525
529
  };
526
530
  }> {
527
- logger.info('Comparing two agents', {
531
+ logger.info("Comparing two agents", {
528
532
  agent1: agent1Config.agentUserId,
529
533
  agent2: agent2Config.agentUserId,
530
534
  benchmark: benchmarkPath,
@@ -532,8 +536,8 @@ export class BenchmarkRunner {
532
536
 
533
537
  // Run both agents on same benchmark (concurrently)
534
538
  const [result1, result2] = await Promise.all([
535
- this.runSingle({ ...agent1Config, benchmarkPath }),
536
- this.runSingle({ ...agent2Config, benchmarkPath }),
539
+ BenchmarkRunner.runSingle({ ...agent1Config, benchmarkPath }),
540
+ BenchmarkRunner.runSingle({ ...agent2Config, benchmarkPath }),
537
541
  ]);
538
542
 
539
543
  const winner =
@@ -550,7 +554,7 @@ export class BenchmarkRunner {
550
554
  result1.metrics.optimalityScore - result2.metrics.optimalityScore,
551
555
  };
552
556
 
553
- logger.info('Agent comparison completed', {
557
+ logger.info("Agent comparison completed", {
554
558
  winner,
555
559
  delta,
556
560
  });
@@ -571,16 +575,16 @@ export class BenchmarkRunner {
571
575
  * @throws Error if file cannot be read or parsed
572
576
  */
573
577
  private static async loadBenchmark(
574
- benchmarkPath: string
578
+ benchmarkPath: string,
575
579
  ): Promise<BenchmarkGameSnapshot> {
576
580
  try {
577
- const data = await fs.readFile(benchmarkPath, 'utf-8');
581
+ const data = await fs.readFile(benchmarkPath, "utf-8");
578
582
  const parsed = JSON.parse(data) as BenchmarkGameSnapshot;
579
583
 
580
584
  // Validate basic structure
581
585
  if (!parsed.id || !parsed.initialState || !parsed.groundTruth) {
582
586
  throw new Error(
583
- `Invalid benchmark file: missing required fields (id, initialState, or groundTruth)`
587
+ `Invalid benchmark file: missing required fields (id, initialState, or groundTruth)`,
584
588
  );
585
589
  }
586
590
 
@@ -588,10 +592,10 @@ export class BenchmarkRunner {
588
592
  } catch (error) {
589
593
  if (error instanceof SyntaxError) {
590
594
  throw new Error(
591
- `Failed to parse benchmark JSON file: ${error.message}`
595
+ `Failed to parse benchmark JSON file: ${error.message}`,
592
596
  );
593
597
  }
594
- if ((error as { code?: string })?.code === 'ENOENT') {
598
+ if ((error as { code?: string })?.code === "ENOENT") {
595
599
  throw new Error(`Benchmark file not found: ${benchmarkPath}`);
596
600
  }
597
601
  throw error;
@@ -609,9 +613,9 @@ export class BenchmarkRunner {
609
613
  * @throws Error if generation fails
610
614
  */
611
615
  private static async generateBenchmark(
612
- config: BenchmarkConfig
616
+ config: BenchmarkConfig,
613
617
  ): Promise<BenchmarkGameSnapshot> {
614
- logger.info('Generating new benchmark', config);
618
+ logger.info("Generating new benchmark", config);
615
619
 
616
620
  const generator = new BenchmarkDataGenerator(config);
617
621
  const snapshot = await generator.generate();
@@ -619,13 +623,13 @@ export class BenchmarkRunner {
619
623
  // Save for reuse
620
624
  const outputPath = path.join(
621
625
  process.cwd(),
622
- 'benchmarks',
623
- `benchmark-${snapshot.id}.json`
626
+ "benchmarks",
627
+ `benchmark-${snapshot.id}.json`,
624
628
  );
625
629
  await fs.mkdir(path.dirname(outputPath), { recursive: true });
626
630
  await fs.writeFile(outputPath, JSON.stringify(snapshot, null, 2));
627
631
 
628
- logger.info('Benchmark generated and saved', { path: outputPath });
632
+ logger.info("Benchmark generated and saved", { path: outputPath });
629
633
 
630
634
  return snapshot;
631
635
  }
@@ -641,26 +645,26 @@ export class BenchmarkRunner {
641
645
  */
642
646
  private static async saveResult(
643
647
  result: SimulationResult,
644
- outputDir: string
648
+ outputDir: string,
645
649
  ): Promise<void> {
646
650
  await fs.mkdir(outputDir, { recursive: true });
647
651
 
648
652
  // Save full result
649
- const resultPath = path.join(outputDir, 'result.json');
653
+ const resultPath = path.join(outputDir, "result.json");
650
654
  await fs.writeFile(resultPath, JSON.stringify(result, null, 2));
651
655
 
652
656
  // Save metrics summary
653
- const metricsPath = path.join(outputDir, 'metrics.json');
657
+ const metricsPath = path.join(outputDir, "metrics.json");
654
658
  await fs.writeFile(metricsPath, JSON.stringify(result.metrics, null, 2));
655
659
 
656
660
  // Save trajectory
657
- const trajectoryPath = path.join(outputDir, 'trajectory.json');
661
+ const trajectoryPath = path.join(outputDir, "trajectory.json");
658
662
  await fs.writeFile(
659
663
  trajectoryPath,
660
- JSON.stringify(result.trajectory, null, 2)
664
+ JSON.stringify(result.trajectory, null, 2),
661
665
  );
662
666
 
663
- logger.debug('Results saved', { outputDir });
667
+ logger.debug("Results saved", { outputDir });
664
668
  }
665
669
 
666
670
  /**
@@ -673,13 +677,13 @@ export class BenchmarkRunner {
673
677
  */
674
678
  private static async saveComparison(
675
679
  comparison: BenchmarkComparisonResult,
676
- outputDir: string
680
+ outputDir: string,
677
681
  ): Promise<void> {
678
682
  await fs.mkdir(outputDir, { recursive: true });
679
683
 
680
- const comparisonPath = path.join(outputDir, 'comparison.json');
684
+ const comparisonPath = path.join(outputDir, "comparison.json");
681
685
  await fs.writeFile(comparisonPath, JSON.stringify(comparison, null, 2));
682
686
 
683
- logger.debug('Comparison saved', { outputDir });
687
+ logger.debug("Comparison saved", { outputDir });
684
688
  }
685
689
  }