@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -7,7 +7,7 @@
7
7
  * @packageDocumentation
8
8
  */
9
9
 
10
- import { getAvailableArchetypes, normalizeArchetype } from '../rubrics';
10
+ import { getAvailableArchetypes, normalizeArchetype } from "../rubrics";
11
11
 
12
12
  /**
13
13
  * NPC characteristics used for archetype derivation
@@ -29,28 +29,28 @@ export interface NPCCharacteristics {
29
29
  */
30
30
  const ROLE_TO_ARCHETYPE: Record<string, string> = {
31
31
  // High-reliability roles → ethical archetypes
32
- insider: 'information-trader',
33
- expert: 'researcher',
34
- whistleblower: 'goody-twoshoes',
35
- analyst: 'researcher',
32
+ insider: "information-trader",
33
+ expert: "researcher",
34
+ whistleblower: "goody-twoshoes",
35
+ analyst: "researcher",
36
36
 
37
37
  // Media/content roles
38
- journalist: 'social-butterfly',
39
- reporter: 'social-butterfly',
40
- influencer: 'social-butterfly',
38
+ journalist: "social-butterfly",
39
+ reporter: "social-butterfly",
40
+ influencer: "social-butterfly",
41
41
 
42
42
  // Low-reliability roles → deceptive archetypes
43
- deceiver: 'scammer',
44
- politician: 'liar',
45
- conspiracy: 'liar',
43
+ deceiver: "scammer",
44
+ politician: "liar",
45
+ conspiracy: "liar",
46
46
 
47
47
  // Trading-focused roles
48
- trader: 'trader',
49
- investor: 'trader',
50
- speculator: 'degen',
48
+ trader: "trader",
49
+ investor: "trader",
50
+ speculator: "degen",
51
51
 
52
52
  // Default fallback
53
- unknown: 'trader',
53
+ unknown: "trader",
54
54
  };
55
55
 
56
56
  /**
@@ -64,57 +64,57 @@ const PERSONALITY_KEYWORDS: Array<{
64
64
  }> = [
65
65
  // High priority - distinctive personalities
66
66
  {
67
- keywords: ['manipulative', 'deceptive', 'cunning', 'unethical'],
68
- archetype: 'scammer',
67
+ keywords: ["manipulative", "deceptive", "cunning", "unethical"],
68
+ archetype: "scammer",
69
69
  priority: 10,
70
70
  },
71
71
  {
72
- keywords: ['reckless', 'impulsive', 'yolo', 'fomo', 'aggressive'],
73
- archetype: 'degen',
72
+ keywords: ["reckless", "impulsive", "yolo", "fomo", "aggressive"],
73
+ archetype: "degen",
74
74
  priority: 10,
75
75
  },
76
76
  {
77
- keywords: ['honest', 'ethical', 'helpful', 'transparent', 'altruistic'],
78
- archetype: 'goody-twoshoes',
77
+ keywords: ["honest", "ethical", "helpful", "transparent", "altruistic"],
78
+ archetype: "goody-twoshoes",
79
79
  priority: 10,
80
80
  },
81
81
  {
82
- keywords: ['thorough', 'meticulous', 'analytical', 'data-driven'],
83
- archetype: 'researcher',
82
+ keywords: ["thorough", "meticulous", "analytical", "data-driven"],
83
+ archetype: "researcher",
84
84
  priority: 8,
85
85
  },
86
86
 
87
87
  // Medium priority - trading styles
88
88
  {
89
- keywords: ['disciplined', 'methodical', 'patient', 'risk-averse'],
90
- archetype: 'trader',
89
+ keywords: ["disciplined", "methodical", "patient", "risk-averse"],
90
+ archetype: "trader",
91
91
  priority: 5,
92
92
  },
93
93
  {
94
- keywords: ['social', 'networker', 'outgoing', 'community'],
95
- archetype: 'social-butterfly',
94
+ keywords: ["social", "networker", "outgoing", "community"],
95
+ archetype: "social-butterfly",
96
96
  priority: 5,
97
97
  },
98
98
  {
99
- keywords: ['flattering', 'agreeable', 'sycophantic', 'pleasing'],
100
- archetype: 'ass-kisser',
99
+ keywords: ["flattering", "agreeable", "sycophantic", "pleasing"],
100
+ archetype: "ass-kisser",
101
101
  priority: 5,
102
102
  },
103
103
 
104
104
  // Low priority - general
105
105
  {
106
- keywords: ['suspicious', 'secretive', 'paranoid', 'security'],
107
- archetype: 'infosec',
106
+ keywords: ["suspicious", "secretive", "paranoid", "security"],
107
+ archetype: "infosec",
108
108
  priority: 3,
109
109
  },
110
110
  {
111
- keywords: ['leverage', 'perpetual', 'futures', 'derivatives'],
112
- archetype: 'perps-trader',
111
+ keywords: ["leverage", "perpetual", "futures", "derivatives"],
112
+ archetype: "perps-trader",
113
113
  priority: 3,
114
114
  },
115
115
  {
116
- keywords: ['prediction', 'forecast', 'oracle', 'prophet'],
117
- archetype: 'super-predictor',
116
+ keywords: ["prediction", "forecast", "oracle", "prophet"],
117
+ archetype: "super-predictor",
118
118
  priority: 3,
119
119
  },
120
120
  ];
@@ -155,7 +155,7 @@ export function deriveArchetype(npc: NPCCharacteristics): string {
155
155
  npc.willingToLie === true
156
156
  ) {
157
157
  // Confirmed deceptive: low reliability + actively willing to lie
158
- return 'scammer';
158
+ return "scammer";
159
159
  }
160
160
 
161
161
  // Note: High reliability is factored into personality analysis below, not used as an override.
@@ -168,7 +168,7 @@ export function deriveArchetype(npc: NPCCharacteristics): string {
168
168
 
169
169
  for (const mapping of PERSONALITY_KEYWORDS) {
170
170
  const matchCount = mapping.keywords.filter((keyword) =>
171
- personalityLower.includes(keyword)
171
+ personalityLower.includes(keyword),
172
172
  ).length;
173
173
 
174
174
  if (matchCount > 0) {
@@ -190,19 +190,19 @@ export function deriveArchetype(npc: NPCCharacteristics): string {
190
190
  // 4. Check domain for trading specialization
191
191
  if (npc.domain && npc.domain.length > 0) {
192
192
  const domains = npc.domain.map((d) => d.toLowerCase());
193
- if (domains.includes('trading') || domains.includes('finance')) {
194
- return 'trader';
193
+ if (domains.includes("trading") || domains.includes("finance")) {
194
+ return "trader";
195
195
  }
196
- if (domains.includes('technology') || domains.includes('tech')) {
197
- return 'researcher';
196
+ if (domains.includes("technology") || domains.includes("tech")) {
197
+ return "researcher";
198
198
  }
199
- if (domains.includes('media') || domains.includes('social')) {
200
- return 'social-butterfly';
199
+ if (domains.includes("media") || domains.includes("social")) {
200
+ return "social-butterfly";
201
201
  }
202
202
  }
203
203
 
204
204
  // 5. Default fallback
205
- return 'trader';
205
+ return "trader";
206
206
  }
207
207
 
208
208
  /**
@@ -218,7 +218,7 @@ export type ArchetypeResolver = (npcId: string) => string;
218
218
  * @returns Function that resolves archetype from NPC ID
219
219
  */
220
220
  export function createArchetypeResolver(
221
- npcs: NPCCharacteristics[]
221
+ npcs: NPCCharacteristics[],
222
222
  ): ArchetypeResolver {
223
223
  const archetypeMap = new Map<string, string>();
224
224
 
@@ -227,7 +227,7 @@ export function createArchetypeResolver(
227
227
  }
228
228
 
229
229
  return (npcId: string): string => {
230
- return archetypeMap.get(npcId) ?? 'trader';
230
+ return archetypeMap.get(npcId) ?? "trader";
231
231
  };
232
232
  }
233
233
 
@@ -237,7 +237,7 @@ export function createArchetypeResolver(
237
237
  */
238
238
  export function getRoleArchetype(role: string): string {
239
239
  const normalized = role.toLowerCase().trim();
240
- return ROLE_TO_ARCHETYPE[normalized] ?? 'trader';
240
+ return ROLE_TO_ARCHETYPE[normalized] ?? "trader";
241
241
  }
242
242
 
243
243
  /**
@@ -10,7 +10,7 @@ export {
10
10
  ArchetypeConfigService,
11
11
  type ArchetypeTraits,
12
12
  archetypeConfigService,
13
- } from './ArchetypeConfigService';
13
+ } from "./ArchetypeConfigService";
14
14
 
15
15
  export {
16
16
  type ArchetypeResolver,
@@ -19,4 +19,4 @@ export {
19
19
  getRoleArchetype,
20
20
  getValidArchetypes,
21
21
  type NPCCharacteristics,
22
- } from './derive-archetype';
22
+ } from "./derive-archetype";
@@ -12,18 +12,18 @@
12
12
  import {
13
13
  type ArchetypeConfig,
14
14
  ArchetypeConfigService,
15
- } from '../archetypes/ArchetypeConfigService';
15
+ } from "../archetypes/ArchetypeConfigService";
16
16
  import {
17
17
  createMultiModelOrchestrator,
18
18
  type MultiModelOrchestrator,
19
- } from '../training/MultiModelOrchestrator';
20
- import { logger } from '../utils/logger';
19
+ } from "../training/MultiModelOrchestrator";
20
+ import { logger } from "../utils/logger";
21
21
  import {
22
22
  type BenchmarkConfig,
23
23
  BenchmarkDataGenerator,
24
24
  type BenchmarkGameSnapshot,
25
25
  type Tick,
26
- } from './BenchmarkDataGenerator';
26
+ } from "./BenchmarkDataGenerator";
27
27
 
28
28
  /**
29
29
  * Individual agent in the matchup simulation
@@ -95,7 +95,7 @@ export interface MatchupBenchmarkResult {
95
95
  headToHead: ArchetypeVsResult[];
96
96
 
97
97
  /** Market condition during benchmark */
98
- marketCondition: 'bull' | 'bear' | 'volatile' | 'stable';
98
+ marketCondition: "bull" | "bear" | "volatile" | "stable";
99
99
 
100
100
  /** Insights derived from the matchup */
101
101
  insights: string[];
@@ -106,7 +106,7 @@ export interface MatchupBenchmarkResult {
106
106
  */
107
107
  export interface MatchupBenchmarkConfig {
108
108
  /** Archetypes to include in matchup (or 'all' for all archetypes) */
109
- archetypes: string[] | 'all';
109
+ archetypes: string[] | "all";
110
110
 
111
111
  /** Number of agents per archetype */
112
112
  agentsPerArchetype: number;
@@ -118,7 +118,7 @@ export interface MatchupBenchmarkConfig {
118
118
  ticksPerRound: number;
119
119
 
120
120
  /** Market conditions to test */
121
- marketConditions: Array<'bull' | 'bear' | 'volatile' | 'stable'>;
121
+ marketConditions: Array<"bull" | "bear" | "volatile" | "stable">;
122
122
 
123
123
  /** Available VRAM for model loading */
124
124
  availableVramGb: number;
@@ -140,7 +140,7 @@ export class ArchetypeMatchupBenchmark {
140
140
  * Get all archetypes to benchmark
141
141
  */
142
142
  private getArchetypes(): string[] {
143
- if (this.config.archetypes === 'all') {
143
+ if (this.config.archetypes === "all") {
144
144
  return ArchetypeConfigService.getAvailableArchetypes();
145
145
  }
146
146
  return this.config.archetypes;
@@ -173,7 +173,7 @@ export class ArchetypeMatchupBenchmark {
173
173
  * Market condition affects seed to create different scenarios
174
174
  */
175
175
  private async generateBenchmarkData(
176
- condition: 'bull' | 'bear' | 'volatile' | 'stable'
176
+ condition: "bull" | "bear" | "volatile" | "stable",
177
177
  ): Promise<BenchmarkGameSnapshot> {
178
178
  // Convert ticks to duration minutes (assuming 1 tick per second)
179
179
  const durationMinutes = Math.ceil(this.config.ticksPerRound / 60);
@@ -190,8 +190,8 @@ export class ArchetypeMatchupBenchmark {
190
190
  const benchmarkConfig: BenchmarkConfig = {
191
191
  durationMinutes,
192
192
  tickInterval: 1,
193
- numPredictionMarkets: condition === 'volatile' ? 8 : 5,
194
- numPerpetualMarkets: condition === 'volatile' ? 5 : 3,
193
+ numPredictionMarkets: condition === "volatile" ? 8 : 5,
194
+ numPerpetualMarkets: condition === "volatile" ? 5 : 3,
195
195
  numAgents: 10,
196
196
  seed: baseSeed + (Date.now() % 1000), // Semi-reproducible
197
197
  };
@@ -206,18 +206,18 @@ export class ArchetypeMatchupBenchmark {
206
206
  private async simulateRound(
207
207
  agents: MatchupAgent[],
208
208
  snapshot: BenchmarkGameSnapshot,
209
- roundNumber: number
209
+ roundNumber: number,
210
210
  ): Promise<MatchupAgentResult[]> {
211
211
  const results: MatchupAgentResult[] = [];
212
212
 
213
213
  logger.info(
214
214
  `Simulating round ${roundNumber} with ${agents.length} agents`,
215
215
  { archetypes: [...new Set(agents.map((a) => a.archetype))] },
216
- 'ArchetypeMatchupBenchmark'
216
+ "ArchetypeMatchupBenchmark",
217
217
  );
218
218
 
219
219
  // Check if we should use real inference or simulation
220
- const useRealInference = process.env.USE_REAL_INFERENCE === 'true';
220
+ const useRealInference = process.env.USE_REAL_INFERENCE === "true";
221
221
 
222
222
  if (useRealInference) {
223
223
  // Use real model inference via the orchestrator
@@ -247,7 +247,7 @@ export class ArchetypeMatchupBenchmark {
247
247
  */
248
248
  private async runAgentWithRealModel(
249
249
  agent: MatchupAgent,
250
- snapshot: BenchmarkGameSnapshot
250
+ snapshot: BenchmarkGameSnapshot,
251
251
  ): Promise<MatchupAgentResult> {
252
252
  let totalPnl = 0;
253
253
  let totalTrades = 0;
@@ -275,20 +275,20 @@ export class ArchetypeMatchupBenchmark {
275
275
  // Parse the decision and simulate outcome
276
276
  const decision = this.parseAgentDecision(response.response);
277
277
 
278
- if (decision.action === 'trade') {
278
+ if (decision.action === "trade") {
279
279
  totalTrades++;
280
280
  // Simulate trade outcome based on market conditions
281
281
  const marketTrend = this.getMarketTrend(tick);
282
282
  const isCorrectDirection =
283
- (decision.direction === 'long' && marketTrend > 0) ||
284
- (decision.direction === 'short' && marketTrend < 0);
283
+ (decision.direction === "long" && marketTrend > 0) ||
284
+ (decision.direction === "short" && marketTrend < 0);
285
285
  if (isCorrectDirection) {
286
286
  wins++;
287
287
  totalPnl += Math.abs(marketTrend) * 100 * (decision.confidence || 1);
288
288
  } else {
289
289
  totalPnl -= Math.abs(marketTrend) * 50 * (decision.confidence || 1);
290
290
  }
291
- } else if (decision.action === 'post') {
291
+ } else if (decision.action === "post") {
292
292
  postsCreated++;
293
293
  }
294
294
  }
@@ -326,7 +326,7 @@ export class ArchetypeMatchupBenchmark {
326
326
 
327
327
  // Extract market prices from perpetual markets
328
328
  const marketPrices = Object.fromEntries(
329
- state.perpetualMarkets.map((m) => [m.ticker, m.price])
329
+ state.perpetualMarkets.map((m) => [m.ticker, m.price]),
330
330
  );
331
331
 
332
332
  // Recent posts can serve as "news"
@@ -352,8 +352,8 @@ Respond with a JSON object containing:
352
352
  * Parse agent decision from model response
353
353
  */
354
354
  private parseAgentDecision(response: string): {
355
- action: 'trade' | 'post' | 'observe';
356
- direction?: 'long' | 'short';
355
+ action: "trade" | "post" | "observe";
356
+ direction?: "long" | "short";
357
357
  confidence?: number;
358
358
  } {
359
359
  try {
@@ -362,7 +362,7 @@ Respond with a JSON object containing:
362
362
  if (jsonMatch) {
363
363
  const parsed = JSON.parse(jsonMatch[0]);
364
364
  return {
365
- action: parsed.action || 'observe',
365
+ action: parsed.action || "observe",
366
366
  direction: parsed.direction,
367
367
  confidence: parsed.confidence || 0.5,
368
368
  };
@@ -373,25 +373,25 @@ Respond with a JSON object containing:
373
373
 
374
374
  // Default behavior based on response content
375
375
  if (
376
- response.toLowerCase().includes('trade') ||
377
- response.toLowerCase().includes('buy') ||
378
- response.toLowerCase().includes('sell')
376
+ response.toLowerCase().includes("trade") ||
377
+ response.toLowerCase().includes("buy") ||
378
+ response.toLowerCase().includes("sell")
379
379
  ) {
380
380
  return {
381
- action: 'trade',
382
- direction: response.toLowerCase().includes('short') ? 'short' : 'long',
381
+ action: "trade",
382
+ direction: response.toLowerCase().includes("short") ? "short" : "long",
383
383
  confidence: 0.5,
384
384
  };
385
385
  }
386
386
 
387
387
  if (
388
- response.toLowerCase().includes('post') ||
389
- response.toLowerCase().includes('share')
388
+ response.toLowerCase().includes("post") ||
389
+ response.toLowerCase().includes("share")
390
390
  ) {
391
- return { action: 'post' };
391
+ return { action: "post" };
392
392
  }
393
393
 
394
- return { action: 'observe' };
394
+ return { action: "observe" };
395
395
  }
396
396
 
397
397
  /**
@@ -403,7 +403,7 @@ Respond with a JSON object containing:
403
403
  if (state.perpetualMarkets.length === 0) return 0;
404
404
 
405
405
  const prices = Object.fromEntries(
406
- state.perpetualMarkets.map((m) => [m.ticker, m.price])
406
+ state.perpetualMarkets.map((m) => [m.ticker, m.price]),
407
407
  );
408
408
 
409
409
  // Calculate average price change
@@ -422,7 +422,7 @@ Respond with a JSON object containing:
422
422
  */
423
423
  private simulateAgentPerformance(
424
424
  agent: MatchupAgent,
425
- snapshot: BenchmarkGameSnapshot
425
+ snapshot: BenchmarkGameSnapshot,
426
426
  ): MatchupAgentResult {
427
427
  const config = agent.config;
428
428
  const tickCount = snapshot.ticks.length;
@@ -465,7 +465,7 @@ Respond with a JSON object containing:
465
465
  * Calculate head-to-head results between archetypes
466
466
  */
467
467
  private calculateHeadToHead(
468
- allResults: MatchupAgentResult[][]
468
+ allResults: MatchupAgentResult[][],
469
469
  ): ArchetypeVsResult[] {
470
470
  const archetypes = this.getArchetypes();
471
471
  const headToHead: ArchetypeVsResult[] = [];
@@ -484,10 +484,10 @@ Respond with a JSON object containing:
484
484
  // Compare performance in each round
485
485
  for (const roundResults of allResults) {
486
486
  const arch1Results = roundResults.filter(
487
- (r) => r.archetype === arch1
487
+ (r) => r.archetype === arch1,
488
488
  );
489
489
  const arch2Results = roundResults.filter(
490
- (r) => r.archetype === arch2
490
+ (r) => r.archetype === arch2,
491
491
  );
492
492
 
493
493
  if (arch1Results.length === 0 || arch2Results.length === 0) continue;
@@ -532,8 +532,8 @@ Respond with a JSON object containing:
532
532
  * Calculate overall archetype rankings
533
533
  */
534
534
  private calculateRankings(
535
- allResults: MatchupAgentResult[][]
536
- ): MatchupBenchmarkResult['archetypeRankings'] {
535
+ allResults: MatchupAgentResult[][],
536
+ ): MatchupBenchmarkResult["archetypeRankings"] {
537
537
  const archetypes = this.getArchetypes();
538
538
  const rankings: Map<
539
539
  string,
@@ -593,9 +593,9 @@ Respond with a JSON object containing:
593
593
  * Generate insights from the matchup results
594
594
  */
595
595
  private generateInsights(
596
- rankings: MatchupBenchmarkResult['archetypeRankings'],
596
+ rankings: MatchupBenchmarkResult["archetypeRankings"],
597
597
  headToHead: ArchetypeVsResult[],
598
- marketCondition: string
598
+ marketCondition: string,
599
599
  ): string[] {
600
600
  const insights: string[] = [];
601
601
 
@@ -603,7 +603,7 @@ Respond with a JSON object containing:
603
603
  const topRanking = rankings[0];
604
604
  if (topRanking) {
605
605
  insights.push(
606
- `${topRanking.archetype} performed best in ${marketCondition} conditions with avg rank ${topRanking.avgRank.toFixed(2)}`
606
+ `${topRanking.archetype} performed best in ${marketCondition} conditions with avg rank ${topRanking.avgRank.toFixed(2)}`,
607
607
  );
608
608
  }
609
609
 
@@ -611,11 +611,11 @@ Respond with a JSON object containing:
611
611
  for (const h2h of headToHead) {
612
612
  if (h2h.winRate1 >= 0.7) {
613
613
  insights.push(
614
- `${h2h.archetype1} dominates ${h2h.archetype2} (${(h2h.winRate1 * 100).toFixed(0)}% win rate)`
614
+ `${h2h.archetype1} dominates ${h2h.archetype2} (${(h2h.winRate1 * 100).toFixed(0)}% win rate)`,
615
615
  );
616
616
  } else if (h2h.winRate2 >= 0.7) {
617
617
  insights.push(
618
- `${h2h.archetype2} dominates ${h2h.archetype1} (${(h2h.winRate2 * 100).toFixed(0)}% win rate)`
618
+ `${h2h.archetype2} dominates ${h2h.archetype1} (${(h2h.winRate2 * 100).toFixed(0)}% win rate)`,
619
619
  );
620
620
  }
621
621
  }
@@ -657,9 +657,9 @@ Respond with a JSON object containing:
657
657
  if (bWins) {
658
658
  for (const c of bWins) {
659
659
  const cWins = wins.get(c);
660
- if (cWins && cWins.has(a)) {
660
+ if (cWins?.has(a)) {
661
661
  insights.push(
662
- `Counter triangle found: ${a} → ${b} → ${c} → ${a}`
662
+ `Counter triangle found: ${a} → ${b} → ${c} → ${a}`,
663
663
  );
664
664
  }
665
665
  }
@@ -678,14 +678,14 @@ Respond with a JSON object containing:
678
678
  const results: MatchupBenchmarkResult[] = [];
679
679
 
680
680
  logger.info(
681
- 'Starting Archetype Matchup Benchmark',
681
+ "Starting Archetype Matchup Benchmark",
682
682
  {
683
683
  archetypes: this.getArchetypes(),
684
684
  agentsPerArchetype: this.config.agentsPerArchetype,
685
685
  rounds: this.config.rounds,
686
686
  conditions: this.config.marketConditions,
687
687
  },
688
- 'ArchetypeMatchupBenchmark'
688
+ "ArchetypeMatchupBenchmark",
689
689
  );
690
690
 
691
691
  const agents = this.createAgents();
@@ -694,7 +694,7 @@ Respond with a JSON object containing:
694
694
  logger.info(
695
695
  `Testing in ${condition} market conditions`,
696
696
  {},
697
- 'ArchetypeMatchupBenchmark'
697
+ "ArchetypeMatchupBenchmark",
698
698
  );
699
699
 
700
700
  const allRoundResults: MatchupAgentResult[][] = [];
@@ -704,7 +704,7 @@ Respond with a JSON object containing:
704
704
  const roundResults = await this.simulateRound(
705
705
  agents,
706
706
  snapshot,
707
- round + 1
707
+ round + 1,
708
708
  );
709
709
  allRoundResults.push(roundResults);
710
710
  }
@@ -734,7 +734,7 @@ Respond with a JSON object containing:
734
734
  topArchetype: rankings[0]?.archetype,
735
735
  avgPnl: rankings[0]?.avgPnl.toFixed(2),
736
736
  },
737
- 'ArchetypeMatchupBenchmark'
737
+ "ArchetypeMatchupBenchmark",
738
738
  );
739
739
  }
740
740
 
@@ -743,13 +743,13 @@ Respond with a JSON object containing:
743
743
 
744
744
  const totalDuration = Date.now() - startTime;
745
745
  logger.info(
746
- 'Archetype Matchup Benchmark complete',
746
+ "Archetype Matchup Benchmark complete",
747
747
  {
748
748
  totalDurationMs: totalDuration,
749
749
  conditionsTested: this.config.marketConditions.length,
750
750
  totalRounds: this.config.rounds * this.config.marketConditions.length,
751
751
  },
752
- 'ArchetypeMatchupBenchmark'
752
+ "ArchetypeMatchupBenchmark",
753
753
  );
754
754
 
755
755
  return results;
@@ -760,49 +760,49 @@ Respond with a JSON object containing:
760
760
  */
761
761
  static generateReport(results: MatchupBenchmarkResult[]): string {
762
762
  const lines: string[] = [];
763
- lines.push('# Archetype Matchup Benchmark Report\n');
763
+ lines.push("# Archetype Matchup Benchmark Report\n");
764
764
 
765
765
  for (const result of results) {
766
766
  lines.push(
767
- `## ${result.marketCondition.toUpperCase()} Market Conditions\n`
767
+ `## ${result.marketCondition.toUpperCase()} Market Conditions\n`,
768
768
  );
769
769
 
770
770
  // Rankings table
771
- lines.push('### Overall Rankings\n');
772
- lines.push('| Rank | Archetype | Avg PnL | Win Rate |');
773
- lines.push('|------|-----------|---------|----------|');
771
+ lines.push("### Overall Rankings\n");
772
+ lines.push("| Rank | Archetype | Avg PnL | Win Rate |");
773
+ lines.push("|------|-----------|---------|----------|");
774
774
  for (const ranking of result.archetypeRankings) {
775
775
  lines.push(
776
- `| ${ranking.avgRank.toFixed(1)} | ${ranking.archetype} | ${ranking.avgPnl.toFixed(2)} | ${(ranking.winRate * 100).toFixed(1)}% |`
776
+ `| ${ranking.avgRank.toFixed(1)} | ${ranking.archetype} | ${ranking.avgPnl.toFixed(2)} | ${(ranking.winRate * 100).toFixed(1)}% |`,
777
777
  );
778
778
  }
779
- lines.push('');
779
+ lines.push("");
780
780
 
781
781
  // Head-to-head table
782
- lines.push('### Head-to-Head Results\n');
783
- lines.push('| Matchup | Winner | Win Rate |');
784
- lines.push('|---------|--------|----------|');
782
+ lines.push("### Head-to-Head Results\n");
783
+ lines.push("| Matchup | Winner | Win Rate |");
784
+ lines.push("|---------|--------|----------|");
785
785
  for (const h2h of result.headToHead) {
786
786
  const winner =
787
787
  h2h.winRate1 > h2h.winRate2 ? h2h.archetype1 : h2h.archetype2;
788
788
  const winRate = Math.max(h2h.winRate1, h2h.winRate2);
789
789
  lines.push(
790
- `| ${h2h.archetype1} vs ${h2h.archetype2} | ${winner} | ${(winRate * 100).toFixed(1)}% |`
790
+ `| ${h2h.archetype1} vs ${h2h.archetype2} | ${winner} | ${(winRate * 100).toFixed(1)}% |`,
791
791
  );
792
792
  }
793
- lines.push('');
793
+ lines.push("");
794
794
 
795
795
  // Insights
796
796
  if (result.insights.length > 0) {
797
- lines.push('### Key Insights\n');
797
+ lines.push("### Key Insights\n");
798
798
  for (const insight of result.insights) {
799
799
  lines.push(`- ${insight}`);
800
800
  }
801
- lines.push('');
801
+ lines.push("");
802
802
  }
803
803
  }
804
804
 
805
- return lines.join('\n');
805
+ return lines.join("\n");
806
806
  }
807
807
  }
808
808
 
@@ -813,11 +813,11 @@ export async function runQuickMatchupBenchmark(): Promise<
813
813
  MatchupBenchmarkResult[]
814
814
  > {
815
815
  const benchmark = new ArchetypeMatchupBenchmark({
816
- archetypes: 'all',
816
+ archetypes: "all",
817
817
  agentsPerArchetype: 2,
818
818
  rounds: 5,
819
819
  ticksPerRound: 100,
820
- marketConditions: ['bull', 'bear', 'volatile', 'stable'],
820
+ marketConditions: ["bull", "bear", "volatile", "stable"],
821
821
  availableVramGb: 16,
822
822
  });
823
823