@elizaos/training 2.0.0-alpha.76 → 2.0.0-alpha.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/package.json +2 -2
  2. package/.turbo/turbo-lint.log +0 -3
  3. package/.turbo/turbo-typecheck.log +0 -1
  4. package/dist/.tsbuildinfo +0 -1
  5. package/dist/adapter.js +0 -59
  6. package/dist/archetypes/ArchetypeConfigService.js +0 -510
  7. package/dist/archetypes/derive-archetype.js +0 -196
  8. package/dist/archetypes/index.js +0 -7
  9. package/dist/benchmark/ArchetypeMatchupBenchmark.js +0 -547
  10. package/dist/benchmark/BenchmarkChartGenerator.js +0 -632
  11. package/dist/benchmark/BenchmarkDataGenerator.js +0 -825
  12. package/dist/benchmark/BenchmarkDataViewer.js +0 -197
  13. package/dist/benchmark/BenchmarkHistoryService.js +0 -135
  14. package/dist/benchmark/BenchmarkRunner.js +0 -483
  15. package/dist/benchmark/BenchmarkValidator.js +0 -158
  16. package/dist/benchmark/FastEvalRunner.js +0 -133
  17. package/dist/benchmark/MetricsValidator.js +0 -104
  18. package/dist/benchmark/MetricsVisualizer.js +0 -775
  19. package/dist/benchmark/ModelBenchmarkService.js +0 -433
  20. package/dist/benchmark/ModelRegistry.js +0 -122
  21. package/dist/benchmark/RulerBenchmarkIntegration.js +0 -168
  22. package/dist/benchmark/SimulationA2AInterface.js +0 -683
  23. package/dist/benchmark/SimulationEngine.js +0 -522
  24. package/dist/benchmark/TaskRunner.js +0 -60
  25. package/dist/benchmark/__tests__/BenchmarkRunner.test.js +0 -409
  26. package/dist/benchmark/__tests__/HeadToHead.test.js +0 -105
  27. package/dist/benchmark/index.js +0 -23
  28. package/dist/benchmark/parseSimulationMetrics.js +0 -86
  29. package/dist/benchmark/simulation-types.js +0 -1
  30. package/dist/dependencies.js +0 -197
  31. package/dist/generation/TrajectoryGenerator.js +0 -244
  32. package/dist/generation/index.js +0 -6
  33. package/dist/huggingface/HuggingFaceDatasetUploader.js +0 -463
  34. package/dist/huggingface/HuggingFaceIntegrationService.js +0 -272
  35. package/dist/huggingface/HuggingFaceModelUploader.js +0 -385
  36. package/dist/huggingface/index.js +0 -9
  37. package/dist/huggingface/shared/HuggingFaceUploadUtil.js +0 -144
  38. package/dist/index.js +0 -41
  39. package/dist/init-training.js +0 -43
  40. package/dist/metrics/TrajectoryMetricsExtractor.js +0 -523
  41. package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +0 -628
  42. package/dist/metrics/index.js +0 -7
  43. package/dist/metrics/types.js +0 -21
  44. package/dist/rubrics/__tests__/index.test.js +0 -150
  45. package/dist/rubrics/ass-kisser.js +0 -83
  46. package/dist/rubrics/degen.js +0 -78
  47. package/dist/rubrics/goody-twoshoes.js +0 -82
  48. package/dist/rubrics/index.js +0 -184
  49. package/dist/rubrics/information-trader.js +0 -82
  50. package/dist/rubrics/infosec.js +0 -99
  51. package/dist/rubrics/liar.js +0 -102
  52. package/dist/rubrics/perps-trader.js +0 -85
  53. package/dist/rubrics/researcher.js +0 -79
  54. package/dist/rubrics/scammer.js +0 -80
  55. package/dist/rubrics/social-butterfly.js +0 -71
  56. package/dist/rubrics/super-predictor.js +0 -95
  57. package/dist/rubrics/trader.js +0 -65
  58. package/dist/scoring/ArchetypeScoringService.js +0 -301
  59. package/dist/scoring/JudgePromptBuilder.js +0 -401
  60. package/dist/scoring/LLMJudgeCache.js +0 -263
  61. package/dist/scoring/index.js +0 -8
  62. package/dist/training/AutomationPipeline.js +0 -714
  63. package/dist/training/BenchmarkService.js +0 -370
  64. package/dist/training/ConfigValidator.js +0 -153
  65. package/dist/training/MarketOutcomesTracker.js +0 -142
  66. package/dist/training/ModelDeployer.js +0 -128
  67. package/dist/training/ModelFetcher.js +0 -48
  68. package/dist/training/ModelSelectionService.js +0 -248
  69. package/dist/training/ModelUsageVerifier.js +0 -106
  70. package/dist/training/MultiModelOrchestrator.js +0 -349
  71. package/dist/training/RLModelConfig.js +0 -295
  72. package/dist/training/RewardBackpropagationService.js +0 -117
  73. package/dist/training/RulerScoringService.js +0 -450
  74. package/dist/training/TrainingMonitor.js +0 -108
  75. package/dist/training/TrajectoryRecorder.js +0 -281
  76. package/dist/training/__tests__/TrajectoryRecorder.test.js +0 -363
  77. package/dist/training/index.js +0 -30
  78. package/dist/training/logRLConfig.js +0 -29
  79. package/dist/training/pipeline.js +0 -80
  80. package/dist/training/storage/ModelStorageService.js +0 -190
  81. package/dist/training/storage/TrainingDataArchiver.js +0 -136
  82. package/dist/training/storage/index.js +0 -7
  83. package/dist/training/types.js +0 -6
  84. package/dist/training/window-utils.js +0 -100
  85. package/dist/utils/index.js +0 -73
  86. package/dist/utils/logger.js +0 -55
  87. package/dist/utils/snowflake.js +0 -15
  88. package/dist/utils/synthetic-detector.js +0 -67
  89. package/vitest.config.ts +0 -8
@@ -1,547 +0,0 @@
1
- /**
2
- * Archetype Matchup Benchmark
3
- *
4
- * Simulates multiple archetypes competing against each other to understand:
5
- * - Which archetypes perform best in different market conditions
6
- * - How archetypes interact (trader vs scammer, social-butterfly vs contrarian, etc.)
7
- * - Relative strengths and weaknesses
8
- *
9
- * Uses the multi-model orchestrator to efficiently run multiple archetype models.
10
- */
11
- import { ArchetypeConfigService, } from "../archetypes/ArchetypeConfigService";
12
- import { createMultiModelOrchestrator, } from "../training/MultiModelOrchestrator";
13
- import { logger } from "../utils/logger";
14
- import { BenchmarkDataGenerator, } from "./BenchmarkDataGenerator";
15
- /**
16
- * Runs multi-archetype benchmark simulations
17
- */
18
- export class ArchetypeMatchupBenchmark {
19
- config;
20
- orchestrator;
21
- constructor(config) {
22
- this.config = config;
23
- this.orchestrator = createMultiModelOrchestrator(config.availableVramGb);
24
- }
25
- /**
26
- * Get all archetypes to benchmark
27
- */
28
- getArchetypes() {
29
- if (this.config.archetypes === "all") {
30
- return ArchetypeConfigService.getAvailableArchetypes();
31
- }
32
- return this.config.archetypes;
33
- }
34
- /**
35
- * Create agents for the matchup
36
- */
37
- createAgents() {
38
- const agents = [];
39
- const archetypes = this.getArchetypes();
40
- for (const archetype of archetypes) {
41
- const archetypeConfig = ArchetypeConfigService.getConfig(archetype);
42
- for (let i = 0; i < this.config.agentsPerArchetype; i++) {
43
- agents.push({
44
- id: `${archetype}-${i + 1}`,
45
- archetype,
46
- config: archetypeConfig,
47
- });
48
- }
49
- }
50
- return agents;
51
- }
52
- /**
53
- * Generate benchmark data for a market condition
54
- * Market condition affects seed to create different scenarios
55
- */
56
- async generateBenchmarkData(condition) {
57
- // Convert ticks to duration minutes (assuming 1 tick per second)
58
- const durationMinutes = Math.ceil(this.config.ticksPerRound / 60);
59
- // Use condition to create different but reproducible seeds
60
- const conditionSeeds = {
61
- bull: 1001,
62
- bear: 2002,
63
- volatile: 3003,
64
- stable: 4004,
65
- };
66
- const baseSeed = conditionSeeds[condition] || 1000;
67
- const benchmarkConfig = {
68
- durationMinutes,
69
- tickInterval: 1,
70
- numPredictionMarkets: condition === "volatile" ? 8 : 5,
71
- numPerpetualMarkets: condition === "volatile" ? 5 : 3,
72
- numAgents: 10,
73
- seed: baseSeed + (Date.now() % 1000), // Semi-reproducible
74
- };
75
- const generator = new BenchmarkDataGenerator(benchmarkConfig);
76
- return generator.generate();
77
- }
78
- /**
79
- * Simulate a single round of the matchup
80
- */
81
- async simulateRound(agents, snapshot, roundNumber) {
82
- const results = [];
83
- logger.info(`Simulating round ${roundNumber} with ${agents.length} agents`, { archetypes: [...new Set(agents.map((a) => a.archetype))] }, "ArchetypeMatchupBenchmark");
84
- // Check if we should use real inference or simulation
85
- const useRealInference = process.env.USE_REAL_INFERENCE === "true";
86
- if (useRealInference) {
87
- // Use real model inference via the orchestrator
88
- for (const agent of agents) {
89
- const result = await this.runAgentWithRealModel(agent, snapshot);
90
- results.push(result);
91
- }
92
- }
93
- else {
94
- // Use simulated performance based on archetype characteristics
95
- for (const agent of agents) {
96
- const result = this.simulateAgentPerformance(agent, snapshot);
97
- results.push(result);
98
- }
99
- }
100
- // Assign ranks
101
- results.sort((a, b) => b.pnl - a.pnl);
102
- results.forEach((r, i) => {
103
- r.rank = i + 1;
104
- });
105
- return results;
106
- }
107
- /**
108
- * Run an agent with real model inference
109
- */
110
- async runAgentWithRealModel(agent, snapshot) {
111
- let totalPnl = 0;
112
- let totalTrades = 0;
113
- let wins = 0;
114
- let postsCreated = 0;
115
- // Process a subset of ticks (every 10th tick to speed up)
116
- const ticksToProcess = snapshot.ticks
117
- .filter((_, i) => i % 10 === 0)
118
- .slice(0, 10);
119
- for (const tick of ticksToProcess) {
120
- // Build a prompt with the current game state
121
- const prompt = this.buildDecisionPrompt(agent, tick);
122
- // Get decision from model
123
- const response = await this.orchestrator.inference({
124
- archetype: agent.archetype,
125
- prompt,
126
- systemPrompt: agent.config.system,
127
- maxTokens: 256,
128
- temperature: 0.7,
129
- });
130
- // Parse the decision and simulate outcome
131
- const decision = this.parseAgentDecision(response.response);
132
- if (decision.action === "trade") {
133
- totalTrades++;
134
- // Simulate trade outcome based on market conditions
135
- const marketTrend = this.getMarketTrend(tick);
136
- const isCorrectDirection = (decision.direction === "long" && marketTrend > 0) ||
137
- (decision.direction === "short" && marketTrend < 0);
138
- if (isCorrectDirection) {
139
- wins++;
140
- totalPnl += Math.abs(marketTrend) * 100 * (decision.confidence || 1);
141
- }
142
- else {
143
- totalPnl -= Math.abs(marketTrend) * 50 * (decision.confidence || 1);
144
- }
145
- }
146
- else if (decision.action === "post") {
147
- postsCreated++;
148
- }
149
- }
150
- const winRate = totalTrades > 0 ? wins / totalTrades : 0;
151
- return {
152
- agentId: agent.id,
153
- archetype: agent.archetype,
154
- pnl: totalPnl,
155
- tradingMetrics: {
156
- totalTrades,
157
- winRate,
158
- avgPnlPerTrade: totalTrades > 0 ? totalPnl / totalTrades : 0,
159
- },
160
- socialMetrics: {
161
- postsCreated,
162
- engagementReceived: postsCreated * 5,
163
- reputationGained: postsCreated * 10 + wins * 5,
164
- },
165
- actions: totalTrades + postsCreated,
166
- rank: 0,
167
- };
168
- }
169
- /**
170
- * Build a decision prompt for the agent
171
- */
172
- buildDecisionPrompt(agent, tick) {
173
- const state = tick.state;
174
- // Find agent's balance from agents array
175
- const agentState = state.agents.find((a) => a.id === agent.id);
176
- const agentBalance = agentState?.totalPnl !== undefined ? 1000 + agentState.totalPnl : 1000;
177
- // Extract market prices from perpetual markets
178
- const marketPrices = Object.fromEntries(state.perpetualMarkets.map((m) => [m.ticker, m.price]));
179
- // Recent posts can serve as "news"
180
- const recentNews = state.posts?.slice(-5).map((p) => p.content) || [];
181
- return `
182
- Current game state:
183
- - Timestamp: ${tick.timestamp}
184
- - Your balance: ${agentBalance}
185
- - Market prices: ${JSON.stringify(marketPrices)}
186
- - Recent news: ${JSON.stringify(recentNews)}
187
-
188
- As a ${agent.archetype} agent, what action would you take?
189
- Respond with a JSON object containing:
190
- - action: "trade" | "post" | "observe"
191
- - direction: "long" | "short" (if trading)
192
- - confidence: 0.0 to 1.0
193
- - reasoning: brief explanation
194
- `;
195
- }
196
- /**
197
- * Parse agent decision from model response
198
- */
199
- parseAgentDecision(response) {
200
- try {
201
- // Try to extract JSON from response
202
- const jsonMatch = response.match(/\{[\s\S]*\}/);
203
- if (jsonMatch) {
204
- const parsed = JSON.parse(jsonMatch[0]);
205
- return {
206
- action: parsed.action || "observe",
207
- direction: parsed.direction,
208
- confidence: parsed.confidence || 0.5,
209
- };
210
- }
211
- }
212
- catch {
213
- // Failed to parse, default to observe
214
- }
215
- // Default behavior based on response content
216
- if (response.toLowerCase().includes("trade") ||
217
- response.toLowerCase().includes("buy") ||
218
- response.toLowerCase().includes("sell")) {
219
- return {
220
- action: "trade",
221
- direction: response.toLowerCase().includes("short") ? "short" : "long",
222
- confidence: 0.5,
223
- };
224
- }
225
- if (response.toLowerCase().includes("post") ||
226
- response.toLowerCase().includes("share")) {
227
- return { action: "post" };
228
- }
229
- return { action: "observe" };
230
- }
231
- /**
232
- * Get market trend from tick data
233
- */
234
- getMarketTrend(tick) {
235
- const state = tick.state;
236
- // Extract prices from perpetual markets
237
- if (state.perpetualMarkets.length === 0)
238
- return 0;
239
- const prices = Object.fromEntries(state.perpetualMarkets.map((m) => [m.ticker, m.price]));
240
- // Calculate average price change
241
- const priceValues = Object.values(prices);
242
- if (priceValues.length === 0)
243
- return 0;
244
- const avgPrice = priceValues.reduce((a, b) => a + b, 0) / priceValues.length;
245
- // Normalize to -1 to 1 range
246
- return (avgPrice - 100) / 100;
247
- }
248
- /**
249
- * Simulate agent performance based on archetype characteristics
250
- * Used when real model inference is not available
251
- */
252
- simulateAgentPerformance(agent, snapshot) {
253
- const config = agent.config;
254
- const tickCount = snapshot.ticks.length;
255
- // Calculate expected performance based on archetype traits
256
- // Higher risk tolerance = higher variance in PnL
257
- const riskFactor = config.riskTolerance;
258
- const basePnl = (Math.random() - 0.5) * 1000 * riskFactor;
259
- // Trading-focused archetypes trade more
260
- const tradeWeight = config.actionWeights.trade;
261
- const totalTrades = Math.floor(tickCount * tradeWeight * 0.1);
262
- const winRate = 0.45 + (config.riskTolerance < 0.5 ? 0.15 : -0.05) + Math.random() * 0.1;
263
- // Social-focused archetypes post more
264
- const postWeight = config.actionWeights.post;
265
- const postsCreated = Math.floor(tickCount * postWeight * 0.05);
266
- return {
267
- agentId: agent.id,
268
- archetype: agent.archetype,
269
- pnl: basePnl + (winRate > 0.5 ? 100 : -100) * Math.random(),
270
- tradingMetrics: {
271
- totalTrades,
272
- winRate,
273
- avgPnlPerTrade: basePnl / Math.max(totalTrades, 1),
274
- },
275
- socialMetrics: {
276
- postsCreated,
277
- engagementReceived: postsCreated * (2 + Math.random() * 5),
278
- reputationGained: postsCreated * 10,
279
- },
280
- actions: totalTrades + postsCreated,
281
- rank: 0, // Set after sorting
282
- };
283
- }
284
- /**
285
- * Calculate head-to-head results between archetypes
286
- */
287
- calculateHeadToHead(allResults) {
288
- const archetypes = this.getArchetypes();
289
- const headToHead = [];
290
- for (let i = 0; i < archetypes.length; i++) {
291
- for (let j = i + 1; j < archetypes.length; j++) {
292
- const arch1 = archetypes[i];
293
- const arch2 = archetypes[j];
294
- let wins1 = 0;
295
- let wins2 = 0;
296
- let ties = 0;
297
- let margin1Total = 0;
298
- let margin2Total = 0;
299
- // Compare performance in each round
300
- for (const roundResults of allResults) {
301
- const arch1Results = roundResults.filter((r) => r.archetype === arch1);
302
- const arch2Results = roundResults.filter((r) => r.archetype === arch2);
303
- if (arch1Results.length === 0 || arch2Results.length === 0)
304
- continue;
305
- const avgPnl1 = arch1Results.reduce((sum, r) => sum + r.pnl, 0) /
306
- arch1Results.length;
307
- const avgPnl2 = arch2Results.reduce((sum, r) => sum + r.pnl, 0) /
308
- arch2Results.length;
309
- if (avgPnl1 > avgPnl2) {
310
- wins1++;
311
- margin1Total += avgPnl1 - avgPnl2;
312
- }
313
- else if (avgPnl2 > avgPnl1) {
314
- wins2++;
315
- margin2Total += avgPnl2 - avgPnl1;
316
- }
317
- else {
318
- ties++;
319
- }
320
- }
321
- const totalGames = wins1 + wins2 + ties;
322
- headToHead.push({
323
- archetype1: arch1,
324
- archetype2: arch2,
325
- archetype1Wins: wins1,
326
- archetype2Wins: wins2,
327
- ties,
328
- archetype1AvgMargin: wins1 > 0 ? margin1Total / wins1 : 0,
329
- archetype2AvgMargin: wins2 > 0 ? margin2Total / wins2 : 0,
330
- winRate1: totalGames > 0 ? wins1 / totalGames : 0,
331
- winRate2: totalGames > 0 ? wins2 / totalGames : 0,
332
- });
333
- }
334
- }
335
- return headToHead;
336
- }
337
- /**
338
- * Calculate overall archetype rankings
339
- */
340
- calculateRankings(allResults) {
341
- const archetypes = this.getArchetypes();
342
- const rankings = new Map();
343
- // Initialize
344
- for (const arch of archetypes) {
345
- rankings.set(arch, {
346
- totalRank: 0,
347
- totalPnl: 0,
348
- wins: 0,
349
- losses: 0,
350
- count: 0,
351
- });
352
- }
353
- // Aggregate results
354
- for (const roundResults of allResults) {
355
- const archetypeResults = new Map();
356
- for (const result of roundResults) {
357
- const existing = archetypeResults.get(result.archetype) || [];
358
- existing.push(result.pnl);
359
- archetypeResults.set(result.archetype, existing);
360
- const stats = rankings.get(result.archetype);
361
- if (stats) {
362
- stats.totalRank += result.rank;
363
- stats.totalPnl += result.pnl;
364
- stats.count++;
365
- if (result.rank === 1)
366
- stats.wins++;
367
- if (result.rank === roundResults.length)
368
- stats.losses++;
369
- }
370
- }
371
- }
372
- return Array.from(rankings.entries())
373
- .map(([archetype, stats]) => ({
374
- archetype,
375
- avgRank: stats.count > 0 ? stats.totalRank / stats.count : 0,
376
- avgPnl: stats.count > 0 ? stats.totalPnl / stats.count : 0,
377
- totalWins: stats.wins,
378
- totalLosses: stats.losses,
379
- winRate: stats.count > 0 ? stats.wins / stats.count : 0,
380
- }))
381
- .sort((a, b) => a.avgRank - b.avgRank);
382
- }
383
- /**
384
- * Generate insights from the matchup results
385
- */
386
- generateInsights(rankings, headToHead, marketCondition) {
387
- const insights = [];
388
- // Top performer insight
389
- const topRanking = rankings[0];
390
- if (topRanking) {
391
- insights.push(`${topRanking.archetype} performed best in ${marketCondition} conditions with avg rank ${topRanking.avgRank.toFixed(2)}`);
392
- }
393
- // Find dominant matchups
394
- for (const h2h of headToHead) {
395
- if (h2h.winRate1 >= 0.7) {
396
- insights.push(`${h2h.archetype1} dominates ${h2h.archetype2} (${(h2h.winRate1 * 100).toFixed(0)}% win rate)`);
397
- }
398
- else if (h2h.winRate2 >= 0.7) {
399
- insights.push(`${h2h.archetype2} dominates ${h2h.archetype1} (${(h2h.winRate2 * 100).toFixed(0)}% win rate)`);
400
- }
401
- }
402
- // Find rock-paper-scissors patterns
403
- const counters = this.findCounterArchetypes(headToHead);
404
- for (const counter of counters) {
405
- insights.push(counter);
406
- }
407
- return insights;
408
- }
409
- /**
410
- * Find archetype counter relationships (A beats B, B beats C, C beats A)
411
- */
412
- findCounterArchetypes(headToHead) {
413
- const insights = [];
414
- const wins = new Map();
415
- // Build win graph
416
- for (const h2h of headToHead) {
417
- if (h2h.winRate1 > 0.6) {
418
- const set = wins.get(h2h.archetype1) || new Set();
419
- set.add(h2h.archetype2);
420
- wins.set(h2h.archetype1, set);
421
- }
422
- if (h2h.winRate2 > 0.6) {
423
- const set = wins.get(h2h.archetype2) || new Set();
424
- set.add(h2h.archetype1);
425
- wins.set(h2h.archetype2, set);
426
- }
427
- }
428
- // Find triangles (rock-paper-scissors patterns)
429
- for (const [a, aWins] of wins) {
430
- for (const b of aWins) {
431
- const bWins = wins.get(b);
432
- if (bWins) {
433
- for (const c of bWins) {
434
- const cWins = wins.get(c);
435
- if (cWins?.has(a)) {
436
- insights.push(`Counter triangle found: ${a} → ${b} → ${c} → ${a}`);
437
- }
438
- }
439
- }
440
- }
441
- }
442
- return insights;
443
- }
444
- /**
445
- * Run the complete matchup benchmark
446
- */
447
- async run() {
448
- const startTime = Date.now();
449
- const results = [];
450
- logger.info("Starting Archetype Matchup Benchmark", {
451
- archetypes: this.getArchetypes(),
452
- agentsPerArchetype: this.config.agentsPerArchetype,
453
- rounds: this.config.rounds,
454
- conditions: this.config.marketConditions,
455
- }, "ArchetypeMatchupBenchmark");
456
- const agents = this.createAgents();
457
- for (const condition of this.config.marketConditions) {
458
- logger.info(`Testing in ${condition} market conditions`, {}, "ArchetypeMatchupBenchmark");
459
- const allRoundResults = [];
460
- for (let round = 0; round < this.config.rounds; round++) {
461
- const snapshot = await this.generateBenchmarkData(condition);
462
- const roundResults = await this.simulateRound(agents, snapshot, round + 1);
463
- allRoundResults.push(roundResults);
464
- }
465
- // Flatten agent results for this condition
466
- const flatAgentResults = allRoundResults.flat();
467
- // Calculate aggregated results
468
- const headToHead = this.calculateHeadToHead(allRoundResults);
469
- const rankings = this.calculateRankings(allRoundResults);
470
- const insights = this.generateInsights(rankings, headToHead, condition);
471
- results.push({
472
- benchmarkId: `matchup-${condition}-${Date.now()}`,
473
- timestamp: Date.now(),
474
- duration: Date.now() - startTime,
475
- agents: flatAgentResults,
476
- archetypeRankings: rankings,
477
- headToHead,
478
- marketCondition: condition,
479
- insights,
480
- });
481
- logger.info(`Completed ${condition} market benchmark`, {
482
- topArchetype: rankings[0]?.archetype,
483
- avgPnl: rankings[0]?.avgPnl.toFixed(2),
484
- }, "ArchetypeMatchupBenchmark");
485
- }
486
- // Cleanup
487
- this.orchestrator.unloadAll();
488
- const totalDuration = Date.now() - startTime;
489
- logger.info("Archetype Matchup Benchmark complete", {
490
- totalDurationMs: totalDuration,
491
- conditionsTested: this.config.marketConditions.length,
492
- totalRounds: this.config.rounds * this.config.marketConditions.length,
493
- }, "ArchetypeMatchupBenchmark");
494
- return results;
495
- }
496
- /**
497
- * Generate a summary report of the matchup results
498
- */
499
- static generateReport(results) {
500
- const lines = [];
501
- lines.push("# Archetype Matchup Benchmark Report\n");
502
- for (const result of results) {
503
- lines.push(`## ${result.marketCondition.toUpperCase()} Market Conditions\n`);
504
- // Rankings table
505
- lines.push("### Overall Rankings\n");
506
- lines.push("| Rank | Archetype | Avg PnL | Win Rate |");
507
- lines.push("|------|-----------|---------|----------|");
508
- for (const ranking of result.archetypeRankings) {
509
- lines.push(`| ${ranking.avgRank.toFixed(1)} | ${ranking.archetype} | ${ranking.avgPnl.toFixed(2)} | ${(ranking.winRate * 100).toFixed(1)}% |`);
510
- }
511
- lines.push("");
512
- // Head-to-head table
513
- lines.push("### Head-to-Head Results\n");
514
- lines.push("| Matchup | Winner | Win Rate |");
515
- lines.push("|---------|--------|----------|");
516
- for (const h2h of result.headToHead) {
517
- const winner = h2h.winRate1 > h2h.winRate2 ? h2h.archetype1 : h2h.archetype2;
518
- const winRate = Math.max(h2h.winRate1, h2h.winRate2);
519
- lines.push(`| ${h2h.archetype1} vs ${h2h.archetype2} | ${winner} | ${(winRate * 100).toFixed(1)}% |`);
520
- }
521
- lines.push("");
522
- // Insights
523
- if (result.insights.length > 0) {
524
- lines.push("### Key Insights\n");
525
- for (const insight of result.insights) {
526
- lines.push(`- ${insight}`);
527
- }
528
- lines.push("");
529
- }
530
- }
531
- return lines.join("\n");
532
- }
533
- }
534
- /**
535
- * Run a quick matchup benchmark with sensible defaults
536
- */
537
- export async function runQuickMatchupBenchmark() {
538
- const benchmark = new ArchetypeMatchupBenchmark({
539
- archetypes: "all",
540
- agentsPerArchetype: 2,
541
- rounds: 5,
542
- ticksPerRound: 100,
543
- marketConditions: ["bull", "bear", "volatile", "stable"],
544
- availableVramGb: 16,
545
- });
546
- return benchmark.run();
547
- }