@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -9,16 +9,16 @@
9
9
  * @packageDocumentation
10
10
  */
11
11
 
12
- import { getTrainingDataAdapter } from '../adapter';
13
- import type { IAgentRuntimeLike, UserLike } from '../dependencies';
14
- import { ArchetypeConfigService } from '../archetypes/ArchetypeConfigService';
12
+ import { getTrainingDataAdapter } from "../adapter";
13
+ import { ArchetypeConfigService } from "../archetypes/ArchetypeConfigService";
14
+ import type { IAgentRuntimeLike, UserLike } from "../dependencies";
15
15
  import {
16
16
  areAgentDependenciesConfigured,
17
17
  getAgentRuntimeManager,
18
18
  getAgentService,
19
19
  getAutonomousCoordinator,
20
- } from '../dependencies';
21
- import { logger } from '../utils/logger';
20
+ } from "../dependencies";
21
+ import { logger } from "../utils/logger";
22
22
 
23
23
  export interface ParallelGenerationConfig {
24
24
  // Agent configuration
@@ -58,7 +58,7 @@ export interface ParallelGenerationResult {
58
58
  function ensureDependencies(): void {
59
59
  if (!areAgentDependenciesConfigured()) {
60
60
  throw new Error(
61
- 'Training dependencies not configured. Call configureTrainingDependencies() with agentService, agentRuntimeManager, and autonomousCoordinator first.'
61
+ "Training dependencies not configured. Call configureTrainingDependencies() with agentService, agentRuntimeManager, and autonomousCoordinator first.",
62
62
  );
63
63
  }
64
64
  }
@@ -68,7 +68,8 @@ function ensureDependencies(): void {
68
68
  */
69
69
  export class TrajectoryGenerator {
70
70
  private config: ParallelGenerationConfig;
71
- private agents: Map<string, { user: UserLike; archetype: string }> = new Map();
71
+ private agents: Map<string, { user: UserLike; archetype: string }> =
72
+ new Map();
72
73
 
73
74
  constructor(config: ParallelGenerationConfig) {
74
75
  this.config = {
@@ -86,12 +87,12 @@ export class TrajectoryGenerator {
86
87
  const agentService = getAgentService();
87
88
 
88
89
  logger.info(
89
- 'Creating archetype-based agents...',
90
+ "Creating archetype-based agents...",
90
91
  {
91
92
  archetypes: this.config.archetypes,
92
93
  perArchetype: this.config.agentsPerArchetype,
93
94
  },
94
- 'TrajectoryGenerator'
95
+ "TrajectoryGenerator",
95
96
  );
96
97
 
97
98
  for (const archetype of this.config.archetypes) {
@@ -115,10 +116,10 @@ export class TrajectoryGenerator {
115
116
  // Disable A2A to allow offline training without localhost server
116
117
  await getTrainingDataAdapter().updateAgentConfig(agent.id, {
117
118
  autonomousTrading: archetypeConfig.actionWeights.trade > 0.3,
118
- autonomousPosting: archetypeConfig.postFrequency !== 'low',
119
+ autonomousPosting: archetypeConfig.postFrequency !== "low",
119
120
  autonomousCommenting:
120
- archetypeConfig.engagementStyle === 'helpful' ||
121
- archetypeConfig.engagementStyle === 'analytical',
121
+ archetypeConfig.engagementStyle === "helpful" ||
122
+ archetypeConfig.engagementStyle === "analytical",
122
123
  autonomousDMs: archetypeConfig.dmActivity,
123
124
  autonomousGroupChats: archetypeConfig.groupChatActivity,
124
125
  maxActionsPerTick: 5,
@@ -131,7 +132,7 @@ export class TrajectoryGenerator {
131
132
  logger.info(
132
133
  `Created ${archetype} agent: ${agent.username}`,
133
134
  {},
134
- 'TrajectoryGenerator'
135
+ "TrajectoryGenerator",
135
136
  );
136
137
  }
137
138
  }
@@ -139,7 +140,7 @@ export class TrajectoryGenerator {
139
140
  logger.info(
140
141
  `Created ${this.agents.size} agents total`,
141
142
  {},
142
- 'TrajectoryGenerator'
143
+ "TrajectoryGenerator",
143
144
  );
144
145
  }
145
146
 
@@ -171,7 +172,7 @@ export class TrajectoryGenerator {
171
172
  logger.warn(
172
173
  `Runtime creation returned null for ${agentId}, skipping`,
173
174
  {},
174
- 'TrajectoryGenerator'
175
+ "TrajectoryGenerator",
175
176
  );
176
177
  return;
177
178
  }
@@ -179,44 +180,44 @@ export class TrajectoryGenerator {
179
180
 
180
181
  // Apply archetype configuration to runtime character if available
181
182
  const archetypeConfig = ArchetypeConfigService.getConfig(
182
- agentInfo.archetype
183
+ agentInfo.archetype,
183
184
  );
184
185
  const character = runtime.character as
185
186
  | { name?: string; bio?: string | string[]; topics?: string[] }
186
187
  | undefined;
187
188
  if (character) {
188
189
  character.name = archetypeConfig.name;
189
- character.bio = archetypeConfig.bio.join(' ');
190
+ character.bio = archetypeConfig.bio.join(" ");
190
191
  if (!character.topics) {
191
192
  character.topics = [];
192
193
  }
193
194
 
194
195
  // Add archetype-specific topics
195
- if (archetypeConfig.preferredMarkets.includes('perpetual')) {
196
- character.topics.push('perpetual_trading', 'leverage');
196
+ if (archetypeConfig.preferredMarkets.includes("perpetual")) {
197
+ character.topics.push("perpetual_trading", "leverage");
197
198
  }
198
- if (archetypeConfig.preferredMarkets.includes('prediction')) {
199
- character.topics.push('prediction_markets', 'forecasting');
199
+ if (archetypeConfig.preferredMarkets.includes("prediction")) {
200
+ character.topics.push("prediction_markets", "forecasting");
200
201
  }
201
202
  }
202
203
 
203
204
  // Run ticks for this agent
204
205
  for (let tick = 0; tick < this.config.ticksPerAgent; tick++) {
205
206
  logger.debug(
206
- `Agent ${agentInfo.user.username} - Tick ${tick + 1}/${this.config.ticksPerAgent}`
207
+ `Agent ${agentInfo.user.username} - Tick ${tick + 1}/${this.config.ticksPerAgent}`,
207
208
  );
208
209
 
209
210
  // Execute autonomous tick with trajectory recording
210
211
  const result = await autonomousCoordinator.executeAutonomousTick(
211
212
  agentId,
212
213
  runtime,
213
- true // Enable trajectory recording
214
+ true, // Enable trajectory recording
214
215
  );
215
216
 
216
217
  if (result.trajectoryId) {
217
218
  trajectoryIds.push(result.trajectoryId);
218
219
  logger.debug(
219
- `Recorded trajectory ${result.trajectoryId} for ${agentInfo.user.username}`
220
+ `Recorded trajectory ${result.trajectoryId} for ${agentInfo.user.username}`,
220
221
  );
221
222
  }
222
223
 
@@ -230,19 +231,19 @@ export class TrajectoryGenerator {
230
231
  trajectories: trajectoryIds.length,
231
232
  archetype: agentInfo.archetype,
232
233
  },
233
- 'TrajectoryGenerator'
234
+ "TrajectoryGenerator",
234
235
  );
235
236
  });
236
237
 
237
238
  // Wait for all agents in batch to complete
238
239
  await Promise.allSettled(promises).then((results) => {
239
240
  for (const result of results) {
240
- if (result.status === 'rejected') {
241
+ if (result.status === "rejected") {
241
242
  const errorMsg = `Agent batch error: ${result.reason instanceof Error ? result.reason.message : String(result.reason)}`;
242
243
  logger.error(
243
244
  errorMsg,
244
245
  { error: result.reason },
245
- 'TrajectoryGenerator'
246
+ "TrajectoryGenerator",
246
247
  );
247
248
  errors.push(errorMsg);
248
249
  }
@@ -288,15 +289,15 @@ export class TrajectoryGenerator {
288
289
  }
289
290
 
290
291
  logger.info(
291
- 'Starting parallel trajectory generation',
292
+ "Starting parallel trajectory generation",
292
293
  {
293
294
  totalAgents: this.agents.size,
294
295
  parallelBatches: Math.ceil(
295
- this.agents.size / this.config.parallelAgents
296
+ this.agents.size / this.config.parallelAgents,
296
297
  ),
297
298
  ticksPerAgent: this.config.ticksPerAgent,
298
299
  },
299
- 'TrajectoryGenerator'
300
+ "TrajectoryGenerator",
300
301
  );
301
302
 
302
303
  // Process agents in parallel batches
@@ -309,7 +310,7 @@ export class TrajectoryGenerator {
309
310
  {
310
311
  agents: batch.length,
311
312
  },
312
- 'TrajectoryGenerator'
313
+ "TrajectoryGenerator",
313
314
  );
314
315
 
315
316
  const batchResult = await this.runParallelBatch(batch);
@@ -321,7 +322,8 @@ export class TrajectoryGenerator {
321
322
  // Calculate stats
322
323
  for (const trajId of result.trajectoryIds) {
323
324
  // Get trajectory to determine archetype
324
- const trajectory = await getTrainingDataAdapter().getTrajectoryById(trajId);
325
+ const trajectory =
326
+ await getTrainingDataAdapter().getTrajectoryById(trajId);
325
327
 
326
328
  if (trajectory) {
327
329
  const agentInfo = this.agents.get(trajectory.agentId);
@@ -344,7 +346,7 @@ export class TrajectoryGenerator {
344
346
  result.duration = Date.now() - startTime;
345
347
 
346
348
  logger.info(
347
- 'Parallel generation complete',
349
+ "Parallel generation complete",
348
350
  {
349
351
  agents: result.agentsCreated.length,
350
352
  trajectories: result.trajectoryIds.length,
@@ -352,7 +354,7 @@ export class TrajectoryGenerator {
352
354
  durationSeconds: result.duration / 1000,
353
355
  errors: result.errors.length,
354
356
  },
355
- 'TrajectoryGenerator'
357
+ "TrajectoryGenerator",
356
358
  );
357
359
 
358
360
  return result;
@@ -365,7 +367,7 @@ export class TrajectoryGenerator {
365
367
  logger.info(
366
368
  `Cleaning up ${this.agents.size} agents...`,
367
369
  {},
368
- 'TrajectoryGenerator'
370
+ "TrajectoryGenerator",
369
371
  );
370
372
 
371
373
  const adapter = getTrainingDataAdapter();
@@ -373,7 +375,7 @@ export class TrajectoryGenerator {
373
375
  await adapter.deleteUser(agentId);
374
376
  }
375
377
 
376
- logger.info('Cleanup complete', {}, 'TrajectoryGenerator');
378
+ logger.info("Cleanup complete", {}, "TrajectoryGenerator");
377
379
  }
378
380
  }
379
381
 
@@ -381,7 +383,7 @@ export class TrajectoryGenerator {
381
383
  * Factory function for creating parallel generator
382
384
  */
383
385
  export async function createParallelGenerator(
384
- config: ParallelGenerationConfig
386
+ config: ParallelGenerationConfig,
385
387
  ): Promise<TrajectoryGenerator> {
386
388
  return new TrajectoryGenerator(config);
387
389
  }
@@ -9,4 +9,4 @@ export {
9
9
  type ParallelGenerationConfig,
10
10
  type ParallelGenerationResult,
11
11
  TrajectoryGenerator,
12
- } from './TrajectoryGenerator';
12
+ } from "./TrajectoryGenerator";
@@ -5,15 +5,15 @@
5
5
  * Creates dataset cards with visualizations, metrics, and usage examples.
6
6
  */
7
7
 
8
- import { promises as fs } from 'fs';
9
- import * as path from 'path';
10
- import type { SimulationMetrics } from '../benchmark/SimulationEngine';
11
- import { calculateArrayStats, logger } from '../utils';
8
+ import { promises as fs } from "node:fs";
9
+ import * as path from "node:path";
10
+ import type { SimulationMetrics } from "../benchmark/SimulationEngine";
11
+ import { calculateArrayStats, logger } from "../utils";
12
12
  import {
13
13
  getHuggingFaceToken,
14
14
  HuggingFaceUploadUtil,
15
15
  requireHuggingFaceToken,
16
- } from './shared/HuggingFaceUploadUtil';
16
+ } from "./shared/HuggingFaceUploadUtil";
17
17
 
18
18
  export interface BenchmarkRecord {
19
19
  benchmarkId: string;
@@ -71,7 +71,7 @@ export class HuggingFaceDatasetUploader {
71
71
  */
72
72
  async uploadDataset(options: UploadOptions): Promise<UploadResult> {
73
73
  try {
74
- logger.info('Starting HuggingFace dataset upload', {
74
+ logger.info("Starting HuggingFace dataset upload", {
75
75
  datasetName: options.datasetName,
76
76
  });
77
77
 
@@ -82,57 +82,57 @@ export class HuggingFaceDatasetUploader {
82
82
  // Set defaults
83
83
  const version = options.version || this.generateVersion();
84
84
  const benchmarkDir =
85
- options.benchmarkDir || path.join(process.cwd(), 'benchmarks');
85
+ options.benchmarkDir || path.join(process.cwd(), "benchmarks");
86
86
  const outputDir =
87
87
  options.outputDir ||
88
- path.join(process.cwd(), 'exports', 'huggingface', version);
88
+ path.join(process.cwd(), "exports", "huggingface", version);
89
89
 
90
90
  // Step 1: Collect benchmark data
91
- logger.info('Collecting benchmark data', { benchmarkDir });
91
+ logger.info("Collecting benchmark data", { benchmarkDir });
92
92
  const benchmarks = await this.collectBenchmarkData(benchmarkDir);
93
93
  logger.info(`Collected ${benchmarks.length} benchmark records`);
94
94
 
95
95
  if (benchmarks.length === 0) {
96
- throw new Error('No benchmark data found to upload');
96
+ throw new Error("No benchmark data found to upload");
97
97
  }
98
98
 
99
99
  // Step 2: Prepare dataset files
100
- logger.info('Preparing dataset files', { outputDir });
100
+ logger.info("Preparing dataset files", { outputDir });
101
101
  await fs.mkdir(outputDir, { recursive: true });
102
102
 
103
103
  const metadata = await this.prepareDatasetFiles(benchmarks, outputDir, {
104
104
  datasetName: options.datasetName,
105
105
  version,
106
106
  description:
107
- options.description || 'Autonomous agent benchmark results',
107
+ options.description || "Autonomous agent benchmark results",
108
108
  });
109
109
 
110
110
  // Step 3: Generate dataset card
111
- logger.info('Generating dataset card');
111
+ logger.info("Generating dataset card");
112
112
  await this.generateDatasetCard(metadata, benchmarks, outputDir);
113
113
 
114
114
  // Step 4: Create repository if it doesn't exist
115
- logger.info('Ensuring repository exists', {
115
+ logger.info("Ensuring repository exists", {
116
116
  datasetName: options.datasetName,
117
117
  });
118
118
  await this.ensureRepository(
119
119
  options.datasetName,
120
- options.private ?? false
120
+ options.private ?? false,
121
121
  );
122
122
 
123
123
  // Step 5: Upload to HuggingFace
124
- logger.info('Uploading to HuggingFace', {
124
+ logger.info("Uploading to HuggingFace", {
125
125
  datasetName: options.datasetName,
126
126
  });
127
127
  const filesUploaded = await this.uploadToHub(
128
128
  options.datasetName,
129
129
  outputDir,
130
- options.private ?? false
130
+ options.private ?? false,
131
131
  );
132
132
 
133
133
  const datasetUrl = `https://huggingface.co/datasets/${options.datasetName}`;
134
134
 
135
- logger.info('Dataset uploaded successfully', {
135
+ logger.info("Dataset uploaded successfully", {
136
136
  datasetUrl,
137
137
  filesUploaded,
138
138
  });
@@ -144,12 +144,12 @@ export class HuggingFaceDatasetUploader {
144
144
  filesUploaded,
145
145
  };
146
146
  } catch (error) {
147
- logger.error('Failed to upload dataset', { error });
147
+ logger.error("Failed to upload dataset", { error });
148
148
  return {
149
149
  success: false,
150
- version: options.version || 'unknown',
150
+ version: options.version || "unknown",
151
151
  filesUploaded: 0,
152
- error: error instanceof Error ? error.message : 'Unknown error',
152
+ error: error instanceof Error ? error.message : "Unknown error",
153
153
  };
154
154
  }
155
155
  }
@@ -158,22 +158,22 @@ export class HuggingFaceDatasetUploader {
158
158
  * Collect benchmark data from files
159
159
  */
160
160
  private async collectBenchmarkData(
161
- benchmarkDir: string
161
+ benchmarkDir: string,
162
162
  ): Promise<BenchmarkRecord[]> {
163
163
  const records: BenchmarkRecord[] = [];
164
164
 
165
165
  // Collect from model-comparison directory
166
- const comparisonDir = path.join(benchmarkDir, 'model-comparison');
166
+ const comparisonDir = path.join(benchmarkDir, "model-comparison");
167
167
  if (await this.fileExists(comparisonDir)) {
168
- const comparisonFile = path.join(comparisonDir, 'comparison.json');
168
+ const comparisonFile = path.join(comparisonDir, "comparison.json");
169
169
  if (await this.fileExists(comparisonFile)) {
170
- const data = JSON.parse(await fs.readFile(comparisonFile, 'utf-8'));
170
+ const data = JSON.parse(await fs.readFile(comparisonFile, "utf-8"));
171
171
  for (const result of data.results || []) {
172
172
  if (result.metrics) {
173
173
  records.push({
174
- benchmarkId: data.benchmark || 'comparison',
174
+ benchmarkId: data.benchmark || "comparison",
175
175
  modelId: result.model.modelId,
176
- modelVersion: 'baseline',
176
+ modelVersion: "baseline",
177
177
  modelName: result.model.displayName,
178
178
  runAt: data.runAt,
179
179
  metrics: result.metrics,
@@ -182,7 +182,7 @@ export class HuggingFaceDatasetUploader {
182
182
  tickInterval: 60,
183
183
  markets: 10,
184
184
  ticks: Math.floor(
185
- (result.metrics.timing?.totalDuration || 0) / 60
185
+ (result.metrics.timing?.totalDuration || 0) / 60,
186
186
  ),
187
187
  },
188
188
  });
@@ -192,13 +192,13 @@ export class HuggingFaceDatasetUploader {
192
192
  }
193
193
 
194
194
  // Collect from baselines directory
195
- const baselinesDir = path.join(benchmarkDir, 'baselines');
195
+ const baselinesDir = path.join(benchmarkDir, "baselines");
196
196
  if (await this.fileExists(baselinesDir)) {
197
197
  const files = await fs.readdir(baselinesDir);
198
198
  for (const file of files) {
199
- if (file.endsWith('.json') && file.startsWith('baseline-')) {
199
+ if (file.endsWith(".json") && file.startsWith("baseline-")) {
200
200
  const filePath = path.join(baselinesDir, file);
201
- const data = JSON.parse(await fs.readFile(filePath, 'utf-8'));
201
+ const data = JSON.parse(await fs.readFile(filePath, "utf-8"));
202
202
 
203
203
  // Skip if no metrics
204
204
  if (!data.metrics) continue;
@@ -207,13 +207,13 @@ export class HuggingFaceDatasetUploader {
207
207
  benchmarkId:
208
208
  data.benchmark?.id ||
209
209
  data.benchmark?.path ||
210
- file.replace('.json', ''),
211
- modelId: data.model?.modelId || 'unknown',
212
- modelVersion: data.model?.version || 'baseline',
210
+ file.replace(".json", ""),
211
+ modelId: data.model?.modelId || "unknown",
212
+ modelVersion: data.model?.version || "baseline",
213
213
  modelName:
214
214
  data.model?.displayName ||
215
215
  data.model?.name ||
216
- file.replace('.json', ''),
216
+ file.replace(".json", ""),
217
217
  runAt: data.runAt || new Date().toISOString(),
218
218
  metrics: data.metrics,
219
219
  benchmarkSnapshot: {
@@ -226,7 +226,7 @@ export class HuggingFaceDatasetUploader {
226
226
  ticks: Math.floor(
227
227
  (data.timing?.totalDuration ||
228
228
  data.metrics.timing?.totalDuration ||
229
- 0) / 60
229
+ 0) / 60,
230
230
  ),
231
231
  },
232
232
  });
@@ -235,21 +235,21 @@ export class HuggingFaceDatasetUploader {
235
235
  }
236
236
 
237
237
  // Collect from test-baselines directory
238
- const testBaselinesDir = path.join(benchmarkDir, 'test-baselines');
238
+ const testBaselinesDir = path.join(benchmarkDir, "test-baselines");
239
239
  if (await this.fileExists(testBaselinesDir)) {
240
240
  const subdirs = await fs.readdir(testBaselinesDir);
241
241
  for (const subdir of subdirs) {
242
- const metricsFile = path.join(testBaselinesDir, subdir, 'metrics.json');
242
+ const metricsFile = path.join(testBaselinesDir, subdir, "metrics.json");
243
243
  if (await this.fileExists(metricsFile)) {
244
- const data = JSON.parse(await fs.readFile(metricsFile, 'utf-8'));
244
+ const data = JSON.parse(await fs.readFile(metricsFile, "utf-8"));
245
245
 
246
246
  // Skip if no required fields
247
247
  if (!data.totalPnl && !data.predictionMetrics) continue;
248
248
 
249
249
  records.push({
250
- benchmarkId: data.benchmarkId || 'test-benchmark',
250
+ benchmarkId: data.benchmarkId || "test-benchmark",
251
251
  modelId: subdir,
252
- modelVersion: 'test-baseline',
252
+ modelVersion: "test-baseline",
253
253
  modelName: subdir,
254
254
  runAt: data.runAt || new Date().toISOString(),
255
255
  metrics: data,
@@ -273,11 +273,11 @@ export class HuggingFaceDatasetUploader {
273
273
  private async prepareDatasetFiles(
274
274
  benchmarks: BenchmarkRecord[],
275
275
  outputDir: string,
276
- options: { datasetName: string; version: string; description: string }
276
+ options: { datasetName: string; version: string; description: string },
277
277
  ): Promise<DatasetMetadata> {
278
278
  // Create data.jsonl with all benchmark records
279
- const jsonlPath = path.join(outputDir, 'data.jsonl');
280
- const jsonlLines = benchmarks.map((b) => JSON.stringify(b)).join('\n');
279
+ const jsonlPath = path.join(outputDir, "data.jsonl");
280
+ const jsonlLines = benchmarks.map((b) => JSON.stringify(b)).join("\n");
281
281
  await fs.writeFile(jsonlPath, jsonlLines);
282
282
 
283
283
  // Create metadata.json
@@ -289,15 +289,15 @@ export class HuggingFaceDatasetUploader {
289
289
  totalBenchmarks: benchmarks.length,
290
290
  models: Array.from(new Set(benchmarks.map((b) => b.modelName))),
291
291
  benchmarkTypes: Array.from(new Set(benchmarks.map((b) => b.benchmarkId))),
292
- license: 'MIT',
292
+ license: "MIT",
293
293
  };
294
294
 
295
- const metadataPath = path.join(outputDir, 'metadata.json');
295
+ const metadataPath = path.join(outputDir, "metadata.json");
296
296
  await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2));
297
297
 
298
298
  // Create summary statistics
299
299
  const summary = this.calculateSummaryStatistics(benchmarks);
300
- const summaryPath = path.join(outputDir, 'summary.json');
300
+ const summaryPath = path.join(outputDir, "summary.json");
301
301
  await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2));
302
302
 
303
303
  return metadata;
@@ -309,14 +309,14 @@ export class HuggingFaceDatasetUploader {
309
309
  private async generateDatasetCard(
310
310
  metadata: DatasetMetadata,
311
311
  benchmarks: BenchmarkRecord[],
312
- outputDir: string
312
+ outputDir: string,
313
313
  ): Promise<void> {
314
314
  const summary = this.calculateSummaryStatistics(benchmarks);
315
- const brandName = process.env.TRAINING_BRAND_NAME || 'ElizaOS';
316
- const brandOrg = process.env.TRAINING_BRAND_ORG || 'ElizaOS Contributors';
315
+ const brandName = process.env.TRAINING_BRAND_NAME || "ElizaOS";
316
+ const brandOrg = process.env.TRAINING_BRAND_ORG || "ElizaOS Contributors";
317
317
  const platformName =
318
- process.env.TRAINING_PLATFORM_NAME || 'ElizaOS-compatible runtimes';
319
- const brandTag = brandName.toLowerCase().replace(/\s+/g, '-');
318
+ process.env.TRAINING_PLATFORM_NAME || "ElizaOS-compatible runtimes";
319
+ const brandTag = brandName.toLowerCase().replace(/\s+/g, "-");
320
320
 
321
321
  const card = `---
322
322
  license: ${metadata.license}
@@ -435,7 +435,7 @@ print(model_performance.sort_values('metrics.totalPnl', ascending=False))
435
435
  If you use this dataset in your research, please cite:
436
436
 
437
437
  \`\`\`bibtex
438
- @dataset{${brandTag}_benchmarks_${metadata.version.replace(/\./g, '_')},
438
+ @dataset{${brandTag}_benchmarks_${metadata.version.replace(/\./g, "_")},
439
439
  title = {${brandName} Agent Benchmarks},
440
440
  author = {${brandOrg}},
441
441
  year = {${new Date().getFullYear()}},
@@ -453,7 +453,7 @@ ${metadata.license}
453
453
  For questions or issues, please open an issue on the repository.
454
454
  `;
455
455
 
456
- const cardPath = path.join(outputDir, 'README.md');
456
+ const cardPath = path.join(outputDir, "README.md");
457
457
  await fs.writeFile(cardPath, card);
458
458
  }
459
459
 
@@ -494,8 +494,8 @@ For questions or issues, please open an issue on the repository.
494
494
  }))
495
495
  .sort((a, b) => b.avgPnl - a.avgPnl);
496
496
 
497
- let table = '| Rank | Model | Avg P&L | Accuracy | Optimality | Runs |\n';
498
- table += '|------|-------|---------|----------|------------|------|\n';
497
+ let table = "| Rank | Model | Avg P&L | Accuracy | Optimality | Runs |\n";
498
+ table += "|------|-------|---------|----------|------------|------|\n";
499
499
 
500
500
  leaderboard.forEach((entry, index) => {
501
501
  table += `| ${index + 1} | ${entry.model} | ${entry.avgPnl.toFixed(2)} | ${(entry.avgAccuracy * 100).toFixed(1)}% | ${entry.avgOptimality.toFixed(1)} | ${entry.runs} |\n`;
@@ -553,17 +553,17 @@ For questions or issues, please open an issue on the repository.
553
553
  */
554
554
  private async ensureRepository(
555
555
  datasetName: string,
556
- isPrivate: boolean
556
+ isPrivate: boolean,
557
557
  ): Promise<void> {
558
558
  if (!this.huggingFaceToken) {
559
- throw new Error('HuggingFace token not configured');
559
+ throw new Error("HuggingFace token not configured");
560
560
  }
561
561
 
562
562
  await HuggingFaceUploadUtil.ensureRepository(
563
563
  datasetName,
564
- 'dataset',
564
+ "dataset",
565
565
  this.huggingFaceToken,
566
- isPrivate
566
+ isPrivate,
567
567
  );
568
568
  }
569
569
 
@@ -574,38 +574,38 @@ For questions or issues, please open an issue on the repository.
574
574
  private async uploadToHub(
575
575
  datasetName: string,
576
576
  localDir: string,
577
- _isPrivate: boolean
577
+ _isPrivate: boolean,
578
578
  ): Promise<number> {
579
579
  if (!this.huggingFaceToken) {
580
- throw new Error('HuggingFace token not configured');
580
+ throw new Error("HuggingFace token not configured");
581
581
  }
582
582
 
583
583
  try {
584
584
  // Use shared upload utility
585
585
  const { HuggingFaceUploadUtil } = await import(
586
- './shared/HuggingFaceUploadUtil'
586
+ "./shared/HuggingFaceUploadUtil"
587
587
  );
588
588
 
589
589
  return await HuggingFaceUploadUtil.uploadDirectory(
590
590
  datasetName,
591
- 'dataset',
591
+ "dataset",
592
592
  localDir,
593
- this.huggingFaceToken
593
+ this.huggingFaceToken,
594
594
  );
595
595
  } catch (error) {
596
- logger.error('Failed to upload to HuggingFace Hub', { error });
596
+ logger.error("Failed to upload to HuggingFace Hub", { error });
597
597
 
598
598
  // Provide helpful manual upload instructions
599
599
  const { HuggingFaceUploadUtil } = await import(
600
- './shared/HuggingFaceUploadUtil'
600
+ "./shared/HuggingFaceUploadUtil"
601
601
  );
602
602
  const instructions = HuggingFaceUploadUtil.getManualUploadInstructions(
603
603
  datasetName,
604
- 'dataset',
605
- localDir
604
+ "dataset",
605
+ localDir,
606
606
  );
607
607
 
608
- logger.info('To upload manually:', { instructions });
608
+ logger.info("To upload manually:", { instructions });
609
609
 
610
610
  throw error;
611
611
  }
@@ -617,8 +617,8 @@ For questions or issues, please open an issue on the repository.
617
617
  private generateVersion(): string {
618
618
  const now = new Date();
619
619
  const year = now.getFullYear();
620
- const month = String(now.getMonth() + 1).padStart(2, '0');
621
- const day = String(now.getDate()).padStart(2, '0');
620
+ const month = String(now.getMonth() + 1).padStart(2, "0");
621
+ const day = String(now.getDate()).padStart(2, "0");
622
622
  return `${year}.${month}.${day}`;
623
623
  }
624
624