@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -5,13 +5,13 @@
5
5
  * Main entry point for all HuggingFace operations.
6
6
  */
7
7
 
8
- import { getTrainingDataAdapter } from '../adapter';
9
- import { ModelBenchmarkService } from '../benchmark/ModelBenchmarkService';
10
- import { getExportToHuggingFace } from '../dependencies';
11
- import { logger } from '../utils';
12
- import { HuggingFaceDatasetUploader } from './HuggingFaceDatasetUploader';
13
- import { HuggingFaceModelUploader } from './HuggingFaceModelUploader';
14
- import { getHuggingFaceToken } from './shared/HuggingFaceUploadUtil';
8
+ import { getTrainingDataAdapter } from "../adapter";
9
+ import { ModelBenchmarkService } from "../benchmark/ModelBenchmarkService";
10
+ import { getExportToHuggingFace } from "../dependencies";
11
+ import { logger } from "../utils";
12
+ import { HuggingFaceDatasetUploader } from "./HuggingFaceDatasetUploader";
13
+ import { HuggingFaceModelUploader } from "./HuggingFaceModelUploader";
14
+ import { getHuggingFaceToken } from "./shared/HuggingFaceUploadUtil";
15
15
 
16
16
  export interface WeeklyUploadResult {
17
17
  success: boolean;
@@ -49,13 +49,13 @@ export class HuggingFaceIntegrationService {
49
49
  * Execute complete weekly upload pipeline
50
50
  */
51
51
  async executeWeeklyUpload(
52
- options: DatasetUploadOptions = {}
52
+ options: DatasetUploadOptions = {},
53
53
  ): Promise<WeeklyUploadResult> {
54
54
  const startTime = Date.now();
55
55
  logger.info(
56
- 'Starting weekly upload pipeline',
56
+ "Starting weekly upload pipeline",
57
57
  options,
58
- 'HuggingFaceIntegration'
58
+ "HuggingFaceIntegration",
59
59
  );
60
60
 
61
61
  const result: WeeklyUploadResult = {
@@ -77,17 +77,16 @@ export class HuggingFaceIntegrationService {
77
77
  // Step 1: Upload benchmark dataset
78
78
  if (!options.dryRun) {
79
79
  logger.info(
80
- 'Step 1: Uploading benchmark dataset',
80
+ "Step 1: Uploading benchmark dataset",
81
81
  undefined,
82
- 'HuggingFaceIntegration'
82
+ "HuggingFaceIntegration",
83
83
  );
84
84
  const benchmarkResult = await this.datasetUploader.uploadDataset({
85
85
  datasetName:
86
86
  options.datasetName ||
87
87
  process.env.HF_DATASET_NAME ||
88
- 'elizaos/agent-benchmarks',
89
- description:
90
- 'Weekly benchmark results for autonomous ElizaOS agents',
88
+ "elizaos/agent-benchmarks",
89
+ description: "Weekly benchmark results for autonomous ElizaOS agents",
91
90
  });
92
91
 
93
92
  result.datasets.benchmarks = {
@@ -98,14 +97,14 @@ export class HuggingFaceIntegrationService {
98
97
 
99
98
  if (!benchmarkResult.success) {
100
99
  result.errors.push(
101
- `Benchmark dataset upload: ${benchmarkResult.error}`
100
+ `Benchmark dataset upload: ${benchmarkResult.error}`,
102
101
  );
103
102
  }
104
103
  } else {
105
104
  logger.info(
106
- 'DRY RUN: Skipping benchmark dataset upload',
105
+ "DRY RUN: Skipping benchmark dataset upload",
107
106
  undefined,
108
- 'HuggingFaceIntegration'
107
+ "HuggingFaceIntegration",
109
108
  );
110
109
  result.datasets.benchmarks.success = true;
111
110
  }
@@ -113,17 +112,17 @@ export class HuggingFaceIntegrationService {
113
112
  // Step 2: Upload trajectory dataset
114
113
  if (!options.dryRun) {
115
114
  logger.info(
116
- 'Step 2: Uploading trajectory dataset',
115
+ "Step 2: Uploading trajectory dataset",
117
116
  undefined,
118
- 'HuggingFaceIntegration'
117
+ "HuggingFaceIntegration",
119
118
  );
120
119
  const exportToHuggingFace = getExportToHuggingFace();
121
120
  const trajectoryResult = await exportToHuggingFace({
122
121
  datasetName:
123
122
  options.trajectoryDatasetName ||
124
123
  process.env.HF_TRAJECTORY_DATASET_NAME ||
125
- 'elizaos/agent-trajectories',
126
- format: 'jsonl',
124
+ "elizaos/agent-trajectories",
125
+ format: "jsonl",
127
126
  });
128
127
 
129
128
  result.datasets.trajectories = {
@@ -134,14 +133,14 @@ export class HuggingFaceIntegrationService {
134
133
 
135
134
  if (!trajectoryResult.success) {
136
135
  result.errors.push(
137
- `Trajectory dataset upload: ${trajectoryResult.error}`
136
+ `Trajectory dataset upload: ${trajectoryResult.error}`,
138
137
  );
139
138
  }
140
139
  } else {
141
140
  logger.info(
142
- 'DRY RUN: Skipping trajectory dataset upload',
141
+ "DRY RUN: Skipping trajectory dataset upload",
143
142
  undefined,
144
- 'HuggingFaceIntegration'
143
+ "HuggingFaceIntegration",
145
144
  );
146
145
  result.datasets.trajectories.success = true;
147
146
  }
@@ -154,7 +153,7 @@ export class HuggingFaceIntegrationService {
154
153
  logger.info(
155
154
  `Step 3: Found ${unbenchmarkedModels.length} unbenchmarked models`,
156
155
  undefined,
157
- 'HuggingFaceIntegration'
156
+ "HuggingFaceIntegration",
158
157
  );
159
158
 
160
159
  if (unbenchmarkedModels.length > 0) {
@@ -162,8 +161,8 @@ export class HuggingFaceIntegrationService {
162
161
  await ModelBenchmarkService.getStandardBenchmarkPaths();
163
162
 
164
163
  if (standardBenchmarks.length === 0) {
165
- const error = 'No standard benchmarks available for model evaluation';
166
- logger.error(error, undefined, 'HuggingFaceIntegration');
164
+ const error = "No standard benchmarks available for model evaluation";
165
+ logger.error(error, undefined, "HuggingFaceIntegration");
167
166
  result.errors.push(error);
168
167
  } else {
169
168
  for (const modelId of unbenchmarkedModels) {
@@ -172,7 +171,7 @@ export class HuggingFaceIntegrationService {
172
171
  logger.info(
173
172
  `Benchmarking model: ${modelId}`,
174
173
  undefined,
175
- 'HuggingFaceIntegration'
174
+ "HuggingFaceIntegration",
176
175
  );
177
176
  await ModelBenchmarkService.benchmarkModel({
178
177
  modelId,
@@ -186,14 +185,15 @@ export class HuggingFaceIntegrationService {
186
185
  await ModelBenchmarkService.compareToBaseline(modelId);
187
186
 
188
187
  // Upload if improved
189
- if (comparison.recommendation === 'deploy' && !options.dryRun) {
188
+ if (comparison.recommendation === "deploy" && !options.dryRun) {
190
189
  logger.info(
191
190
  `Model ${modelId} improved, uploading`,
192
191
  undefined,
193
- 'HuggingFaceIntegration'
192
+ "HuggingFaceIntegration",
194
193
  );
195
194
 
196
- const model = await getTrainingDataAdapter().getModelById(modelId);
195
+ const model =
196
+ await getTrainingDataAdapter().getModelById(modelId);
197
197
 
198
198
  if (model) {
199
199
  const modelName = options.modelNamePrefix
@@ -205,7 +205,7 @@ export class HuggingFaceIntegrationService {
205
205
  const modelDescription =
206
206
  options.modelDescriptionPrefix ||
207
207
  process.env.HF_MODEL_DESCRIPTION_PREFIX ||
208
- 'Autonomous ElizaOS agent';
208
+ "Autonomous ElizaOS agent";
209
209
 
210
210
  const uploadResult = await this.modelUploader.uploadModel({
211
211
  modelId,
@@ -218,10 +218,13 @@ export class HuggingFaceIntegrationService {
218
218
  result.models.uploaded++;
219
219
 
220
220
  // Update model with HuggingFace repo
221
- await getTrainingDataAdapter().updateModelHuggingFaceRepo(modelId, modelName);
221
+ await getTrainingDataAdapter().updateModelHuggingFaceRepo(
222
+ modelId,
223
+ modelName,
224
+ );
222
225
  } else {
223
226
  result.errors.push(
224
- `Model upload ${modelId}: ${uploadResult.error}`
227
+ `Model upload ${modelId}: ${uploadResult.error}`,
225
228
  );
226
229
  }
227
230
  }
@@ -229,7 +232,7 @@ export class HuggingFaceIntegrationService {
229
232
  logger.info(
230
233
  `Model ${modelId} not ready for deployment: ${comparison.recommendation}`,
231
234
  undefined,
232
- 'HuggingFaceIntegration'
235
+ "HuggingFaceIntegration",
233
236
  );
234
237
  }
235
238
  } catch (error) {
@@ -238,7 +241,7 @@ export class HuggingFaceIntegrationService {
238
241
  logger.error(
239
242
  `Failed to process model ${modelId}`,
240
243
  { error },
241
- 'HuggingFaceIntegration'
244
+ "HuggingFaceIntegration",
242
245
  );
243
246
  result.errors.push(`Model ${modelId}: ${errorMsg}`);
244
247
  }
@@ -250,7 +253,7 @@ export class HuggingFaceIntegrationService {
250
253
  result.duration = Date.now() - startTime;
251
254
 
252
255
  logger.info(
253
- 'Weekly upload pipeline complete',
256
+ "Weekly upload pipeline complete",
254
257
  {
255
258
  success: result.success,
256
259
  benchmarkDataset: result.datasets.benchmarks.success,
@@ -261,19 +264,19 @@ export class HuggingFaceIntegrationService {
261
264
  errors: result.errors.length,
262
265
  duration: result.duration,
263
266
  },
264
- 'HuggingFaceIntegration'
267
+ "HuggingFaceIntegration",
265
268
  );
266
269
 
267
270
  return result;
268
271
  } catch (error) {
269
272
  result.duration = Date.now() - startTime;
270
273
  result.errors.push(
271
- error instanceof Error ? error.message : String(error)
274
+ error instanceof Error ? error.message : String(error),
272
275
  );
273
276
  logger.error(
274
- 'Weekly upload pipeline failed',
277
+ "Weekly upload pipeline failed",
275
278
  { error },
276
- 'HuggingFaceIntegration'
279
+ "HuggingFaceIntegration",
277
280
  );
278
281
  return result;
279
282
  }
@@ -295,13 +298,16 @@ export class HuggingFaceIntegrationService {
295
298
  const adapter = getTrainingDataAdapter();
296
299
 
297
300
  // Get last upload time from database
298
- const lastUploadTime = (await adapter.getLastDeployedModelDate()) || new Date(0);
301
+ const lastUploadTime =
302
+ (await adapter.getLastDeployedModelDate()) || new Date(0);
299
303
 
300
304
  // Check for new benchmarks since last upload
301
- const newBenchmarksCount = await adapter.countBenchmarksSince(lastUploadTime);
305
+ const newBenchmarksCount =
306
+ await adapter.countBenchmarksSince(lastUploadTime);
302
307
 
303
308
  // Check for new trajectories since last upload
304
- const newTrajectoriesCount = await adapter.countTrajectoriesSince(lastUploadTime);
309
+ const newTrajectoriesCount =
310
+ await adapter.countTrajectoriesSince(lastUploadTime);
305
311
 
306
312
  // Check for unbenchmarked models
307
313
  const unbenchmarkedModels =
@@ -333,7 +339,7 @@ export class HuggingFaceIntegrationService {
333
339
  // Check HuggingFace token
334
340
  if (!getHuggingFaceToken()) {
335
341
  issues.push(
336
- 'HUGGING_FACE_TOKEN or HF_TOKEN environment variable not set'
342
+ "HUGGING_FACE_TOKEN or HF_TOKEN environment variable not set",
337
343
  );
338
344
  }
339
345
 
@@ -343,10 +349,10 @@ export class HuggingFaceIntegrationService {
343
349
  try {
344
350
  const healthy = await adapter.healthCheck();
345
351
  if (!healthy) {
346
- issues.push('Cannot connect to database');
352
+ issues.push("Cannot connect to database");
347
353
  }
348
354
  } catch {
349
- issues.push('Cannot connect to database');
355
+ issues.push("Cannot connect to database");
350
356
  }
351
357
 
352
358
  // Check for standard benchmarks
@@ -354,7 +360,7 @@ export class HuggingFaceIntegrationService {
354
360
  await ModelBenchmarkService.getStandardBenchmarkPaths();
355
361
  if (standardBenchmarks.length === 0) {
356
362
  warnings.push(
357
- 'No standard benchmarks found. Generate benchmark fixtures before upload.'
363
+ "No standard benchmarks found. Generate benchmark fixtures before upload.",
358
364
  );
359
365
  }
360
366
 
@@ -364,21 +370,21 @@ export class HuggingFaceIntegrationService {
364
370
 
365
371
  if (stats.benchmarkCount === 0) {
366
372
  warnings.push(
367
- 'No benchmark results in database. Run some benchmarks first.'
373
+ "No benchmark results in database. Run some benchmarks first.",
368
374
  );
369
375
  }
370
376
 
371
377
  if (stats.trajectoryTraining === 0) {
372
378
  warnings.push(
373
- 'No training trajectories in database. Generate with agents or test data.'
379
+ "No training trajectories in database. Generate with agents or test data.",
374
380
  );
375
381
  }
376
382
 
377
383
  if (stats.modelTotal === 0) {
378
- warnings.push('No trained models in database.');
384
+ warnings.push("No trained models in database.");
379
385
  }
380
386
  } catch {
381
- issues.push('Could not retrieve training statistics');
387
+ issues.push("Could not retrieve training statistics");
382
388
  }
383
389
 
384
390
  return {
@@ -4,20 +4,20 @@
4
4
  * Uploads trained RL models to HuggingFace Hub with benchmark results and model cards.
5
5
  */
6
6
 
7
- import { getTrainingDataAdapter } from '../adapter';
8
- import { promises as fs } from 'fs';
9
- import * as path from 'path';
7
+ import { promises as fs } from "node:fs";
8
+ import * as path from "node:path";
9
+ import { getTrainingDataAdapter } from "../adapter";
10
10
  import {
11
11
  type JsonValue,
12
12
  parseSimulationMetrics,
13
- } from '../benchmark/parseSimulationMetrics';
14
- import type { SimulationMetrics } from '../benchmark/SimulationEngine';
15
- import { logger } from '../utils';
13
+ } from "../benchmark/parseSimulationMetrics";
14
+ import type { SimulationMetrics } from "../benchmark/SimulationEngine";
15
+ import { logger } from "../utils";
16
16
  import {
17
17
  getHuggingFaceToken,
18
18
  HuggingFaceUploadUtil,
19
19
  requireHuggingFaceToken,
20
- } from './shared/HuggingFaceUploadUtil';
20
+ } from "./shared/HuggingFaceUploadUtil";
21
21
 
22
22
  /**
23
23
  * Simplified benchmark result for HuggingFace model cards
@@ -76,7 +76,7 @@ export class HuggingFaceModelUploader {
76
76
  */
77
77
  async uploadModel(options: ModelUploadOptions): Promise<ModelUploadResult> {
78
78
  try {
79
- logger.info('Starting HuggingFace model upload', {
79
+ logger.info("Starting HuggingFace model upload", {
80
80
  modelId: options.modelId,
81
81
  });
82
82
 
@@ -93,11 +93,11 @@ export class HuggingFaceModelUploader {
93
93
  }
94
94
 
95
95
  // Step 2: Get benchmark results
96
- logger.info('Loading benchmark results', { modelId: options.modelId });
96
+ logger.info("Loading benchmark results", { modelId: options.modelId });
97
97
  const modelBenchmarks = await this.getBenchmarkResults(options.modelId);
98
98
 
99
99
  if (modelBenchmarks.length === 0) {
100
- logger.warn('No benchmark results found for model', {
100
+ logger.warn("No benchmark results found for model", {
101
101
  modelId: options.modelId,
102
102
  });
103
103
  }
@@ -117,15 +117,15 @@ export class HuggingFaceModelUploader {
117
117
  // Step 4: Create output directory
118
118
  const outputDir =
119
119
  options.outputDir ||
120
- path.join(process.cwd(), 'exports', 'models', model.version);
120
+ path.join(process.cwd(), "exports", "models", model.version);
121
121
  await fs.mkdir(outputDir, { recursive: true });
122
122
 
123
123
  // Step 5: Generate model card
124
- logger.info('Generating model card');
124
+ logger.info("Generating model card");
125
125
  await this.generateModelCard(cardData, outputDir);
126
126
 
127
127
  // Step 6: Save metadata
128
- const metadataPath = path.join(outputDir, 'model_metadata.json');
128
+ const metadataPath = path.join(outputDir, "model_metadata.json");
129
129
  await fs.writeFile(
130
130
  metadataPath,
131
131
  JSON.stringify(
@@ -141,42 +141,42 @@ export class HuggingFaceModelUploader {
141
141
  accuracy: model.accuracy,
142
142
  },
143
143
  null,
144
- 2
145
- )
144
+ 2,
145
+ ),
146
146
  );
147
147
 
148
148
  // Step 7: Save benchmark results
149
- const benchmarksPath = path.join(outputDir, 'benchmark_results.json');
149
+ const benchmarksPath = path.join(outputDir, "benchmark_results.json");
150
150
  await fs.writeFile(
151
151
  benchmarksPath,
152
- JSON.stringify(modelBenchmarks, null, 2)
152
+ JSON.stringify(modelBenchmarks, null, 2),
153
153
  );
154
154
 
155
155
  // Step 8: Upload to HuggingFace (if weights available and requested)
156
156
  let filesUploaded = 2; // README.md + metadata
157
157
 
158
158
  if (options.includeWeights && model.storagePath) {
159
- logger.info('Uploading model to HuggingFace', {
159
+ logger.info("Uploading model to HuggingFace", {
160
160
  modelName: options.modelName,
161
161
  });
162
162
  const uploadCount = await this.uploadToHub(
163
163
  options.modelName,
164
164
  outputDir,
165
- options.private ?? false
165
+ options.private ?? false,
166
166
  );
167
167
  filesUploaded = uploadCount;
168
168
  } else {
169
169
  logger.info(
170
- 'Skipping model weight upload (not requested or no weights available)'
170
+ "Skipping model weight upload (not requested or no weights available)",
171
171
  );
172
172
  }
173
173
 
174
174
  const modelUrl = `https://huggingface.co/${options.modelName}`;
175
175
 
176
- logger.info('Model uploaded successfully', { modelUrl, filesUploaded });
176
+ logger.info("Model uploaded successfully", { modelUrl, filesUploaded });
177
177
 
178
178
  // Update model status in database
179
- await adapter.updateModelStatus(options.modelId, 'deployed', {
179
+ await adapter.updateModelStatus(options.modelId, "deployed", {
180
180
  deployedAt: new Date(),
181
181
  });
182
182
 
@@ -187,12 +187,12 @@ export class HuggingFaceModelUploader {
187
187
  filesUploaded,
188
188
  };
189
189
  } catch (error) {
190
- logger.error('Failed to upload model', { error });
190
+ logger.error("Failed to upload model", { error });
191
191
  return {
192
192
  success: false,
193
193
  modelId: options.modelId,
194
194
  filesUploaded: 0,
195
- error: error instanceof Error ? error.message : 'Unknown error',
195
+ error: error instanceof Error ? error.message : "Unknown error",
196
196
  };
197
197
  }
198
198
  }
@@ -201,11 +201,12 @@ export class HuggingFaceModelUploader {
201
201
  * Get benchmark results for a model
202
202
  */
203
203
  private async getBenchmarkResults(
204
- modelId: string
204
+ modelId: string,
205
205
  ): Promise<ModelCardBenchmarkResult[]> {
206
206
  // Query benchmark results from database
207
207
  try {
208
- const results = await getTrainingDataAdapter().getBenchmarkResultsByModel(modelId);
208
+ const results =
209
+ await getTrainingDataAdapter().getBenchmarkResultsByModel(modelId);
209
210
 
210
211
  return results.map((r) => ({
211
212
  benchmarkId: r.benchmarkId,
@@ -214,7 +215,7 @@ export class HuggingFaceModelUploader {
214
215
  metrics: parseSimulationMetrics(r.detailedMetrics as JsonValue),
215
216
  }));
216
217
  } catch (error) {
217
- logger.warn('Could not load benchmark results from database', { error });
218
+ logger.warn("Could not load benchmark results from database", { error });
218
219
 
219
220
  // Fallback to files if database fails
220
221
  return await this.getBenchmarkResultsFromFiles(modelId);
@@ -225,18 +226,18 @@ export class HuggingFaceModelUploader {
225
226
  * Fallback: Get benchmark results from files
226
227
  */
227
228
  private async getBenchmarkResultsFromFiles(
228
- modelId: string
229
+ modelId: string,
229
230
  ): Promise<ModelCardBenchmarkResult[]> {
230
231
  const results: ModelCardBenchmarkResult[] = [];
231
232
 
232
233
  try {
233
- const benchmarksDir = path.join(process.cwd(), 'benchmarks');
234
+ const benchmarksDir = path.join(process.cwd(), "benchmarks");
234
235
  const files = await fs.readdir(benchmarksDir);
235
236
 
236
237
  for (const file of files) {
237
- if (file.endsWith('.json') && file.includes(modelId)) {
238
+ if (file.endsWith(".json") && file.includes(modelId)) {
238
239
  const filePath = path.join(benchmarksDir, file);
239
- const data = JSON.parse(await fs.readFile(filePath, 'utf-8'));
240
+ const data = JSON.parse(await fs.readFile(filePath, "utf-8"));
240
241
 
241
242
  if (data.metrics) {
242
243
  results.push({
@@ -248,7 +249,7 @@ export class HuggingFaceModelUploader {
248
249
  }
249
250
  }
250
251
  } catch (error) {
251
- logger.warn('Could not load benchmark results from files either', {
252
+ logger.warn("Could not load benchmark results from files either", {
252
253
  error,
253
254
  });
254
255
  }
@@ -260,7 +261,7 @@ export class HuggingFaceModelUploader {
260
261
  * Calculate average metrics across benchmarks
261
262
  */
262
263
  private calculateAverageMetrics(
263
- benchmarkResults: ModelCardBenchmarkResult[]
264
+ benchmarkResults: ModelCardBenchmarkResult[],
264
265
  ): {
265
266
  avgPnl: number;
266
267
  avgAccuracy: number;
@@ -278,15 +279,15 @@ export class HuggingFaceModelUploader {
278
279
 
279
280
  const totalPnl = benchmarkResults.reduce(
280
281
  (sum, r) => sum + r.metrics.totalPnl,
281
- 0
282
+ 0,
282
283
  );
283
284
  const totalAccuracy = benchmarkResults.reduce(
284
285
  (sum, r) => sum + r.metrics.predictionMetrics.accuracy,
285
- 0
286
+ 0,
286
287
  );
287
288
  const totalOptimality = benchmarkResults.reduce(
288
289
  (sum, r) => sum + r.metrics.optimalityScore,
289
- 0
290
+ 0,
290
291
  );
291
292
 
292
293
  return {
@@ -302,14 +303,14 @@ export class HuggingFaceModelUploader {
302
303
  */
303
304
  private async generateModelCard(
304
305
  data: ModelCardData,
305
- outputDir: string
306
+ outputDir: string,
306
307
  ): Promise<void> {
307
- const brandName = process.env.TRAINING_BRAND_NAME || 'ElizaOS';
308
- const brandOrg = process.env.TRAINING_BRAND_ORG || 'ElizaOS Contributors';
308
+ const brandName = process.env.TRAINING_BRAND_NAME || "ElizaOS";
309
+ const brandOrg = process.env.TRAINING_BRAND_ORG || "ElizaOS Contributors";
309
310
  const platformName =
310
- process.env.TRAINING_PLATFORM_NAME || 'ElizaOS-compatible runtimes';
311
- const brandTag = brandName.toLowerCase().replace(/\s+/g, '-');
312
- const citationKey = `${brandTag}_agent_${data.version.replace(/\./g, '_')}`;
311
+ process.env.TRAINING_PLATFORM_NAME || "ElizaOS-compatible runtimes";
312
+ const brandTag = brandName.toLowerCase().replace(/\s+/g, "-");
313
+ const citationKey = `${brandTag}_agent_${data.version.replace(/\./g, "_")}`;
313
314
 
314
315
  const card = `---
315
316
  license: mit
@@ -330,9 +331,9 @@ Autonomous agent trained with reinforcement learning for market-style decision m
330
331
 
331
332
  - **Version:** ${data.version}
332
333
  - **Base Model:** ${data.baseModel}
333
- - **Training Date:** ${data.trainedAt.toISOString().split('T')[0]}
334
+ - **Training Date:** ${data.trainedAt.toISOString().split("T")[0]}
334
335
  - **Model ID:** ${data.modelId}
335
- ${data.trainingRunId ? `- **Training Run:** ${data.trainingRunId}` : ''}
336
+ ${data.trainingRunId ? `- **Training Run:** ${data.trainingRunId}` : ""}
336
337
 
337
338
  ## Performance Metrics
338
339
 
@@ -351,7 +352,7 @@ ${
351
352
 
352
353
  ${this.generateBenchmarkTable(data.benchmarkResults)}
353
354
  `
354
- : 'No benchmark results available yet.'
355
+ : "No benchmark results available yet."
355
356
  }
356
357
 
357
358
  ## Training Details
@@ -374,7 +375,7 @@ This model was trained using Group Relative Policy Optimization (GRPO) via the A
374
375
 
375
376
  ### Compute Infrastructure
376
377
 
377
- - **Platform:** ${data.trainingRunId ? 'Atropos GRPO Training' : 'Local training'}
378
+ - **Platform:** ${data.trainingRunId ? "Atropos GRPO Training" : "Local training"}
378
379
  - **Training Time:** Continuous learning with hourly updates
379
380
 
380
381
  ## Intended Use
@@ -471,7 +472,7 @@ This model is part of a research project on autonomous agents in prediction mark
471
472
  For questions or issues, please open an issue on the repository.
472
473
  `;
473
474
 
474
- const cardPath = path.join(outputDir, 'README.md');
475
+ const cardPath = path.join(outputDir, "README.md");
475
476
  await fs.writeFile(cardPath, card);
476
477
  }
477
478
 
@@ -479,14 +480,14 @@ For questions or issues, please open an issue on the repository.
479
480
  * Generate benchmark results table
480
481
  */
481
482
  private generateBenchmarkTable(results: ModelCardBenchmarkResult[]): string {
482
- if (results.length === 0) return '';
483
+ if (results.length === 0) return "";
483
484
 
484
485
  let table =
485
- '| Benchmark | Date | P&L | Accuracy | Win Rate | Optimality |\n';
486
- table += '|-----------|------|-----|----------|----------|------------|\n';
486
+ "| Benchmark | Date | P&L | Accuracy | Win Rate | Optimality |\n";
487
+ table += "|-----------|------|-----|----------|----------|------------|\n";
487
488
 
488
489
  results.forEach((result) => {
489
- const date = new Date(result.runAt).toISOString().split('T')[0];
490
+ const date = new Date(result.runAt).toISOString().split("T")[0];
490
491
  table += `| ${result.benchmarkId.substring(0, 20)}... | ${date} | ${result.metrics.totalPnl.toFixed(2)} | ${(result.metrics.predictionMetrics.accuracy * 100).toFixed(1)}% | ${(result.metrics.perpMetrics.winRate * 100).toFixed(1)}% | ${result.metrics.optimalityScore.toFixed(1)} |\n`;
491
492
  });
492
493
 
@@ -500,31 +501,31 @@ For questions or issues, please open an issue on the repository.
500
501
  private async uploadToHub(
501
502
  modelName: string,
502
503
  localDir: string,
503
- _isPrivate: boolean
504
+ _isPrivate: boolean,
504
505
  ): Promise<number> {
505
506
  if (!this.huggingFaceToken) {
506
- throw new Error('HuggingFace token not configured');
507
+ throw new Error("HuggingFace token not configured");
507
508
  }
508
509
 
509
510
  try {
510
511
  // Use shared upload utility
511
512
  return await HuggingFaceUploadUtil.uploadDirectory(
512
513
  modelName,
513
- 'model',
514
+ "model",
514
515
  localDir,
515
- this.huggingFaceToken
516
+ this.huggingFaceToken,
516
517
  );
517
518
  } catch (error) {
518
- logger.error('Failed to upload to HuggingFace Hub', { error });
519
+ logger.error("Failed to upload to HuggingFace Hub", { error });
519
520
 
520
521
  // Provide helpful manual upload instructions
521
522
  const instructions = HuggingFaceUploadUtil.getManualUploadInstructions(
522
523
  modelName,
523
- 'model',
524
- localDir
524
+ "model",
525
+ localDir,
525
526
  );
526
527
 
527
- logger.info('To upload manually:', { instructions });
528
+ logger.info("To upload manually:", { instructions });
528
529
 
529
530
  throw error;
530
531
  }