@elizaos/training 2.0.0-alpha.21 → 2.0.0-alpha.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.turbo/turbo-lint.log +2 -0
  2. package/.turbo/turbo-typecheck.log +1 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/adapter.js +59 -0
  5. package/dist/archetypes/ArchetypeConfigService.js +510 -0
  6. package/dist/archetypes/derive-archetype.js +196 -0
  7. package/dist/archetypes/index.js +7 -0
  8. package/dist/benchmark/ArchetypeMatchupBenchmark.js +547 -0
  9. package/dist/benchmark/BenchmarkChartGenerator.js +632 -0
  10. package/dist/benchmark/BenchmarkDataGenerator.js +825 -0
  11. package/dist/benchmark/BenchmarkDataViewer.js +197 -0
  12. package/dist/benchmark/BenchmarkHistoryService.js +135 -0
  13. package/dist/benchmark/BenchmarkRunner.js +483 -0
  14. package/dist/benchmark/BenchmarkValidator.js +158 -0
  15. package/dist/benchmark/FastEvalRunner.js +133 -0
  16. package/dist/benchmark/MetricsValidator.js +104 -0
  17. package/dist/benchmark/MetricsVisualizer.js +775 -0
  18. package/dist/benchmark/ModelBenchmarkService.js +433 -0
  19. package/dist/benchmark/ModelRegistry.js +122 -0
  20. package/dist/benchmark/RulerBenchmarkIntegration.js +168 -0
  21. package/dist/benchmark/SimulationA2AInterface.js +683 -0
  22. package/dist/benchmark/SimulationEngine.js +522 -0
  23. package/dist/benchmark/TaskRunner.js +60 -0
  24. package/dist/benchmark/__tests__/BenchmarkRunner.test.js +409 -0
  25. package/dist/benchmark/__tests__/HeadToHead.test.js +105 -0
  26. package/dist/benchmark/index.js +23 -0
  27. package/dist/benchmark/parseSimulationMetrics.js +86 -0
  28. package/dist/benchmark/simulation-types.js +1 -0
  29. package/dist/dependencies.js +197 -0
  30. package/dist/generation/TrajectoryGenerator.js +244 -0
  31. package/dist/generation/index.js +6 -0
  32. package/dist/huggingface/HuggingFaceDatasetUploader.js +463 -0
  33. package/dist/huggingface/HuggingFaceIntegrationService.js +272 -0
  34. package/dist/huggingface/HuggingFaceModelUploader.js +385 -0
  35. package/dist/huggingface/index.js +9 -0
  36. package/dist/huggingface/shared/HuggingFaceUploadUtil.js +144 -0
  37. package/dist/index.js +41 -0
  38. package/dist/init-training.js +43 -0
  39. package/dist/metrics/TrajectoryMetricsExtractor.js +523 -0
  40. package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +628 -0
  41. package/dist/metrics/index.js +7 -0
  42. package/dist/metrics/types.js +21 -0
  43. package/dist/rubrics/__tests__/index.test.js +150 -0
  44. package/dist/rubrics/ass-kisser.js +83 -0
  45. package/dist/rubrics/degen.js +78 -0
  46. package/dist/rubrics/goody-twoshoes.js +82 -0
  47. package/dist/rubrics/index.js +184 -0
  48. package/dist/rubrics/information-trader.js +82 -0
  49. package/dist/rubrics/infosec.js +99 -0
  50. package/dist/rubrics/liar.js +102 -0
  51. package/dist/rubrics/perps-trader.js +85 -0
  52. package/dist/rubrics/researcher.js +79 -0
  53. package/dist/rubrics/scammer.js +80 -0
  54. package/dist/rubrics/social-butterfly.js +71 -0
  55. package/dist/rubrics/super-predictor.js +95 -0
  56. package/dist/rubrics/trader.js +65 -0
  57. package/dist/scoring/ArchetypeScoringService.js +301 -0
  58. package/dist/scoring/JudgePromptBuilder.js +401 -0
  59. package/dist/scoring/LLMJudgeCache.js +263 -0
  60. package/dist/scoring/index.js +8 -0
  61. package/dist/training/AutomationPipeline.js +714 -0
  62. package/dist/training/BenchmarkService.js +370 -0
  63. package/dist/training/ConfigValidator.js +153 -0
  64. package/dist/training/MarketOutcomesTracker.js +142 -0
  65. package/dist/training/ModelDeployer.js +128 -0
  66. package/dist/training/ModelFetcher.js +48 -0
  67. package/dist/training/ModelSelectionService.js +248 -0
  68. package/dist/training/ModelUsageVerifier.js +106 -0
  69. package/dist/training/MultiModelOrchestrator.js +349 -0
  70. package/dist/training/RLModelConfig.js +295 -0
  71. package/dist/training/RewardBackpropagationService.js +117 -0
  72. package/dist/training/RulerScoringService.js +450 -0
  73. package/dist/training/TrainingMonitor.js +108 -0
  74. package/dist/training/TrajectoryRecorder.js +281 -0
  75. package/dist/training/__tests__/TrajectoryRecorder.test.js +363 -0
  76. package/dist/training/index.js +30 -0
  77. package/dist/training/logRLConfig.js +29 -0
  78. package/dist/training/pipeline.js +80 -0
  79. package/dist/training/storage/ModelStorageService.js +190 -0
  80. package/dist/training/storage/TrainingDataArchiver.js +136 -0
  81. package/dist/training/storage/index.js +7 -0
  82. package/dist/training/types.js +6 -0
  83. package/dist/training/window-utils.js +100 -0
  84. package/dist/utils/index.js +73 -0
  85. package/dist/utils/logger.js +55 -0
  86. package/dist/utils/snowflake.js +15 -0
  87. package/dist/utils/synthetic-detector.js +67 -0
  88. package/package.json +2 -2
  89. package/research-output/training-runs/training-run-1773742857616.json +38 -0
  90. package/research-output/training-runs/training-run-1773742946977.json +38 -0
  91. package/research-output/training-runs/training-run-1773743278891.json +38 -0
  92. package/research-output/training-runs/training-run-1773743409754.json +38 -0
  93. package/research-output/training-runs/training-run-1773743651086.json +38 -0
  94. package/research-output/training-runs/training-run-1773743782883.json +38 -0
@@ -0,0 +1,272 @@
1
+ /**
2
+ * HuggingFace Integration Service
3
+ *
4
+ * Orchestrates the complete HuggingFace integration pipeline.
5
+ * Main entry point for all HuggingFace operations.
6
+ */
7
+ import { getTrainingDataAdapter } from "../adapter";
8
+ import { ModelBenchmarkService } from "../benchmark/ModelBenchmarkService";
9
+ import { getExportToHuggingFace } from "../dependencies";
10
+ import { logger } from "../utils";
11
+ import { HuggingFaceDatasetUploader } from "./HuggingFaceDatasetUploader";
12
+ import { HuggingFaceModelUploader } from "./HuggingFaceModelUploader";
13
+ import { getHuggingFaceToken } from "./shared/HuggingFaceUploadUtil";
14
+ export class HuggingFaceIntegrationService {
15
+ datasetUploader;
16
+ modelUploader;
17
+ constructor() {
18
+ this.datasetUploader = new HuggingFaceDatasetUploader();
19
+ this.modelUploader = new HuggingFaceModelUploader();
20
+ }
21
+ /**
22
+ * Execute complete weekly upload pipeline
23
+ */
24
+ async executeWeeklyUpload(options = {}) {
25
+ const startTime = Date.now();
26
+ logger.info("Starting weekly upload pipeline", options, "HuggingFaceIntegration");
27
+ const result = {
28
+ success: false,
29
+ datasets: {
30
+ benchmarks: { success: false },
31
+ trajectories: { success: false },
32
+ },
33
+ models: {
34
+ processed: 0,
35
+ benchmarked: 0,
36
+ uploaded: 0,
37
+ },
38
+ errors: [],
39
+ duration: 0,
40
+ };
41
+ try {
42
+ // Step 1: Upload benchmark dataset
43
+ if (!options.dryRun) {
44
+ logger.info("Step 1: Uploading benchmark dataset", undefined, "HuggingFaceIntegration");
45
+ const benchmarkResult = await this.datasetUploader.uploadDataset({
46
+ datasetName: options.datasetName ||
47
+ process.env.HF_DATASET_NAME ||
48
+ "elizaos/agent-benchmarks",
49
+ description: "Weekly benchmark results for autonomous ElizaOS agents",
50
+ });
51
+ result.datasets.benchmarks = {
52
+ success: benchmarkResult.success,
53
+ url: benchmarkResult.datasetUrl,
54
+ error: benchmarkResult.error,
55
+ };
56
+ if (!benchmarkResult.success) {
57
+ result.errors.push(`Benchmark dataset upload: ${benchmarkResult.error}`);
58
+ }
59
+ }
60
+ else {
61
+ logger.info("DRY RUN: Skipping benchmark dataset upload", undefined, "HuggingFaceIntegration");
62
+ result.datasets.benchmarks.success = true;
63
+ }
64
+ // Step 2: Upload trajectory dataset
65
+ if (!options.dryRun) {
66
+ logger.info("Step 2: Uploading trajectory dataset", undefined, "HuggingFaceIntegration");
67
+ const exportToHuggingFace = getExportToHuggingFace();
68
+ const trajectoryResult = await exportToHuggingFace({
69
+ datasetName: options.trajectoryDatasetName ||
70
+ process.env.HF_TRAJECTORY_DATASET_NAME ||
71
+ "elizaos/agent-trajectories",
72
+ format: "jsonl",
73
+ });
74
+ result.datasets.trajectories = {
75
+ success: trajectoryResult.success,
76
+ url: trajectoryResult.url,
77
+ error: trajectoryResult.error,
78
+ };
79
+ if (!trajectoryResult.success) {
80
+ result.errors.push(`Trajectory dataset upload: ${trajectoryResult.error}`);
81
+ }
82
+ }
83
+ else {
84
+ logger.info("DRY RUN: Skipping trajectory dataset upload", undefined, "HuggingFaceIntegration");
85
+ result.datasets.trajectories.success = true;
86
+ }
87
+ // Step 3: Process models
88
+ const unbenchmarkedModels = await ModelBenchmarkService.getUnbenchmarkedModels();
89
+ result.models.processed = unbenchmarkedModels.length;
90
+ logger.info(`Step 3: Found ${unbenchmarkedModels.length} unbenchmarked models`, undefined, "HuggingFaceIntegration");
91
+ if (unbenchmarkedModels.length > 0) {
92
+ const standardBenchmarks = await ModelBenchmarkService.getStandardBenchmarkPaths();
93
+ if (standardBenchmarks.length === 0) {
94
+ const error = "No standard benchmarks available for model evaluation";
95
+ logger.error(error, undefined, "HuggingFaceIntegration");
96
+ result.errors.push(error);
97
+ }
98
+ else {
99
+ for (const modelId of unbenchmarkedModels) {
100
+ try {
101
+ // Benchmark model
102
+ logger.info(`Benchmarking model: ${modelId}`, undefined, "HuggingFaceIntegration");
103
+ await ModelBenchmarkService.benchmarkModel({
104
+ modelId,
105
+ benchmarkPaths: standardBenchmarks,
106
+ saveResults: true,
107
+ });
108
+ result.models.benchmarked++;
109
+ // Compare to baseline
110
+ const comparison = await ModelBenchmarkService.compareToBaseline(modelId);
111
+ // Upload if improved
112
+ if (comparison.recommendation === "deploy" && !options.dryRun) {
113
+ logger.info(`Model ${modelId} improved, uploading`, undefined, "HuggingFaceIntegration");
114
+ const model = await getTrainingDataAdapter().getModelById(modelId);
115
+ if (model) {
116
+ const modelName = options.modelNamePrefix
117
+ ? `${options.modelNamePrefix}-${model.version}`
118
+ : process.env.HF_MODEL_NAME
119
+ ? `${process.env.HF_MODEL_NAME}-${model.version}`
120
+ : `elizaos/agent-${model.version}`;
121
+ const modelDescription = options.modelDescriptionPrefix ||
122
+ process.env.HF_MODEL_DESCRIPTION_PREFIX ||
123
+ "Autonomous ElizaOS agent";
124
+ const uploadResult = await this.modelUploader.uploadModel({
125
+ modelId,
126
+ modelName,
127
+ description: `${modelDescription} - v${model.version}`,
128
+ includeWeights: true,
129
+ });
130
+ if (uploadResult.success) {
131
+ result.models.uploaded++;
132
+ // Update model with HuggingFace repo
133
+ await getTrainingDataAdapter().updateModelHuggingFaceRepo(modelId, modelName);
134
+ }
135
+ else {
136
+ result.errors.push(`Model upload ${modelId}: ${uploadResult.error}`);
137
+ }
138
+ }
139
+ }
140
+ else {
141
+ logger.info(`Model ${modelId} not ready for deployment: ${comparison.recommendation}`, undefined, "HuggingFaceIntegration");
142
+ }
143
+ }
144
+ catch (error) {
145
+ const errorMsg = error instanceof Error ? error.message : String(error);
146
+ logger.error(`Failed to process model ${modelId}`, { error }, "HuggingFaceIntegration");
147
+ result.errors.push(`Model ${modelId}: ${errorMsg}`);
148
+ }
149
+ }
150
+ }
151
+ }
152
+ result.success = result.errors.length === 0;
153
+ result.duration = Date.now() - startTime;
154
+ logger.info("Weekly upload pipeline complete", {
155
+ success: result.success,
156
+ benchmarkDataset: result.datasets.benchmarks.success,
157
+ trajectoryDataset: result.datasets.trajectories.success,
158
+ modelsProcessed: result.models.processed,
159
+ modelsBenchmarked: result.models.benchmarked,
160
+ modelsUploaded: result.models.uploaded,
161
+ errors: result.errors.length,
162
+ duration: result.duration,
163
+ }, "HuggingFaceIntegration");
164
+ return result;
165
+ }
166
+ catch (error) {
167
+ result.duration = Date.now() - startTime;
168
+ result.errors.push(error instanceof Error ? error.message : String(error));
169
+ logger.error("Weekly upload pipeline failed", { error }, "HuggingFaceIntegration");
170
+ return result;
171
+ }
172
+ }
173
+ /**
174
+ * Check if new data is available for upload
175
+ */
176
+ async hasNewDataToUpload() {
177
+ const adapter = getTrainingDataAdapter();
178
+ // Get last upload time from database
179
+ const lastUploadTime = (await adapter.getLastDeployedModelDate()) || new Date(0);
180
+ // Check for new benchmarks since last upload
181
+ const newBenchmarksCount = await adapter.countBenchmarksSince(lastUploadTime);
182
+ // Check for new trajectories since last upload
183
+ const newTrajectoriesCount = await adapter.countTrajectoriesSince(lastUploadTime);
184
+ // Check for unbenchmarked models
185
+ const unbenchmarkedModels = await ModelBenchmarkService.getUnbenchmarkedModels();
186
+ return {
187
+ hasNewBenchmarks: newBenchmarksCount > 0,
188
+ hasNewTrajectories: newTrajectoriesCount > 0,
189
+ hasUnbenchmarkedModels: unbenchmarkedModels.length > 0,
190
+ details: {
191
+ newBenchmarksSince: lastUploadTime,
192
+ newTrajectoriesCount,
193
+ unbenchmarkedModels: unbenchmarkedModels.length,
194
+ },
195
+ };
196
+ }
197
+ /**
198
+ * Validate system is ready for HuggingFace operations
199
+ */
200
+ async validateSystemReadiness() {
201
+ const issues = [];
202
+ const warnings = [];
203
+ // Check HuggingFace token
204
+ if (!getHuggingFaceToken()) {
205
+ issues.push("HUGGING_FACE_TOKEN or HF_TOKEN environment variable not set");
206
+ }
207
+ const adapter = getTrainingDataAdapter();
208
+ // Check database connection
209
+ try {
210
+ const healthy = await adapter.healthCheck();
211
+ if (!healthy) {
212
+ issues.push("Cannot connect to database");
213
+ }
214
+ }
215
+ catch {
216
+ issues.push("Cannot connect to database");
217
+ }
218
+ // Check for standard benchmarks
219
+ const standardBenchmarks = await ModelBenchmarkService.getStandardBenchmarkPaths();
220
+ if (standardBenchmarks.length === 0) {
221
+ warnings.push("No standard benchmarks found. Generate benchmark fixtures before upload.");
222
+ }
223
+ // Check for data using training statistics
224
+ try {
225
+ const stats = await adapter.getTrainingStatistics();
226
+ if (stats.benchmarkCount === 0) {
227
+ warnings.push("No benchmark results in database. Run some benchmarks first.");
228
+ }
229
+ if (stats.trajectoryTraining === 0) {
230
+ warnings.push("No training trajectories in database. Generate with agents or test data.");
231
+ }
232
+ if (stats.modelTotal === 0) {
233
+ warnings.push("No trained models in database.");
234
+ }
235
+ }
236
+ catch {
237
+ issues.push("Could not retrieve training statistics");
238
+ }
239
+ return {
240
+ ready: issues.length === 0,
241
+ issues,
242
+ warnings,
243
+ };
244
+ }
245
+ /**
246
+ * Get integration statistics
247
+ */
248
+ async getStatistics() {
249
+ const stats = await getTrainingDataAdapter().getTrainingStatistics();
250
+ return {
251
+ benchmarks: {
252
+ total: stats.benchmarkCount,
253
+ lastUpload: stats.lastBenchmarkDate ?? undefined,
254
+ },
255
+ trajectories: {
256
+ total: stats.trajectoryTotal,
257
+ training: stats.trajectoryTraining,
258
+ },
259
+ models: {
260
+ total: stats.modelTotal,
261
+ benchmarked: stats.modelBenchmarked,
262
+ deployed: stats.modelDeployed,
263
+ },
264
+ huggingface: {
265
+ datasetsPublished: (stats.benchmarkCount > 0 ? 1 : 0) +
266
+ (stats.trajectoryTraining > 0 ? 1 : 0),
267
+ modelsPublished: stats.publishedRepoCount,
268
+ },
269
+ };
270
+ }
271
+ }
272
+ export const huggingFaceIntegration = new HuggingFaceIntegrationService();
@@ -0,0 +1,385 @@
1
+ /**
2
+ * HuggingFace Model Uploader
3
+ *
4
+ * Uploads trained RL models to HuggingFace Hub with benchmark results and model cards.
5
+ */
6
+ import { promises as fs } from "node:fs";
7
+ import * as path from "node:path";
8
+ import { getTrainingDataAdapter } from "../adapter";
9
+ import { parseSimulationMetrics, } from "../benchmark/parseSimulationMetrics";
10
+ import { logger } from "../utils";
11
+ import { getHuggingFaceToken, HuggingFaceUploadUtil, requireHuggingFaceToken, } from "./shared/HuggingFaceUploadUtil";
12
+ export class HuggingFaceModelUploader {
13
+ huggingFaceToken;
14
+ constructor(huggingFaceToken) {
15
+ this.huggingFaceToken = huggingFaceToken || getHuggingFaceToken();
16
+ }
17
+ /**
18
+ * Upload model to HuggingFace with benchmarks and model card
19
+ */
20
+ async uploadModel(options) {
21
+ try {
22
+ logger.info("Starting HuggingFace model upload", {
23
+ modelId: options.modelId,
24
+ });
25
+ // Validate token (throws if not set)
26
+ const token = this.huggingFaceToken || requireHuggingFaceToken();
27
+ this.huggingFaceToken = token;
28
+ // Step 1: Load model from database
29
+ const adapter = getTrainingDataAdapter();
30
+ const model = await adapter.getModelById(options.modelId);
31
+ if (!model) {
32
+ throw new Error(`Model not found: ${options.modelId}`);
33
+ }
34
+ // Step 2: Get benchmark results
35
+ logger.info("Loading benchmark results", { modelId: options.modelId });
36
+ const modelBenchmarks = await this.getBenchmarkResults(options.modelId);
37
+ if (modelBenchmarks.length === 0) {
38
+ logger.warn("No benchmark results found for model", {
39
+ modelId: options.modelId,
40
+ });
41
+ }
42
+ // Step 3: Prepare model card data
43
+ const cardData = {
44
+ modelId: model.modelId,
45
+ modelName: options.modelName,
46
+ version: model.version,
47
+ baseModel: model.baseModel,
48
+ trainedAt: model.createdAt,
49
+ trainingRunId: model.trainingBatch || undefined,
50
+ benchmarkResults: modelBenchmarks,
51
+ metrics: this.calculateAverageMetrics(modelBenchmarks),
52
+ };
53
+ // Step 4: Create output directory
54
+ const outputDir = options.outputDir ||
55
+ path.join(process.cwd(), "exports", "models", model.version);
56
+ await fs.mkdir(outputDir, { recursive: true });
57
+ // Step 5: Generate model card
58
+ logger.info("Generating model card");
59
+ await this.generateModelCard(cardData, outputDir);
60
+ // Step 6: Save metadata
61
+ const metadataPath = path.join(outputDir, "model_metadata.json");
62
+ await fs.writeFile(metadataPath, JSON.stringify({
63
+ modelId: model.modelId,
64
+ version: model.version,
65
+ baseModel: model.baseModel,
66
+ storagePath: model.storagePath,
67
+ trainingBatch: model.trainingBatch,
68
+ trainedAt: model.createdAt.toISOString(),
69
+ benchmarkScore: model.benchmarkScore,
70
+ avgReward: model.avgReward,
71
+ accuracy: model.accuracy,
72
+ }, null, 2));
73
+ // Step 7: Save benchmark results
74
+ const benchmarksPath = path.join(outputDir, "benchmark_results.json");
75
+ await fs.writeFile(benchmarksPath, JSON.stringify(modelBenchmarks, null, 2));
76
+ // Step 8: Upload to HuggingFace (if weights available and requested)
77
+ let filesUploaded = 2; // README.md + metadata
78
+ if (options.includeWeights && model.storagePath) {
79
+ logger.info("Uploading model to HuggingFace", {
80
+ modelName: options.modelName,
81
+ });
82
+ const uploadCount = await this.uploadToHub(options.modelName, outputDir, options.private ?? false);
83
+ filesUploaded = uploadCount;
84
+ }
85
+ else {
86
+ logger.info("Skipping model weight upload (not requested or no weights available)");
87
+ }
88
+ const modelUrl = `https://huggingface.co/${options.modelName}`;
89
+ logger.info("Model uploaded successfully", { modelUrl, filesUploaded });
90
+ // Update model status in database
91
+ await adapter.updateModelStatus(options.modelId, "deployed", {
92
+ deployedAt: new Date(),
93
+ });
94
+ return {
95
+ success: true,
96
+ modelUrl,
97
+ modelId: options.modelId,
98
+ filesUploaded,
99
+ };
100
+ }
101
+ catch (error) {
102
+ logger.error("Failed to upload model", { error });
103
+ return {
104
+ success: false,
105
+ modelId: options.modelId,
106
+ filesUploaded: 0,
107
+ error: error instanceof Error ? error.message : "Unknown error",
108
+ };
109
+ }
110
+ }
111
+ /**
112
+ * Get benchmark results for a model
113
+ */
114
+ async getBenchmarkResults(modelId) {
115
+ // Query benchmark results from database
116
+ try {
117
+ const results = await getTrainingDataAdapter().getBenchmarkResultsByModel(modelId);
118
+ return results.map((r) => ({
119
+ benchmarkId: r.benchmarkId,
120
+ runAt: r.runAt.toISOString(),
121
+ // detailedMetrics is stored as JSON in database, validate it matches SimulationMetrics
122
+ metrics: parseSimulationMetrics(r.detailedMetrics),
123
+ }));
124
+ }
125
+ catch (error) {
126
+ logger.warn("Could not load benchmark results from database", { error });
127
+ // Fallback to files if database fails
128
+ return await this.getBenchmarkResultsFromFiles(modelId);
129
+ }
130
+ }
131
+ /**
132
+ * Fallback: Get benchmark results from files
133
+ */
134
+ async getBenchmarkResultsFromFiles(modelId) {
135
+ const results = [];
136
+ try {
137
+ const benchmarksDir = path.join(process.cwd(), "benchmarks");
138
+ const files = await fs.readdir(benchmarksDir);
139
+ for (const file of files) {
140
+ if (file.endsWith(".json") && file.includes(modelId)) {
141
+ const filePath = path.join(benchmarksDir, file);
142
+ const data = JSON.parse(await fs.readFile(filePath, "utf-8"));
143
+ if (data.metrics) {
144
+ results.push({
145
+ benchmarkId: data.benchmarkId || file,
146
+ runAt: data.runAt || new Date().toISOString(),
147
+ metrics: data.metrics,
148
+ });
149
+ }
150
+ }
151
+ }
152
+ }
153
+ catch (error) {
154
+ logger.warn("Could not load benchmark results from files either", {
155
+ error,
156
+ });
157
+ }
158
+ return results;
159
+ }
160
+ /**
161
+ * Calculate average metrics across benchmarks
162
+ */
163
+ calculateAverageMetrics(benchmarkResults) {
164
+ if (benchmarkResults.length === 0) {
165
+ return {
166
+ avgPnl: 0,
167
+ avgAccuracy: 0,
168
+ avgOptimality: 0,
169
+ benchmarkCount: 0,
170
+ };
171
+ }
172
+ const totalPnl = benchmarkResults.reduce((sum, r) => sum + r.metrics.totalPnl, 0);
173
+ const totalAccuracy = benchmarkResults.reduce((sum, r) => sum + r.metrics.predictionMetrics.accuracy, 0);
174
+ const totalOptimality = benchmarkResults.reduce((sum, r) => sum + r.metrics.optimalityScore, 0);
175
+ return {
176
+ avgPnl: totalPnl / benchmarkResults.length,
177
+ avgAccuracy: totalAccuracy / benchmarkResults.length,
178
+ avgOptimality: totalOptimality / benchmarkResults.length,
179
+ benchmarkCount: benchmarkResults.length,
180
+ };
181
+ }
182
+ /**
183
+ * Generate model card for HuggingFace
184
+ */
185
+ async generateModelCard(data, outputDir) {
186
+ const brandName = process.env.TRAINING_BRAND_NAME || "ElizaOS";
187
+ const brandOrg = process.env.TRAINING_BRAND_ORG || "ElizaOS Contributors";
188
+ const platformName = process.env.TRAINING_PLATFORM_NAME || "ElizaOS-compatible runtimes";
189
+ const brandTag = brandName.toLowerCase().replace(/\s+/g, "-");
190
+ const citationKey = `${brandTag}_agent_${data.version.replace(/\./g, "_")}`;
191
+ const card = `---
192
+ license: mit
193
+ library_name: transformers
194
+ tags:
195
+ - ${brandTag}
196
+ - reinforcement-learning
197
+ - trading-agent
198
+ - prediction-markets
199
+ base_model: ${data.baseModel}
200
+ ---
201
+
202
+ # ${data.modelName}
203
+
204
+ Autonomous agent trained with reinforcement learning for market-style decision making.
205
+
206
+ ## Model Details
207
+
208
+ - **Version:** ${data.version}
209
+ - **Base Model:** ${data.baseModel}
210
+ - **Training Date:** ${data.trainedAt.toISOString().split("T")[0]}
211
+ - **Model ID:** ${data.modelId}
212
+ ${data.trainingRunId ? `- **Training Run:** ${data.trainingRunId}` : ""}
213
+
214
+ ## Performance Metrics
215
+
216
+ ${data.benchmarkResults.length > 0
217
+ ? `
218
+ ### Benchmark Results (${data.benchmarkResults.length} runs)
219
+
220
+ | Metric | Value |
221
+ |--------|-------|
222
+ | Average P&L | ${data.metrics.avgPnl.toFixed(2)} |
223
+ | Average Accuracy | ${(data.metrics.avgAccuracy * 100).toFixed(1)}% |
224
+ | Average Optimality | ${data.metrics.avgOptimality.toFixed(1)} |
225
+
226
+ ### Detailed Benchmark Results
227
+
228
+ ${this.generateBenchmarkTable(data.benchmarkResults)}
229
+ `
230
+ : "No benchmark results available yet."}
231
+
232
+ ## Training Details
233
+
234
+ ### Training Data
235
+
236
+ - **Source:** Autonomous agent trajectories
237
+ - **Collection Method:** Live agent gameplay on prediction markets
238
+ - **Training Framework:** Atropos GRPO
239
+ - **Base Model:** ${data.baseModel}
240
+
241
+ ### Training Procedure
242
+
243
+ This model was trained using Group Relative Policy Optimization (GRPO) via the Atropos framework on trajectories collected from autonomous agents. The training process:
244
+
245
+ 1. Agents generate trajectories through market interactions
246
+ 2. Trajectories are scored using RLAIF with an LLM judge based on P&L, prediction accuracy, and decision quality
247
+ 3. GRPO training optimizes policy to maximize expected rewards
248
+ 4. Model checkpoints are evaluated on standardized benchmarks
249
+
250
+ ### Compute Infrastructure
251
+
252
+ - **Platform:** ${data.trainingRunId ? "Atropos GRPO Training" : "Local training"}
253
+ - **Training Time:** Continuous learning with hourly updates
254
+
255
+ ## Intended Use
256
+
257
+ This model is designed for:
258
+
259
+ - Autonomous market decision support and simulation
260
+ - Research on RL-based trading strategies
261
+ - Benchmarking agent decision-making
262
+ - Educational purposes
263
+
264
+ **Not intended for:**
265
+ - Production trading without human oversight
266
+ - Financial advice
267
+ - Real-money trading without risk management
268
+
269
+ ## Evaluation
270
+
271
+ The model is evaluated on standardized benchmarks that include:
272
+
273
+ - **Prediction Market Trading:** Betting on binary outcomes with LMSR pricing
274
+ - **Perpetual Trading:** Long/short positions on crypto perps
275
+ - **Social Interaction:** Posts, group chats, and reputation building
276
+ - **Risk Management:** Position sizing and portfolio optimization
277
+
278
+ ### Metrics
279
+
280
+ - **Total P&L:** Cumulative profit/loss across all positions
281
+ - **Prediction Accuracy:** Percentage of correct market predictions
282
+ - **Optimality Score:** Alignment with theoretically optimal actions (0-100)
283
+ - **Response Time:** Decision-making latency
284
+
285
+ ## Usage
286
+
287
+ ### Via ${platformName}
288
+
289
+ The model can be deployed in compatible runtimes and accessed via an agent API:
290
+
291
+ \`\`\`typescript
292
+ import { agentRuntimeManager } from '@elizaos/agents';
293
+
294
+ const runtime = await agentRuntimeManager.getRuntime(agentId);
295
+ const response = await runtime.chat({
296
+ messages: [{ role: 'user', content: 'Analyze this market...' }]
297
+ });
298
+ \`\`\`
299
+
300
+ ### Direct Inference
301
+
302
+ If you have downloaded the model weights:
303
+
304
+ \`\`\`python
305
+ from transformers import AutoModelForCausalLM, AutoTokenizer
306
+
307
+ model = AutoModelForCausalLM.from_pretrained("${data.modelName}")
308
+ tokenizer = AutoTokenizer.from_pretrained("${data.modelName}")
309
+
310
+ # Use model for inference
311
+ inputs = tokenizer("Should I bet YES on this market?", return_tensors="pt")
312
+ outputs = model.generate(**inputs)
313
+ response = tokenizer.decode(outputs[0])
314
+ \`\`\`
315
+
316
+ ## Limitations
317
+
318
+ - Trained on simulated market data; real-world performance may vary
319
+ - May not generalize to markets significantly different from training distribution
320
+ - Decision quality depends on market information quality
321
+ - No guarantees of profitability
322
+
323
+ ## Ethical Considerations
324
+
325
+ This model is part of a research project on autonomous agents in prediction markets. Users should:
326
+
327
+ - Understand the risks of algorithmic trading
328
+ - Not rely solely on model decisions for financial outcomes
329
+ - Use appropriate risk management and position sizing
330
+ - Consider market impact and fairness implications
331
+
332
+ ## Citation
333
+
334
+ \`\`\`bibtex
335
+ @model{${citationKey},
336
+ title = {${brandName} Trading Agent},
337
+ author = {${brandOrg}},
338
+ year = {${new Date().getFullYear()}},
339
+ version = {${data.version}},
340
+ url = {https://huggingface.co/${data.modelName}}
341
+ }
342
+ \`\`\`
343
+
344
+ ## Model Card Contact
345
+
346
+ For questions or issues, please open an issue on the repository.
347
+ `;
348
+ const cardPath = path.join(outputDir, "README.md");
349
+ await fs.writeFile(cardPath, card);
350
+ }
351
+ /**
352
+ * Generate benchmark results table
353
+ */
354
+ generateBenchmarkTable(results) {
355
+ if (results.length === 0)
356
+ return "";
357
+ let table = "| Benchmark | Date | P&L | Accuracy | Win Rate | Optimality |\n";
358
+ table += "|-----------|------|-----|----------|----------|------------|\n";
359
+ results.forEach((result) => {
360
+ const date = new Date(result.runAt).toISOString().split("T")[0];
361
+ table += `| ${result.benchmarkId.substring(0, 20)}... | ${date} | ${result.metrics.totalPnl.toFixed(2)} | ${(result.metrics.predictionMetrics.accuracy * 100).toFixed(1)}% | ${(result.metrics.perpMetrics.winRate * 100).toFixed(1)}% | ${result.metrics.optimalityScore.toFixed(1)} |\n`;
362
+ });
363
+ return table;
364
+ }
365
+ /**
366
+ * Upload files to HuggingFace Hub
367
+ * Uses shared utility for consistent upload behavior
368
+ */
369
+ async uploadToHub(modelName, localDir, _isPrivate) {
370
+ if (!this.huggingFaceToken) {
371
+ throw new Error("HuggingFace token not configured");
372
+ }
373
+ try {
374
+ // Use shared upload utility
375
+ return await HuggingFaceUploadUtil.uploadDirectory(modelName, "model", localDir, this.huggingFaceToken);
376
+ }
377
+ catch (error) {
378
+ logger.error("Failed to upload to HuggingFace Hub", { error });
379
+ // Provide helpful manual upload instructions
380
+ const instructions = HuggingFaceUploadUtil.getManualUploadInstructions(modelName, "model", localDir);
381
+ logger.info("To upload manually:", { instructions });
382
+ throw error;
383
+ }
384
+ }
385
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * HuggingFace Integration Module
3
+ *
4
+ * Tools for uploading models and datasets to HuggingFace Hub.
5
+ */
6
+ export { HuggingFaceDatasetUploader } from "./HuggingFaceDatasetUploader";
7
+ export { HuggingFaceIntegrationService, huggingFaceIntegration, } from "./HuggingFaceIntegrationService";
8
+ export { HuggingFaceModelUploader } from "./HuggingFaceModelUploader";
9
+ export { getHuggingFaceToken, HuggingFaceUploadUtil, requireHuggingFaceToken, } from "./shared/HuggingFaceUploadUtil";