@elizaos/training 2.0.0-alpha.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/Dockerfile +75 -0
  2. package/Makefile +374 -0
  3. package/README.md +346 -0
  4. package/config/rubrics.json +137 -0
  5. package/data/.gitkeep +0 -0
  6. package/data/degen/.gitkeep +2 -0
  7. package/data/trader/.gitkeep +2 -0
  8. package/docker-compose.test.yml +57 -0
  9. package/package.json +58 -0
  10. package/python/config/babylon_atropos.yaml +90 -0
  11. package/python/config/profiles/12gb.json +11 -0
  12. package/python/config/profiles/16gb.json +10 -0
  13. package/python/config/profiles/24gb.json +10 -0
  14. package/python/config/profiles/48gb.json +10 -0
  15. package/python/config/profiles/cpu.json +11 -0
  16. package/python/config/profiles/l40-2gpu-safe.json +20 -0
  17. package/python/config/profiles/l40-2gpu.json +22 -0
  18. package/python/config/profiles/l40-4gpu.json +21 -0
  19. package/python/config/profiles/l40.json +17 -0
  20. package/python/config/tinker_training.yaml +143 -0
  21. package/python/curriculum_state.json +165 -0
  22. package/python/env.template +86 -0
  23. package/python/env.training.template +46 -0
  24. package/python/pyproject.toml +41 -0
  25. package/python/requirements-ci.txt +31 -0
  26. package/python/requirements.txt +87 -0
  27. package/python/scripts/__init__.py +4 -0
  28. package/python/scripts/import_json_trajectories.py +412 -0
  29. package/python/scripts/local-finetune/README.md +63 -0
  30. package/python/scripts/local-finetune/ingest_and_score.py +139 -0
  31. package/python/scripts/local-finetune/merge_model.py +32 -0
  32. package/python/scripts/local-finetune/test_adapter.py +91 -0
  33. package/python/scripts/local-finetune/train_from_csv.py +132 -0
  34. package/python/scripts/merge_trajectories.py +318 -0
  35. package/python/scripts/run_ab_test.py +143 -0
  36. package/python/scripts/run_full_pipeline.py +544 -0
  37. package/python/scripts/run_tinker_training.py +192 -0
  38. package/python/scripts/run_training.py +914 -0
  39. package/python/scripts/test_judge.py +155 -0
  40. package/python/scripts/test_pipeline.py +356 -0
  41. package/python/scripts/test_trained_model.py +380 -0
  42. package/python/scripts/train_local.py +528 -0
  43. package/python/setup.py +20 -0
  44. package/python/src/__init__.py +190 -0
  45. package/python/src/data_bridge/__init__.py +24 -0
  46. package/python/src/data_bridge/converter.py +435 -0
  47. package/python/src/data_bridge/reader.py +393 -0
  48. package/python/src/models.py +283 -0
  49. package/python/src/training/__init__.py +605 -0
  50. package/python/src/training/ab_testing.py +404 -0
  51. package/python/src/training/action_executor.py +621 -0
  52. package/python/src/training/archetype_trainer.py +347 -0
  53. package/python/src/training/atropos_trainer.py +980 -0
  54. package/python/src/training/babylon_env.py +1254 -0
  55. package/python/src/training/error_recovery.py +647 -0
  56. package/python/src/training/evaluation.py +856 -0
  57. package/python/src/training/fast_simulator.py +880 -0
  58. package/python/src/training/format_validator.py +584 -0
  59. package/python/src/training/hybrid_env.py +522 -0
  60. package/python/src/training/kl_controller.py +628 -0
  61. package/python/src/training/multi_prompt_dataset.py +883 -0
  62. package/python/src/training/multi_turn.py +656 -0
  63. package/python/src/training/online_env.py +1084 -0
  64. package/python/src/training/quality_scorer.py +391 -0
  65. package/python/src/training/quality_utils.py +633 -0
  66. package/python/src/training/rewards.py +1344 -0
  67. package/python/src/training/rlaif_env.py +17 -0
  68. package/python/src/training/rollout_generator.py +502 -0
  69. package/python/src/training/rubric_loader.py +198 -0
  70. package/python/src/training/scenario_pool.py +1072 -0
  71. package/python/src/training/schemas.py +481 -0
  72. package/python/src/training/service_manager.py +552 -0
  73. package/python/src/training/simulation_bridge.py +535 -0
  74. package/python/src/training/tick_reward_attribution.py +399 -0
  75. package/python/src/training/tinker_client.py +575 -0
  76. package/python/src/training/tinker_trainer.py +646 -0
  77. package/python/src/training/tokenization_utils.py +402 -0
  78. package/python/tests/e2e/__init__.py +13 -0
  79. package/python/tests/e2e/conftest.py +258 -0
  80. package/python/tests/e2e/test_full_pipeline.py +643 -0
  81. package/python/tests/e2e/test_online_training_e2e.py +365 -0
  82. package/python/tests/integration/__init__.py +12 -0
  83. package/python/tests/integration/conftest.py +383 -0
  84. package/python/tests/integration/test_db_integration.py +649 -0
  85. package/python/tests/integration/test_json_mode_integration.py +554 -0
  86. package/python/tests/test_action_executor.py +594 -0
  87. package/python/tests/test_archetype_scoring.py +1027 -0
  88. package/python/tests/test_atropos_integration.py +360 -0
  89. package/python/tests/test_evaluation.py +727 -0
  90. package/python/tests/test_format_validator.py +486 -0
  91. package/python/tests/test_kl_controller.py +432 -0
  92. package/python/tests/test_lr_scheduler.py +579 -0
  93. package/python/tests/test_multi_turn.py +590 -0
  94. package/python/tests/test_online_env.py +519 -0
  95. package/python/tests/test_quality_scorer.py +474 -0
  96. package/python/tests/test_scenario_pool.py +735 -0
  97. package/python/tests/test_service_manager.py +585 -0
  98. package/python/tests/test_simulation_rollout.py +581 -0
  99. package/python/tests/test_tokenization_utils.py +501 -0
  100. package/python/tests/test_training_orchestrator.py +497 -0
  101. package/python/tests/test_training_output_structure.py +661 -0
  102. package/research-output/training-runs/training-run-1770772042899.json +26 -0
  103. package/research-output/training-runs/training-run-1770930079670.json +32 -0
  104. package/research-output/training-runs/training-run-1770930143700.json +44 -0
  105. package/research-output/training-runs/training-run-1770930183638.json +38 -0
  106. package/research-output/training-runs/training-run-1770930442049.json +38 -0
  107. package/research-output/training-runs/training-run-1770930793243.json +38 -0
  108. package/scripts/assess-training-data.ts +422 -0
  109. package/scripts/e2e-training-test.ts +550 -0
  110. package/scripts/export-rubrics.ts +64 -0
  111. package/scripts/generate-research-report.ts +1523 -0
  112. package/scripts/generate_dataset.sh +173 -0
  113. package/scripts/json-mode-benchmark.ts +399 -0
  114. package/scripts/real-archetype-benchmark.ts +210 -0
  115. package/scripts/run-baseline-comparison.ts +116 -0
  116. package/scripts/run-full-pipeline.ts +272 -0
  117. package/scripts/runpod_setup.sh +137 -0
  118. package/scripts/runpod_validate.sh +147 -0
  119. package/scripts/test-model-in-game.ts +955 -0
  120. package/scripts/test-scoring.ts +73 -0
  121. package/scripts/test-trained-model.ts +209 -0
  122. package/scripts/train-and-test.ts +824 -0
  123. package/scripts/verify-final.ts +118 -0
  124. package/src/adapter.ts +516 -0
  125. package/src/archetypes/ArchetypeConfigService.ts +626 -0
  126. package/src/archetypes/derive-archetype.ts +249 -0
  127. package/src/archetypes/index.ts +22 -0
  128. package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
  129. package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
  130. package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
  131. package/src/benchmark/BenchmarkDataViewer.ts +324 -0
  132. package/src/benchmark/BenchmarkHistoryService.ts +221 -0
  133. package/src/benchmark/BenchmarkRunner.ts +685 -0
  134. package/src/benchmark/BenchmarkValidator.ts +206 -0
  135. package/src/benchmark/FastEvalRunner.ts +225 -0
  136. package/src/benchmark/MetricsValidator.ts +165 -0
  137. package/src/benchmark/MetricsVisualizer.ts +909 -0
  138. package/src/benchmark/ModelBenchmarkService.ts +611 -0
  139. package/src/benchmark/ModelRegistry.ts +158 -0
  140. package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
  141. package/src/benchmark/SimulationA2AInterface.ts +1169 -0
  142. package/src/benchmark/SimulationEngine.ts +832 -0
  143. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
  144. package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
  145. package/src/benchmark/index.ts +89 -0
  146. package/src/benchmark/parseSimulationMetrics.ts +124 -0
  147. package/src/benchmark/simulation-types.ts +78 -0
  148. package/src/dependencies.ts +439 -0
  149. package/src/generation/TrajectoryGenerator.ts +387 -0
  150. package/src/generation/index.ts +12 -0
  151. package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
  152. package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
  153. package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
  154. package/src/huggingface/index.ts +27 -0
  155. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
  156. package/src/index.ts +102 -0
  157. package/src/init-training.ts +53 -0
  158. package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
  159. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
  160. package/src/metrics/index.ts +8 -0
  161. package/src/metrics/types.ts +200 -0
  162. package/src/rubrics/__tests__/index.test.ts +184 -0
  163. package/src/rubrics/ass-kisser.ts +85 -0
  164. package/src/rubrics/degen.ts +80 -0
  165. package/src/rubrics/goody-twoshoes.ts +84 -0
  166. package/src/rubrics/index.ts +236 -0
  167. package/src/rubrics/information-trader.ts +84 -0
  168. package/src/rubrics/infosec.ts +101 -0
  169. package/src/rubrics/liar.ts +104 -0
  170. package/src/rubrics/perps-trader.ts +87 -0
  171. package/src/rubrics/researcher.ts +81 -0
  172. package/src/rubrics/scammer.ts +82 -0
  173. package/src/rubrics/social-butterfly.ts +73 -0
  174. package/src/rubrics/super-predictor.ts +97 -0
  175. package/src/rubrics/trader.ts +67 -0
  176. package/src/scoring/ArchetypeScoringService.ts +486 -0
  177. package/src/scoring/JudgePromptBuilder.ts +556 -0
  178. package/src/scoring/LLMJudgeCache.ts +401 -0
  179. package/src/scoring/index.ts +9 -0
  180. package/src/training/AutomationPipeline.ts +916 -0
  181. package/src/training/BenchmarkService.ts +518 -0
  182. package/src/training/ConfigValidator.ts +220 -0
  183. package/src/training/MarketOutcomesTracker.ts +187 -0
  184. package/src/training/ModelDeployer.ts +186 -0
  185. package/src/training/ModelFetcher.ts +76 -0
  186. package/src/training/ModelSelectionService.ts +341 -0
  187. package/src/training/ModelUsageVerifier.ts +160 -0
  188. package/src/training/MultiModelOrchestrator.ts +580 -0
  189. package/src/training/RLModelConfig.ts +407 -0
  190. package/src/training/RewardBackpropagationService.ts +149 -0
  191. package/src/training/RulerScoringService.ts +666 -0
  192. package/src/training/TrainingMonitor.ts +166 -0
  193. package/src/training/TrajectoryRecorder.ts +399 -0
  194. package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
  195. package/src/training/index.ts +100 -0
  196. package/src/training/logRLConfig.ts +34 -0
  197. package/src/training/pipeline.ts +129 -0
  198. package/src/training/storage/ModelStorageService.ts +279 -0
  199. package/src/training/storage/TrainingDataArchiver.ts +197 -0
  200. package/src/training/storage/index.ts +17 -0
  201. package/src/training/types.ts +207 -0
  202. package/src/training/window-utils.ts +138 -0
  203. package/src/utils/index.ts +101 -0
  204. package/src/utils/logger.ts +59 -0
  205. package/src/utils/snowflake.ts +17 -0
  206. package/src/utils/synthetic-detector.ts +111 -0
  207. package/tsconfig.json +20 -0
@@ -0,0 +1,532 @@
1
+ /**
2
+ * HuggingFace Model Uploader
3
+ *
4
+ * Uploads trained RL models to HuggingFace Hub with benchmark results and model cards.
5
+ */
6
+
7
+ import { getTrainingDataAdapter } from '../adapter';
8
+ import { promises as fs } from 'fs';
9
+ import * as path from 'path';
10
+ import {
11
+ type JsonValue,
12
+ parseSimulationMetrics,
13
+ } from '../benchmark/parseSimulationMetrics';
14
+ import type { SimulationMetrics } from '../benchmark/SimulationEngine';
15
+ import { logger } from '../utils';
16
+ import {
17
+ getHuggingFaceToken,
18
+ HuggingFaceUploadUtil,
19
+ requireHuggingFaceToken,
20
+ } from './shared/HuggingFaceUploadUtil';
21
+
22
+ /**
23
+ * Simplified benchmark result for HuggingFace model cards
24
+ * Uses string date for JSON serialization compatibility
25
+ */
26
+ export interface ModelCardBenchmarkResult {
27
+ benchmarkId: string;
28
+ runAt: string;
29
+ metrics: SimulationMetrics;
30
+ }
31
+
32
+ export interface ModelUploadOptions {
33
+ /** Database model ID */
34
+ modelId: string;
35
+ /** HuggingFace model name (e.g., 'elizaos/agent-v1') */
36
+ modelName: string;
37
+ description?: string;
38
+ private?: boolean;
39
+ includeWeights?: boolean;
40
+ outputDir?: string;
41
+ }
42
+
43
+ export interface ModelUploadResult {
44
+ success: boolean;
45
+ modelUrl?: string;
46
+ modelId: string;
47
+ filesUploaded: number;
48
+ error?: string;
49
+ }
50
+
51
+ export interface ModelCardData {
52
+ modelId: string;
53
+ modelName: string;
54
+ version: string;
55
+ baseModel: string;
56
+ trainedAt: Date;
57
+ trainingRunId?: string;
58
+ benchmarkResults: ModelCardBenchmarkResult[];
59
+ metrics: {
60
+ avgPnl: number;
61
+ avgAccuracy: number;
62
+ avgOptimality: number;
63
+ benchmarkCount: number;
64
+ };
65
+ }
66
+
67
+ export class HuggingFaceModelUploader {
68
+ private huggingFaceToken: string | undefined;
69
+
70
+ constructor(huggingFaceToken?: string) {
71
+ this.huggingFaceToken = huggingFaceToken || getHuggingFaceToken();
72
+ }
73
+
74
+ /**
75
+ * Upload model to HuggingFace with benchmarks and model card
76
+ */
77
+ async uploadModel(options: ModelUploadOptions): Promise<ModelUploadResult> {
78
+ try {
79
+ logger.info('Starting HuggingFace model upload', {
80
+ modelId: options.modelId,
81
+ });
82
+
83
+ // Validate token (throws if not set)
84
+ const token = this.huggingFaceToken || requireHuggingFaceToken();
85
+ this.huggingFaceToken = token;
86
+
87
+ // Step 1: Load model from database
88
+ const adapter = getTrainingDataAdapter();
89
+ const model = await adapter.getModelById(options.modelId);
90
+
91
+ if (!model) {
92
+ throw new Error(`Model not found: ${options.modelId}`);
93
+ }
94
+
95
+ // Step 2: Get benchmark results
96
+ logger.info('Loading benchmark results', { modelId: options.modelId });
97
+ const modelBenchmarks = await this.getBenchmarkResults(options.modelId);
98
+
99
+ if (modelBenchmarks.length === 0) {
100
+ logger.warn('No benchmark results found for model', {
101
+ modelId: options.modelId,
102
+ });
103
+ }
104
+
105
+ // Step 3: Prepare model card data
106
+ const cardData: ModelCardData = {
107
+ modelId: model.modelId,
108
+ modelName: options.modelName,
109
+ version: model.version,
110
+ baseModel: model.baseModel,
111
+ trainedAt: model.createdAt,
112
+ trainingRunId: model.trainingBatch || undefined,
113
+ benchmarkResults: modelBenchmarks,
114
+ metrics: this.calculateAverageMetrics(modelBenchmarks),
115
+ };
116
+
117
+ // Step 4: Create output directory
118
+ const outputDir =
119
+ options.outputDir ||
120
+ path.join(process.cwd(), 'exports', 'models', model.version);
121
+ await fs.mkdir(outputDir, { recursive: true });
122
+
123
+ // Step 5: Generate model card
124
+ logger.info('Generating model card');
125
+ await this.generateModelCard(cardData, outputDir);
126
+
127
+ // Step 6: Save metadata
128
+ const metadataPath = path.join(outputDir, 'model_metadata.json');
129
+ await fs.writeFile(
130
+ metadataPath,
131
+ JSON.stringify(
132
+ {
133
+ modelId: model.modelId,
134
+ version: model.version,
135
+ baseModel: model.baseModel,
136
+ storagePath: model.storagePath,
137
+ trainingBatch: model.trainingBatch,
138
+ trainedAt: model.createdAt.toISOString(),
139
+ benchmarkScore: model.benchmarkScore,
140
+ avgReward: model.avgReward,
141
+ accuracy: model.accuracy,
142
+ },
143
+ null,
144
+ 2
145
+ )
146
+ );
147
+
148
+ // Step 7: Save benchmark results
149
+ const benchmarksPath = path.join(outputDir, 'benchmark_results.json');
150
+ await fs.writeFile(
151
+ benchmarksPath,
152
+ JSON.stringify(modelBenchmarks, null, 2)
153
+ );
154
+
155
+ // Step 8: Upload to HuggingFace (if weights available and requested)
156
+ let filesUploaded = 2; // README.md + metadata
157
+
158
+ if (options.includeWeights && model.storagePath) {
159
+ logger.info('Uploading model to HuggingFace', {
160
+ modelName: options.modelName,
161
+ });
162
+ const uploadCount = await this.uploadToHub(
163
+ options.modelName,
164
+ outputDir,
165
+ options.private ?? false
166
+ );
167
+ filesUploaded = uploadCount;
168
+ } else {
169
+ logger.info(
170
+ 'Skipping model weight upload (not requested or no weights available)'
171
+ );
172
+ }
173
+
174
+ const modelUrl = `https://huggingface.co/${options.modelName}`;
175
+
176
+ logger.info('Model uploaded successfully', { modelUrl, filesUploaded });
177
+
178
+ // Update model status in database
179
+ await adapter.updateModelStatus(options.modelId, 'deployed', {
180
+ deployedAt: new Date(),
181
+ });
182
+
183
+ return {
184
+ success: true,
185
+ modelUrl,
186
+ modelId: options.modelId,
187
+ filesUploaded,
188
+ };
189
+ } catch (error) {
190
+ logger.error('Failed to upload model', { error });
191
+ return {
192
+ success: false,
193
+ modelId: options.modelId,
194
+ filesUploaded: 0,
195
+ error: error instanceof Error ? error.message : 'Unknown error',
196
+ };
197
+ }
198
+ }
199
+
200
+ /**
201
+ * Get benchmark results for a model
202
+ */
203
+ private async getBenchmarkResults(
204
+ modelId: string
205
+ ): Promise<ModelCardBenchmarkResult[]> {
206
+ // Query benchmark results from database
207
+ try {
208
+ const results = await getTrainingDataAdapter().getBenchmarkResultsByModel(modelId);
209
+
210
+ return results.map((r) => ({
211
+ benchmarkId: r.benchmarkId,
212
+ runAt: r.runAt.toISOString(),
213
+ // detailedMetrics is stored as JSON in database, validate it matches SimulationMetrics
214
+ metrics: parseSimulationMetrics(r.detailedMetrics as JsonValue),
215
+ }));
216
+ } catch (error) {
217
+ logger.warn('Could not load benchmark results from database', { error });
218
+
219
+ // Fallback to files if database fails
220
+ return await this.getBenchmarkResultsFromFiles(modelId);
221
+ }
222
+ }
223
+
224
+ /**
225
+ * Fallback: Get benchmark results from files
226
+ */
227
+ private async getBenchmarkResultsFromFiles(
228
+ modelId: string
229
+ ): Promise<ModelCardBenchmarkResult[]> {
230
+ const results: ModelCardBenchmarkResult[] = [];
231
+
232
+ try {
233
+ const benchmarksDir = path.join(process.cwd(), 'benchmarks');
234
+ const files = await fs.readdir(benchmarksDir);
235
+
236
+ for (const file of files) {
237
+ if (file.endsWith('.json') && file.includes(modelId)) {
238
+ const filePath = path.join(benchmarksDir, file);
239
+ const data = JSON.parse(await fs.readFile(filePath, 'utf-8'));
240
+
241
+ if (data.metrics) {
242
+ results.push({
243
+ benchmarkId: data.benchmarkId || file,
244
+ runAt: data.runAt || new Date().toISOString(),
245
+ metrics: data.metrics,
246
+ });
247
+ }
248
+ }
249
+ }
250
+ } catch (error) {
251
+ logger.warn('Could not load benchmark results from files either', {
252
+ error,
253
+ });
254
+ }
255
+
256
+ return results;
257
+ }
258
+
259
+ /**
260
+ * Calculate average metrics across benchmarks
261
+ */
262
+ private calculateAverageMetrics(
263
+ benchmarkResults: ModelCardBenchmarkResult[]
264
+ ): {
265
+ avgPnl: number;
266
+ avgAccuracy: number;
267
+ avgOptimality: number;
268
+ benchmarkCount: number;
269
+ } {
270
+ if (benchmarkResults.length === 0) {
271
+ return {
272
+ avgPnl: 0,
273
+ avgAccuracy: 0,
274
+ avgOptimality: 0,
275
+ benchmarkCount: 0,
276
+ };
277
+ }
278
+
279
+ const totalPnl = benchmarkResults.reduce(
280
+ (sum, r) => sum + r.metrics.totalPnl,
281
+ 0
282
+ );
283
+ const totalAccuracy = benchmarkResults.reduce(
284
+ (sum, r) => sum + r.metrics.predictionMetrics.accuracy,
285
+ 0
286
+ );
287
+ const totalOptimality = benchmarkResults.reduce(
288
+ (sum, r) => sum + r.metrics.optimalityScore,
289
+ 0
290
+ );
291
+
292
+ return {
293
+ avgPnl: totalPnl / benchmarkResults.length,
294
+ avgAccuracy: totalAccuracy / benchmarkResults.length,
295
+ avgOptimality: totalOptimality / benchmarkResults.length,
296
+ benchmarkCount: benchmarkResults.length,
297
+ };
298
+ }
299
+
300
+ /**
301
+ * Generate model card for HuggingFace
302
+ */
303
+ private async generateModelCard(
304
+ data: ModelCardData,
305
+ outputDir: string
306
+ ): Promise<void> {
307
+ const brandName = process.env.TRAINING_BRAND_NAME || 'ElizaOS';
308
+ const brandOrg = process.env.TRAINING_BRAND_ORG || 'ElizaOS Contributors';
309
+ const platformName =
310
+ process.env.TRAINING_PLATFORM_NAME || 'ElizaOS-compatible runtimes';
311
+ const brandTag = brandName.toLowerCase().replace(/\s+/g, '-');
312
+ const citationKey = `${brandTag}_agent_${data.version.replace(/\./g, '_')}`;
313
+
314
+ const card = `---
315
+ license: mit
316
+ library_name: transformers
317
+ tags:
318
+ - ${brandTag}
319
+ - reinforcement-learning
320
+ - trading-agent
321
+ - prediction-markets
322
+ base_model: ${data.baseModel}
323
+ ---
324
+
325
+ # ${data.modelName}
326
+
327
+ Autonomous agent trained with reinforcement learning for market-style decision making.
328
+
329
+ ## Model Details
330
+
331
+ - **Version:** ${data.version}
332
+ - **Base Model:** ${data.baseModel}
333
+ - **Training Date:** ${data.trainedAt.toISOString().split('T')[0]}
334
+ - **Model ID:** ${data.modelId}
335
+ ${data.trainingRunId ? `- **Training Run:** ${data.trainingRunId}` : ''}
336
+
337
+ ## Performance Metrics
338
+
339
+ ${
340
+ data.benchmarkResults.length > 0
341
+ ? `
342
+ ### Benchmark Results (${data.benchmarkResults.length} runs)
343
+
344
+ | Metric | Value |
345
+ |--------|-------|
346
+ | Average P&L | ${data.metrics.avgPnl.toFixed(2)} |
347
+ | Average Accuracy | ${(data.metrics.avgAccuracy * 100).toFixed(1)}% |
348
+ | Average Optimality | ${data.metrics.avgOptimality.toFixed(1)} |
349
+
350
+ ### Detailed Benchmark Results
351
+
352
+ ${this.generateBenchmarkTable(data.benchmarkResults)}
353
+ `
354
+ : 'No benchmark results available yet.'
355
+ }
356
+
357
+ ## Training Details
358
+
359
+ ### Training Data
360
+
361
+ - **Source:** Autonomous agent trajectories
362
+ - **Collection Method:** Live agent gameplay on prediction markets
363
+ - **Training Framework:** Atropos GRPO
364
+ - **Base Model:** ${data.baseModel}
365
+
366
+ ### Training Procedure
367
+
368
+ This model was trained using Group Relative Policy Optimization (GRPO) via the Atropos framework on trajectories collected from autonomous agents. The training process:
369
+
370
+ 1. Agents generate trajectories through market interactions
371
+ 2. Trajectories are scored using RLAIF with an LLM judge based on P&L, prediction accuracy, and decision quality
372
+ 3. GRPO training optimizes policy to maximize expected rewards
373
+ 4. Model checkpoints are evaluated on standardized benchmarks
374
+
375
+ ### Compute Infrastructure
376
+
377
+ - **Platform:** ${data.trainingRunId ? 'Atropos GRPO Training' : 'Local training'}
378
+ - **Training Time:** Continuous learning with hourly updates
379
+
380
+ ## Intended Use
381
+
382
+ This model is designed for:
383
+
384
+ - Autonomous market decision support and simulation
385
+ - Research on RL-based trading strategies
386
+ - Benchmarking agent decision-making
387
+ - Educational purposes
388
+
389
+ **Not intended for:**
390
+ - Production trading without human oversight
391
+ - Financial advice
392
+ - Real-money trading without risk management
393
+
394
+ ## Evaluation
395
+
396
+ The model is evaluated on standardized benchmarks that include:
397
+
398
+ - **Prediction Market Trading:** Betting on binary outcomes with LMSR pricing
399
+ - **Perpetual Trading:** Long/short positions on crypto perps
400
+ - **Social Interaction:** Posts, group chats, and reputation building
401
+ - **Risk Management:** Position sizing and portfolio optimization
402
+
403
+ ### Metrics
404
+
405
+ - **Total P&L:** Cumulative profit/loss across all positions
406
+ - **Prediction Accuracy:** Percentage of correct market predictions
407
+ - **Optimality Score:** Alignment with theoretically optimal actions (0-100)
408
+ - **Response Time:** Decision-making latency
409
+
410
+ ## Usage
411
+
412
+ ### Via ${platformName}
413
+
414
+ The model can be deployed in compatible runtimes and accessed via an agent API:
415
+
416
+ \`\`\`typescript
417
+ import { agentRuntimeManager } from '@elizaos/agents';
418
+
419
+ const runtime = await agentRuntimeManager.getRuntime(agentId);
420
+ const response = await runtime.chat({
421
+ messages: [{ role: 'user', content: 'Analyze this market...' }]
422
+ });
423
+ \`\`\`
424
+
425
+ ### Direct Inference
426
+
427
+ If you have downloaded the model weights:
428
+
429
+ \`\`\`python
430
+ from transformers import AutoModelForCausalLM, AutoTokenizer
431
+
432
+ model = AutoModelForCausalLM.from_pretrained("${data.modelName}")
433
+ tokenizer = AutoTokenizer.from_pretrained("${data.modelName}")
434
+
435
+ # Use model for inference
436
+ inputs = tokenizer("Should I bet YES on this market?", return_tensors="pt")
437
+ outputs = model.generate(**inputs)
438
+ response = tokenizer.decode(outputs[0])
439
+ \`\`\`
440
+
441
+ ## Limitations
442
+
443
+ - Trained on simulated market data; real-world performance may vary
444
+ - May not generalize to markets significantly different from training distribution
445
+ - Decision quality depends on market information quality
446
+ - No guarantees of profitability
447
+
448
+ ## Ethical Considerations
449
+
450
+ This model is part of a research project on autonomous agents in prediction markets. Users should:
451
+
452
+ - Understand the risks of algorithmic trading
453
+ - Not rely solely on model decisions for financial outcomes
454
+ - Use appropriate risk management and position sizing
455
+ - Consider market impact and fairness implications
456
+
457
+ ## Citation
458
+
459
+ \`\`\`bibtex
460
+ @model{${citationKey},
461
+ title = {${brandName} Trading Agent},
462
+ author = {${brandOrg}},
463
+ year = {${new Date().getFullYear()}},
464
+ version = {${data.version}},
465
+ url = {https://huggingface.co/${data.modelName}}
466
+ }
467
+ \`\`\`
468
+
469
+ ## Model Card Contact
470
+
471
+ For questions or issues, please open an issue on the repository.
472
+ `;
473
+
474
+ const cardPath = path.join(outputDir, 'README.md');
475
+ await fs.writeFile(cardPath, card);
476
+ }
477
+
478
+ /**
479
+ * Generate benchmark results table
480
+ */
481
+ private generateBenchmarkTable(results: ModelCardBenchmarkResult[]): string {
482
+ if (results.length === 0) return '';
483
+
484
+ let table =
485
+ '| Benchmark | Date | P&L | Accuracy | Win Rate | Optimality |\n';
486
+ table += '|-----------|------|-----|----------|----------|------------|\n';
487
+
488
+ results.forEach((result) => {
489
+ const date = new Date(result.runAt).toISOString().split('T')[0];
490
+ table += `| ${result.benchmarkId.substring(0, 20)}... | ${date} | ${result.metrics.totalPnl.toFixed(2)} | ${(result.metrics.predictionMetrics.accuracy * 100).toFixed(1)}% | ${(result.metrics.perpMetrics.winRate * 100).toFixed(1)}% | ${result.metrics.optimalityScore.toFixed(1)} |\n`;
491
+ });
492
+
493
+ return table;
494
+ }
495
+
496
+ /**
497
+ * Upload files to HuggingFace Hub
498
+ * Uses shared utility for consistent upload behavior
499
+ */
500
+ private async uploadToHub(
501
+ modelName: string,
502
+ localDir: string,
503
+ _isPrivate: boolean
504
+ ): Promise<number> {
505
+ if (!this.huggingFaceToken) {
506
+ throw new Error('HuggingFace token not configured');
507
+ }
508
+
509
+ try {
510
+ // Use shared upload utility
511
+ return await HuggingFaceUploadUtil.uploadDirectory(
512
+ modelName,
513
+ 'model',
514
+ localDir,
515
+ this.huggingFaceToken
516
+ );
517
+ } catch (error) {
518
+ logger.error('Failed to upload to HuggingFace Hub', { error });
519
+
520
+ // Provide helpful manual upload instructions
521
+ const instructions = HuggingFaceUploadUtil.getManualUploadInstructions(
522
+ modelName,
523
+ 'model',
524
+ localDir
525
+ );
526
+
527
+ logger.info('To upload manually:', { instructions });
528
+
529
+ throw error;
530
+ }
531
+ }
532
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * HuggingFace Integration Module
3
+ *
4
+ * Tools for uploading models and datasets to HuggingFace Hub.
5
+ */
6
+
7
+ export { HuggingFaceDatasetUploader } from './HuggingFaceDatasetUploader';
8
+ export type {
9
+ DatasetUploadOptions,
10
+ WeeklyUploadResult,
11
+ } from './HuggingFaceIntegrationService';
12
+ export {
13
+ HuggingFaceIntegrationService,
14
+ huggingFaceIntegration,
15
+ } from './HuggingFaceIntegrationService';
16
+ export type {
17
+ ModelCardBenchmarkResult,
18
+ ModelUploadOptions,
19
+ ModelUploadResult,
20
+ } from './HuggingFaceModelUploader';
21
+ export { HuggingFaceModelUploader } from './HuggingFaceModelUploader';
22
+
23
+ export {
24
+ getHuggingFaceToken,
25
+ HuggingFaceUploadUtil,
26
+ requireHuggingFaceToken,
27
+ } from './shared/HuggingFaceUploadUtil';