@elizaos/training 2.0.0-alpha.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +75 -0
- package/Makefile +374 -0
- package/README.md +346 -0
- package/config/rubrics.json +137 -0
- package/data/.gitkeep +0 -0
- package/data/degen/.gitkeep +2 -0
- package/data/trader/.gitkeep +2 -0
- package/docker-compose.test.yml +57 -0
- package/package.json +58 -0
- package/python/config/babylon_atropos.yaml +90 -0
- package/python/config/profiles/12gb.json +11 -0
- package/python/config/profiles/16gb.json +10 -0
- package/python/config/profiles/24gb.json +10 -0
- package/python/config/profiles/48gb.json +10 -0
- package/python/config/profiles/cpu.json +11 -0
- package/python/config/profiles/l40-2gpu-safe.json +20 -0
- package/python/config/profiles/l40-2gpu.json +22 -0
- package/python/config/profiles/l40-4gpu.json +21 -0
- package/python/config/profiles/l40.json +17 -0
- package/python/config/tinker_training.yaml +143 -0
- package/python/curriculum_state.json +165 -0
- package/python/env.template +86 -0
- package/python/env.training.template +46 -0
- package/python/pyproject.toml +41 -0
- package/python/requirements-ci.txt +31 -0
- package/python/requirements.txt +87 -0
- package/python/scripts/__init__.py +4 -0
- package/python/scripts/import_json_trajectories.py +412 -0
- package/python/scripts/local-finetune/README.md +63 -0
- package/python/scripts/local-finetune/ingest_and_score.py +139 -0
- package/python/scripts/local-finetune/merge_model.py +32 -0
- package/python/scripts/local-finetune/test_adapter.py +91 -0
- package/python/scripts/local-finetune/train_from_csv.py +132 -0
- package/python/scripts/merge_trajectories.py +318 -0
- package/python/scripts/run_ab_test.py +143 -0
- package/python/scripts/run_full_pipeline.py +544 -0
- package/python/scripts/run_tinker_training.py +192 -0
- package/python/scripts/run_training.py +914 -0
- package/python/scripts/test_judge.py +155 -0
- package/python/scripts/test_pipeline.py +356 -0
- package/python/scripts/test_trained_model.py +380 -0
- package/python/scripts/train_local.py +528 -0
- package/python/setup.py +20 -0
- package/python/src/__init__.py +190 -0
- package/python/src/data_bridge/__init__.py +24 -0
- package/python/src/data_bridge/converter.py +435 -0
- package/python/src/data_bridge/reader.py +393 -0
- package/python/src/models.py +283 -0
- package/python/src/training/__init__.py +605 -0
- package/python/src/training/ab_testing.py +404 -0
- package/python/src/training/action_executor.py +621 -0
- package/python/src/training/archetype_trainer.py +347 -0
- package/python/src/training/atropos_trainer.py +980 -0
- package/python/src/training/babylon_env.py +1254 -0
- package/python/src/training/error_recovery.py +647 -0
- package/python/src/training/evaluation.py +856 -0
- package/python/src/training/fast_simulator.py +880 -0
- package/python/src/training/format_validator.py +584 -0
- package/python/src/training/hybrid_env.py +522 -0
- package/python/src/training/kl_controller.py +628 -0
- package/python/src/training/multi_prompt_dataset.py +883 -0
- package/python/src/training/multi_turn.py +656 -0
- package/python/src/training/online_env.py +1084 -0
- package/python/src/training/quality_scorer.py +391 -0
- package/python/src/training/quality_utils.py +633 -0
- package/python/src/training/rewards.py +1344 -0
- package/python/src/training/rlaif_env.py +17 -0
- package/python/src/training/rollout_generator.py +502 -0
- package/python/src/training/rubric_loader.py +198 -0
- package/python/src/training/scenario_pool.py +1072 -0
- package/python/src/training/schemas.py +481 -0
- package/python/src/training/service_manager.py +552 -0
- package/python/src/training/simulation_bridge.py +535 -0
- package/python/src/training/tick_reward_attribution.py +399 -0
- package/python/src/training/tinker_client.py +575 -0
- package/python/src/training/tinker_trainer.py +646 -0
- package/python/src/training/tokenization_utils.py +402 -0
- package/python/tests/e2e/__init__.py +13 -0
- package/python/tests/e2e/conftest.py +258 -0
- package/python/tests/e2e/test_full_pipeline.py +643 -0
- package/python/tests/e2e/test_online_training_e2e.py +365 -0
- package/python/tests/integration/__init__.py +12 -0
- package/python/tests/integration/conftest.py +383 -0
- package/python/tests/integration/test_db_integration.py +649 -0
- package/python/tests/integration/test_json_mode_integration.py +554 -0
- package/python/tests/test_action_executor.py +594 -0
- package/python/tests/test_archetype_scoring.py +1027 -0
- package/python/tests/test_atropos_integration.py +360 -0
- package/python/tests/test_evaluation.py +727 -0
- package/python/tests/test_format_validator.py +486 -0
- package/python/tests/test_kl_controller.py +432 -0
- package/python/tests/test_lr_scheduler.py +579 -0
- package/python/tests/test_multi_turn.py +590 -0
- package/python/tests/test_online_env.py +519 -0
- package/python/tests/test_quality_scorer.py +474 -0
- package/python/tests/test_scenario_pool.py +735 -0
- package/python/tests/test_service_manager.py +585 -0
- package/python/tests/test_simulation_rollout.py +581 -0
- package/python/tests/test_tokenization_utils.py +501 -0
- package/python/tests/test_training_orchestrator.py +497 -0
- package/python/tests/test_training_output_structure.py +661 -0
- package/research-output/training-runs/training-run-1770772042899.json +26 -0
- package/research-output/training-runs/training-run-1770930079670.json +32 -0
- package/research-output/training-runs/training-run-1770930143700.json +44 -0
- package/research-output/training-runs/training-run-1770930183638.json +38 -0
- package/research-output/training-runs/training-run-1770930442049.json +38 -0
- package/research-output/training-runs/training-run-1770930793243.json +38 -0
- package/scripts/assess-training-data.ts +422 -0
- package/scripts/e2e-training-test.ts +550 -0
- package/scripts/export-rubrics.ts +64 -0
- package/scripts/generate-research-report.ts +1523 -0
- package/scripts/generate_dataset.sh +173 -0
- package/scripts/json-mode-benchmark.ts +399 -0
- package/scripts/real-archetype-benchmark.ts +210 -0
- package/scripts/run-baseline-comparison.ts +116 -0
- package/scripts/run-full-pipeline.ts +272 -0
- package/scripts/runpod_setup.sh +137 -0
- package/scripts/runpod_validate.sh +147 -0
- package/scripts/test-model-in-game.ts +955 -0
- package/scripts/test-scoring.ts +73 -0
- package/scripts/test-trained-model.ts +209 -0
- package/scripts/train-and-test.ts +824 -0
- package/scripts/verify-final.ts +118 -0
- package/src/adapter.ts +516 -0
- package/src/archetypes/ArchetypeConfigService.ts +626 -0
- package/src/archetypes/derive-archetype.ts +249 -0
- package/src/archetypes/index.ts +22 -0
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
- package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
- package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
- package/src/benchmark/BenchmarkDataViewer.ts +324 -0
- package/src/benchmark/BenchmarkHistoryService.ts +221 -0
- package/src/benchmark/BenchmarkRunner.ts +685 -0
- package/src/benchmark/BenchmarkValidator.ts +206 -0
- package/src/benchmark/FastEvalRunner.ts +225 -0
- package/src/benchmark/MetricsValidator.ts +165 -0
- package/src/benchmark/MetricsVisualizer.ts +909 -0
- package/src/benchmark/ModelBenchmarkService.ts +611 -0
- package/src/benchmark/ModelRegistry.ts +158 -0
- package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
- package/src/benchmark/SimulationA2AInterface.ts +1169 -0
- package/src/benchmark/SimulationEngine.ts +832 -0
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
- package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
- package/src/benchmark/index.ts +89 -0
- package/src/benchmark/parseSimulationMetrics.ts +124 -0
- package/src/benchmark/simulation-types.ts +78 -0
- package/src/dependencies.ts +439 -0
- package/src/generation/TrajectoryGenerator.ts +387 -0
- package/src/generation/index.ts +12 -0
- package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
- package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
- package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
- package/src/huggingface/index.ts +27 -0
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
- package/src/index.ts +102 -0
- package/src/init-training.ts +53 -0
- package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
- package/src/metrics/index.ts +8 -0
- package/src/metrics/types.ts +200 -0
- package/src/rubrics/__tests__/index.test.ts +184 -0
- package/src/rubrics/ass-kisser.ts +85 -0
- package/src/rubrics/degen.ts +80 -0
- package/src/rubrics/goody-twoshoes.ts +84 -0
- package/src/rubrics/index.ts +236 -0
- package/src/rubrics/information-trader.ts +84 -0
- package/src/rubrics/infosec.ts +101 -0
- package/src/rubrics/liar.ts +104 -0
- package/src/rubrics/perps-trader.ts +87 -0
- package/src/rubrics/researcher.ts +81 -0
- package/src/rubrics/scammer.ts +82 -0
- package/src/rubrics/social-butterfly.ts +73 -0
- package/src/rubrics/super-predictor.ts +97 -0
- package/src/rubrics/trader.ts +67 -0
- package/src/scoring/ArchetypeScoringService.ts +486 -0
- package/src/scoring/JudgePromptBuilder.ts +556 -0
- package/src/scoring/LLMJudgeCache.ts +401 -0
- package/src/scoring/index.ts +9 -0
- package/src/training/AutomationPipeline.ts +916 -0
- package/src/training/BenchmarkService.ts +518 -0
- package/src/training/ConfigValidator.ts +220 -0
- package/src/training/MarketOutcomesTracker.ts +187 -0
- package/src/training/ModelDeployer.ts +186 -0
- package/src/training/ModelFetcher.ts +76 -0
- package/src/training/ModelSelectionService.ts +341 -0
- package/src/training/ModelUsageVerifier.ts +160 -0
- package/src/training/MultiModelOrchestrator.ts +580 -0
- package/src/training/RLModelConfig.ts +407 -0
- package/src/training/RewardBackpropagationService.ts +149 -0
- package/src/training/RulerScoringService.ts +666 -0
- package/src/training/TrainingMonitor.ts +166 -0
- package/src/training/TrajectoryRecorder.ts +399 -0
- package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
- package/src/training/index.ts +100 -0
- package/src/training/logRLConfig.ts +34 -0
- package/src/training/pipeline.ts +129 -0
- package/src/training/storage/ModelStorageService.ts +279 -0
- package/src/training/storage/TrainingDataArchiver.ts +197 -0
- package/src/training/storage/index.ts +17 -0
- package/src/training/types.ts +207 -0
- package/src/training/window-utils.ts +138 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +59 -0
- package/src/utils/snowflake.ts +17 -0
- package/src/utils/synthetic-detector.ts +111 -0
- package/tsconfig.json +20 -0
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HuggingFace Model Uploader
|
|
3
|
+
*
|
|
4
|
+
* Uploads trained RL models to HuggingFace Hub with benchmark results and model cards.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { getTrainingDataAdapter } from '../adapter';
|
|
8
|
+
import { promises as fs } from 'fs';
|
|
9
|
+
import * as path from 'path';
|
|
10
|
+
import {
|
|
11
|
+
type JsonValue,
|
|
12
|
+
parseSimulationMetrics,
|
|
13
|
+
} from '../benchmark/parseSimulationMetrics';
|
|
14
|
+
import type { SimulationMetrics } from '../benchmark/SimulationEngine';
|
|
15
|
+
import { logger } from '../utils';
|
|
16
|
+
import {
|
|
17
|
+
getHuggingFaceToken,
|
|
18
|
+
HuggingFaceUploadUtil,
|
|
19
|
+
requireHuggingFaceToken,
|
|
20
|
+
} from './shared/HuggingFaceUploadUtil';
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Simplified benchmark result for HuggingFace model cards
|
|
24
|
+
* Uses string date for JSON serialization compatibility
|
|
25
|
+
*/
|
|
26
|
+
export interface ModelCardBenchmarkResult {
|
|
27
|
+
benchmarkId: string;
|
|
28
|
+
runAt: string;
|
|
29
|
+
metrics: SimulationMetrics;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface ModelUploadOptions {
|
|
33
|
+
/** Database model ID */
|
|
34
|
+
modelId: string;
|
|
35
|
+
/** HuggingFace model name (e.g., 'elizaos/agent-v1') */
|
|
36
|
+
modelName: string;
|
|
37
|
+
description?: string;
|
|
38
|
+
private?: boolean;
|
|
39
|
+
includeWeights?: boolean;
|
|
40
|
+
outputDir?: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface ModelUploadResult {
|
|
44
|
+
success: boolean;
|
|
45
|
+
modelUrl?: string;
|
|
46
|
+
modelId: string;
|
|
47
|
+
filesUploaded: number;
|
|
48
|
+
error?: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface ModelCardData {
|
|
52
|
+
modelId: string;
|
|
53
|
+
modelName: string;
|
|
54
|
+
version: string;
|
|
55
|
+
baseModel: string;
|
|
56
|
+
trainedAt: Date;
|
|
57
|
+
trainingRunId?: string;
|
|
58
|
+
benchmarkResults: ModelCardBenchmarkResult[];
|
|
59
|
+
metrics: {
|
|
60
|
+
avgPnl: number;
|
|
61
|
+
avgAccuracy: number;
|
|
62
|
+
avgOptimality: number;
|
|
63
|
+
benchmarkCount: number;
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export class HuggingFaceModelUploader {
|
|
68
|
+
private huggingFaceToken: string | undefined;
|
|
69
|
+
|
|
70
|
+
constructor(huggingFaceToken?: string) {
|
|
71
|
+
this.huggingFaceToken = huggingFaceToken || getHuggingFaceToken();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Upload model to HuggingFace with benchmarks and model card
|
|
76
|
+
*/
|
|
77
|
+
async uploadModel(options: ModelUploadOptions): Promise<ModelUploadResult> {
|
|
78
|
+
try {
|
|
79
|
+
logger.info('Starting HuggingFace model upload', {
|
|
80
|
+
modelId: options.modelId,
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
// Validate token (throws if not set)
|
|
84
|
+
const token = this.huggingFaceToken || requireHuggingFaceToken();
|
|
85
|
+
this.huggingFaceToken = token;
|
|
86
|
+
|
|
87
|
+
// Step 1: Load model from database
|
|
88
|
+
const adapter = getTrainingDataAdapter();
|
|
89
|
+
const model = await adapter.getModelById(options.modelId);
|
|
90
|
+
|
|
91
|
+
if (!model) {
|
|
92
|
+
throw new Error(`Model not found: ${options.modelId}`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Step 2: Get benchmark results
|
|
96
|
+
logger.info('Loading benchmark results', { modelId: options.modelId });
|
|
97
|
+
const modelBenchmarks = await this.getBenchmarkResults(options.modelId);
|
|
98
|
+
|
|
99
|
+
if (modelBenchmarks.length === 0) {
|
|
100
|
+
logger.warn('No benchmark results found for model', {
|
|
101
|
+
modelId: options.modelId,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Step 3: Prepare model card data
|
|
106
|
+
const cardData: ModelCardData = {
|
|
107
|
+
modelId: model.modelId,
|
|
108
|
+
modelName: options.modelName,
|
|
109
|
+
version: model.version,
|
|
110
|
+
baseModel: model.baseModel,
|
|
111
|
+
trainedAt: model.createdAt,
|
|
112
|
+
trainingRunId: model.trainingBatch || undefined,
|
|
113
|
+
benchmarkResults: modelBenchmarks,
|
|
114
|
+
metrics: this.calculateAverageMetrics(modelBenchmarks),
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// Step 4: Create output directory
|
|
118
|
+
const outputDir =
|
|
119
|
+
options.outputDir ||
|
|
120
|
+
path.join(process.cwd(), 'exports', 'models', model.version);
|
|
121
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
122
|
+
|
|
123
|
+
// Step 5: Generate model card
|
|
124
|
+
logger.info('Generating model card');
|
|
125
|
+
await this.generateModelCard(cardData, outputDir);
|
|
126
|
+
|
|
127
|
+
// Step 6: Save metadata
|
|
128
|
+
const metadataPath = path.join(outputDir, 'model_metadata.json');
|
|
129
|
+
await fs.writeFile(
|
|
130
|
+
metadataPath,
|
|
131
|
+
JSON.stringify(
|
|
132
|
+
{
|
|
133
|
+
modelId: model.modelId,
|
|
134
|
+
version: model.version,
|
|
135
|
+
baseModel: model.baseModel,
|
|
136
|
+
storagePath: model.storagePath,
|
|
137
|
+
trainingBatch: model.trainingBatch,
|
|
138
|
+
trainedAt: model.createdAt.toISOString(),
|
|
139
|
+
benchmarkScore: model.benchmarkScore,
|
|
140
|
+
avgReward: model.avgReward,
|
|
141
|
+
accuracy: model.accuracy,
|
|
142
|
+
},
|
|
143
|
+
null,
|
|
144
|
+
2
|
|
145
|
+
)
|
|
146
|
+
);
|
|
147
|
+
|
|
148
|
+
// Step 7: Save benchmark results
|
|
149
|
+
const benchmarksPath = path.join(outputDir, 'benchmark_results.json');
|
|
150
|
+
await fs.writeFile(
|
|
151
|
+
benchmarksPath,
|
|
152
|
+
JSON.stringify(modelBenchmarks, null, 2)
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
// Step 8: Upload to HuggingFace (if weights available and requested)
|
|
156
|
+
let filesUploaded = 2; // README.md + metadata
|
|
157
|
+
|
|
158
|
+
if (options.includeWeights && model.storagePath) {
|
|
159
|
+
logger.info('Uploading model to HuggingFace', {
|
|
160
|
+
modelName: options.modelName,
|
|
161
|
+
});
|
|
162
|
+
const uploadCount = await this.uploadToHub(
|
|
163
|
+
options.modelName,
|
|
164
|
+
outputDir,
|
|
165
|
+
options.private ?? false
|
|
166
|
+
);
|
|
167
|
+
filesUploaded = uploadCount;
|
|
168
|
+
} else {
|
|
169
|
+
logger.info(
|
|
170
|
+
'Skipping model weight upload (not requested or no weights available)'
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const modelUrl = `https://huggingface.co/${options.modelName}`;
|
|
175
|
+
|
|
176
|
+
logger.info('Model uploaded successfully', { modelUrl, filesUploaded });
|
|
177
|
+
|
|
178
|
+
// Update model status in database
|
|
179
|
+
await adapter.updateModelStatus(options.modelId, 'deployed', {
|
|
180
|
+
deployedAt: new Date(),
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
return {
|
|
184
|
+
success: true,
|
|
185
|
+
modelUrl,
|
|
186
|
+
modelId: options.modelId,
|
|
187
|
+
filesUploaded,
|
|
188
|
+
};
|
|
189
|
+
} catch (error) {
|
|
190
|
+
logger.error('Failed to upload model', { error });
|
|
191
|
+
return {
|
|
192
|
+
success: false,
|
|
193
|
+
modelId: options.modelId,
|
|
194
|
+
filesUploaded: 0,
|
|
195
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Get benchmark results for a model
|
|
202
|
+
*/
|
|
203
|
+
private async getBenchmarkResults(
|
|
204
|
+
modelId: string
|
|
205
|
+
): Promise<ModelCardBenchmarkResult[]> {
|
|
206
|
+
// Query benchmark results from database
|
|
207
|
+
try {
|
|
208
|
+
const results = await getTrainingDataAdapter().getBenchmarkResultsByModel(modelId);
|
|
209
|
+
|
|
210
|
+
return results.map((r) => ({
|
|
211
|
+
benchmarkId: r.benchmarkId,
|
|
212
|
+
runAt: r.runAt.toISOString(),
|
|
213
|
+
// detailedMetrics is stored as JSON in database, validate it matches SimulationMetrics
|
|
214
|
+
metrics: parseSimulationMetrics(r.detailedMetrics as JsonValue),
|
|
215
|
+
}));
|
|
216
|
+
} catch (error) {
|
|
217
|
+
logger.warn('Could not load benchmark results from database', { error });
|
|
218
|
+
|
|
219
|
+
// Fallback to files if database fails
|
|
220
|
+
return await this.getBenchmarkResultsFromFiles(modelId);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Fallback: Get benchmark results from files
|
|
226
|
+
*/
|
|
227
|
+
private async getBenchmarkResultsFromFiles(
|
|
228
|
+
modelId: string
|
|
229
|
+
): Promise<ModelCardBenchmarkResult[]> {
|
|
230
|
+
const results: ModelCardBenchmarkResult[] = [];
|
|
231
|
+
|
|
232
|
+
try {
|
|
233
|
+
const benchmarksDir = path.join(process.cwd(), 'benchmarks');
|
|
234
|
+
const files = await fs.readdir(benchmarksDir);
|
|
235
|
+
|
|
236
|
+
for (const file of files) {
|
|
237
|
+
if (file.endsWith('.json') && file.includes(modelId)) {
|
|
238
|
+
const filePath = path.join(benchmarksDir, file);
|
|
239
|
+
const data = JSON.parse(await fs.readFile(filePath, 'utf-8'));
|
|
240
|
+
|
|
241
|
+
if (data.metrics) {
|
|
242
|
+
results.push({
|
|
243
|
+
benchmarkId: data.benchmarkId || file,
|
|
244
|
+
runAt: data.runAt || new Date().toISOString(),
|
|
245
|
+
metrics: data.metrics,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
} catch (error) {
|
|
251
|
+
logger.warn('Could not load benchmark results from files either', {
|
|
252
|
+
error,
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
return results;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Calculate average metrics across benchmarks
|
|
261
|
+
*/
|
|
262
|
+
private calculateAverageMetrics(
|
|
263
|
+
benchmarkResults: ModelCardBenchmarkResult[]
|
|
264
|
+
): {
|
|
265
|
+
avgPnl: number;
|
|
266
|
+
avgAccuracy: number;
|
|
267
|
+
avgOptimality: number;
|
|
268
|
+
benchmarkCount: number;
|
|
269
|
+
} {
|
|
270
|
+
if (benchmarkResults.length === 0) {
|
|
271
|
+
return {
|
|
272
|
+
avgPnl: 0,
|
|
273
|
+
avgAccuracy: 0,
|
|
274
|
+
avgOptimality: 0,
|
|
275
|
+
benchmarkCount: 0,
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const totalPnl = benchmarkResults.reduce(
|
|
280
|
+
(sum, r) => sum + r.metrics.totalPnl,
|
|
281
|
+
0
|
|
282
|
+
);
|
|
283
|
+
const totalAccuracy = benchmarkResults.reduce(
|
|
284
|
+
(sum, r) => sum + r.metrics.predictionMetrics.accuracy,
|
|
285
|
+
0
|
|
286
|
+
);
|
|
287
|
+
const totalOptimality = benchmarkResults.reduce(
|
|
288
|
+
(sum, r) => sum + r.metrics.optimalityScore,
|
|
289
|
+
0
|
|
290
|
+
);
|
|
291
|
+
|
|
292
|
+
return {
|
|
293
|
+
avgPnl: totalPnl / benchmarkResults.length,
|
|
294
|
+
avgAccuracy: totalAccuracy / benchmarkResults.length,
|
|
295
|
+
avgOptimality: totalOptimality / benchmarkResults.length,
|
|
296
|
+
benchmarkCount: benchmarkResults.length,
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Generate model card for HuggingFace
|
|
302
|
+
*/
|
|
303
|
+
private async generateModelCard(
|
|
304
|
+
data: ModelCardData,
|
|
305
|
+
outputDir: string
|
|
306
|
+
): Promise<void> {
|
|
307
|
+
const brandName = process.env.TRAINING_BRAND_NAME || 'ElizaOS';
|
|
308
|
+
const brandOrg = process.env.TRAINING_BRAND_ORG || 'ElizaOS Contributors';
|
|
309
|
+
const platformName =
|
|
310
|
+
process.env.TRAINING_PLATFORM_NAME || 'ElizaOS-compatible runtimes';
|
|
311
|
+
const brandTag = brandName.toLowerCase().replace(/\s+/g, '-');
|
|
312
|
+
const citationKey = `${brandTag}_agent_${data.version.replace(/\./g, '_')}`;
|
|
313
|
+
|
|
314
|
+
const card = `---
|
|
315
|
+
license: mit
|
|
316
|
+
library_name: transformers
|
|
317
|
+
tags:
|
|
318
|
+
- ${brandTag}
|
|
319
|
+
- reinforcement-learning
|
|
320
|
+
- trading-agent
|
|
321
|
+
- prediction-markets
|
|
322
|
+
base_model: ${data.baseModel}
|
|
323
|
+
---
|
|
324
|
+
|
|
325
|
+
# ${data.modelName}
|
|
326
|
+
|
|
327
|
+
Autonomous agent trained with reinforcement learning for market-style decision making.
|
|
328
|
+
|
|
329
|
+
## Model Details
|
|
330
|
+
|
|
331
|
+
- **Version:** ${data.version}
|
|
332
|
+
- **Base Model:** ${data.baseModel}
|
|
333
|
+
- **Training Date:** ${data.trainedAt.toISOString().split('T')[0]}
|
|
334
|
+
- **Model ID:** ${data.modelId}
|
|
335
|
+
${data.trainingRunId ? `- **Training Run:** ${data.trainingRunId}` : ''}
|
|
336
|
+
|
|
337
|
+
## Performance Metrics
|
|
338
|
+
|
|
339
|
+
${
|
|
340
|
+
data.benchmarkResults.length > 0
|
|
341
|
+
? `
|
|
342
|
+
### Benchmark Results (${data.benchmarkResults.length} runs)
|
|
343
|
+
|
|
344
|
+
| Metric | Value |
|
|
345
|
+
|--------|-------|
|
|
346
|
+
| Average P&L | ${data.metrics.avgPnl.toFixed(2)} |
|
|
347
|
+
| Average Accuracy | ${(data.metrics.avgAccuracy * 100).toFixed(1)}% |
|
|
348
|
+
| Average Optimality | ${data.metrics.avgOptimality.toFixed(1)} |
|
|
349
|
+
|
|
350
|
+
### Detailed Benchmark Results
|
|
351
|
+
|
|
352
|
+
${this.generateBenchmarkTable(data.benchmarkResults)}
|
|
353
|
+
`
|
|
354
|
+
: 'No benchmark results available yet.'
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
## Training Details
|
|
358
|
+
|
|
359
|
+
### Training Data
|
|
360
|
+
|
|
361
|
+
- **Source:** Autonomous agent trajectories
|
|
362
|
+
- **Collection Method:** Live agent gameplay on prediction markets
|
|
363
|
+
- **Training Framework:** Atropos GRPO
|
|
364
|
+
- **Base Model:** ${data.baseModel}
|
|
365
|
+
|
|
366
|
+
### Training Procedure
|
|
367
|
+
|
|
368
|
+
This model was trained using Group Relative Policy Optimization (GRPO) via the Atropos framework on trajectories collected from autonomous agents. The training process:
|
|
369
|
+
|
|
370
|
+
1. Agents generate trajectories through market interactions
|
|
371
|
+
2. Trajectories are scored using RLAIF with an LLM judge based on P&L, prediction accuracy, and decision quality
|
|
372
|
+
3. GRPO training optimizes policy to maximize expected rewards
|
|
373
|
+
4. Model checkpoints are evaluated on standardized benchmarks
|
|
374
|
+
|
|
375
|
+
### Compute Infrastructure
|
|
376
|
+
|
|
377
|
+
- **Platform:** ${data.trainingRunId ? 'Atropos GRPO Training' : 'Local training'}
|
|
378
|
+
- **Training Time:** Continuous learning with hourly updates
|
|
379
|
+
|
|
380
|
+
## Intended Use
|
|
381
|
+
|
|
382
|
+
This model is designed for:
|
|
383
|
+
|
|
384
|
+
- Autonomous market decision support and simulation
|
|
385
|
+
- Research on RL-based trading strategies
|
|
386
|
+
- Benchmarking agent decision-making
|
|
387
|
+
- Educational purposes
|
|
388
|
+
|
|
389
|
+
**Not intended for:**
|
|
390
|
+
- Production trading without human oversight
|
|
391
|
+
- Financial advice
|
|
392
|
+
- Real-money trading without risk management
|
|
393
|
+
|
|
394
|
+
## Evaluation
|
|
395
|
+
|
|
396
|
+
The model is evaluated on standardized benchmarks that include:
|
|
397
|
+
|
|
398
|
+
- **Prediction Market Trading:** Betting on binary outcomes with LMSR pricing
|
|
399
|
+
- **Perpetual Trading:** Long/short positions on crypto perps
|
|
400
|
+
- **Social Interaction:** Posts, group chats, and reputation building
|
|
401
|
+
- **Risk Management:** Position sizing and portfolio optimization
|
|
402
|
+
|
|
403
|
+
### Metrics
|
|
404
|
+
|
|
405
|
+
- **Total P&L:** Cumulative profit/loss across all positions
|
|
406
|
+
- **Prediction Accuracy:** Percentage of correct market predictions
|
|
407
|
+
- **Optimality Score:** Alignment with theoretically optimal actions (0-100)
|
|
408
|
+
- **Response Time:** Decision-making latency
|
|
409
|
+
|
|
410
|
+
## Usage
|
|
411
|
+
|
|
412
|
+
### Via ${platformName}
|
|
413
|
+
|
|
414
|
+
The model can be deployed in compatible runtimes and accessed via an agent API:
|
|
415
|
+
|
|
416
|
+
\`\`\`typescript
|
|
417
|
+
import { agentRuntimeManager } from '@elizaos/agents';
|
|
418
|
+
|
|
419
|
+
const runtime = await agentRuntimeManager.getRuntime(agentId);
|
|
420
|
+
const response = await runtime.chat({
|
|
421
|
+
messages: [{ role: 'user', content: 'Analyze this market...' }]
|
|
422
|
+
});
|
|
423
|
+
\`\`\`
|
|
424
|
+
|
|
425
|
+
### Direct Inference
|
|
426
|
+
|
|
427
|
+
If you have downloaded the model weights:
|
|
428
|
+
|
|
429
|
+
\`\`\`python
|
|
430
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
431
|
+
|
|
432
|
+
model = AutoModelForCausalLM.from_pretrained("${data.modelName}")
|
|
433
|
+
tokenizer = AutoTokenizer.from_pretrained("${data.modelName}")
|
|
434
|
+
|
|
435
|
+
# Use model for inference
|
|
436
|
+
inputs = tokenizer("Should I bet YES on this market?", return_tensors="pt")
|
|
437
|
+
outputs = model.generate(**inputs)
|
|
438
|
+
response = tokenizer.decode(outputs[0])
|
|
439
|
+
\`\`\`
|
|
440
|
+
|
|
441
|
+
## Limitations
|
|
442
|
+
|
|
443
|
+
- Trained on simulated market data; real-world performance may vary
|
|
444
|
+
- May not generalize to markets significantly different from training distribution
|
|
445
|
+
- Decision quality depends on market information quality
|
|
446
|
+
- No guarantees of profitability
|
|
447
|
+
|
|
448
|
+
## Ethical Considerations
|
|
449
|
+
|
|
450
|
+
This model is part of a research project on autonomous agents in prediction markets. Users should:
|
|
451
|
+
|
|
452
|
+
- Understand the risks of algorithmic trading
|
|
453
|
+
- Not rely solely on model decisions for financial outcomes
|
|
454
|
+
- Use appropriate risk management and position sizing
|
|
455
|
+
- Consider market impact and fairness implications
|
|
456
|
+
|
|
457
|
+
## Citation
|
|
458
|
+
|
|
459
|
+
\`\`\`bibtex
|
|
460
|
+
@model{${citationKey},
|
|
461
|
+
title = {${brandName} Trading Agent},
|
|
462
|
+
author = {${brandOrg}},
|
|
463
|
+
year = {${new Date().getFullYear()}},
|
|
464
|
+
version = {${data.version}},
|
|
465
|
+
url = {https://huggingface.co/${data.modelName}}
|
|
466
|
+
}
|
|
467
|
+
\`\`\`
|
|
468
|
+
|
|
469
|
+
## Model Card Contact
|
|
470
|
+
|
|
471
|
+
For questions or issues, please open an issue on the repository.
|
|
472
|
+
`;
|
|
473
|
+
|
|
474
|
+
const cardPath = path.join(outputDir, 'README.md');
|
|
475
|
+
await fs.writeFile(cardPath, card);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
/**
|
|
479
|
+
* Generate benchmark results table
|
|
480
|
+
*/
|
|
481
|
+
private generateBenchmarkTable(results: ModelCardBenchmarkResult[]): string {
|
|
482
|
+
if (results.length === 0) return '';
|
|
483
|
+
|
|
484
|
+
let table =
|
|
485
|
+
'| Benchmark | Date | P&L | Accuracy | Win Rate | Optimality |\n';
|
|
486
|
+
table += '|-----------|------|-----|----------|----------|------------|\n';
|
|
487
|
+
|
|
488
|
+
results.forEach((result) => {
|
|
489
|
+
const date = new Date(result.runAt).toISOString().split('T')[0];
|
|
490
|
+
table += `| ${result.benchmarkId.substring(0, 20)}... | ${date} | ${result.metrics.totalPnl.toFixed(2)} | ${(result.metrics.predictionMetrics.accuracy * 100).toFixed(1)}% | ${(result.metrics.perpMetrics.winRate * 100).toFixed(1)}% | ${result.metrics.optimalityScore.toFixed(1)} |\n`;
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
return table;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Upload files to HuggingFace Hub
|
|
498
|
+
* Uses shared utility for consistent upload behavior
|
|
499
|
+
*/
|
|
500
|
+
private async uploadToHub(
|
|
501
|
+
modelName: string,
|
|
502
|
+
localDir: string,
|
|
503
|
+
_isPrivate: boolean
|
|
504
|
+
): Promise<number> {
|
|
505
|
+
if (!this.huggingFaceToken) {
|
|
506
|
+
throw new Error('HuggingFace token not configured');
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
try {
|
|
510
|
+
// Use shared upload utility
|
|
511
|
+
return await HuggingFaceUploadUtil.uploadDirectory(
|
|
512
|
+
modelName,
|
|
513
|
+
'model',
|
|
514
|
+
localDir,
|
|
515
|
+
this.huggingFaceToken
|
|
516
|
+
);
|
|
517
|
+
} catch (error) {
|
|
518
|
+
logger.error('Failed to upload to HuggingFace Hub', { error });
|
|
519
|
+
|
|
520
|
+
// Provide helpful manual upload instructions
|
|
521
|
+
const instructions = HuggingFaceUploadUtil.getManualUploadInstructions(
|
|
522
|
+
modelName,
|
|
523
|
+
'model',
|
|
524
|
+
localDir
|
|
525
|
+
);
|
|
526
|
+
|
|
527
|
+
logger.info('To upload manually:', { instructions });
|
|
528
|
+
|
|
529
|
+
throw error;
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HuggingFace Integration Module
|
|
3
|
+
*
|
|
4
|
+
* Tools for uploading models and datasets to HuggingFace Hub.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export { HuggingFaceDatasetUploader } from './HuggingFaceDatasetUploader';
|
|
8
|
+
export type {
|
|
9
|
+
DatasetUploadOptions,
|
|
10
|
+
WeeklyUploadResult,
|
|
11
|
+
} from './HuggingFaceIntegrationService';
|
|
12
|
+
export {
|
|
13
|
+
HuggingFaceIntegrationService,
|
|
14
|
+
huggingFaceIntegration,
|
|
15
|
+
} from './HuggingFaceIntegrationService';
|
|
16
|
+
export type {
|
|
17
|
+
ModelCardBenchmarkResult,
|
|
18
|
+
ModelUploadOptions,
|
|
19
|
+
ModelUploadResult,
|
|
20
|
+
} from './HuggingFaceModelUploader';
|
|
21
|
+
export { HuggingFaceModelUploader } from './HuggingFaceModelUploader';
|
|
22
|
+
|
|
23
|
+
export {
|
|
24
|
+
getHuggingFaceToken,
|
|
25
|
+
HuggingFaceUploadUtil,
|
|
26
|
+
requireHuggingFaceToken,
|
|
27
|
+
} from './shared/HuggingFaceUploadUtil';
|