@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773726941205.json +38 -0
- package/scripts/rank_trajectories.ts +0 -1
- package/scripts/run_task_benchmark.ts +4 -11
- package/src/adapter.ts +96 -49
- package/src/archetypes/ArchetypeConfigService.ts +188 -185
- package/src/archetypes/derive-archetype.ts +47 -47
- package/src/archetypes/index.ts +2 -2
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
- package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
- package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
- package/src/benchmark/BenchmarkDataViewer.ts +32 -30
- package/src/benchmark/BenchmarkHistoryService.ts +13 -12
- package/src/benchmark/BenchmarkRunner.ts +87 -83
- package/src/benchmark/BenchmarkValidator.ts +48 -46
- package/src/benchmark/FastEvalRunner.ts +17 -16
- package/src/benchmark/MetricsValidator.ts +20 -21
- package/src/benchmark/MetricsVisualizer.ts +92 -85
- package/src/benchmark/ModelBenchmarkService.ts +90 -82
- package/src/benchmark/ModelRegistry.ts +44 -44
- package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
- package/src/benchmark/SimulationA2AInterface.ts +118 -118
- package/src/benchmark/SimulationEngine.ts +51 -51
- package/src/benchmark/TaskRunner.ts +87 -79
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
- package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
- package/src/benchmark/index.ts +27 -27
- package/src/benchmark/parseSimulationMetrics.ts +32 -32
- package/src/benchmark/simulation-types.ts +10 -10
- package/src/dependencies.ts +34 -34
- package/src/generation/TrajectoryGenerator.ts +39 -37
- package/src/generation/index.ts +1 -1
- package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
- package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
- package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
- package/src/huggingface/index.ts +6 -6
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
- package/src/index.ts +27 -27
- package/src/init-training.ts +6 -6
- package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
- package/src/metrics/index.ts +2 -2
- package/src/rubrics/__tests__/index.test.ts +73 -73
- package/src/rubrics/ass-kisser.ts +6 -6
- package/src/rubrics/degen.ts +6 -6
- package/src/rubrics/goody-twoshoes.ts +6 -6
- package/src/rubrics/index.ts +50 -50
- package/src/rubrics/information-trader.ts +6 -6
- package/src/rubrics/infosec.ts +6 -6
- package/src/rubrics/liar.ts +6 -6
- package/src/rubrics/perps-trader.ts +6 -6
- package/src/rubrics/researcher.ts +6 -6
- package/src/rubrics/scammer.ts +6 -6
- package/src/rubrics/social-butterfly.ts +7 -7
- package/src/rubrics/super-predictor.ts +6 -6
- package/src/rubrics/trader.ts +5 -5
- package/src/scoring/ArchetypeScoringService.ts +56 -54
- package/src/scoring/JudgePromptBuilder.ts +96 -96
- package/src/scoring/LLMJudgeCache.ts +26 -23
- package/src/scoring/index.ts +3 -3
- package/src/training/AutomationPipeline.ts +149 -140
- package/src/training/BenchmarkService.ts +49 -45
- package/src/training/ConfigValidator.ts +38 -32
- package/src/training/MarketOutcomesTracker.ts +22 -12
- package/src/training/ModelDeployer.ts +15 -15
- package/src/training/ModelFetcher.ts +7 -7
- package/src/training/ModelSelectionService.ts +32 -32
- package/src/training/ModelUsageVerifier.ts +31 -24
- package/src/training/MultiModelOrchestrator.ts +44 -44
- package/src/training/RLModelConfig.ts +57 -57
- package/src/training/RewardBackpropagationService.ts +18 -17
- package/src/training/RulerScoringService.ts +73 -72
- package/src/training/TrainingMonitor.ts +29 -29
- package/src/training/TrajectoryRecorder.ts +25 -27
- package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
- package/src/training/index.ts +36 -36
- package/src/training/logRLConfig.ts +7 -7
- package/src/training/pipeline.ts +13 -16
- package/src/training/storage/ModelStorageService.ts +32 -32
- package/src/training/storage/TrainingDataArchiver.ts +21 -21
- package/src/training/storage/index.ts +2 -2
- package/src/training/types.ts +6 -6
- package/src/training/window-utils.ts +14 -14
- package/src/utils/index.ts +7 -7
- package/src/utils/logger.ts +5 -5
- package/src/utils/snowflake.ts +1 -1
- package/src/utils/synthetic-detector.ts +7 -7
|
@@ -5,13 +5,13 @@
|
|
|
5
5
|
* Main entry point for all HuggingFace operations.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import { getTrainingDataAdapter } from
|
|
9
|
-
import { ModelBenchmarkService } from
|
|
10
|
-
import { getExportToHuggingFace } from
|
|
11
|
-
import { logger } from
|
|
12
|
-
import { HuggingFaceDatasetUploader } from
|
|
13
|
-
import { HuggingFaceModelUploader } from
|
|
14
|
-
import { getHuggingFaceToken } from
|
|
8
|
+
import { getTrainingDataAdapter } from "../adapter";
|
|
9
|
+
import { ModelBenchmarkService } from "../benchmark/ModelBenchmarkService";
|
|
10
|
+
import { getExportToHuggingFace } from "../dependencies";
|
|
11
|
+
import { logger } from "../utils";
|
|
12
|
+
import { HuggingFaceDatasetUploader } from "./HuggingFaceDatasetUploader";
|
|
13
|
+
import { HuggingFaceModelUploader } from "./HuggingFaceModelUploader";
|
|
14
|
+
import { getHuggingFaceToken } from "./shared/HuggingFaceUploadUtil";
|
|
15
15
|
|
|
16
16
|
export interface WeeklyUploadResult {
|
|
17
17
|
success: boolean;
|
|
@@ -49,13 +49,13 @@ export class HuggingFaceIntegrationService {
|
|
|
49
49
|
* Execute complete weekly upload pipeline
|
|
50
50
|
*/
|
|
51
51
|
async executeWeeklyUpload(
|
|
52
|
-
options: DatasetUploadOptions = {}
|
|
52
|
+
options: DatasetUploadOptions = {},
|
|
53
53
|
): Promise<WeeklyUploadResult> {
|
|
54
54
|
const startTime = Date.now();
|
|
55
55
|
logger.info(
|
|
56
|
-
|
|
56
|
+
"Starting weekly upload pipeline",
|
|
57
57
|
options,
|
|
58
|
-
|
|
58
|
+
"HuggingFaceIntegration",
|
|
59
59
|
);
|
|
60
60
|
|
|
61
61
|
const result: WeeklyUploadResult = {
|
|
@@ -77,17 +77,16 @@ export class HuggingFaceIntegrationService {
|
|
|
77
77
|
// Step 1: Upload benchmark dataset
|
|
78
78
|
if (!options.dryRun) {
|
|
79
79
|
logger.info(
|
|
80
|
-
|
|
80
|
+
"Step 1: Uploading benchmark dataset",
|
|
81
81
|
undefined,
|
|
82
|
-
|
|
82
|
+
"HuggingFaceIntegration",
|
|
83
83
|
);
|
|
84
84
|
const benchmarkResult = await this.datasetUploader.uploadDataset({
|
|
85
85
|
datasetName:
|
|
86
86
|
options.datasetName ||
|
|
87
87
|
process.env.HF_DATASET_NAME ||
|
|
88
|
-
|
|
89
|
-
description:
|
|
90
|
-
'Weekly benchmark results for autonomous ElizaOS agents',
|
|
88
|
+
"elizaos/agent-benchmarks",
|
|
89
|
+
description: "Weekly benchmark results for autonomous ElizaOS agents",
|
|
91
90
|
});
|
|
92
91
|
|
|
93
92
|
result.datasets.benchmarks = {
|
|
@@ -98,14 +97,14 @@ export class HuggingFaceIntegrationService {
|
|
|
98
97
|
|
|
99
98
|
if (!benchmarkResult.success) {
|
|
100
99
|
result.errors.push(
|
|
101
|
-
`Benchmark dataset upload: ${benchmarkResult.error}
|
|
100
|
+
`Benchmark dataset upload: ${benchmarkResult.error}`,
|
|
102
101
|
);
|
|
103
102
|
}
|
|
104
103
|
} else {
|
|
105
104
|
logger.info(
|
|
106
|
-
|
|
105
|
+
"DRY RUN: Skipping benchmark dataset upload",
|
|
107
106
|
undefined,
|
|
108
|
-
|
|
107
|
+
"HuggingFaceIntegration",
|
|
109
108
|
);
|
|
110
109
|
result.datasets.benchmarks.success = true;
|
|
111
110
|
}
|
|
@@ -113,17 +112,17 @@ export class HuggingFaceIntegrationService {
|
|
|
113
112
|
// Step 2: Upload trajectory dataset
|
|
114
113
|
if (!options.dryRun) {
|
|
115
114
|
logger.info(
|
|
116
|
-
|
|
115
|
+
"Step 2: Uploading trajectory dataset",
|
|
117
116
|
undefined,
|
|
118
|
-
|
|
117
|
+
"HuggingFaceIntegration",
|
|
119
118
|
);
|
|
120
119
|
const exportToHuggingFace = getExportToHuggingFace();
|
|
121
120
|
const trajectoryResult = await exportToHuggingFace({
|
|
122
121
|
datasetName:
|
|
123
122
|
options.trajectoryDatasetName ||
|
|
124
123
|
process.env.HF_TRAJECTORY_DATASET_NAME ||
|
|
125
|
-
|
|
126
|
-
format:
|
|
124
|
+
"elizaos/agent-trajectories",
|
|
125
|
+
format: "jsonl",
|
|
127
126
|
});
|
|
128
127
|
|
|
129
128
|
result.datasets.trajectories = {
|
|
@@ -134,14 +133,14 @@ export class HuggingFaceIntegrationService {
|
|
|
134
133
|
|
|
135
134
|
if (!trajectoryResult.success) {
|
|
136
135
|
result.errors.push(
|
|
137
|
-
`Trajectory dataset upload: ${trajectoryResult.error}
|
|
136
|
+
`Trajectory dataset upload: ${trajectoryResult.error}`,
|
|
138
137
|
);
|
|
139
138
|
}
|
|
140
139
|
} else {
|
|
141
140
|
logger.info(
|
|
142
|
-
|
|
141
|
+
"DRY RUN: Skipping trajectory dataset upload",
|
|
143
142
|
undefined,
|
|
144
|
-
|
|
143
|
+
"HuggingFaceIntegration",
|
|
145
144
|
);
|
|
146
145
|
result.datasets.trajectories.success = true;
|
|
147
146
|
}
|
|
@@ -154,7 +153,7 @@ export class HuggingFaceIntegrationService {
|
|
|
154
153
|
logger.info(
|
|
155
154
|
`Step 3: Found ${unbenchmarkedModels.length} unbenchmarked models`,
|
|
156
155
|
undefined,
|
|
157
|
-
|
|
156
|
+
"HuggingFaceIntegration",
|
|
158
157
|
);
|
|
159
158
|
|
|
160
159
|
if (unbenchmarkedModels.length > 0) {
|
|
@@ -162,8 +161,8 @@ export class HuggingFaceIntegrationService {
|
|
|
162
161
|
await ModelBenchmarkService.getStandardBenchmarkPaths();
|
|
163
162
|
|
|
164
163
|
if (standardBenchmarks.length === 0) {
|
|
165
|
-
const error =
|
|
166
|
-
logger.error(error, undefined,
|
|
164
|
+
const error = "No standard benchmarks available for model evaluation";
|
|
165
|
+
logger.error(error, undefined, "HuggingFaceIntegration");
|
|
167
166
|
result.errors.push(error);
|
|
168
167
|
} else {
|
|
169
168
|
for (const modelId of unbenchmarkedModels) {
|
|
@@ -172,7 +171,7 @@ export class HuggingFaceIntegrationService {
|
|
|
172
171
|
logger.info(
|
|
173
172
|
`Benchmarking model: ${modelId}`,
|
|
174
173
|
undefined,
|
|
175
|
-
|
|
174
|
+
"HuggingFaceIntegration",
|
|
176
175
|
);
|
|
177
176
|
await ModelBenchmarkService.benchmarkModel({
|
|
178
177
|
modelId,
|
|
@@ -186,14 +185,15 @@ export class HuggingFaceIntegrationService {
|
|
|
186
185
|
await ModelBenchmarkService.compareToBaseline(modelId);
|
|
187
186
|
|
|
188
187
|
// Upload if improved
|
|
189
|
-
if (comparison.recommendation ===
|
|
188
|
+
if (comparison.recommendation === "deploy" && !options.dryRun) {
|
|
190
189
|
logger.info(
|
|
191
190
|
`Model ${modelId} improved, uploading`,
|
|
192
191
|
undefined,
|
|
193
|
-
|
|
192
|
+
"HuggingFaceIntegration",
|
|
194
193
|
);
|
|
195
194
|
|
|
196
|
-
const model =
|
|
195
|
+
const model =
|
|
196
|
+
await getTrainingDataAdapter().getModelById(modelId);
|
|
197
197
|
|
|
198
198
|
if (model) {
|
|
199
199
|
const modelName = options.modelNamePrefix
|
|
@@ -205,7 +205,7 @@ export class HuggingFaceIntegrationService {
|
|
|
205
205
|
const modelDescription =
|
|
206
206
|
options.modelDescriptionPrefix ||
|
|
207
207
|
process.env.HF_MODEL_DESCRIPTION_PREFIX ||
|
|
208
|
-
|
|
208
|
+
"Autonomous ElizaOS agent";
|
|
209
209
|
|
|
210
210
|
const uploadResult = await this.modelUploader.uploadModel({
|
|
211
211
|
modelId,
|
|
@@ -218,10 +218,13 @@ export class HuggingFaceIntegrationService {
|
|
|
218
218
|
result.models.uploaded++;
|
|
219
219
|
|
|
220
220
|
// Update model with HuggingFace repo
|
|
221
|
-
await getTrainingDataAdapter().updateModelHuggingFaceRepo(
|
|
221
|
+
await getTrainingDataAdapter().updateModelHuggingFaceRepo(
|
|
222
|
+
modelId,
|
|
223
|
+
modelName,
|
|
224
|
+
);
|
|
222
225
|
} else {
|
|
223
226
|
result.errors.push(
|
|
224
|
-
`Model upload ${modelId}: ${uploadResult.error}
|
|
227
|
+
`Model upload ${modelId}: ${uploadResult.error}`,
|
|
225
228
|
);
|
|
226
229
|
}
|
|
227
230
|
}
|
|
@@ -229,7 +232,7 @@ export class HuggingFaceIntegrationService {
|
|
|
229
232
|
logger.info(
|
|
230
233
|
`Model ${modelId} not ready for deployment: ${comparison.recommendation}`,
|
|
231
234
|
undefined,
|
|
232
|
-
|
|
235
|
+
"HuggingFaceIntegration",
|
|
233
236
|
);
|
|
234
237
|
}
|
|
235
238
|
} catch (error) {
|
|
@@ -238,7 +241,7 @@ export class HuggingFaceIntegrationService {
|
|
|
238
241
|
logger.error(
|
|
239
242
|
`Failed to process model ${modelId}`,
|
|
240
243
|
{ error },
|
|
241
|
-
|
|
244
|
+
"HuggingFaceIntegration",
|
|
242
245
|
);
|
|
243
246
|
result.errors.push(`Model ${modelId}: ${errorMsg}`);
|
|
244
247
|
}
|
|
@@ -250,7 +253,7 @@ export class HuggingFaceIntegrationService {
|
|
|
250
253
|
result.duration = Date.now() - startTime;
|
|
251
254
|
|
|
252
255
|
logger.info(
|
|
253
|
-
|
|
256
|
+
"Weekly upload pipeline complete",
|
|
254
257
|
{
|
|
255
258
|
success: result.success,
|
|
256
259
|
benchmarkDataset: result.datasets.benchmarks.success,
|
|
@@ -261,19 +264,19 @@ export class HuggingFaceIntegrationService {
|
|
|
261
264
|
errors: result.errors.length,
|
|
262
265
|
duration: result.duration,
|
|
263
266
|
},
|
|
264
|
-
|
|
267
|
+
"HuggingFaceIntegration",
|
|
265
268
|
);
|
|
266
269
|
|
|
267
270
|
return result;
|
|
268
271
|
} catch (error) {
|
|
269
272
|
result.duration = Date.now() - startTime;
|
|
270
273
|
result.errors.push(
|
|
271
|
-
error instanceof Error ? error.message : String(error)
|
|
274
|
+
error instanceof Error ? error.message : String(error),
|
|
272
275
|
);
|
|
273
276
|
logger.error(
|
|
274
|
-
|
|
277
|
+
"Weekly upload pipeline failed",
|
|
275
278
|
{ error },
|
|
276
|
-
|
|
279
|
+
"HuggingFaceIntegration",
|
|
277
280
|
);
|
|
278
281
|
return result;
|
|
279
282
|
}
|
|
@@ -295,13 +298,16 @@ export class HuggingFaceIntegrationService {
|
|
|
295
298
|
const adapter = getTrainingDataAdapter();
|
|
296
299
|
|
|
297
300
|
// Get last upload time from database
|
|
298
|
-
const lastUploadTime =
|
|
301
|
+
const lastUploadTime =
|
|
302
|
+
(await adapter.getLastDeployedModelDate()) || new Date(0);
|
|
299
303
|
|
|
300
304
|
// Check for new benchmarks since last upload
|
|
301
|
-
const newBenchmarksCount =
|
|
305
|
+
const newBenchmarksCount =
|
|
306
|
+
await adapter.countBenchmarksSince(lastUploadTime);
|
|
302
307
|
|
|
303
308
|
// Check for new trajectories since last upload
|
|
304
|
-
const newTrajectoriesCount =
|
|
309
|
+
const newTrajectoriesCount =
|
|
310
|
+
await adapter.countTrajectoriesSince(lastUploadTime);
|
|
305
311
|
|
|
306
312
|
// Check for unbenchmarked models
|
|
307
313
|
const unbenchmarkedModels =
|
|
@@ -333,7 +339,7 @@ export class HuggingFaceIntegrationService {
|
|
|
333
339
|
// Check HuggingFace token
|
|
334
340
|
if (!getHuggingFaceToken()) {
|
|
335
341
|
issues.push(
|
|
336
|
-
|
|
342
|
+
"HUGGING_FACE_TOKEN or HF_TOKEN environment variable not set",
|
|
337
343
|
);
|
|
338
344
|
}
|
|
339
345
|
|
|
@@ -343,10 +349,10 @@ export class HuggingFaceIntegrationService {
|
|
|
343
349
|
try {
|
|
344
350
|
const healthy = await adapter.healthCheck();
|
|
345
351
|
if (!healthy) {
|
|
346
|
-
issues.push(
|
|
352
|
+
issues.push("Cannot connect to database");
|
|
347
353
|
}
|
|
348
354
|
} catch {
|
|
349
|
-
issues.push(
|
|
355
|
+
issues.push("Cannot connect to database");
|
|
350
356
|
}
|
|
351
357
|
|
|
352
358
|
// Check for standard benchmarks
|
|
@@ -354,7 +360,7 @@ export class HuggingFaceIntegrationService {
|
|
|
354
360
|
await ModelBenchmarkService.getStandardBenchmarkPaths();
|
|
355
361
|
if (standardBenchmarks.length === 0) {
|
|
356
362
|
warnings.push(
|
|
357
|
-
|
|
363
|
+
"No standard benchmarks found. Generate benchmark fixtures before upload.",
|
|
358
364
|
);
|
|
359
365
|
}
|
|
360
366
|
|
|
@@ -364,21 +370,21 @@ export class HuggingFaceIntegrationService {
|
|
|
364
370
|
|
|
365
371
|
if (stats.benchmarkCount === 0) {
|
|
366
372
|
warnings.push(
|
|
367
|
-
|
|
373
|
+
"No benchmark results in database. Run some benchmarks first.",
|
|
368
374
|
);
|
|
369
375
|
}
|
|
370
376
|
|
|
371
377
|
if (stats.trajectoryTraining === 0) {
|
|
372
378
|
warnings.push(
|
|
373
|
-
|
|
379
|
+
"No training trajectories in database. Generate with agents or test data.",
|
|
374
380
|
);
|
|
375
381
|
}
|
|
376
382
|
|
|
377
383
|
if (stats.modelTotal === 0) {
|
|
378
|
-
warnings.push(
|
|
384
|
+
warnings.push("No trained models in database.");
|
|
379
385
|
}
|
|
380
386
|
} catch {
|
|
381
|
-
issues.push(
|
|
387
|
+
issues.push("Could not retrieve training statistics");
|
|
382
388
|
}
|
|
383
389
|
|
|
384
390
|
return {
|
|
@@ -4,20 +4,20 @@
|
|
|
4
4
|
* Uploads trained RL models to HuggingFace Hub with benchmark results and model cards.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import {
|
|
8
|
-
import
|
|
9
|
-
import
|
|
7
|
+
import { promises as fs } from "node:fs";
|
|
8
|
+
import * as path from "node:path";
|
|
9
|
+
import { getTrainingDataAdapter } from "../adapter";
|
|
10
10
|
import {
|
|
11
11
|
type JsonValue,
|
|
12
12
|
parseSimulationMetrics,
|
|
13
|
-
} from
|
|
14
|
-
import type { SimulationMetrics } from
|
|
15
|
-
import { logger } from
|
|
13
|
+
} from "../benchmark/parseSimulationMetrics";
|
|
14
|
+
import type { SimulationMetrics } from "../benchmark/SimulationEngine";
|
|
15
|
+
import { logger } from "../utils";
|
|
16
16
|
import {
|
|
17
17
|
getHuggingFaceToken,
|
|
18
18
|
HuggingFaceUploadUtil,
|
|
19
19
|
requireHuggingFaceToken,
|
|
20
|
-
} from
|
|
20
|
+
} from "./shared/HuggingFaceUploadUtil";
|
|
21
21
|
|
|
22
22
|
/**
|
|
23
23
|
* Simplified benchmark result for HuggingFace model cards
|
|
@@ -76,7 +76,7 @@ export class HuggingFaceModelUploader {
|
|
|
76
76
|
*/
|
|
77
77
|
async uploadModel(options: ModelUploadOptions): Promise<ModelUploadResult> {
|
|
78
78
|
try {
|
|
79
|
-
logger.info(
|
|
79
|
+
logger.info("Starting HuggingFace model upload", {
|
|
80
80
|
modelId: options.modelId,
|
|
81
81
|
});
|
|
82
82
|
|
|
@@ -93,11 +93,11 @@ export class HuggingFaceModelUploader {
|
|
|
93
93
|
}
|
|
94
94
|
|
|
95
95
|
// Step 2: Get benchmark results
|
|
96
|
-
logger.info(
|
|
96
|
+
logger.info("Loading benchmark results", { modelId: options.modelId });
|
|
97
97
|
const modelBenchmarks = await this.getBenchmarkResults(options.modelId);
|
|
98
98
|
|
|
99
99
|
if (modelBenchmarks.length === 0) {
|
|
100
|
-
logger.warn(
|
|
100
|
+
logger.warn("No benchmark results found for model", {
|
|
101
101
|
modelId: options.modelId,
|
|
102
102
|
});
|
|
103
103
|
}
|
|
@@ -117,15 +117,15 @@ export class HuggingFaceModelUploader {
|
|
|
117
117
|
// Step 4: Create output directory
|
|
118
118
|
const outputDir =
|
|
119
119
|
options.outputDir ||
|
|
120
|
-
path.join(process.cwd(),
|
|
120
|
+
path.join(process.cwd(), "exports", "models", model.version);
|
|
121
121
|
await fs.mkdir(outputDir, { recursive: true });
|
|
122
122
|
|
|
123
123
|
// Step 5: Generate model card
|
|
124
|
-
logger.info(
|
|
124
|
+
logger.info("Generating model card");
|
|
125
125
|
await this.generateModelCard(cardData, outputDir);
|
|
126
126
|
|
|
127
127
|
// Step 6: Save metadata
|
|
128
|
-
const metadataPath = path.join(outputDir,
|
|
128
|
+
const metadataPath = path.join(outputDir, "model_metadata.json");
|
|
129
129
|
await fs.writeFile(
|
|
130
130
|
metadataPath,
|
|
131
131
|
JSON.stringify(
|
|
@@ -141,42 +141,42 @@ export class HuggingFaceModelUploader {
|
|
|
141
141
|
accuracy: model.accuracy,
|
|
142
142
|
},
|
|
143
143
|
null,
|
|
144
|
-
2
|
|
145
|
-
)
|
|
144
|
+
2,
|
|
145
|
+
),
|
|
146
146
|
);
|
|
147
147
|
|
|
148
148
|
// Step 7: Save benchmark results
|
|
149
|
-
const benchmarksPath = path.join(outputDir,
|
|
149
|
+
const benchmarksPath = path.join(outputDir, "benchmark_results.json");
|
|
150
150
|
await fs.writeFile(
|
|
151
151
|
benchmarksPath,
|
|
152
|
-
JSON.stringify(modelBenchmarks, null, 2)
|
|
152
|
+
JSON.stringify(modelBenchmarks, null, 2),
|
|
153
153
|
);
|
|
154
154
|
|
|
155
155
|
// Step 8: Upload to HuggingFace (if weights available and requested)
|
|
156
156
|
let filesUploaded = 2; // README.md + metadata
|
|
157
157
|
|
|
158
158
|
if (options.includeWeights && model.storagePath) {
|
|
159
|
-
logger.info(
|
|
159
|
+
logger.info("Uploading model to HuggingFace", {
|
|
160
160
|
modelName: options.modelName,
|
|
161
161
|
});
|
|
162
162
|
const uploadCount = await this.uploadToHub(
|
|
163
163
|
options.modelName,
|
|
164
164
|
outputDir,
|
|
165
|
-
options.private ?? false
|
|
165
|
+
options.private ?? false,
|
|
166
166
|
);
|
|
167
167
|
filesUploaded = uploadCount;
|
|
168
168
|
} else {
|
|
169
169
|
logger.info(
|
|
170
|
-
|
|
170
|
+
"Skipping model weight upload (not requested or no weights available)",
|
|
171
171
|
);
|
|
172
172
|
}
|
|
173
173
|
|
|
174
174
|
const modelUrl = `https://huggingface.co/${options.modelName}`;
|
|
175
175
|
|
|
176
|
-
logger.info(
|
|
176
|
+
logger.info("Model uploaded successfully", { modelUrl, filesUploaded });
|
|
177
177
|
|
|
178
178
|
// Update model status in database
|
|
179
|
-
await adapter.updateModelStatus(options.modelId,
|
|
179
|
+
await adapter.updateModelStatus(options.modelId, "deployed", {
|
|
180
180
|
deployedAt: new Date(),
|
|
181
181
|
});
|
|
182
182
|
|
|
@@ -187,12 +187,12 @@ export class HuggingFaceModelUploader {
|
|
|
187
187
|
filesUploaded,
|
|
188
188
|
};
|
|
189
189
|
} catch (error) {
|
|
190
|
-
logger.error(
|
|
190
|
+
logger.error("Failed to upload model", { error });
|
|
191
191
|
return {
|
|
192
192
|
success: false,
|
|
193
193
|
modelId: options.modelId,
|
|
194
194
|
filesUploaded: 0,
|
|
195
|
-
error: error instanceof Error ? error.message :
|
|
195
|
+
error: error instanceof Error ? error.message : "Unknown error",
|
|
196
196
|
};
|
|
197
197
|
}
|
|
198
198
|
}
|
|
@@ -201,11 +201,12 @@ export class HuggingFaceModelUploader {
|
|
|
201
201
|
* Get benchmark results for a model
|
|
202
202
|
*/
|
|
203
203
|
private async getBenchmarkResults(
|
|
204
|
-
modelId: string
|
|
204
|
+
modelId: string,
|
|
205
205
|
): Promise<ModelCardBenchmarkResult[]> {
|
|
206
206
|
// Query benchmark results from database
|
|
207
207
|
try {
|
|
208
|
-
const results =
|
|
208
|
+
const results =
|
|
209
|
+
await getTrainingDataAdapter().getBenchmarkResultsByModel(modelId);
|
|
209
210
|
|
|
210
211
|
return results.map((r) => ({
|
|
211
212
|
benchmarkId: r.benchmarkId,
|
|
@@ -214,7 +215,7 @@ export class HuggingFaceModelUploader {
|
|
|
214
215
|
metrics: parseSimulationMetrics(r.detailedMetrics as JsonValue),
|
|
215
216
|
}));
|
|
216
217
|
} catch (error) {
|
|
217
|
-
logger.warn(
|
|
218
|
+
logger.warn("Could not load benchmark results from database", { error });
|
|
218
219
|
|
|
219
220
|
// Fallback to files if database fails
|
|
220
221
|
return await this.getBenchmarkResultsFromFiles(modelId);
|
|
@@ -225,18 +226,18 @@ export class HuggingFaceModelUploader {
|
|
|
225
226
|
* Fallback: Get benchmark results from files
|
|
226
227
|
*/
|
|
227
228
|
private async getBenchmarkResultsFromFiles(
|
|
228
|
-
modelId: string
|
|
229
|
+
modelId: string,
|
|
229
230
|
): Promise<ModelCardBenchmarkResult[]> {
|
|
230
231
|
const results: ModelCardBenchmarkResult[] = [];
|
|
231
232
|
|
|
232
233
|
try {
|
|
233
|
-
const benchmarksDir = path.join(process.cwd(),
|
|
234
|
+
const benchmarksDir = path.join(process.cwd(), "benchmarks");
|
|
234
235
|
const files = await fs.readdir(benchmarksDir);
|
|
235
236
|
|
|
236
237
|
for (const file of files) {
|
|
237
|
-
if (file.endsWith(
|
|
238
|
+
if (file.endsWith(".json") && file.includes(modelId)) {
|
|
238
239
|
const filePath = path.join(benchmarksDir, file);
|
|
239
|
-
const data = JSON.parse(await fs.readFile(filePath,
|
|
240
|
+
const data = JSON.parse(await fs.readFile(filePath, "utf-8"));
|
|
240
241
|
|
|
241
242
|
if (data.metrics) {
|
|
242
243
|
results.push({
|
|
@@ -248,7 +249,7 @@ export class HuggingFaceModelUploader {
|
|
|
248
249
|
}
|
|
249
250
|
}
|
|
250
251
|
} catch (error) {
|
|
251
|
-
logger.warn(
|
|
252
|
+
logger.warn("Could not load benchmark results from files either", {
|
|
252
253
|
error,
|
|
253
254
|
});
|
|
254
255
|
}
|
|
@@ -260,7 +261,7 @@ export class HuggingFaceModelUploader {
|
|
|
260
261
|
* Calculate average metrics across benchmarks
|
|
261
262
|
*/
|
|
262
263
|
private calculateAverageMetrics(
|
|
263
|
-
benchmarkResults: ModelCardBenchmarkResult[]
|
|
264
|
+
benchmarkResults: ModelCardBenchmarkResult[],
|
|
264
265
|
): {
|
|
265
266
|
avgPnl: number;
|
|
266
267
|
avgAccuracy: number;
|
|
@@ -278,15 +279,15 @@ export class HuggingFaceModelUploader {
|
|
|
278
279
|
|
|
279
280
|
const totalPnl = benchmarkResults.reduce(
|
|
280
281
|
(sum, r) => sum + r.metrics.totalPnl,
|
|
281
|
-
0
|
|
282
|
+
0,
|
|
282
283
|
);
|
|
283
284
|
const totalAccuracy = benchmarkResults.reduce(
|
|
284
285
|
(sum, r) => sum + r.metrics.predictionMetrics.accuracy,
|
|
285
|
-
0
|
|
286
|
+
0,
|
|
286
287
|
);
|
|
287
288
|
const totalOptimality = benchmarkResults.reduce(
|
|
288
289
|
(sum, r) => sum + r.metrics.optimalityScore,
|
|
289
|
-
0
|
|
290
|
+
0,
|
|
290
291
|
);
|
|
291
292
|
|
|
292
293
|
return {
|
|
@@ -302,14 +303,14 @@ export class HuggingFaceModelUploader {
|
|
|
302
303
|
*/
|
|
303
304
|
private async generateModelCard(
|
|
304
305
|
data: ModelCardData,
|
|
305
|
-
outputDir: string
|
|
306
|
+
outputDir: string,
|
|
306
307
|
): Promise<void> {
|
|
307
|
-
const brandName = process.env.TRAINING_BRAND_NAME ||
|
|
308
|
-
const brandOrg = process.env.TRAINING_BRAND_ORG ||
|
|
308
|
+
const brandName = process.env.TRAINING_BRAND_NAME || "ElizaOS";
|
|
309
|
+
const brandOrg = process.env.TRAINING_BRAND_ORG || "ElizaOS Contributors";
|
|
309
310
|
const platformName =
|
|
310
|
-
process.env.TRAINING_PLATFORM_NAME ||
|
|
311
|
-
const brandTag = brandName.toLowerCase().replace(/\s+/g,
|
|
312
|
-
const citationKey = `${brandTag}_agent_${data.version.replace(/\./g,
|
|
311
|
+
process.env.TRAINING_PLATFORM_NAME || "ElizaOS-compatible runtimes";
|
|
312
|
+
const brandTag = brandName.toLowerCase().replace(/\s+/g, "-");
|
|
313
|
+
const citationKey = `${brandTag}_agent_${data.version.replace(/\./g, "_")}`;
|
|
313
314
|
|
|
314
315
|
const card = `---
|
|
315
316
|
license: mit
|
|
@@ -330,9 +331,9 @@ Autonomous agent trained with reinforcement learning for market-style decision m
|
|
|
330
331
|
|
|
331
332
|
- **Version:** ${data.version}
|
|
332
333
|
- **Base Model:** ${data.baseModel}
|
|
333
|
-
- **Training Date:** ${data.trainedAt.toISOString().split(
|
|
334
|
+
- **Training Date:** ${data.trainedAt.toISOString().split("T")[0]}
|
|
334
335
|
- **Model ID:** ${data.modelId}
|
|
335
|
-
${data.trainingRunId ? `- **Training Run:** ${data.trainingRunId}` :
|
|
336
|
+
${data.trainingRunId ? `- **Training Run:** ${data.trainingRunId}` : ""}
|
|
336
337
|
|
|
337
338
|
## Performance Metrics
|
|
338
339
|
|
|
@@ -351,7 +352,7 @@ ${
|
|
|
351
352
|
|
|
352
353
|
${this.generateBenchmarkTable(data.benchmarkResults)}
|
|
353
354
|
`
|
|
354
|
-
:
|
|
355
|
+
: "No benchmark results available yet."
|
|
355
356
|
}
|
|
356
357
|
|
|
357
358
|
## Training Details
|
|
@@ -374,7 +375,7 @@ This model was trained using Group Relative Policy Optimization (GRPO) via the A
|
|
|
374
375
|
|
|
375
376
|
### Compute Infrastructure
|
|
376
377
|
|
|
377
|
-
- **Platform:** ${data.trainingRunId ?
|
|
378
|
+
- **Platform:** ${data.trainingRunId ? "Atropos GRPO Training" : "Local training"}
|
|
378
379
|
- **Training Time:** Continuous learning with hourly updates
|
|
379
380
|
|
|
380
381
|
## Intended Use
|
|
@@ -471,7 +472,7 @@ This model is part of a research project on autonomous agents in prediction mark
|
|
|
471
472
|
For questions or issues, please open an issue on the repository.
|
|
472
473
|
`;
|
|
473
474
|
|
|
474
|
-
const cardPath = path.join(outputDir,
|
|
475
|
+
const cardPath = path.join(outputDir, "README.md");
|
|
475
476
|
await fs.writeFile(cardPath, card);
|
|
476
477
|
}
|
|
477
478
|
|
|
@@ -479,14 +480,14 @@ For questions or issues, please open an issue on the repository.
|
|
|
479
480
|
* Generate benchmark results table
|
|
480
481
|
*/
|
|
481
482
|
private generateBenchmarkTable(results: ModelCardBenchmarkResult[]): string {
|
|
482
|
-
if (results.length === 0) return
|
|
483
|
+
if (results.length === 0) return "";
|
|
483
484
|
|
|
484
485
|
let table =
|
|
485
|
-
|
|
486
|
-
table +=
|
|
486
|
+
"| Benchmark | Date | P&L | Accuracy | Win Rate | Optimality |\n";
|
|
487
|
+
table += "|-----------|------|-----|----------|----------|------------|\n";
|
|
487
488
|
|
|
488
489
|
results.forEach((result) => {
|
|
489
|
-
const date = new Date(result.runAt).toISOString().split(
|
|
490
|
+
const date = new Date(result.runAt).toISOString().split("T")[0];
|
|
490
491
|
table += `| ${result.benchmarkId.substring(0, 20)}... | ${date} | ${result.metrics.totalPnl.toFixed(2)} | ${(result.metrics.predictionMetrics.accuracy * 100).toFixed(1)}% | ${(result.metrics.perpMetrics.winRate * 100).toFixed(1)}% | ${result.metrics.optimalityScore.toFixed(1)} |\n`;
|
|
491
492
|
});
|
|
492
493
|
|
|
@@ -500,31 +501,31 @@ For questions or issues, please open an issue on the repository.
|
|
|
500
501
|
private async uploadToHub(
|
|
501
502
|
modelName: string,
|
|
502
503
|
localDir: string,
|
|
503
|
-
_isPrivate: boolean
|
|
504
|
+
_isPrivate: boolean,
|
|
504
505
|
): Promise<number> {
|
|
505
506
|
if (!this.huggingFaceToken) {
|
|
506
|
-
throw new Error(
|
|
507
|
+
throw new Error("HuggingFace token not configured");
|
|
507
508
|
}
|
|
508
509
|
|
|
509
510
|
try {
|
|
510
511
|
// Use shared upload utility
|
|
511
512
|
return await HuggingFaceUploadUtil.uploadDirectory(
|
|
512
513
|
modelName,
|
|
513
|
-
|
|
514
|
+
"model",
|
|
514
515
|
localDir,
|
|
515
|
-
this.huggingFaceToken
|
|
516
|
+
this.huggingFaceToken,
|
|
516
517
|
);
|
|
517
518
|
} catch (error) {
|
|
518
|
-
logger.error(
|
|
519
|
+
logger.error("Failed to upload to HuggingFace Hub", { error });
|
|
519
520
|
|
|
520
521
|
// Provide helpful manual upload instructions
|
|
521
522
|
const instructions = HuggingFaceUploadUtil.getManualUploadInstructions(
|
|
522
523
|
modelName,
|
|
523
|
-
|
|
524
|
-
localDir
|
|
524
|
+
"model",
|
|
525
|
+
localDir,
|
|
525
526
|
);
|
|
526
527
|
|
|
527
|
-
logger.info(
|
|
528
|
+
logger.info("To upload manually:", { instructions });
|
|
528
529
|
|
|
529
530
|
throw error;
|
|
530
531
|
}
|