@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773726941205.json +38 -0
- package/scripts/rank_trajectories.ts +0 -1
- package/scripts/run_task_benchmark.ts +4 -11
- package/src/adapter.ts +96 -49
- package/src/archetypes/ArchetypeConfigService.ts +188 -185
- package/src/archetypes/derive-archetype.ts +47 -47
- package/src/archetypes/index.ts +2 -2
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
- package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
- package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
- package/src/benchmark/BenchmarkDataViewer.ts +32 -30
- package/src/benchmark/BenchmarkHistoryService.ts +13 -12
- package/src/benchmark/BenchmarkRunner.ts +87 -83
- package/src/benchmark/BenchmarkValidator.ts +48 -46
- package/src/benchmark/FastEvalRunner.ts +17 -16
- package/src/benchmark/MetricsValidator.ts +20 -21
- package/src/benchmark/MetricsVisualizer.ts +92 -85
- package/src/benchmark/ModelBenchmarkService.ts +90 -82
- package/src/benchmark/ModelRegistry.ts +44 -44
- package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
- package/src/benchmark/SimulationA2AInterface.ts +118 -118
- package/src/benchmark/SimulationEngine.ts +51 -51
- package/src/benchmark/TaskRunner.ts +87 -79
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
- package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
- package/src/benchmark/index.ts +27 -27
- package/src/benchmark/parseSimulationMetrics.ts +32 -32
- package/src/benchmark/simulation-types.ts +10 -10
- package/src/dependencies.ts +34 -34
- package/src/generation/TrajectoryGenerator.ts +39 -37
- package/src/generation/index.ts +1 -1
- package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
- package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
- package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
- package/src/huggingface/index.ts +6 -6
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
- package/src/index.ts +27 -27
- package/src/init-training.ts +6 -6
- package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
- package/src/metrics/index.ts +2 -2
- package/src/rubrics/__tests__/index.test.ts +73 -73
- package/src/rubrics/ass-kisser.ts +6 -6
- package/src/rubrics/degen.ts +6 -6
- package/src/rubrics/goody-twoshoes.ts +6 -6
- package/src/rubrics/index.ts +50 -50
- package/src/rubrics/information-trader.ts +6 -6
- package/src/rubrics/infosec.ts +6 -6
- package/src/rubrics/liar.ts +6 -6
- package/src/rubrics/perps-trader.ts +6 -6
- package/src/rubrics/researcher.ts +6 -6
- package/src/rubrics/scammer.ts +6 -6
- package/src/rubrics/social-butterfly.ts +7 -7
- package/src/rubrics/super-predictor.ts +6 -6
- package/src/rubrics/trader.ts +5 -5
- package/src/scoring/ArchetypeScoringService.ts +56 -54
- package/src/scoring/JudgePromptBuilder.ts +96 -96
- package/src/scoring/LLMJudgeCache.ts +26 -23
- package/src/scoring/index.ts +3 -3
- package/src/training/AutomationPipeline.ts +149 -140
- package/src/training/BenchmarkService.ts +49 -45
- package/src/training/ConfigValidator.ts +38 -32
- package/src/training/MarketOutcomesTracker.ts +22 -12
- package/src/training/ModelDeployer.ts +15 -15
- package/src/training/ModelFetcher.ts +7 -7
- package/src/training/ModelSelectionService.ts +32 -32
- package/src/training/ModelUsageVerifier.ts +31 -24
- package/src/training/MultiModelOrchestrator.ts +44 -44
- package/src/training/RLModelConfig.ts +57 -57
- package/src/training/RewardBackpropagationService.ts +18 -17
- package/src/training/RulerScoringService.ts +73 -72
- package/src/training/TrainingMonitor.ts +29 -29
- package/src/training/TrajectoryRecorder.ts +25 -27
- package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
- package/src/training/index.ts +36 -36
- package/src/training/logRLConfig.ts +7 -7
- package/src/training/pipeline.ts +13 -16
- package/src/training/storage/ModelStorageService.ts +32 -32
- package/src/training/storage/TrainingDataArchiver.ts +21 -21
- package/src/training/storage/index.ts +2 -2
- package/src/training/types.ts +6 -6
- package/src/training/window-utils.ts +14 -14
- package/src/utils/index.ts +7 -7
- package/src/utils/logger.ts +5 -5
- package/src/utils/snowflake.ts +1 -1
- package/src/utils/synthetic-detector.ts +7 -7
|
@@ -9,16 +9,16 @@
|
|
|
9
9
|
* @packageDocumentation
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import { getTrainingDataAdapter } from
|
|
13
|
-
import
|
|
14
|
-
import {
|
|
12
|
+
import { getTrainingDataAdapter } from "../adapter";
|
|
13
|
+
import { ArchetypeConfigService } from "../archetypes/ArchetypeConfigService";
|
|
14
|
+
import type { IAgentRuntimeLike, UserLike } from "../dependencies";
|
|
15
15
|
import {
|
|
16
16
|
areAgentDependenciesConfigured,
|
|
17
17
|
getAgentRuntimeManager,
|
|
18
18
|
getAgentService,
|
|
19
19
|
getAutonomousCoordinator,
|
|
20
|
-
} from
|
|
21
|
-
import { logger } from
|
|
20
|
+
} from "../dependencies";
|
|
21
|
+
import { logger } from "../utils/logger";
|
|
22
22
|
|
|
23
23
|
export interface ParallelGenerationConfig {
|
|
24
24
|
// Agent configuration
|
|
@@ -58,7 +58,7 @@ export interface ParallelGenerationResult {
|
|
|
58
58
|
function ensureDependencies(): void {
|
|
59
59
|
if (!areAgentDependenciesConfigured()) {
|
|
60
60
|
throw new Error(
|
|
61
|
-
|
|
61
|
+
"Training dependencies not configured. Call configureTrainingDependencies() with agentService, agentRuntimeManager, and autonomousCoordinator first.",
|
|
62
62
|
);
|
|
63
63
|
}
|
|
64
64
|
}
|
|
@@ -68,7 +68,8 @@ function ensureDependencies(): void {
|
|
|
68
68
|
*/
|
|
69
69
|
export class TrajectoryGenerator {
|
|
70
70
|
private config: ParallelGenerationConfig;
|
|
71
|
-
private agents: Map<string, { user: UserLike; archetype: string }> =
|
|
71
|
+
private agents: Map<string, { user: UserLike; archetype: string }> =
|
|
72
|
+
new Map();
|
|
72
73
|
|
|
73
74
|
constructor(config: ParallelGenerationConfig) {
|
|
74
75
|
this.config = {
|
|
@@ -86,12 +87,12 @@ export class TrajectoryGenerator {
|
|
|
86
87
|
const agentService = getAgentService();
|
|
87
88
|
|
|
88
89
|
logger.info(
|
|
89
|
-
|
|
90
|
+
"Creating archetype-based agents...",
|
|
90
91
|
{
|
|
91
92
|
archetypes: this.config.archetypes,
|
|
92
93
|
perArchetype: this.config.agentsPerArchetype,
|
|
93
94
|
},
|
|
94
|
-
|
|
95
|
+
"TrajectoryGenerator",
|
|
95
96
|
);
|
|
96
97
|
|
|
97
98
|
for (const archetype of this.config.archetypes) {
|
|
@@ -115,10 +116,10 @@ export class TrajectoryGenerator {
|
|
|
115
116
|
// Disable A2A to allow offline training without localhost server
|
|
116
117
|
await getTrainingDataAdapter().updateAgentConfig(agent.id, {
|
|
117
118
|
autonomousTrading: archetypeConfig.actionWeights.trade > 0.3,
|
|
118
|
-
autonomousPosting: archetypeConfig.postFrequency !==
|
|
119
|
+
autonomousPosting: archetypeConfig.postFrequency !== "low",
|
|
119
120
|
autonomousCommenting:
|
|
120
|
-
archetypeConfig.engagementStyle ===
|
|
121
|
-
archetypeConfig.engagementStyle ===
|
|
121
|
+
archetypeConfig.engagementStyle === "helpful" ||
|
|
122
|
+
archetypeConfig.engagementStyle === "analytical",
|
|
122
123
|
autonomousDMs: archetypeConfig.dmActivity,
|
|
123
124
|
autonomousGroupChats: archetypeConfig.groupChatActivity,
|
|
124
125
|
maxActionsPerTick: 5,
|
|
@@ -131,7 +132,7 @@ export class TrajectoryGenerator {
|
|
|
131
132
|
logger.info(
|
|
132
133
|
`Created ${archetype} agent: ${agent.username}`,
|
|
133
134
|
{},
|
|
134
|
-
|
|
135
|
+
"TrajectoryGenerator",
|
|
135
136
|
);
|
|
136
137
|
}
|
|
137
138
|
}
|
|
@@ -139,7 +140,7 @@ export class TrajectoryGenerator {
|
|
|
139
140
|
logger.info(
|
|
140
141
|
`Created ${this.agents.size} agents total`,
|
|
141
142
|
{},
|
|
142
|
-
|
|
143
|
+
"TrajectoryGenerator",
|
|
143
144
|
);
|
|
144
145
|
}
|
|
145
146
|
|
|
@@ -171,7 +172,7 @@ export class TrajectoryGenerator {
|
|
|
171
172
|
logger.warn(
|
|
172
173
|
`Runtime creation returned null for ${agentId}, skipping`,
|
|
173
174
|
{},
|
|
174
|
-
|
|
175
|
+
"TrajectoryGenerator",
|
|
175
176
|
);
|
|
176
177
|
return;
|
|
177
178
|
}
|
|
@@ -179,44 +180,44 @@ export class TrajectoryGenerator {
|
|
|
179
180
|
|
|
180
181
|
// Apply archetype configuration to runtime character if available
|
|
181
182
|
const archetypeConfig = ArchetypeConfigService.getConfig(
|
|
182
|
-
agentInfo.archetype
|
|
183
|
+
agentInfo.archetype,
|
|
183
184
|
);
|
|
184
185
|
const character = runtime.character as
|
|
185
186
|
| { name?: string; bio?: string | string[]; topics?: string[] }
|
|
186
187
|
| undefined;
|
|
187
188
|
if (character) {
|
|
188
189
|
character.name = archetypeConfig.name;
|
|
189
|
-
character.bio = archetypeConfig.bio.join(
|
|
190
|
+
character.bio = archetypeConfig.bio.join(" ");
|
|
190
191
|
if (!character.topics) {
|
|
191
192
|
character.topics = [];
|
|
192
193
|
}
|
|
193
194
|
|
|
194
195
|
// Add archetype-specific topics
|
|
195
|
-
if (archetypeConfig.preferredMarkets.includes(
|
|
196
|
-
character.topics.push(
|
|
196
|
+
if (archetypeConfig.preferredMarkets.includes("perpetual")) {
|
|
197
|
+
character.topics.push("perpetual_trading", "leverage");
|
|
197
198
|
}
|
|
198
|
-
if (archetypeConfig.preferredMarkets.includes(
|
|
199
|
-
character.topics.push(
|
|
199
|
+
if (archetypeConfig.preferredMarkets.includes("prediction")) {
|
|
200
|
+
character.topics.push("prediction_markets", "forecasting");
|
|
200
201
|
}
|
|
201
202
|
}
|
|
202
203
|
|
|
203
204
|
// Run ticks for this agent
|
|
204
205
|
for (let tick = 0; tick < this.config.ticksPerAgent; tick++) {
|
|
205
206
|
logger.debug(
|
|
206
|
-
`Agent ${agentInfo.user.username} - Tick ${tick + 1}/${this.config.ticksPerAgent}
|
|
207
|
+
`Agent ${agentInfo.user.username} - Tick ${tick + 1}/${this.config.ticksPerAgent}`,
|
|
207
208
|
);
|
|
208
209
|
|
|
209
210
|
// Execute autonomous tick with trajectory recording
|
|
210
211
|
const result = await autonomousCoordinator.executeAutonomousTick(
|
|
211
212
|
agentId,
|
|
212
213
|
runtime,
|
|
213
|
-
true // Enable trajectory recording
|
|
214
|
+
true, // Enable trajectory recording
|
|
214
215
|
);
|
|
215
216
|
|
|
216
217
|
if (result.trajectoryId) {
|
|
217
218
|
trajectoryIds.push(result.trajectoryId);
|
|
218
219
|
logger.debug(
|
|
219
|
-
`Recorded trajectory ${result.trajectoryId} for ${agentInfo.user.username}
|
|
220
|
+
`Recorded trajectory ${result.trajectoryId} for ${agentInfo.user.username}`,
|
|
220
221
|
);
|
|
221
222
|
}
|
|
222
223
|
|
|
@@ -230,19 +231,19 @@ export class TrajectoryGenerator {
|
|
|
230
231
|
trajectories: trajectoryIds.length,
|
|
231
232
|
archetype: agentInfo.archetype,
|
|
232
233
|
},
|
|
233
|
-
|
|
234
|
+
"TrajectoryGenerator",
|
|
234
235
|
);
|
|
235
236
|
});
|
|
236
237
|
|
|
237
238
|
// Wait for all agents in batch to complete
|
|
238
239
|
await Promise.allSettled(promises).then((results) => {
|
|
239
240
|
for (const result of results) {
|
|
240
|
-
if (result.status ===
|
|
241
|
+
if (result.status === "rejected") {
|
|
241
242
|
const errorMsg = `Agent batch error: ${result.reason instanceof Error ? result.reason.message : String(result.reason)}`;
|
|
242
243
|
logger.error(
|
|
243
244
|
errorMsg,
|
|
244
245
|
{ error: result.reason },
|
|
245
|
-
|
|
246
|
+
"TrajectoryGenerator",
|
|
246
247
|
);
|
|
247
248
|
errors.push(errorMsg);
|
|
248
249
|
}
|
|
@@ -288,15 +289,15 @@ export class TrajectoryGenerator {
|
|
|
288
289
|
}
|
|
289
290
|
|
|
290
291
|
logger.info(
|
|
291
|
-
|
|
292
|
+
"Starting parallel trajectory generation",
|
|
292
293
|
{
|
|
293
294
|
totalAgents: this.agents.size,
|
|
294
295
|
parallelBatches: Math.ceil(
|
|
295
|
-
this.agents.size / this.config.parallelAgents
|
|
296
|
+
this.agents.size / this.config.parallelAgents,
|
|
296
297
|
),
|
|
297
298
|
ticksPerAgent: this.config.ticksPerAgent,
|
|
298
299
|
},
|
|
299
|
-
|
|
300
|
+
"TrajectoryGenerator",
|
|
300
301
|
);
|
|
301
302
|
|
|
302
303
|
// Process agents in parallel batches
|
|
@@ -309,7 +310,7 @@ export class TrajectoryGenerator {
|
|
|
309
310
|
{
|
|
310
311
|
agents: batch.length,
|
|
311
312
|
},
|
|
312
|
-
|
|
313
|
+
"TrajectoryGenerator",
|
|
313
314
|
);
|
|
314
315
|
|
|
315
316
|
const batchResult = await this.runParallelBatch(batch);
|
|
@@ -321,7 +322,8 @@ export class TrajectoryGenerator {
|
|
|
321
322
|
// Calculate stats
|
|
322
323
|
for (const trajId of result.trajectoryIds) {
|
|
323
324
|
// Get trajectory to determine archetype
|
|
324
|
-
const trajectory =
|
|
325
|
+
const trajectory =
|
|
326
|
+
await getTrainingDataAdapter().getTrajectoryById(trajId);
|
|
325
327
|
|
|
326
328
|
if (trajectory) {
|
|
327
329
|
const agentInfo = this.agents.get(trajectory.agentId);
|
|
@@ -344,7 +346,7 @@ export class TrajectoryGenerator {
|
|
|
344
346
|
result.duration = Date.now() - startTime;
|
|
345
347
|
|
|
346
348
|
logger.info(
|
|
347
|
-
|
|
349
|
+
"Parallel generation complete",
|
|
348
350
|
{
|
|
349
351
|
agents: result.agentsCreated.length,
|
|
350
352
|
trajectories: result.trajectoryIds.length,
|
|
@@ -352,7 +354,7 @@ export class TrajectoryGenerator {
|
|
|
352
354
|
durationSeconds: result.duration / 1000,
|
|
353
355
|
errors: result.errors.length,
|
|
354
356
|
},
|
|
355
|
-
|
|
357
|
+
"TrajectoryGenerator",
|
|
356
358
|
);
|
|
357
359
|
|
|
358
360
|
return result;
|
|
@@ -365,7 +367,7 @@ export class TrajectoryGenerator {
|
|
|
365
367
|
logger.info(
|
|
366
368
|
`Cleaning up ${this.agents.size} agents...`,
|
|
367
369
|
{},
|
|
368
|
-
|
|
370
|
+
"TrajectoryGenerator",
|
|
369
371
|
);
|
|
370
372
|
|
|
371
373
|
const adapter = getTrainingDataAdapter();
|
|
@@ -373,7 +375,7 @@ export class TrajectoryGenerator {
|
|
|
373
375
|
await adapter.deleteUser(agentId);
|
|
374
376
|
}
|
|
375
377
|
|
|
376
|
-
logger.info(
|
|
378
|
+
logger.info("Cleanup complete", {}, "TrajectoryGenerator");
|
|
377
379
|
}
|
|
378
380
|
}
|
|
379
381
|
|
|
@@ -381,7 +383,7 @@ export class TrajectoryGenerator {
|
|
|
381
383
|
* Factory function for creating parallel generator
|
|
382
384
|
*/
|
|
383
385
|
export async function createParallelGenerator(
|
|
384
|
-
config: ParallelGenerationConfig
|
|
386
|
+
config: ParallelGenerationConfig,
|
|
385
387
|
): Promise<TrajectoryGenerator> {
|
|
386
388
|
return new TrajectoryGenerator(config);
|
|
387
389
|
}
|
package/src/generation/index.ts
CHANGED
|
@@ -5,15 +5,15 @@
|
|
|
5
5
|
* Creates dataset cards with visualizations, metrics, and usage examples.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import { promises as fs } from
|
|
9
|
-
import * as path from
|
|
10
|
-
import type { SimulationMetrics } from
|
|
11
|
-
import { calculateArrayStats, logger } from
|
|
8
|
+
import { promises as fs } from "node:fs";
|
|
9
|
+
import * as path from "node:path";
|
|
10
|
+
import type { SimulationMetrics } from "../benchmark/SimulationEngine";
|
|
11
|
+
import { calculateArrayStats, logger } from "../utils";
|
|
12
12
|
import {
|
|
13
13
|
getHuggingFaceToken,
|
|
14
14
|
HuggingFaceUploadUtil,
|
|
15
15
|
requireHuggingFaceToken,
|
|
16
|
-
} from
|
|
16
|
+
} from "./shared/HuggingFaceUploadUtil";
|
|
17
17
|
|
|
18
18
|
export interface BenchmarkRecord {
|
|
19
19
|
benchmarkId: string;
|
|
@@ -71,7 +71,7 @@ export class HuggingFaceDatasetUploader {
|
|
|
71
71
|
*/
|
|
72
72
|
async uploadDataset(options: UploadOptions): Promise<UploadResult> {
|
|
73
73
|
try {
|
|
74
|
-
logger.info(
|
|
74
|
+
logger.info("Starting HuggingFace dataset upload", {
|
|
75
75
|
datasetName: options.datasetName,
|
|
76
76
|
});
|
|
77
77
|
|
|
@@ -82,57 +82,57 @@ export class HuggingFaceDatasetUploader {
|
|
|
82
82
|
// Set defaults
|
|
83
83
|
const version = options.version || this.generateVersion();
|
|
84
84
|
const benchmarkDir =
|
|
85
|
-
options.benchmarkDir || path.join(process.cwd(),
|
|
85
|
+
options.benchmarkDir || path.join(process.cwd(), "benchmarks");
|
|
86
86
|
const outputDir =
|
|
87
87
|
options.outputDir ||
|
|
88
|
-
path.join(process.cwd(),
|
|
88
|
+
path.join(process.cwd(), "exports", "huggingface", version);
|
|
89
89
|
|
|
90
90
|
// Step 1: Collect benchmark data
|
|
91
|
-
logger.info(
|
|
91
|
+
logger.info("Collecting benchmark data", { benchmarkDir });
|
|
92
92
|
const benchmarks = await this.collectBenchmarkData(benchmarkDir);
|
|
93
93
|
logger.info(`Collected ${benchmarks.length} benchmark records`);
|
|
94
94
|
|
|
95
95
|
if (benchmarks.length === 0) {
|
|
96
|
-
throw new Error(
|
|
96
|
+
throw new Error("No benchmark data found to upload");
|
|
97
97
|
}
|
|
98
98
|
|
|
99
99
|
// Step 2: Prepare dataset files
|
|
100
|
-
logger.info(
|
|
100
|
+
logger.info("Preparing dataset files", { outputDir });
|
|
101
101
|
await fs.mkdir(outputDir, { recursive: true });
|
|
102
102
|
|
|
103
103
|
const metadata = await this.prepareDatasetFiles(benchmarks, outputDir, {
|
|
104
104
|
datasetName: options.datasetName,
|
|
105
105
|
version,
|
|
106
106
|
description:
|
|
107
|
-
options.description ||
|
|
107
|
+
options.description || "Autonomous agent benchmark results",
|
|
108
108
|
});
|
|
109
109
|
|
|
110
110
|
// Step 3: Generate dataset card
|
|
111
|
-
logger.info(
|
|
111
|
+
logger.info("Generating dataset card");
|
|
112
112
|
await this.generateDatasetCard(metadata, benchmarks, outputDir);
|
|
113
113
|
|
|
114
114
|
// Step 4: Create repository if it doesn't exist
|
|
115
|
-
logger.info(
|
|
115
|
+
logger.info("Ensuring repository exists", {
|
|
116
116
|
datasetName: options.datasetName,
|
|
117
117
|
});
|
|
118
118
|
await this.ensureRepository(
|
|
119
119
|
options.datasetName,
|
|
120
|
-
options.private ?? false
|
|
120
|
+
options.private ?? false,
|
|
121
121
|
);
|
|
122
122
|
|
|
123
123
|
// Step 5: Upload to HuggingFace
|
|
124
|
-
logger.info(
|
|
124
|
+
logger.info("Uploading to HuggingFace", {
|
|
125
125
|
datasetName: options.datasetName,
|
|
126
126
|
});
|
|
127
127
|
const filesUploaded = await this.uploadToHub(
|
|
128
128
|
options.datasetName,
|
|
129
129
|
outputDir,
|
|
130
|
-
options.private ?? false
|
|
130
|
+
options.private ?? false,
|
|
131
131
|
);
|
|
132
132
|
|
|
133
133
|
const datasetUrl = `https://huggingface.co/datasets/${options.datasetName}`;
|
|
134
134
|
|
|
135
|
-
logger.info(
|
|
135
|
+
logger.info("Dataset uploaded successfully", {
|
|
136
136
|
datasetUrl,
|
|
137
137
|
filesUploaded,
|
|
138
138
|
});
|
|
@@ -144,12 +144,12 @@ export class HuggingFaceDatasetUploader {
|
|
|
144
144
|
filesUploaded,
|
|
145
145
|
};
|
|
146
146
|
} catch (error) {
|
|
147
|
-
logger.error(
|
|
147
|
+
logger.error("Failed to upload dataset", { error });
|
|
148
148
|
return {
|
|
149
149
|
success: false,
|
|
150
|
-
version: options.version ||
|
|
150
|
+
version: options.version || "unknown",
|
|
151
151
|
filesUploaded: 0,
|
|
152
|
-
error: error instanceof Error ? error.message :
|
|
152
|
+
error: error instanceof Error ? error.message : "Unknown error",
|
|
153
153
|
};
|
|
154
154
|
}
|
|
155
155
|
}
|
|
@@ -158,22 +158,22 @@ export class HuggingFaceDatasetUploader {
|
|
|
158
158
|
* Collect benchmark data from files
|
|
159
159
|
*/
|
|
160
160
|
private async collectBenchmarkData(
|
|
161
|
-
benchmarkDir: string
|
|
161
|
+
benchmarkDir: string,
|
|
162
162
|
): Promise<BenchmarkRecord[]> {
|
|
163
163
|
const records: BenchmarkRecord[] = [];
|
|
164
164
|
|
|
165
165
|
// Collect from model-comparison directory
|
|
166
|
-
const comparisonDir = path.join(benchmarkDir,
|
|
166
|
+
const comparisonDir = path.join(benchmarkDir, "model-comparison");
|
|
167
167
|
if (await this.fileExists(comparisonDir)) {
|
|
168
|
-
const comparisonFile = path.join(comparisonDir,
|
|
168
|
+
const comparisonFile = path.join(comparisonDir, "comparison.json");
|
|
169
169
|
if (await this.fileExists(comparisonFile)) {
|
|
170
|
-
const data = JSON.parse(await fs.readFile(comparisonFile,
|
|
170
|
+
const data = JSON.parse(await fs.readFile(comparisonFile, "utf-8"));
|
|
171
171
|
for (const result of data.results || []) {
|
|
172
172
|
if (result.metrics) {
|
|
173
173
|
records.push({
|
|
174
|
-
benchmarkId: data.benchmark ||
|
|
174
|
+
benchmarkId: data.benchmark || "comparison",
|
|
175
175
|
modelId: result.model.modelId,
|
|
176
|
-
modelVersion:
|
|
176
|
+
modelVersion: "baseline",
|
|
177
177
|
modelName: result.model.displayName,
|
|
178
178
|
runAt: data.runAt,
|
|
179
179
|
metrics: result.metrics,
|
|
@@ -182,7 +182,7 @@ export class HuggingFaceDatasetUploader {
|
|
|
182
182
|
tickInterval: 60,
|
|
183
183
|
markets: 10,
|
|
184
184
|
ticks: Math.floor(
|
|
185
|
-
(result.metrics.timing?.totalDuration || 0) / 60
|
|
185
|
+
(result.metrics.timing?.totalDuration || 0) / 60,
|
|
186
186
|
),
|
|
187
187
|
},
|
|
188
188
|
});
|
|
@@ -192,13 +192,13 @@ export class HuggingFaceDatasetUploader {
|
|
|
192
192
|
}
|
|
193
193
|
|
|
194
194
|
// Collect from baselines directory
|
|
195
|
-
const baselinesDir = path.join(benchmarkDir,
|
|
195
|
+
const baselinesDir = path.join(benchmarkDir, "baselines");
|
|
196
196
|
if (await this.fileExists(baselinesDir)) {
|
|
197
197
|
const files = await fs.readdir(baselinesDir);
|
|
198
198
|
for (const file of files) {
|
|
199
|
-
if (file.endsWith(
|
|
199
|
+
if (file.endsWith(".json") && file.startsWith("baseline-")) {
|
|
200
200
|
const filePath = path.join(baselinesDir, file);
|
|
201
|
-
const data = JSON.parse(await fs.readFile(filePath,
|
|
201
|
+
const data = JSON.parse(await fs.readFile(filePath, "utf-8"));
|
|
202
202
|
|
|
203
203
|
// Skip if no metrics
|
|
204
204
|
if (!data.metrics) continue;
|
|
@@ -207,13 +207,13 @@ export class HuggingFaceDatasetUploader {
|
|
|
207
207
|
benchmarkId:
|
|
208
208
|
data.benchmark?.id ||
|
|
209
209
|
data.benchmark?.path ||
|
|
210
|
-
file.replace(
|
|
211
|
-
modelId: data.model?.modelId ||
|
|
212
|
-
modelVersion: data.model?.version ||
|
|
210
|
+
file.replace(".json", ""),
|
|
211
|
+
modelId: data.model?.modelId || "unknown",
|
|
212
|
+
modelVersion: data.model?.version || "baseline",
|
|
213
213
|
modelName:
|
|
214
214
|
data.model?.displayName ||
|
|
215
215
|
data.model?.name ||
|
|
216
|
-
file.replace(
|
|
216
|
+
file.replace(".json", ""),
|
|
217
217
|
runAt: data.runAt || new Date().toISOString(),
|
|
218
218
|
metrics: data.metrics,
|
|
219
219
|
benchmarkSnapshot: {
|
|
@@ -226,7 +226,7 @@ export class HuggingFaceDatasetUploader {
|
|
|
226
226
|
ticks: Math.floor(
|
|
227
227
|
(data.timing?.totalDuration ||
|
|
228
228
|
data.metrics.timing?.totalDuration ||
|
|
229
|
-
0) / 60
|
|
229
|
+
0) / 60,
|
|
230
230
|
),
|
|
231
231
|
},
|
|
232
232
|
});
|
|
@@ -235,21 +235,21 @@ export class HuggingFaceDatasetUploader {
|
|
|
235
235
|
}
|
|
236
236
|
|
|
237
237
|
// Collect from test-baselines directory
|
|
238
|
-
const testBaselinesDir = path.join(benchmarkDir,
|
|
238
|
+
const testBaselinesDir = path.join(benchmarkDir, "test-baselines");
|
|
239
239
|
if (await this.fileExists(testBaselinesDir)) {
|
|
240
240
|
const subdirs = await fs.readdir(testBaselinesDir);
|
|
241
241
|
for (const subdir of subdirs) {
|
|
242
|
-
const metricsFile = path.join(testBaselinesDir, subdir,
|
|
242
|
+
const metricsFile = path.join(testBaselinesDir, subdir, "metrics.json");
|
|
243
243
|
if (await this.fileExists(metricsFile)) {
|
|
244
|
-
const data = JSON.parse(await fs.readFile(metricsFile,
|
|
244
|
+
const data = JSON.parse(await fs.readFile(metricsFile, "utf-8"));
|
|
245
245
|
|
|
246
246
|
// Skip if no required fields
|
|
247
247
|
if (!data.totalPnl && !data.predictionMetrics) continue;
|
|
248
248
|
|
|
249
249
|
records.push({
|
|
250
|
-
benchmarkId: data.benchmarkId ||
|
|
250
|
+
benchmarkId: data.benchmarkId || "test-benchmark",
|
|
251
251
|
modelId: subdir,
|
|
252
|
-
modelVersion:
|
|
252
|
+
modelVersion: "test-baseline",
|
|
253
253
|
modelName: subdir,
|
|
254
254
|
runAt: data.runAt || new Date().toISOString(),
|
|
255
255
|
metrics: data,
|
|
@@ -273,11 +273,11 @@ export class HuggingFaceDatasetUploader {
|
|
|
273
273
|
private async prepareDatasetFiles(
|
|
274
274
|
benchmarks: BenchmarkRecord[],
|
|
275
275
|
outputDir: string,
|
|
276
|
-
options: { datasetName: string; version: string; description: string }
|
|
276
|
+
options: { datasetName: string; version: string; description: string },
|
|
277
277
|
): Promise<DatasetMetadata> {
|
|
278
278
|
// Create data.jsonl with all benchmark records
|
|
279
|
-
const jsonlPath = path.join(outputDir,
|
|
280
|
-
const jsonlLines = benchmarks.map((b) => JSON.stringify(b)).join(
|
|
279
|
+
const jsonlPath = path.join(outputDir, "data.jsonl");
|
|
280
|
+
const jsonlLines = benchmarks.map((b) => JSON.stringify(b)).join("\n");
|
|
281
281
|
await fs.writeFile(jsonlPath, jsonlLines);
|
|
282
282
|
|
|
283
283
|
// Create metadata.json
|
|
@@ -289,15 +289,15 @@ export class HuggingFaceDatasetUploader {
|
|
|
289
289
|
totalBenchmarks: benchmarks.length,
|
|
290
290
|
models: Array.from(new Set(benchmarks.map((b) => b.modelName))),
|
|
291
291
|
benchmarkTypes: Array.from(new Set(benchmarks.map((b) => b.benchmarkId))),
|
|
292
|
-
license:
|
|
292
|
+
license: "MIT",
|
|
293
293
|
};
|
|
294
294
|
|
|
295
|
-
const metadataPath = path.join(outputDir,
|
|
295
|
+
const metadataPath = path.join(outputDir, "metadata.json");
|
|
296
296
|
await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2));
|
|
297
297
|
|
|
298
298
|
// Create summary statistics
|
|
299
299
|
const summary = this.calculateSummaryStatistics(benchmarks);
|
|
300
|
-
const summaryPath = path.join(outputDir,
|
|
300
|
+
const summaryPath = path.join(outputDir, "summary.json");
|
|
301
301
|
await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2));
|
|
302
302
|
|
|
303
303
|
return metadata;
|
|
@@ -309,14 +309,14 @@ export class HuggingFaceDatasetUploader {
|
|
|
309
309
|
private async generateDatasetCard(
|
|
310
310
|
metadata: DatasetMetadata,
|
|
311
311
|
benchmarks: BenchmarkRecord[],
|
|
312
|
-
outputDir: string
|
|
312
|
+
outputDir: string,
|
|
313
313
|
): Promise<void> {
|
|
314
314
|
const summary = this.calculateSummaryStatistics(benchmarks);
|
|
315
|
-
const brandName = process.env.TRAINING_BRAND_NAME ||
|
|
316
|
-
const brandOrg = process.env.TRAINING_BRAND_ORG ||
|
|
315
|
+
const brandName = process.env.TRAINING_BRAND_NAME || "ElizaOS";
|
|
316
|
+
const brandOrg = process.env.TRAINING_BRAND_ORG || "ElizaOS Contributors";
|
|
317
317
|
const platformName =
|
|
318
|
-
process.env.TRAINING_PLATFORM_NAME ||
|
|
319
|
-
const brandTag = brandName.toLowerCase().replace(/\s+/g,
|
|
318
|
+
process.env.TRAINING_PLATFORM_NAME || "ElizaOS-compatible runtimes";
|
|
319
|
+
const brandTag = brandName.toLowerCase().replace(/\s+/g, "-");
|
|
320
320
|
|
|
321
321
|
const card = `---
|
|
322
322
|
license: ${metadata.license}
|
|
@@ -435,7 +435,7 @@ print(model_performance.sort_values('metrics.totalPnl', ascending=False))
|
|
|
435
435
|
If you use this dataset in your research, please cite:
|
|
436
436
|
|
|
437
437
|
\`\`\`bibtex
|
|
438
|
-
@dataset{${brandTag}_benchmarks_${metadata.version.replace(/\./g,
|
|
438
|
+
@dataset{${brandTag}_benchmarks_${metadata.version.replace(/\./g, "_")},
|
|
439
439
|
title = {${brandName} Agent Benchmarks},
|
|
440
440
|
author = {${brandOrg}},
|
|
441
441
|
year = {${new Date().getFullYear()}},
|
|
@@ -453,7 +453,7 @@ ${metadata.license}
|
|
|
453
453
|
For questions or issues, please open an issue on the repository.
|
|
454
454
|
`;
|
|
455
455
|
|
|
456
|
-
const cardPath = path.join(outputDir,
|
|
456
|
+
const cardPath = path.join(outputDir, "README.md");
|
|
457
457
|
await fs.writeFile(cardPath, card);
|
|
458
458
|
}
|
|
459
459
|
|
|
@@ -494,8 +494,8 @@ For questions or issues, please open an issue on the repository.
|
|
|
494
494
|
}))
|
|
495
495
|
.sort((a, b) => b.avgPnl - a.avgPnl);
|
|
496
496
|
|
|
497
|
-
let table =
|
|
498
|
-
table +=
|
|
497
|
+
let table = "| Rank | Model | Avg P&L | Accuracy | Optimality | Runs |\n";
|
|
498
|
+
table += "|------|-------|---------|----------|------------|------|\n";
|
|
499
499
|
|
|
500
500
|
leaderboard.forEach((entry, index) => {
|
|
501
501
|
table += `| ${index + 1} | ${entry.model} | ${entry.avgPnl.toFixed(2)} | ${(entry.avgAccuracy * 100).toFixed(1)}% | ${entry.avgOptimality.toFixed(1)} | ${entry.runs} |\n`;
|
|
@@ -553,17 +553,17 @@ For questions or issues, please open an issue on the repository.
|
|
|
553
553
|
*/
|
|
554
554
|
private async ensureRepository(
|
|
555
555
|
datasetName: string,
|
|
556
|
-
isPrivate: boolean
|
|
556
|
+
isPrivate: boolean,
|
|
557
557
|
): Promise<void> {
|
|
558
558
|
if (!this.huggingFaceToken) {
|
|
559
|
-
throw new Error(
|
|
559
|
+
throw new Error("HuggingFace token not configured");
|
|
560
560
|
}
|
|
561
561
|
|
|
562
562
|
await HuggingFaceUploadUtil.ensureRepository(
|
|
563
563
|
datasetName,
|
|
564
|
-
|
|
564
|
+
"dataset",
|
|
565
565
|
this.huggingFaceToken,
|
|
566
|
-
isPrivate
|
|
566
|
+
isPrivate,
|
|
567
567
|
);
|
|
568
568
|
}
|
|
569
569
|
|
|
@@ -574,38 +574,38 @@ For questions or issues, please open an issue on the repository.
|
|
|
574
574
|
private async uploadToHub(
|
|
575
575
|
datasetName: string,
|
|
576
576
|
localDir: string,
|
|
577
|
-
_isPrivate: boolean
|
|
577
|
+
_isPrivate: boolean,
|
|
578
578
|
): Promise<number> {
|
|
579
579
|
if (!this.huggingFaceToken) {
|
|
580
|
-
throw new Error(
|
|
580
|
+
throw new Error("HuggingFace token not configured");
|
|
581
581
|
}
|
|
582
582
|
|
|
583
583
|
try {
|
|
584
584
|
// Use shared upload utility
|
|
585
585
|
const { HuggingFaceUploadUtil } = await import(
|
|
586
|
-
|
|
586
|
+
"./shared/HuggingFaceUploadUtil"
|
|
587
587
|
);
|
|
588
588
|
|
|
589
589
|
return await HuggingFaceUploadUtil.uploadDirectory(
|
|
590
590
|
datasetName,
|
|
591
|
-
|
|
591
|
+
"dataset",
|
|
592
592
|
localDir,
|
|
593
|
-
this.huggingFaceToken
|
|
593
|
+
this.huggingFaceToken,
|
|
594
594
|
);
|
|
595
595
|
} catch (error) {
|
|
596
|
-
logger.error(
|
|
596
|
+
logger.error("Failed to upload to HuggingFace Hub", { error });
|
|
597
597
|
|
|
598
598
|
// Provide helpful manual upload instructions
|
|
599
599
|
const { HuggingFaceUploadUtil } = await import(
|
|
600
|
-
|
|
600
|
+
"./shared/HuggingFaceUploadUtil"
|
|
601
601
|
);
|
|
602
602
|
const instructions = HuggingFaceUploadUtil.getManualUploadInstructions(
|
|
603
603
|
datasetName,
|
|
604
|
-
|
|
605
|
-
localDir
|
|
604
|
+
"dataset",
|
|
605
|
+
localDir,
|
|
606
606
|
);
|
|
607
607
|
|
|
608
|
-
logger.info(
|
|
608
|
+
logger.info("To upload manually:", { instructions });
|
|
609
609
|
|
|
610
610
|
throw error;
|
|
611
611
|
}
|
|
@@ -617,8 +617,8 @@ For questions or issues, please open an issue on the repository.
|
|
|
617
617
|
private generateVersion(): string {
|
|
618
618
|
const now = new Date();
|
|
619
619
|
const year = now.getFullYear();
|
|
620
|
-
const month = String(now.getMonth() + 1).padStart(2,
|
|
621
|
-
const day = String(now.getDate()).padStart(2,
|
|
620
|
+
const month = String(now.getMonth() + 1).padStart(2, "0");
|
|
621
|
+
const day = String(now.getDate()).padStart(2, "0");
|
|
622
622
|
return `${year}.${month}.${day}`;
|
|
623
623
|
}
|
|
624
624
|
|