@elizaos/training 2.0.0-alpha.21 → 2.0.0-alpha.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.turbo/turbo-lint.log +2 -0
  2. package/.turbo/turbo-typecheck.log +1 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/adapter.js +59 -0
  5. package/dist/archetypes/ArchetypeConfigService.js +510 -0
  6. package/dist/archetypes/derive-archetype.js +196 -0
  7. package/dist/archetypes/index.js +7 -0
  8. package/dist/benchmark/ArchetypeMatchupBenchmark.js +547 -0
  9. package/dist/benchmark/BenchmarkChartGenerator.js +632 -0
  10. package/dist/benchmark/BenchmarkDataGenerator.js +825 -0
  11. package/dist/benchmark/BenchmarkDataViewer.js +197 -0
  12. package/dist/benchmark/BenchmarkHistoryService.js +135 -0
  13. package/dist/benchmark/BenchmarkRunner.js +483 -0
  14. package/dist/benchmark/BenchmarkValidator.js +158 -0
  15. package/dist/benchmark/FastEvalRunner.js +133 -0
  16. package/dist/benchmark/MetricsValidator.js +104 -0
  17. package/dist/benchmark/MetricsVisualizer.js +775 -0
  18. package/dist/benchmark/ModelBenchmarkService.js +433 -0
  19. package/dist/benchmark/ModelRegistry.js +122 -0
  20. package/dist/benchmark/RulerBenchmarkIntegration.js +168 -0
  21. package/dist/benchmark/SimulationA2AInterface.js +683 -0
  22. package/dist/benchmark/SimulationEngine.js +522 -0
  23. package/dist/benchmark/TaskRunner.js +60 -0
  24. package/dist/benchmark/__tests__/BenchmarkRunner.test.js +409 -0
  25. package/dist/benchmark/__tests__/HeadToHead.test.js +105 -0
  26. package/dist/benchmark/index.js +23 -0
  27. package/dist/benchmark/parseSimulationMetrics.js +86 -0
  28. package/dist/benchmark/simulation-types.js +1 -0
  29. package/dist/dependencies.js +197 -0
  30. package/dist/generation/TrajectoryGenerator.js +244 -0
  31. package/dist/generation/index.js +6 -0
  32. package/dist/huggingface/HuggingFaceDatasetUploader.js +463 -0
  33. package/dist/huggingface/HuggingFaceIntegrationService.js +272 -0
  34. package/dist/huggingface/HuggingFaceModelUploader.js +385 -0
  35. package/dist/huggingface/index.js +9 -0
  36. package/dist/huggingface/shared/HuggingFaceUploadUtil.js +144 -0
  37. package/dist/index.js +41 -0
  38. package/dist/init-training.js +43 -0
  39. package/dist/metrics/TrajectoryMetricsExtractor.js +523 -0
  40. package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +628 -0
  41. package/dist/metrics/index.js +7 -0
  42. package/dist/metrics/types.js +21 -0
  43. package/dist/rubrics/__tests__/index.test.js +150 -0
  44. package/dist/rubrics/ass-kisser.js +83 -0
  45. package/dist/rubrics/degen.js +78 -0
  46. package/dist/rubrics/goody-twoshoes.js +82 -0
  47. package/dist/rubrics/index.js +184 -0
  48. package/dist/rubrics/information-trader.js +82 -0
  49. package/dist/rubrics/infosec.js +99 -0
  50. package/dist/rubrics/liar.js +102 -0
  51. package/dist/rubrics/perps-trader.js +85 -0
  52. package/dist/rubrics/researcher.js +79 -0
  53. package/dist/rubrics/scammer.js +80 -0
  54. package/dist/rubrics/social-butterfly.js +71 -0
  55. package/dist/rubrics/super-predictor.js +95 -0
  56. package/dist/rubrics/trader.js +65 -0
  57. package/dist/scoring/ArchetypeScoringService.js +301 -0
  58. package/dist/scoring/JudgePromptBuilder.js +401 -0
  59. package/dist/scoring/LLMJudgeCache.js +263 -0
  60. package/dist/scoring/index.js +8 -0
  61. package/dist/training/AutomationPipeline.js +714 -0
  62. package/dist/training/BenchmarkService.js +370 -0
  63. package/dist/training/ConfigValidator.js +153 -0
  64. package/dist/training/MarketOutcomesTracker.js +142 -0
  65. package/dist/training/ModelDeployer.js +128 -0
  66. package/dist/training/ModelFetcher.js +48 -0
  67. package/dist/training/ModelSelectionService.js +248 -0
  68. package/dist/training/ModelUsageVerifier.js +106 -0
  69. package/dist/training/MultiModelOrchestrator.js +349 -0
  70. package/dist/training/RLModelConfig.js +295 -0
  71. package/dist/training/RewardBackpropagationService.js +117 -0
  72. package/dist/training/RulerScoringService.js +450 -0
  73. package/dist/training/TrainingMonitor.js +108 -0
  74. package/dist/training/TrajectoryRecorder.js +281 -0
  75. package/dist/training/__tests__/TrajectoryRecorder.test.js +363 -0
  76. package/dist/training/index.js +30 -0
  77. package/dist/training/logRLConfig.js +29 -0
  78. package/dist/training/pipeline.js +80 -0
  79. package/dist/training/storage/ModelStorageService.js +190 -0
  80. package/dist/training/storage/TrainingDataArchiver.js +136 -0
  81. package/dist/training/storage/index.js +7 -0
  82. package/dist/training/types.js +6 -0
  83. package/dist/training/window-utils.js +100 -0
  84. package/dist/utils/index.js +73 -0
  85. package/dist/utils/logger.js +55 -0
  86. package/dist/utils/snowflake.js +15 -0
  87. package/dist/utils/synthetic-detector.js +67 -0
  88. package/package.json +2 -2
  89. package/research-output/training-runs/training-run-1773742857616.json +38 -0
  90. package/research-output/training-runs/training-run-1773742946977.json +38 -0
  91. package/research-output/training-runs/training-run-1773743278891.json +38 -0
  92. package/research-output/training-runs/training-run-1773743409754.json +38 -0
  93. package/research-output/training-runs/training-run-1773743651086.json +38 -0
  94. package/research-output/training-runs/training-run-1773743782883.json +38 -0
@@ -0,0 +1,463 @@
1
+ /**
2
+ * HuggingFace Dataset Uploader
3
+ *
4
+ * Prepares and uploads benchmark datasets to HuggingFace Hub for public access.
5
+ * Creates dataset cards with visualizations, metrics, and usage examples.
6
+ */
7
+ import { promises as fs } from "node:fs";
8
+ import * as path from "node:path";
9
+ import { calculateArrayStats, logger } from "../utils";
10
+ import { getHuggingFaceToken, HuggingFaceUploadUtil, requireHuggingFaceToken, } from "./shared/HuggingFaceUploadUtil";
11
+ export class HuggingFaceDatasetUploader {
12
+ huggingFaceToken;
13
+ constructor(huggingFaceToken) {
14
+ this.huggingFaceToken = huggingFaceToken || getHuggingFaceToken();
15
+ }
16
+ /**
17
+ * Prepare and upload benchmark dataset to HuggingFace
18
+ */
19
+ async uploadDataset(options) {
20
+ try {
21
+ logger.info("Starting HuggingFace dataset upload", {
22
+ datasetName: options.datasetName,
23
+ });
24
+ // Validate token (throws if not set)
25
+ const token = this.huggingFaceToken || requireHuggingFaceToken();
26
+ this.huggingFaceToken = token;
27
+ // Set defaults
28
+ const version = options.version || this.generateVersion();
29
+ const benchmarkDir = options.benchmarkDir || path.join(process.cwd(), "benchmarks");
30
+ const outputDir = options.outputDir ||
31
+ path.join(process.cwd(), "exports", "huggingface", version);
32
+ // Step 1: Collect benchmark data
33
+ logger.info("Collecting benchmark data", { benchmarkDir });
34
+ const benchmarks = await this.collectBenchmarkData(benchmarkDir);
35
+ logger.info(`Collected ${benchmarks.length} benchmark records`);
36
+ if (benchmarks.length === 0) {
37
+ throw new Error("No benchmark data found to upload");
38
+ }
39
+ // Step 2: Prepare dataset files
40
+ logger.info("Preparing dataset files", { outputDir });
41
+ await fs.mkdir(outputDir, { recursive: true });
42
+ const metadata = await this.prepareDatasetFiles(benchmarks, outputDir, {
43
+ datasetName: options.datasetName,
44
+ version,
45
+ description: options.description || "Autonomous agent benchmark results",
46
+ });
47
+ // Step 3: Generate dataset card
48
+ logger.info("Generating dataset card");
49
+ await this.generateDatasetCard(metadata, benchmarks, outputDir);
50
+ // Step 4: Create repository if it doesn't exist
51
+ logger.info("Ensuring repository exists", {
52
+ datasetName: options.datasetName,
53
+ });
54
+ await this.ensureRepository(options.datasetName, options.private ?? false);
55
+ // Step 5: Upload to HuggingFace
56
+ logger.info("Uploading to HuggingFace", {
57
+ datasetName: options.datasetName,
58
+ });
59
+ const filesUploaded = await this.uploadToHub(options.datasetName, outputDir, options.private ?? false);
60
+ const datasetUrl = `https://huggingface.co/datasets/${options.datasetName}`;
61
+ logger.info("Dataset uploaded successfully", {
62
+ datasetUrl,
63
+ filesUploaded,
64
+ });
65
+ return {
66
+ success: true,
67
+ datasetUrl,
68
+ version,
69
+ filesUploaded,
70
+ };
71
+ }
72
+ catch (error) {
73
+ logger.error("Failed to upload dataset", { error });
74
+ return {
75
+ success: false,
76
+ version: options.version || "unknown",
77
+ filesUploaded: 0,
78
+ error: error instanceof Error ? error.message : "Unknown error",
79
+ };
80
+ }
81
+ }
82
+ /**
83
+ * Collect benchmark data from files
84
+ */
85
+ async collectBenchmarkData(benchmarkDir) {
86
+ const records = [];
87
+ // Collect from model-comparison directory
88
+ const comparisonDir = path.join(benchmarkDir, "model-comparison");
89
+ if (await this.fileExists(comparisonDir)) {
90
+ const comparisonFile = path.join(comparisonDir, "comparison.json");
91
+ if (await this.fileExists(comparisonFile)) {
92
+ const data = JSON.parse(await fs.readFile(comparisonFile, "utf-8"));
93
+ for (const result of data.results || []) {
94
+ if (result.metrics) {
95
+ records.push({
96
+ benchmarkId: data.benchmark || "comparison",
97
+ modelId: result.model.modelId,
98
+ modelVersion: "baseline",
99
+ modelName: result.model.displayName,
100
+ runAt: data.runAt,
101
+ metrics: result.metrics,
102
+ benchmarkSnapshot: {
103
+ duration: result.metrics.timing?.totalDuration || 0,
104
+ tickInterval: 60,
105
+ markets: 10,
106
+ ticks: Math.floor((result.metrics.timing?.totalDuration || 0) / 60),
107
+ },
108
+ });
109
+ }
110
+ }
111
+ }
112
+ }
113
+ // Collect from baselines directory
114
+ const baselinesDir = path.join(benchmarkDir, "baselines");
115
+ if (await this.fileExists(baselinesDir)) {
116
+ const files = await fs.readdir(baselinesDir);
117
+ for (const file of files) {
118
+ if (file.endsWith(".json") && file.startsWith("baseline-")) {
119
+ const filePath = path.join(baselinesDir, file);
120
+ const data = JSON.parse(await fs.readFile(filePath, "utf-8"));
121
+ // Skip if no metrics
122
+ if (!data.metrics)
123
+ continue;
124
+ records.push({
125
+ benchmarkId: data.benchmark?.id ||
126
+ data.benchmark?.path ||
127
+ file.replace(".json", ""),
128
+ modelId: data.model?.modelId || "unknown",
129
+ modelVersion: data.model?.version || "baseline",
130
+ modelName: data.model?.displayName ||
131
+ data.model?.name ||
132
+ file.replace(".json", ""),
133
+ runAt: data.runAt || new Date().toISOString(),
134
+ metrics: data.metrics,
135
+ benchmarkSnapshot: {
136
+ duration: data.timing?.totalDuration ||
137
+ data.metrics.timing?.totalDuration ||
138
+ 0,
139
+ tickInterval: 60,
140
+ markets: 10,
141
+ ticks: Math.floor((data.timing?.totalDuration ||
142
+ data.metrics.timing?.totalDuration ||
143
+ 0) / 60),
144
+ },
145
+ });
146
+ }
147
+ }
148
+ }
149
+ // Collect from test-baselines directory
150
+ const testBaselinesDir = path.join(benchmarkDir, "test-baselines");
151
+ if (await this.fileExists(testBaselinesDir)) {
152
+ const subdirs = await fs.readdir(testBaselinesDir);
153
+ for (const subdir of subdirs) {
154
+ const metricsFile = path.join(testBaselinesDir, subdir, "metrics.json");
155
+ if (await this.fileExists(metricsFile)) {
156
+ const data = JSON.parse(await fs.readFile(metricsFile, "utf-8"));
157
+ // Skip if no required fields
158
+ if (!data.totalPnl && !data.predictionMetrics)
159
+ continue;
160
+ records.push({
161
+ benchmarkId: data.benchmarkId || "test-benchmark",
162
+ modelId: subdir,
163
+ modelVersion: "test-baseline",
164
+ modelName: subdir,
165
+ runAt: data.runAt || new Date().toISOString(),
166
+ metrics: data,
167
+ benchmarkSnapshot: {
168
+ duration: data.timing?.totalDuration || 0,
169
+ tickInterval: 60,
170
+ markets: 10,
171
+ ticks: Math.floor((data.timing?.totalDuration || 0) / 60),
172
+ },
173
+ });
174
+ }
175
+ }
176
+ }
177
+ return records;
178
+ }
179
+ /**
180
+ * Prepare dataset files in HuggingFace format
181
+ */
182
+ async prepareDatasetFiles(benchmarks, outputDir, options) {
183
+ // Create data.jsonl with all benchmark records
184
+ const jsonlPath = path.join(outputDir, "data.jsonl");
185
+ const jsonlLines = benchmarks.map((b) => JSON.stringify(b)).join("\n");
186
+ await fs.writeFile(jsonlPath, jsonlLines);
187
+ // Create metadata.json
188
+ const metadata = {
189
+ datasetName: options.datasetName,
190
+ version: options.version,
191
+ description: options.description,
192
+ createdAt: new Date().toISOString(),
193
+ totalBenchmarks: benchmarks.length,
194
+ models: Array.from(new Set(benchmarks.map((b) => b.modelName))),
195
+ benchmarkTypes: Array.from(new Set(benchmarks.map((b) => b.benchmarkId))),
196
+ license: "MIT",
197
+ };
198
+ const metadataPath = path.join(outputDir, "metadata.json");
199
+ await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2));
200
+ // Create summary statistics
201
+ const summary = this.calculateSummaryStatistics(benchmarks);
202
+ const summaryPath = path.join(outputDir, "summary.json");
203
+ await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2));
204
+ return metadata;
205
+ }
206
+ /**
207
+ * Generate README.md dataset card for HuggingFace
208
+ */
209
+ async generateDatasetCard(metadata, benchmarks, outputDir) {
210
+ const summary = this.calculateSummaryStatistics(benchmarks);
211
+ const brandName = process.env.TRAINING_BRAND_NAME || "ElizaOS";
212
+ const brandOrg = process.env.TRAINING_BRAND_ORG || "ElizaOS Contributors";
213
+ const platformName = process.env.TRAINING_PLATFORM_NAME || "ElizaOS-compatible runtimes";
214
+ const brandTag = brandName.toLowerCase().replace(/\s+/g, "-");
215
+ const card = `---
216
+ license: ${metadata.license}
217
+ task_categories:
218
+ - reinforcement-learning
219
+ - agent-evaluation
220
+ tags:
221
+ - ${brandTag}
222
+ - prediction-markets
223
+ - trading-agents
224
+ - benchmarks
225
+ size_categories:
226
+ - n<1K
227
+ ---
228
+
229
+ # ${metadata.datasetName}
230
+
231
+ ## Dataset Description
232
+
233
+ ${metadata.description}
234
+
235
+ This dataset contains benchmark results for autonomous trading agents on prediction-market style environments. Each record includes comprehensive performance metrics, market conditions, and agent behavior data.
236
+
237
+ **Version:** ${metadata.version}
238
+ **Created:** ${metadata.createdAt}
239
+ **Total Benchmarks:** ${metadata.totalBenchmarks}
240
+ **Models Evaluated:** ${metadata.models.length}
241
+
242
+ ## Dataset Statistics
243
+
244
+ ### Overall Performance
245
+
246
+ | Metric | Mean | Median | Std Dev | Min | Max |
247
+ |--------|------|--------|---------|-----|-----|
248
+ | Total P&L | ${summary.pnl.mean.toFixed(2)} | ${summary.pnl.median.toFixed(2)} | ${summary.pnl.std.toFixed(2)} | ${summary.pnl.min.toFixed(2)} | ${summary.pnl.max.toFixed(2)} |
249
+ | Prediction Accuracy | ${(summary.accuracy.mean * 100).toFixed(1)}% | ${(summary.accuracy.median * 100).toFixed(1)}% | ${(summary.accuracy.std * 100).toFixed(1)}% | ${(summary.accuracy.min * 100).toFixed(1)}% | ${(summary.accuracy.max * 100).toFixed(1)}% |
250
+ | Optimality Score | ${summary.optimality.mean.toFixed(1)} | ${summary.optimality.median.toFixed(1)} | ${summary.optimality.std.toFixed(1)} | ${summary.optimality.min.toFixed(1)} | ${summary.optimality.max.toFixed(1)} |
251
+
252
+ ### Model Leaderboard
253
+
254
+ ${this.generateLeaderboardTable(benchmarks)}
255
+
256
+ ## Dataset Structure
257
+
258
+ ### Data Fields
259
+
260
+ - \`benchmarkId\`: Unique identifier for the benchmark scenario
261
+ - \`modelId\`: Model identifier
262
+ - \`modelVersion\`: Model version (baseline, trained, etc.)
263
+ - \`modelName\`: Human-readable model name
264
+ - \`runAt\`: ISO timestamp of benchmark execution
265
+ - \`metrics\`: Performance metrics object
266
+ - \`totalPnl\`: Total profit/loss across all positions
267
+ - \`predictionMetrics\`: Prediction market performance
268
+ - \`totalPositions\`: Number of prediction positions taken
269
+ - \`correctPredictions\`: Number of correct predictions
270
+ - \`accuracy\`: Prediction accuracy (0-1)
271
+ - \`perpMetrics\`: Perpetual trading performance
272
+ - \`totalTrades\`: Number of perpetual trades
273
+ - \`winRate\`: Win rate for perpetual trades
274
+ - \`socialMetrics\`: Social engagement metrics
275
+ - \`timing\`: Execution timing statistics
276
+ - \`optimalityScore\`: How close to optimal play (0-100)
277
+
278
+ ### Data Splits
279
+
280
+ This dataset does not have predefined splits. Use for model evaluation and comparison.
281
+
282
+ ## Usage
283
+
284
+ ### Load Dataset
285
+
286
+ \`\`\`python
287
+ from datasets import load_dataset
288
+
289
+ dataset = load_dataset("${metadata.datasetName}")
290
+ \`\`\`
291
+
292
+ ### Example Analysis
293
+
294
+ \`\`\`python
295
+ import pandas as pd
296
+
297
+ # Load as DataFrame
298
+ df = pd.read_json("hf://datasets/${metadata.datasetName}/data.jsonl", lines=True)
299
+
300
+ # Compare models
301
+ model_performance = df.groupby('modelName').agg({
302
+ 'metrics.totalPnl': 'mean',
303
+ 'metrics.predictionMetrics.accuracy': 'mean',
304
+ 'metrics.optimalityScore': 'mean'
305
+ })
306
+
307
+ print(model_performance.sort_values('metrics.totalPnl', ascending=False))
308
+ \`\`\`
309
+
310
+ ## Benchmark Details
311
+
312
+ ### Environment
313
+
314
+ - **Platform:** ${platformName}
315
+ - **Market Types:** Prediction markets + perpetual futures
316
+ - **Tick Interval:** ${benchmarks[0]?.benchmarkSnapshot.tickInterval || 60} seconds
317
+ - **Duration:** ${Math.floor((benchmarks[0]?.benchmarkSnapshot.duration || 0) / 60000)} minutes
318
+
319
+ ### Evaluation Metrics
320
+
321
+ 1. **Total P&L:** Cumulative profit/loss across all positions
322
+ 2. **Prediction Accuracy:** Percentage of correct market outcome predictions
323
+ 3. **Perp Win Rate:** Percentage of profitable perpetual trades
324
+ 4. **Optimality Score:** Alignment with theoretically optimal actions (0-100)
325
+ 5. **Response Time:** Agent decision-making speed
326
+
327
+ ## Citation
328
+
329
+ If you use this dataset in your research, please cite:
330
+
331
+ \`\`\`bibtex
332
+ @dataset{${brandTag}_benchmarks_${metadata.version.replace(/\./g, "_")},
333
+ title = {${brandName} Agent Benchmarks},
334
+ author = {${brandOrg}},
335
+ year = {${new Date().getFullYear()}},
336
+ version = {${metadata.version}},
337
+ url = {https://huggingface.co/datasets/${metadata.datasetName}}
338
+ }
339
+ \`\`\`
340
+
341
+ ## License
342
+
343
+ ${metadata.license}
344
+
345
+ ## Contact
346
+
347
+ For questions or issues, please open an issue on the repository.
348
+ `;
349
+ const cardPath = path.join(outputDir, "README.md");
350
+ await fs.writeFile(cardPath, card);
351
+ }
352
+ /**
353
+ * Generate leaderboard table for dataset card
354
+ */
355
+ generateLeaderboardTable(benchmarks) {
356
+ // Group by model and calculate averages
357
+ const modelStats = new Map();
358
+ for (const benchmark of benchmarks) {
359
+ if (!modelStats.has(benchmark.modelName)) {
360
+ modelStats.set(benchmark.modelName, {
361
+ pnl: [],
362
+ accuracy: [],
363
+ optimality: [],
364
+ });
365
+ }
366
+ const stats = modelStats.get(benchmark.modelName);
367
+ if (!stats)
368
+ continue;
369
+ stats.pnl.push(benchmark.metrics.totalPnl);
370
+ stats.accuracy.push(benchmark.metrics.predictionMetrics.accuracy);
371
+ stats.optimality.push(benchmark.metrics.optimalityScore);
372
+ }
373
+ // Calculate averages and sort by P&L
374
+ const leaderboard = Array.from(modelStats.entries())
375
+ .map(([model, stats]) => ({
376
+ model,
377
+ avgPnl: stats.pnl.reduce((a, b) => a + b, 0) / stats.pnl.length,
378
+ avgAccuracy: stats.accuracy.reduce((a, b) => a + b, 0) / stats.accuracy.length,
379
+ avgOptimality: stats.optimality.reduce((a, b) => a + b, 0) / stats.optimality.length,
380
+ runs: stats.pnl.length,
381
+ }))
382
+ .sort((a, b) => b.avgPnl - a.avgPnl);
383
+ let table = "| Rank | Model | Avg P&L | Accuracy | Optimality | Runs |\n";
384
+ table += "|------|-------|---------|----------|------------|------|\n";
385
+ leaderboard.forEach((entry, index) => {
386
+ table += `| ${index + 1} | ${entry.model} | ${entry.avgPnl.toFixed(2)} | ${(entry.avgAccuracy * 100).toFixed(1)}% | ${entry.avgOptimality.toFixed(1)} | ${entry.runs} |\n`;
387
+ });
388
+ return table;
389
+ }
390
+ /**
391
+ * Calculate summary statistics
392
+ */
393
+ calculateSummaryStatistics(benchmarks) {
394
+ const pnls = benchmarks
395
+ .map((b) => b.metrics.totalPnl)
396
+ .sort((a, b) => a - b);
397
+ const accuracies = benchmarks
398
+ .map((b) => b.metrics.predictionMetrics.accuracy)
399
+ .sort((a, b) => a - b);
400
+ const optimalities = benchmarks
401
+ .map((b) => b.metrics.optimalityScore)
402
+ .sort((a, b) => a - b);
403
+ return {
404
+ pnl: calculateArrayStats(pnls),
405
+ accuracy: calculateArrayStats(accuracies),
406
+ optimality: calculateArrayStats(optimalities),
407
+ };
408
+ }
409
+ /**
410
+ * Ensure repository exists on HuggingFace
411
+ * Uses shared utility for consistent behavior
412
+ */
413
+ async ensureRepository(datasetName, isPrivate) {
414
+ if (!this.huggingFaceToken) {
415
+ throw new Error("HuggingFace token not configured");
416
+ }
417
+ await HuggingFaceUploadUtil.ensureRepository(datasetName, "dataset", this.huggingFaceToken, isPrivate);
418
+ }
419
+ /**
420
+ * Upload files to HuggingFace Hub
421
+ * Uses shared utility for consistent upload behavior
422
+ */
423
+ async uploadToHub(datasetName, localDir, _isPrivate) {
424
+ if (!this.huggingFaceToken) {
425
+ throw new Error("HuggingFace token not configured");
426
+ }
427
+ try {
428
+ // Use shared upload utility
429
+ const { HuggingFaceUploadUtil } = await import("./shared/HuggingFaceUploadUtil");
430
+ return await HuggingFaceUploadUtil.uploadDirectory(datasetName, "dataset", localDir, this.huggingFaceToken);
431
+ }
432
+ catch (error) {
433
+ logger.error("Failed to upload to HuggingFace Hub", { error });
434
+ // Provide helpful manual upload instructions
435
+ const { HuggingFaceUploadUtil } = await import("./shared/HuggingFaceUploadUtil");
436
+ const instructions = HuggingFaceUploadUtil.getManualUploadInstructions(datasetName, "dataset", localDir);
437
+ logger.info("To upload manually:", { instructions });
438
+ throw error;
439
+ }
440
+ }
441
+ /**
442
+ * Generate version string (YYYY.MM.DD format)
443
+ */
444
+ generateVersion() {
445
+ const now = new Date();
446
+ const year = now.getFullYear();
447
+ const month = String(now.getMonth() + 1).padStart(2, "0");
448
+ const day = String(now.getDate()).padStart(2, "0");
449
+ return `${year}.${month}.${day}`;
450
+ }
451
+ /**
452
+ * Check if file exists
453
+ */
454
+ async fileExists(filePath) {
455
+ try {
456
+ await fs.access(filePath);
457
+ return true;
458
+ }
459
+ catch {
460
+ return false;
461
+ }
462
+ }
463
+ }