@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773726941205.json +38 -0
- package/scripts/rank_trajectories.ts +0 -1
- package/scripts/run_task_benchmark.ts +4 -11
- package/src/adapter.ts +96 -49
- package/src/archetypes/ArchetypeConfigService.ts +188 -185
- package/src/archetypes/derive-archetype.ts +47 -47
- package/src/archetypes/index.ts +2 -2
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
- package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
- package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
- package/src/benchmark/BenchmarkDataViewer.ts +32 -30
- package/src/benchmark/BenchmarkHistoryService.ts +13 -12
- package/src/benchmark/BenchmarkRunner.ts +87 -83
- package/src/benchmark/BenchmarkValidator.ts +48 -46
- package/src/benchmark/FastEvalRunner.ts +17 -16
- package/src/benchmark/MetricsValidator.ts +20 -21
- package/src/benchmark/MetricsVisualizer.ts +92 -85
- package/src/benchmark/ModelBenchmarkService.ts +90 -82
- package/src/benchmark/ModelRegistry.ts +44 -44
- package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
- package/src/benchmark/SimulationA2AInterface.ts +118 -118
- package/src/benchmark/SimulationEngine.ts +51 -51
- package/src/benchmark/TaskRunner.ts +87 -79
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
- package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
- package/src/benchmark/index.ts +27 -27
- package/src/benchmark/parseSimulationMetrics.ts +32 -32
- package/src/benchmark/simulation-types.ts +10 -10
- package/src/dependencies.ts +34 -34
- package/src/generation/TrajectoryGenerator.ts +39 -37
- package/src/generation/index.ts +1 -1
- package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
- package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
- package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
- package/src/huggingface/index.ts +6 -6
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
- package/src/index.ts +27 -27
- package/src/init-training.ts +6 -6
- package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
- package/src/metrics/index.ts +2 -2
- package/src/rubrics/__tests__/index.test.ts +73 -73
- package/src/rubrics/ass-kisser.ts +6 -6
- package/src/rubrics/degen.ts +6 -6
- package/src/rubrics/goody-twoshoes.ts +6 -6
- package/src/rubrics/index.ts +50 -50
- package/src/rubrics/information-trader.ts +6 -6
- package/src/rubrics/infosec.ts +6 -6
- package/src/rubrics/liar.ts +6 -6
- package/src/rubrics/perps-trader.ts +6 -6
- package/src/rubrics/researcher.ts +6 -6
- package/src/rubrics/scammer.ts +6 -6
- package/src/rubrics/social-butterfly.ts +7 -7
- package/src/rubrics/super-predictor.ts +6 -6
- package/src/rubrics/trader.ts +5 -5
- package/src/scoring/ArchetypeScoringService.ts +56 -54
- package/src/scoring/JudgePromptBuilder.ts +96 -96
- package/src/scoring/LLMJudgeCache.ts +26 -23
- package/src/scoring/index.ts +3 -3
- package/src/training/AutomationPipeline.ts +149 -140
- package/src/training/BenchmarkService.ts +49 -45
- package/src/training/ConfigValidator.ts +38 -32
- package/src/training/MarketOutcomesTracker.ts +22 -12
- package/src/training/ModelDeployer.ts +15 -15
- package/src/training/ModelFetcher.ts +7 -7
- package/src/training/ModelSelectionService.ts +32 -32
- package/src/training/ModelUsageVerifier.ts +31 -24
- package/src/training/MultiModelOrchestrator.ts +44 -44
- package/src/training/RLModelConfig.ts +57 -57
- package/src/training/RewardBackpropagationService.ts +18 -17
- package/src/training/RulerScoringService.ts +73 -72
- package/src/training/TrainingMonitor.ts +29 -29
- package/src/training/TrajectoryRecorder.ts +25 -27
- package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
- package/src/training/index.ts +36 -36
- package/src/training/logRLConfig.ts +7 -7
- package/src/training/pipeline.ts +13 -16
- package/src/training/storage/ModelStorageService.ts +32 -32
- package/src/training/storage/TrainingDataArchiver.ts +21 -21
- package/src/training/storage/index.ts +2 -2
- package/src/training/types.ts +6 -6
- package/src/training/window-utils.ts +14 -14
- package/src/utils/index.ts +7 -7
- package/src/utils/logger.ts +5 -5
- package/src/utils/snowflake.ts +1 -1
- package/src/utils/synthetic-detector.ts +7 -7
|
@@ -12,11 +12,11 @@
|
|
|
12
12
|
* Outputs HTML reports, JSON data, and ASCII terminal charts for analysis.
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
|
-
import { promises as fs } from
|
|
16
|
-
import * as path from
|
|
17
|
-
import { logger } from
|
|
18
|
-
import type { BenchmarkComparisonResult } from
|
|
19
|
-
import type { SimulationResult } from
|
|
15
|
+
import { promises as fs } from "node:fs";
|
|
16
|
+
import * as path from "node:path";
|
|
17
|
+
import { logger } from "../utils/logger";
|
|
18
|
+
import type { BenchmarkComparisonResult } from "./BenchmarkRunner";
|
|
19
|
+
import type { SimulationResult } from "./SimulationEngine";
|
|
20
20
|
|
|
21
21
|
export interface VisualizationConfig {
|
|
22
22
|
/** Output directory for visualizations */
|
|
@@ -38,43 +38,43 @@ export class MetricsVisualizer {
|
|
|
38
38
|
*/
|
|
39
39
|
static async visualizeSingleRun(
|
|
40
40
|
result: SimulationResult,
|
|
41
|
-
config: VisualizationConfig
|
|
41
|
+
config: VisualizationConfig,
|
|
42
42
|
): Promise<void> {
|
|
43
|
-
logger.info(
|
|
43
|
+
logger.info("Generating visualizations", { resultId: result.id });
|
|
44
44
|
|
|
45
45
|
await fs.mkdir(config.outputDir, { recursive: true });
|
|
46
46
|
|
|
47
47
|
// 1. Generate metrics summary
|
|
48
|
-
const summaryHtml =
|
|
48
|
+
const summaryHtml = MetricsVisualizer.generateMetricsSummary(result);
|
|
49
49
|
await fs.writeFile(
|
|
50
|
-
path.join(config.outputDir,
|
|
51
|
-
summaryHtml
|
|
50
|
+
path.join(config.outputDir, "summary.html"),
|
|
51
|
+
summaryHtml,
|
|
52
52
|
);
|
|
53
53
|
|
|
54
54
|
// 2. Generate detailed metrics tables
|
|
55
|
-
const detailedHtml =
|
|
55
|
+
const detailedHtml = MetricsVisualizer.generateDetailedMetrics(result);
|
|
56
56
|
await fs.writeFile(
|
|
57
|
-
path.join(config.outputDir,
|
|
58
|
-
detailedHtml
|
|
57
|
+
path.join(config.outputDir, "detailed.html"),
|
|
58
|
+
detailedHtml,
|
|
59
59
|
);
|
|
60
60
|
|
|
61
61
|
// 3. Generate action timeline
|
|
62
|
-
const timelineHtml =
|
|
62
|
+
const timelineHtml = MetricsVisualizer.generateActionTimeline(result);
|
|
63
63
|
await fs.writeFile(
|
|
64
|
-
path.join(config.outputDir,
|
|
65
|
-
timelineHtml
|
|
64
|
+
path.join(config.outputDir, "timeline.html"),
|
|
65
|
+
timelineHtml,
|
|
66
66
|
);
|
|
67
67
|
|
|
68
68
|
// 4. Generate CSV exports if requested
|
|
69
69
|
if (config.generateCsv) {
|
|
70
|
-
await
|
|
70
|
+
await MetricsVisualizer.exportToCsv(result, config.outputDir);
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
// 5. Generate master report that links everything
|
|
74
|
-
const reportHtml =
|
|
75
|
-
await fs.writeFile(path.join(config.outputDir,
|
|
74
|
+
const reportHtml = MetricsVisualizer.generateMasterReport(result);
|
|
75
|
+
await fs.writeFile(path.join(config.outputDir, "index.html"), reportHtml);
|
|
76
76
|
|
|
77
|
-
logger.info(
|
|
77
|
+
logger.info("Visualizations generated", { outputDir: config.outputDir });
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
/**
|
|
@@ -82,32 +82,36 @@ export class MetricsVisualizer {
|
|
|
82
82
|
*/
|
|
83
83
|
static async visualizeComparison(
|
|
84
84
|
comparison: BenchmarkComparisonResult,
|
|
85
|
-
config: VisualizationConfig
|
|
85
|
+
config: VisualizationConfig,
|
|
86
86
|
): Promise<void> {
|
|
87
|
-
logger.info(
|
|
87
|
+
logger.info("Generating comparison visualizations");
|
|
88
88
|
|
|
89
89
|
await fs.mkdir(config.outputDir, { recursive: true });
|
|
90
90
|
|
|
91
91
|
// 1. Generate comparison summary
|
|
92
|
-
const summaryHtml =
|
|
92
|
+
const summaryHtml = MetricsVisualizer.generateComparisonSummary(comparison);
|
|
93
93
|
await fs.writeFile(
|
|
94
|
-
path.join(config.outputDir,
|
|
95
|
-
summaryHtml
|
|
94
|
+
path.join(config.outputDir, "comparison.html"),
|
|
95
|
+
summaryHtml,
|
|
96
96
|
);
|
|
97
97
|
|
|
98
98
|
// 2. Generate performance distribution charts
|
|
99
|
-
const distributionHtml =
|
|
99
|
+
const distributionHtml =
|
|
100
|
+
MetricsVisualizer.generateDistributionCharts(comparison);
|
|
100
101
|
await fs.writeFile(
|
|
101
|
-
path.join(config.outputDir,
|
|
102
|
-
distributionHtml
|
|
102
|
+
path.join(config.outputDir, "distribution.html"),
|
|
103
|
+
distributionHtml,
|
|
103
104
|
);
|
|
104
105
|
|
|
105
106
|
// 3. Export comparison data to CSV
|
|
106
107
|
if (config.generateCsv) {
|
|
107
|
-
await
|
|
108
|
+
await MetricsVisualizer.exportComparisonToCsv(
|
|
109
|
+
comparison,
|
|
110
|
+
config.outputDir,
|
|
111
|
+
);
|
|
108
112
|
}
|
|
109
113
|
|
|
110
|
-
logger.info(
|
|
114
|
+
logger.info("Comparison visualizations generated");
|
|
111
115
|
}
|
|
112
116
|
|
|
113
117
|
/**
|
|
@@ -117,13 +121,16 @@ export class MetricsVisualizer {
|
|
|
117
121
|
static async generateComparisonReport(
|
|
118
122
|
baseline: SimulationResult,
|
|
119
123
|
challenger: SimulationResult,
|
|
120
|
-
outputDir: string
|
|
124
|
+
outputDir: string,
|
|
121
125
|
): Promise<void> {
|
|
122
|
-
logger.info(
|
|
126
|
+
logger.info("Generating head-to-head comparison report...");
|
|
123
127
|
await fs.mkdir(outputDir, { recursive: true });
|
|
124
128
|
|
|
125
129
|
// 1. Generate ASCII Chart and print to terminal
|
|
126
|
-
const asciiReport =
|
|
130
|
+
const asciiReport = MetricsVisualizer.generateAsciiComparison(
|
|
131
|
+
baseline,
|
|
132
|
+
challenger,
|
|
133
|
+
);
|
|
127
134
|
console.log(asciiReport);
|
|
128
135
|
|
|
129
136
|
// 2. Save JSON Report with full data
|
|
@@ -153,16 +160,16 @@ export class MetricsVisualizer {
|
|
|
153
160
|
challenger.metrics.perpMetrics.winRate -
|
|
154
161
|
baseline.metrics.perpMetrics.winRate,
|
|
155
162
|
},
|
|
156
|
-
pnlHistory:
|
|
163
|
+
pnlHistory: MetricsVisualizer.mergePnlHistory(baseline, challenger),
|
|
157
164
|
};
|
|
158
165
|
|
|
159
166
|
await fs.writeFile(
|
|
160
|
-
path.join(outputDir,
|
|
161
|
-
JSON.stringify(jsonReport, null, 2)
|
|
167
|
+
path.join(outputDir, "comparison.json"),
|
|
168
|
+
JSON.stringify(jsonReport, null, 2),
|
|
162
169
|
);
|
|
163
170
|
|
|
164
171
|
// 3. Save Text Report (ASCII chart)
|
|
165
|
-
await fs.writeFile(path.join(outputDir,
|
|
172
|
+
await fs.writeFile(path.join(outputDir, "report.txt"), asciiReport);
|
|
166
173
|
|
|
167
174
|
logger.info(`Comparison report saved to ${outputDir}`);
|
|
168
175
|
}
|
|
@@ -173,10 +180,10 @@ export class MetricsVisualizer {
|
|
|
173
180
|
*/
|
|
174
181
|
static generateAsciiComparison(
|
|
175
182
|
baseline: SimulationResult,
|
|
176
|
-
challenger: SimulationResult
|
|
183
|
+
challenger: SimulationResult,
|
|
177
184
|
): string {
|
|
178
185
|
const pnlDelta = challenger.metrics.totalPnl - baseline.metrics.totalPnl;
|
|
179
|
-
const winner = pnlDelta >= 0 ?
|
|
186
|
+
const winner = pnlDelta >= 0 ? "Challenger (LLM)" : "Baseline";
|
|
180
187
|
|
|
181
188
|
let output = `
|
|
182
189
|
=== 🥊 HEAD-TO-HEAD RESULTS ===
|
|
@@ -189,7 +196,7 @@ Tick | Baseline | Challenger | Delta
|
|
|
189
196
|
`;
|
|
190
197
|
|
|
191
198
|
// Sample points (every 10th tick or so to fit terminal vertically)
|
|
192
|
-
const history =
|
|
199
|
+
const history = MetricsVisualizer.mergePnlHistory(baseline, challenger);
|
|
193
200
|
const step = Math.max(1, Math.floor(history.length / 10));
|
|
194
201
|
|
|
195
202
|
for (let i = 0; i < history.length; i += step) {
|
|
@@ -200,11 +207,11 @@ Tick | Baseline | Challenger | Delta
|
|
|
200
207
|
const chalPnl = point.challenger.toFixed(0);
|
|
201
208
|
const deltaVal = point.challenger - point.baseline;
|
|
202
209
|
const deltaStr = deltaVal.toFixed(0);
|
|
203
|
-
const sign = deltaVal >= 0 ?
|
|
210
|
+
const sign = deltaVal >= 0 ? "+" : "";
|
|
204
211
|
|
|
205
212
|
// Format columns nicely
|
|
206
213
|
output += `${point.tick.toString().padEnd(5)} | $${basePnl.padEnd(
|
|
207
|
-
21
|
|
214
|
+
21,
|
|
208
215
|
)} | $${chalPnl.padEnd(21)} | ${sign}$${deltaStr}\n`;
|
|
209
216
|
}
|
|
210
217
|
|
|
@@ -212,12 +219,12 @@ Tick | Baseline | Challenger | Delta
|
|
|
212
219
|
const finalBase = baseline.metrics.totalPnl.toFixed(2);
|
|
213
220
|
const finalChal = challenger.metrics.totalPnl.toFixed(2);
|
|
214
221
|
const finalDelta = pnlDelta.toFixed(2);
|
|
215
|
-
const finalSign = pnlDelta >= 0 ?
|
|
222
|
+
const finalSign = pnlDelta >= 0 ? "+" : "";
|
|
216
223
|
|
|
217
224
|
output += `
|
|
218
225
|
----------------------------------------------------------------------
|
|
219
226
|
FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
220
|
-
21
|
|
227
|
+
21,
|
|
221
228
|
)} | ${finalSign}$${finalDelta}
|
|
222
229
|
|
|
223
230
|
🏆 WINNER: ${winner}
|
|
@@ -233,12 +240,12 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
233
240
|
*/
|
|
234
241
|
static mergePnlHistory(
|
|
235
242
|
baseline: SimulationResult,
|
|
236
|
-
challenger: SimulationResult
|
|
243
|
+
challenger: SimulationResult,
|
|
237
244
|
): Array<{ tick: number; baseline: number; challenger: number }> {
|
|
238
245
|
const merged = [];
|
|
239
246
|
const maxTicks = Math.max(
|
|
240
247
|
baseline.pnlHistory?.length || 0,
|
|
241
|
-
challenger.pnlHistory?.length || 0
|
|
248
|
+
challenger.pnlHistory?.length || 0,
|
|
242
249
|
);
|
|
243
250
|
|
|
244
251
|
for (let i = 0; i < maxTicks; i++) {
|
|
@@ -359,15 +366,15 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
359
366
|
<h2>Overall Performance</h2>
|
|
360
367
|
<div class="metric-item">
|
|
361
368
|
<span class="metric-label">Total P&L</span>
|
|
362
|
-
<span class="metric-value ${metrics.totalPnl >= 0 ?
|
|
363
|
-
${metrics.totalPnl >= 0 ?
|
|
369
|
+
<span class="metric-value ${metrics.totalPnl >= 0 ? "positive" : "negative"}">
|
|
370
|
+
${metrics.totalPnl >= 0 ? "+" : ""}$${metrics.totalPnl.toFixed(2)}
|
|
364
371
|
</span>
|
|
365
372
|
</div>
|
|
366
373
|
<div class="metric-item">
|
|
367
374
|
<span class="metric-label">Optimality Score</span>
|
|
368
375
|
<span class="metric-value">
|
|
369
376
|
${metrics.optimalityScore.toFixed(1)}%
|
|
370
|
-
${
|
|
377
|
+
${MetricsVisualizer.getScoreBadge(metrics.optimalityScore)}
|
|
371
378
|
</span>
|
|
372
379
|
</div>
|
|
373
380
|
<div class="metric-item">
|
|
@@ -390,7 +397,7 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
390
397
|
</div>
|
|
391
398
|
<div class="metric-item">
|
|
392
399
|
<span class="metric-label">Accuracy</span>
|
|
393
|
-
<span class="metric-value ${metrics.predictionMetrics.accuracy >= 0.6 ?
|
|
400
|
+
<span class="metric-value ${metrics.predictionMetrics.accuracy >= 0.6 ? "positive" : ""}">${(metrics.predictionMetrics.accuracy * 100).toFixed(1)}%</span>
|
|
394
401
|
</div>
|
|
395
402
|
<div class="metric-item">
|
|
396
403
|
<span class="metric-label">Correct</span>
|
|
@@ -402,8 +409,8 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
402
409
|
</div>
|
|
403
410
|
<div class="metric-item">
|
|
404
411
|
<span class="metric-label">Avg P&L per Position</span>
|
|
405
|
-
<span class="metric-value ${metrics.predictionMetrics.avgPnlPerPosition >= 0 ?
|
|
406
|
-
${metrics.predictionMetrics.avgPnlPerPosition >= 0 ?
|
|
412
|
+
<span class="metric-value ${metrics.predictionMetrics.avgPnlPerPosition >= 0 ? "positive" : "negative"}">
|
|
413
|
+
${metrics.predictionMetrics.avgPnlPerPosition >= 0 ? "+" : ""}$${metrics.predictionMetrics.avgPnlPerPosition.toFixed(2)}
|
|
407
414
|
</span>
|
|
408
415
|
</div>
|
|
409
416
|
</div>
|
|
@@ -416,7 +423,7 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
416
423
|
</div>
|
|
417
424
|
<div class="metric-item">
|
|
418
425
|
<span class="metric-label">Win Rate</span>
|
|
419
|
-
<span class="metric-value ${metrics.perpMetrics.winRate >= 0.5 ?
|
|
426
|
+
<span class="metric-value ${metrics.perpMetrics.winRate >= 0.5 ? "positive" : ""}">${(metrics.perpMetrics.winRate * 100).toFixed(1)}%</span>
|
|
420
427
|
</div>
|
|
421
428
|
<div class="metric-item">
|
|
422
429
|
<span class="metric-label">Profitable Trades</span>
|
|
@@ -424,8 +431,8 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
424
431
|
</div>
|
|
425
432
|
<div class="metric-item">
|
|
426
433
|
<span class="metric-label">Avg P&L per Trade</span>
|
|
427
|
-
<span class="metric-value ${metrics.perpMetrics.avgPnlPerTrade >= 0 ?
|
|
428
|
-
${metrics.perpMetrics.avgPnlPerTrade >= 0 ?
|
|
434
|
+
<span class="metric-value ${metrics.perpMetrics.avgPnlPerTrade >= 0 ? "positive" : "negative"}">
|
|
435
|
+
${metrics.perpMetrics.avgPnlPerTrade >= 0 ? "+" : ""}$${metrics.perpMetrics.avgPnlPerTrade.toFixed(2)}
|
|
429
436
|
</span>
|
|
430
437
|
</div>
|
|
431
438
|
<div class="metric-item">
|
|
@@ -448,8 +455,8 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
448
455
|
</div>
|
|
449
456
|
<div class="metric-item">
|
|
450
457
|
<span class="metric-label">Reputation Gained</span>
|
|
451
|
-
<span class="metric-value ${metrics.socialMetrics.reputationGained >= 0 ?
|
|
452
|
-
${metrics.socialMetrics.reputationGained >= 0 ?
|
|
458
|
+
<span class="metric-value ${metrics.socialMetrics.reputationGained >= 0 ? "positive" : "negative"}">
|
|
459
|
+
${metrics.socialMetrics.reputationGained >= 0 ? "+" : ""}${metrics.socialMetrics.reputationGained}
|
|
453
460
|
</span>
|
|
454
461
|
</div>
|
|
455
462
|
</div>
|
|
@@ -528,9 +535,9 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
528
535
|
<td><code>${JSON.stringify(action.data)}</code></td>
|
|
529
536
|
<td>${action.duration}ms</td>
|
|
530
537
|
</tr>
|
|
531
|
-
|
|
538
|
+
`,
|
|
532
539
|
)
|
|
533
|
-
.join(
|
|
540
|
+
.join("")}
|
|
534
541
|
</tbody>
|
|
535
542
|
</table>
|
|
536
543
|
</body>
|
|
@@ -597,9 +604,9 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
597
604
|
<div class="action-type">${action.type}</div>
|
|
598
605
|
<div class="action-details">${JSON.stringify(action.data)}</div>
|
|
599
606
|
</div>
|
|
600
|
-
|
|
607
|
+
`,
|
|
601
608
|
)
|
|
602
|
-
.join(
|
|
609
|
+
.join("")}
|
|
603
610
|
</div>
|
|
604
611
|
</body>
|
|
605
612
|
</html>`;
|
|
@@ -665,7 +672,7 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
665
672
|
* Generate comparison summary
|
|
666
673
|
*/
|
|
667
674
|
private static generateComparisonSummary(
|
|
668
|
-
comparison: BenchmarkComparisonResult
|
|
675
|
+
comparison: BenchmarkComparisonResult,
|
|
669
676
|
): string {
|
|
670
677
|
return `
|
|
671
678
|
<!DOCTYPE html>
|
|
@@ -718,7 +725,7 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
718
725
|
</tr>
|
|
719
726
|
<tr>
|
|
720
727
|
<td>P&L</td>
|
|
721
|
-
<td class="${comparison.comparison.avgPnl >= 0 ?
|
|
728
|
+
<td class="${comparison.comparison.avgPnl >= 0 ? "positive" : "negative"}">$${comparison.comparison.avgPnl.toFixed(2)}</td>
|
|
722
729
|
<td>${comparison.comparison.bestRun}</td>
|
|
723
730
|
<td>${comparison.comparison.worstRun}</td>
|
|
724
731
|
</tr>
|
|
@@ -755,14 +762,14 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
755
762
|
(run, i) => `
|
|
756
763
|
<tr>
|
|
757
764
|
<td>Run ${i + 1}</td>
|
|
758
|
-
<td class="${run.metrics.totalPnl >= 0 ?
|
|
765
|
+
<td class="${run.metrics.totalPnl >= 0 ? "positive" : "negative"}">$${run.metrics.totalPnl.toFixed(2)}</td>
|
|
759
766
|
<td>${(run.metrics.predictionMetrics.accuracy * 100).toFixed(1)}%</td>
|
|
760
767
|
<td>${run.metrics.optimalityScore.toFixed(1)}%</td>
|
|
761
768
|
<td>${(run.metrics.timing.totalDuration / 1000).toFixed(1)}s</td>
|
|
762
769
|
</tr>
|
|
763
|
-
|
|
770
|
+
`,
|
|
764
771
|
)
|
|
765
|
-
.join(
|
|
772
|
+
.join("")}
|
|
766
773
|
</tbody>
|
|
767
774
|
</table>
|
|
768
775
|
</div>
|
|
@@ -774,11 +781,11 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
774
781
|
* Generate distribution charts
|
|
775
782
|
*/
|
|
776
783
|
private static generateDistributionCharts(
|
|
777
|
-
comparison: BenchmarkComparisonResult
|
|
784
|
+
comparison: BenchmarkComparisonResult,
|
|
778
785
|
): string {
|
|
779
786
|
const pnls = comparison.runs.map((r) => r.metrics.totalPnl);
|
|
780
787
|
const accuracies = comparison.runs.map(
|
|
781
|
-
(r) => r.metrics.predictionMetrics.accuracy * 100
|
|
788
|
+
(r) => r.metrics.predictionMetrics.accuracy * 100,
|
|
782
789
|
);
|
|
783
790
|
|
|
784
791
|
return `
|
|
@@ -826,9 +833,9 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
826
833
|
<div class="bar" style="width: ${(Math.abs(pnl) / Math.max(...pnls.map(Math.abs))) * 100}%">
|
|
827
834
|
Run ${i + 1}: $${pnl.toFixed(2)}
|
|
828
835
|
</div>
|
|
829
|
-
|
|
836
|
+
`,
|
|
830
837
|
)
|
|
831
|
-
.join(
|
|
838
|
+
.join("")}
|
|
832
839
|
</div>
|
|
833
840
|
|
|
834
841
|
<div class="chart">
|
|
@@ -839,9 +846,9 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
839
846
|
<div class="bar" style="width: ${acc}%">
|
|
840
847
|
Run ${i + 1}: ${acc.toFixed(1)}%
|
|
841
848
|
</div>
|
|
842
|
-
|
|
849
|
+
`,
|
|
843
850
|
)
|
|
844
|
-
.join(
|
|
851
|
+
.join("")}
|
|
845
852
|
</div>
|
|
846
853
|
</body>
|
|
847
854
|
</html>`;
|
|
@@ -852,30 +859,30 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
852
859
|
*/
|
|
853
860
|
private static async exportToCsv(
|
|
854
861
|
result: SimulationResult,
|
|
855
|
-
outputDir: string
|
|
862
|
+
outputDir: string,
|
|
856
863
|
): Promise<void> {
|
|
857
864
|
// Actions CSV
|
|
858
865
|
const actionsCsv = [
|
|
859
|
-
|
|
866
|
+
"tick,type,data,duration",
|
|
860
867
|
...result.actions.map(
|
|
861
868
|
(a) =>
|
|
862
|
-
`${a.tick},"${a.type}","${JSON.stringify(a.data).replace(/"/g, '""')}",${a.duration}
|
|
869
|
+
`${a.tick},"${a.type}","${JSON.stringify(a.data).replace(/"/g, '""')}",${a.duration}`,
|
|
863
870
|
),
|
|
864
|
-
].join(
|
|
871
|
+
].join("\n");
|
|
865
872
|
|
|
866
|
-
await fs.writeFile(path.join(outputDir,
|
|
873
|
+
await fs.writeFile(path.join(outputDir, "actions.csv"), actionsCsv);
|
|
867
874
|
|
|
868
875
|
// Metrics CSV
|
|
869
876
|
const metricsCsv = [
|
|
870
|
-
|
|
877
|
+
"metric,value",
|
|
871
878
|
`total_pnl,${result.metrics.totalPnl}`,
|
|
872
879
|
`prediction_accuracy,${result.metrics.predictionMetrics.accuracy}`,
|
|
873
880
|
`perp_win_rate,${result.metrics.perpMetrics.winRate}`,
|
|
874
881
|
`optimality_score,${result.metrics.optimalityScore}`,
|
|
875
882
|
`avg_response_time,${result.metrics.timing.avgResponseTime}`,
|
|
876
|
-
].join(
|
|
883
|
+
].join("\n");
|
|
877
884
|
|
|
878
|
-
await fs.writeFile(path.join(outputDir,
|
|
885
|
+
await fs.writeFile(path.join(outputDir, "metrics.csv"), metricsCsv);
|
|
879
886
|
}
|
|
880
887
|
|
|
881
888
|
/**
|
|
@@ -883,17 +890,17 @@ FINAL | $${finalBase.padEnd(21)} | $${finalChal.padEnd(
|
|
|
883
890
|
*/
|
|
884
891
|
private static async exportComparisonToCsv(
|
|
885
892
|
comparison: BenchmarkComparisonResult,
|
|
886
|
-
outputDir: string
|
|
893
|
+
outputDir: string,
|
|
887
894
|
): Promise<void> {
|
|
888
895
|
const csv = [
|
|
889
|
-
|
|
896
|
+
"run,total_pnl,accuracy,optimality,duration",
|
|
890
897
|
...comparison.runs.map(
|
|
891
898
|
(run, i) =>
|
|
892
|
-
`${i + 1},${run.metrics.totalPnl},${run.metrics.predictionMetrics.accuracy},${run.metrics.optimalityScore},${run.metrics.timing.totalDuration}
|
|
899
|
+
`${i + 1},${run.metrics.totalPnl},${run.metrics.predictionMetrics.accuracy},${run.metrics.optimalityScore},${run.metrics.timing.totalDuration}`,
|
|
893
900
|
),
|
|
894
|
-
].join(
|
|
901
|
+
].join("\n");
|
|
895
902
|
|
|
896
|
-
await fs.writeFile(path.join(outputDir,
|
|
903
|
+
await fs.writeFile(path.join(outputDir, "comparison.csv"), csv);
|
|
897
904
|
}
|
|
898
905
|
|
|
899
906
|
/**
|