@elizaos/training 2.0.0-alpha.76 → 2.0.0-alpha.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/.turbo/turbo-lint.log +0 -3
- package/.turbo/turbo-typecheck.log +0 -1
- package/dist/.tsbuildinfo +0 -1
- package/dist/adapter.js +0 -59
- package/dist/archetypes/ArchetypeConfigService.js +0 -510
- package/dist/archetypes/derive-archetype.js +0 -196
- package/dist/archetypes/index.js +0 -7
- package/dist/benchmark/ArchetypeMatchupBenchmark.js +0 -547
- package/dist/benchmark/BenchmarkChartGenerator.js +0 -632
- package/dist/benchmark/BenchmarkDataGenerator.js +0 -825
- package/dist/benchmark/BenchmarkDataViewer.js +0 -197
- package/dist/benchmark/BenchmarkHistoryService.js +0 -135
- package/dist/benchmark/BenchmarkRunner.js +0 -483
- package/dist/benchmark/BenchmarkValidator.js +0 -158
- package/dist/benchmark/FastEvalRunner.js +0 -133
- package/dist/benchmark/MetricsValidator.js +0 -104
- package/dist/benchmark/MetricsVisualizer.js +0 -775
- package/dist/benchmark/ModelBenchmarkService.js +0 -433
- package/dist/benchmark/ModelRegistry.js +0 -122
- package/dist/benchmark/RulerBenchmarkIntegration.js +0 -168
- package/dist/benchmark/SimulationA2AInterface.js +0 -683
- package/dist/benchmark/SimulationEngine.js +0 -522
- package/dist/benchmark/TaskRunner.js +0 -60
- package/dist/benchmark/__tests__/BenchmarkRunner.test.js +0 -409
- package/dist/benchmark/__tests__/HeadToHead.test.js +0 -105
- package/dist/benchmark/index.js +0 -23
- package/dist/benchmark/parseSimulationMetrics.js +0 -86
- package/dist/benchmark/simulation-types.js +0 -1
- package/dist/dependencies.js +0 -197
- package/dist/generation/TrajectoryGenerator.js +0 -244
- package/dist/generation/index.js +0 -6
- package/dist/huggingface/HuggingFaceDatasetUploader.js +0 -463
- package/dist/huggingface/HuggingFaceIntegrationService.js +0 -272
- package/dist/huggingface/HuggingFaceModelUploader.js +0 -385
- package/dist/huggingface/index.js +0 -9
- package/dist/huggingface/shared/HuggingFaceUploadUtil.js +0 -144
- package/dist/index.js +0 -41
- package/dist/init-training.js +0 -43
- package/dist/metrics/TrajectoryMetricsExtractor.js +0 -523
- package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +0 -628
- package/dist/metrics/index.js +0 -7
- package/dist/metrics/types.js +0 -21
- package/dist/rubrics/__tests__/index.test.js +0 -150
- package/dist/rubrics/ass-kisser.js +0 -83
- package/dist/rubrics/degen.js +0 -78
- package/dist/rubrics/goody-twoshoes.js +0 -82
- package/dist/rubrics/index.js +0 -184
- package/dist/rubrics/information-trader.js +0 -82
- package/dist/rubrics/infosec.js +0 -99
- package/dist/rubrics/liar.js +0 -102
- package/dist/rubrics/perps-trader.js +0 -85
- package/dist/rubrics/researcher.js +0 -79
- package/dist/rubrics/scammer.js +0 -80
- package/dist/rubrics/social-butterfly.js +0 -71
- package/dist/rubrics/super-predictor.js +0 -95
- package/dist/rubrics/trader.js +0 -65
- package/dist/scoring/ArchetypeScoringService.js +0 -301
- package/dist/scoring/JudgePromptBuilder.js +0 -401
- package/dist/scoring/LLMJudgeCache.js +0 -263
- package/dist/scoring/index.js +0 -8
- package/dist/training/AutomationPipeline.js +0 -714
- package/dist/training/BenchmarkService.js +0 -370
- package/dist/training/ConfigValidator.js +0 -153
- package/dist/training/MarketOutcomesTracker.js +0 -142
- package/dist/training/ModelDeployer.js +0 -128
- package/dist/training/ModelFetcher.js +0 -48
- package/dist/training/ModelSelectionService.js +0 -248
- package/dist/training/ModelUsageVerifier.js +0 -106
- package/dist/training/MultiModelOrchestrator.js +0 -349
- package/dist/training/RLModelConfig.js +0 -295
- package/dist/training/RewardBackpropagationService.js +0 -117
- package/dist/training/RulerScoringService.js +0 -450
- package/dist/training/TrainingMonitor.js +0 -108
- package/dist/training/TrajectoryRecorder.js +0 -281
- package/dist/training/__tests__/TrajectoryRecorder.test.js +0 -363
- package/dist/training/index.js +0 -30
- package/dist/training/logRLConfig.js +0 -29
- package/dist/training/pipeline.js +0 -80
- package/dist/training/storage/ModelStorageService.js +0 -190
- package/dist/training/storage/TrainingDataArchiver.js +0 -136
- package/dist/training/storage/index.js +0 -7
- package/dist/training/types.js +0 -6
- package/dist/training/window-utils.js +0 -100
- package/dist/utils/index.js +0 -73
- package/dist/utils/logger.js +0 -55
- package/dist/utils/snowflake.js +0 -15
- package/dist/utils/synthetic-detector.js +0 -67
- package/vitest.config.ts +0 -8
|
@@ -1,632 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Benchmark Chart Generator
|
|
3
|
-
*
|
|
4
|
-
* Generates charts and visualizations for benchmark results.
|
|
5
|
-
* Creates interactive HTML reports with embedded charts.
|
|
6
|
-
*/
|
|
7
|
-
import { promises as fs } from "node:fs";
|
|
8
|
-
import * as path from "node:path";
|
|
9
|
-
/**
|
|
10
|
-
* Color palette for charts
|
|
11
|
-
*/
|
|
12
|
-
const CHART_COLORS = {
|
|
13
|
-
primary: "#3b82f6",
|
|
14
|
-
success: "#10b981",
|
|
15
|
-
warning: "#f59e0b",
|
|
16
|
-
danger: "#ef4444",
|
|
17
|
-
purple: "#8b5cf6",
|
|
18
|
-
cyan: "#06b6d4",
|
|
19
|
-
pink: "#ec4899",
|
|
20
|
-
gray: "#6b7280",
|
|
21
|
-
};
|
|
22
|
-
/**
|
|
23
|
-
* Generates benchmark charts and reports
|
|
24
|
-
*/
|
|
25
|
-
// biome-ignore lint/complexity/noStaticOnlyClass: Chart generator namespace - methods are logically grouped
|
|
26
|
-
export class BenchmarkChartGenerator {
|
|
27
|
-
/**
|
|
28
|
-
* Generate a comprehensive HTML report with charts
|
|
29
|
-
*/
|
|
30
|
-
static async generateReport(results, outputPath, options = {}) {
|
|
31
|
-
const title = options.title ?? "Benchmark Report";
|
|
32
|
-
const benchmarkId = options.benchmarkId ?? "unknown";
|
|
33
|
-
const html = `
|
|
34
|
-
<!DOCTYPE html>
|
|
35
|
-
<html lang="en">
|
|
36
|
-
<head>
|
|
37
|
-
<meta charset="UTF-8">
|
|
38
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
39
|
-
<title>${title}</title>
|
|
40
|
-
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.1/dist/chart.umd.min.js"></script>
|
|
41
|
-
<style>
|
|
42
|
-
:root {
|
|
43
|
-
--bg-primary: #0f172a;
|
|
44
|
-
--bg-secondary: #1e293b;
|
|
45
|
-
--bg-tertiary: #334155;
|
|
46
|
-
--text-primary: #f8fafc;
|
|
47
|
-
--text-secondary: #94a3b8;
|
|
48
|
-
--accent-primary: #3b82f6;
|
|
49
|
-
--accent-success: #10b981;
|
|
50
|
-
--accent-warning: #f59e0b;
|
|
51
|
-
--accent-danger: #ef4444;
|
|
52
|
-
--border-color: #475569;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
* {
|
|
56
|
-
box-sizing: border-box;
|
|
57
|
-
margin: 0;
|
|
58
|
-
padding: 0;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
body {
|
|
62
|
-
font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
63
|
-
background: var(--bg-primary);
|
|
64
|
-
color: var(--text-primary);
|
|
65
|
-
min-height: 100vh;
|
|
66
|
-
padding: 2rem;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
.container {
|
|
70
|
-
max-width: 1400px;
|
|
71
|
-
margin: 0 auto;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
header {
|
|
75
|
-
margin-bottom: 2rem;
|
|
76
|
-
padding-bottom: 1.5rem;
|
|
77
|
-
border-bottom: 1px solid var(--border-color);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
h1 {
|
|
81
|
-
font-size: 2rem;
|
|
82
|
-
font-weight: 700;
|
|
83
|
-
margin-bottom: 0.5rem;
|
|
84
|
-
background: linear-gradient(135deg, var(--accent-primary), var(--accent-success));
|
|
85
|
-
-webkit-background-clip: text;
|
|
86
|
-
-webkit-text-fill-color: transparent;
|
|
87
|
-
background-clip: text;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
.subtitle {
|
|
91
|
-
color: var(--text-secondary);
|
|
92
|
-
font-size: 1rem;
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
.grid {
|
|
96
|
-
display: grid;
|
|
97
|
-
gap: 1.5rem;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
.grid-2 {
|
|
101
|
-
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
.grid-3 {
|
|
105
|
-
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
.card {
|
|
109
|
-
background: var(--bg-secondary);
|
|
110
|
-
border-radius: 16px;
|
|
111
|
-
padding: 1.5rem;
|
|
112
|
-
border: 1px solid var(--border-color);
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
.card-title {
|
|
116
|
-
font-size: 1.125rem;
|
|
117
|
-
font-weight: 600;
|
|
118
|
-
margin-bottom: 1rem;
|
|
119
|
-
color: var(--text-primary);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
.chart-container {
|
|
123
|
-
position: relative;
|
|
124
|
-
height: 300px;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
.stats-grid {
|
|
128
|
-
display: grid;
|
|
129
|
-
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
|
130
|
-
gap: 1rem;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
.stat-card {
|
|
134
|
-
background: var(--bg-tertiary);
|
|
135
|
-
border-radius: 12px;
|
|
136
|
-
padding: 1rem;
|
|
137
|
-
text-align: center;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
.stat-value {
|
|
141
|
-
font-size: 1.5rem;
|
|
142
|
-
font-weight: 700;
|
|
143
|
-
margin-bottom: 0.25rem;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
.stat-value.positive {
|
|
147
|
-
color: var(--accent-success);
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
.stat-value.negative {
|
|
151
|
-
color: var(--accent-danger);
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
.stat-label {
|
|
155
|
-
font-size: 0.75rem;
|
|
156
|
-
color: var(--text-secondary);
|
|
157
|
-
text-transform: uppercase;
|
|
158
|
-
letter-spacing: 0.05em;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
table {
|
|
162
|
-
width: 100%;
|
|
163
|
-
border-collapse: collapse;
|
|
164
|
-
font-size: 0.875rem;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
th, td {
|
|
168
|
-
padding: 0.75rem 1rem;
|
|
169
|
-
text-align: left;
|
|
170
|
-
border-bottom: 1px solid var(--border-color);
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
th {
|
|
174
|
-
font-weight: 600;
|
|
175
|
-
color: var(--text-secondary);
|
|
176
|
-
font-size: 0.75rem;
|
|
177
|
-
text-transform: uppercase;
|
|
178
|
-
letter-spacing: 0.05em;
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
tr:hover {
|
|
182
|
-
background: var(--bg-tertiary);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
.badge {
|
|
186
|
-
display: inline-block;
|
|
187
|
-
padding: 0.25rem 0.75rem;
|
|
188
|
-
border-radius: 9999px;
|
|
189
|
-
font-size: 0.75rem;
|
|
190
|
-
font-weight: 600;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
.badge-success {
|
|
194
|
-
background: rgba(16, 185, 129, 0.2);
|
|
195
|
-
color: var(--accent-success);
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
.badge-warning {
|
|
199
|
-
background: rgba(245, 158, 11, 0.2);
|
|
200
|
-
color: var(--accent-warning);
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
.badge-danger {
|
|
204
|
-
background: rgba(239, 68, 68, 0.2);
|
|
205
|
-
color: var(--accent-danger);
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
.winner-tag {
|
|
209
|
-
display: inline-flex;
|
|
210
|
-
align-items: center;
|
|
211
|
-
gap: 0.25rem;
|
|
212
|
-
background: linear-gradient(135deg, #fbbf24, #f59e0b);
|
|
213
|
-
color: #1f2937;
|
|
214
|
-
padding: 0.25rem 0.5rem;
|
|
215
|
-
border-radius: 4px;
|
|
216
|
-
font-size: 0.625rem;
|
|
217
|
-
font-weight: 700;
|
|
218
|
-
text-transform: uppercase;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
.timestamp {
|
|
222
|
-
color: var(--text-secondary);
|
|
223
|
-
font-size: 0.875rem;
|
|
224
|
-
margin-top: 2rem;
|
|
225
|
-
text-align: center;
|
|
226
|
-
}
|
|
227
|
-
</style>
|
|
228
|
-
</head>
|
|
229
|
-
<body>
|
|
230
|
-
<div class="container">
|
|
231
|
-
<header>
|
|
232
|
-
<h1>📊 ${title}</h1>
|
|
233
|
-
<p class="subtitle">Benchmark: ${benchmarkId} | Models: ${results.length}</p>
|
|
234
|
-
</header>
|
|
235
|
-
|
|
236
|
-
${BenchmarkChartGenerator.generateSummaryStats(results)}
|
|
237
|
-
|
|
238
|
-
<div class="grid grid-2" style="margin-top: 1.5rem;">
|
|
239
|
-
${BenchmarkChartGenerator.generatePnLChartCard()}
|
|
240
|
-
${BenchmarkChartGenerator.generateAccuracyChartCard()}
|
|
241
|
-
</div>
|
|
242
|
-
|
|
243
|
-
<div class="grid grid-2" style="margin-top: 1.5rem;">
|
|
244
|
-
${BenchmarkChartGenerator.generatePerpMetricsChartCard()}
|
|
245
|
-
${BenchmarkChartGenerator.generateTimingChartCard()}
|
|
246
|
-
</div>
|
|
247
|
-
|
|
248
|
-
${BenchmarkChartGenerator.generateComparisonTable(results)}
|
|
249
|
-
|
|
250
|
-
${options.includeHistory ? BenchmarkChartGenerator.generateHistorySection(options.includeHistory) : ""}
|
|
251
|
-
|
|
252
|
-
<p class="timestamp">Generated: ${new Date().toLocaleString()}</p>
|
|
253
|
-
</div>
|
|
254
|
-
|
|
255
|
-
<script>
|
|
256
|
-
Chart.defaults.color = '#94a3b8';
|
|
257
|
-
Chart.defaults.borderColor = '#475569';
|
|
258
|
-
|
|
259
|
-
${BenchmarkChartGenerator.generateChartScripts(results)}
|
|
260
|
-
</script>
|
|
261
|
-
</body>
|
|
262
|
-
</html>`;
|
|
263
|
-
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
264
|
-
await fs.writeFile(outputPath, html, "utf-8");
|
|
265
|
-
return outputPath;
|
|
266
|
-
}
|
|
267
|
-
/**
|
|
268
|
-
* Generate summary stats section
|
|
269
|
-
*/
|
|
270
|
-
static generateSummaryStats(results) {
|
|
271
|
-
if (results.length === 0)
|
|
272
|
-
return "";
|
|
273
|
-
// Find best model for each metric
|
|
274
|
-
const bestPnl = results.reduce((best, curr) => curr.metrics.totalPnl > best.metrics.totalPnl ? curr : best);
|
|
275
|
-
const bestAccuracy = results.reduce((best, curr) => curr.metrics.predictionMetrics.accuracy >
|
|
276
|
-
best.metrics.predictionMetrics.accuracy
|
|
277
|
-
? curr
|
|
278
|
-
: best);
|
|
279
|
-
const avgPnl = results.reduce((sum, r) => sum + r.metrics.totalPnl, 0) / results.length;
|
|
280
|
-
const avgAccuracy = results.reduce((sum, r) => sum + r.metrics.predictionMetrics.accuracy, 0) / results.length;
|
|
281
|
-
return `
|
|
282
|
-
<div class="stats-grid">
|
|
283
|
-
<div class="stat-card">
|
|
284
|
-
<div class="stat-value ${bestPnl.metrics.totalPnl >= 0 ? "positive" : "negative"}">
|
|
285
|
-
${bestPnl.metrics.totalPnl >= 0 ? "+" : ""}$${bestPnl.metrics.totalPnl.toFixed(0)}
|
|
286
|
-
</div>
|
|
287
|
-
<div class="stat-label">Best P&L (${bestPnl.modelName})</div>
|
|
288
|
-
</div>
|
|
289
|
-
<div class="stat-card">
|
|
290
|
-
<div class="stat-value">${(bestAccuracy.metrics.predictionMetrics.accuracy * 100).toFixed(1)}%</div>
|
|
291
|
-
<div class="stat-label">Best Accuracy (${bestAccuracy.modelName})</div>
|
|
292
|
-
</div>
|
|
293
|
-
<div class="stat-card">
|
|
294
|
-
<div class="stat-value ${avgPnl >= 0 ? "positive" : "negative"}">
|
|
295
|
-
${avgPnl >= 0 ? "+" : ""}$${avgPnl.toFixed(0)}
|
|
296
|
-
</div>
|
|
297
|
-
<div class="stat-label">Average P&L</div>
|
|
298
|
-
</div>
|
|
299
|
-
<div class="stat-card">
|
|
300
|
-
<div class="stat-value">${(avgAccuracy * 100).toFixed(1)}%</div>
|
|
301
|
-
<div class="stat-label">Average Accuracy</div>
|
|
302
|
-
</div>
|
|
303
|
-
<div class="stat-card">
|
|
304
|
-
<div class="stat-value">${results.length}</div>
|
|
305
|
-
<div class="stat-label">Models Tested</div>
|
|
306
|
-
</div>
|
|
307
|
-
</div>`;
|
|
308
|
-
}
|
|
309
|
-
/**
|
|
310
|
-
* Generate P&L chart card
|
|
311
|
-
*/
|
|
312
|
-
static generatePnLChartCard() {
|
|
313
|
-
return `
|
|
314
|
-
<div class="card">
|
|
315
|
-
<h3 class="card-title">💰 Total P&L Comparison</h3>
|
|
316
|
-
<div class="chart-container">
|
|
317
|
-
<canvas id="pnlChart"></canvas>
|
|
318
|
-
</div>
|
|
319
|
-
</div>`;
|
|
320
|
-
}
|
|
321
|
-
/**
|
|
322
|
-
* Generate accuracy chart card
|
|
323
|
-
*/
|
|
324
|
-
static generateAccuracyChartCard() {
|
|
325
|
-
return `
|
|
326
|
-
<div class="card">
|
|
327
|
-
<h3 class="card-title">🎯 Prediction Accuracy</h3>
|
|
328
|
-
<div class="chart-container">
|
|
329
|
-
<canvas id="accuracyChart"></canvas>
|
|
330
|
-
</div>
|
|
331
|
-
</div>`;
|
|
332
|
-
}
|
|
333
|
-
/**
|
|
334
|
-
* Generate perp metrics chart card
|
|
335
|
-
*/
|
|
336
|
-
static generatePerpMetricsChartCard() {
|
|
337
|
-
return `
|
|
338
|
-
<div class="card">
|
|
339
|
-
<h3 class="card-title">📈 Perpetual Trading Metrics</h3>
|
|
340
|
-
<div class="chart-container">
|
|
341
|
-
<canvas id="perpChart"></canvas>
|
|
342
|
-
</div>
|
|
343
|
-
</div>`;
|
|
344
|
-
}
|
|
345
|
-
/**
|
|
346
|
-
* Generate timing chart card
|
|
347
|
-
*/
|
|
348
|
-
static generateTimingChartCard() {
|
|
349
|
-
return `
|
|
350
|
-
<div class="card">
|
|
351
|
-
<h3 class="card-title">⏱️ Response Time</h3>
|
|
352
|
-
<div class="chart-container">
|
|
353
|
-
<canvas id="timingChart"></canvas>
|
|
354
|
-
</div>
|
|
355
|
-
</div>`;
|
|
356
|
-
}
|
|
357
|
-
/**
|
|
358
|
-
* Generate comparison table
|
|
359
|
-
*/
|
|
360
|
-
static generateComparisonTable(results) {
|
|
361
|
-
// Sort by P&L descending
|
|
362
|
-
const sorted = [...results].sort((a, b) => b.metrics.totalPnl - a.metrics.totalPnl);
|
|
363
|
-
const bestPnlModel = sorted[0]?.modelId;
|
|
364
|
-
const rows = sorted
|
|
365
|
-
.map((r) => {
|
|
366
|
-
const pnlClass = r.metrics.totalPnl >= 0 ? "positive" : "negative";
|
|
367
|
-
const isWinner = r.modelId === bestPnlModel;
|
|
368
|
-
const accuracyBadge = r.metrics.predictionMetrics.accuracy >= 0.6
|
|
369
|
-
? "badge-success"
|
|
370
|
-
: r.metrics.predictionMetrics.accuracy >= 0.4
|
|
371
|
-
? "badge-warning"
|
|
372
|
-
: "badge-danger";
|
|
373
|
-
return `
|
|
374
|
-
<tr>
|
|
375
|
-
<td>
|
|
376
|
-
<strong>${r.modelName}</strong>
|
|
377
|
-
${isWinner ? '<span class="winner-tag">🏆 Winner</span>' : ""}
|
|
378
|
-
</td>
|
|
379
|
-
<td class="${pnlClass}">
|
|
380
|
-
${r.metrics.totalPnl >= 0 ? "+" : ""}$${r.metrics.totalPnl.toFixed(2)}
|
|
381
|
-
</td>
|
|
382
|
-
<td>
|
|
383
|
-
<span class="badge ${accuracyBadge}">
|
|
384
|
-
${(r.metrics.predictionMetrics.accuracy * 100).toFixed(1)}%
|
|
385
|
-
</span>
|
|
386
|
-
</td>
|
|
387
|
-
<td>${r.metrics.predictionMetrics.correctPredictions}/${r.metrics.predictionMetrics.totalPositions}</td>
|
|
388
|
-
<td>${r.metrics.perpMetrics.totalTrades}</td>
|
|
389
|
-
<td>${(r.metrics.perpMetrics.winRate * 100).toFixed(1)}%</td>
|
|
390
|
-
<td>${r.metrics.optimalityScore.toFixed(1)}%</td>
|
|
391
|
-
<td>${(r.metrics.timing.totalDuration / 1000).toFixed(1)}s</td>
|
|
392
|
-
</tr>`;
|
|
393
|
-
})
|
|
394
|
-
.join("");
|
|
395
|
-
return `
|
|
396
|
-
<div class="card" style="margin-top: 1.5rem;">
|
|
397
|
-
<h3 class="card-title">📋 Detailed Comparison</h3>
|
|
398
|
-
<table>
|
|
399
|
-
<thead>
|
|
400
|
-
<tr>
|
|
401
|
-
<th>Model</th>
|
|
402
|
-
<th>Total P&L</th>
|
|
403
|
-
<th>Accuracy</th>
|
|
404
|
-
<th>Correct/Total</th>
|
|
405
|
-
<th>Perp Trades</th>
|
|
406
|
-
<th>Win Rate</th>
|
|
407
|
-
<th>Optimality</th>
|
|
408
|
-
<th>Duration</th>
|
|
409
|
-
</tr>
|
|
410
|
-
</thead>
|
|
411
|
-
<tbody>
|
|
412
|
-
${rows}
|
|
413
|
-
</tbody>
|
|
414
|
-
</table>
|
|
415
|
-
</div>`;
|
|
416
|
-
}
|
|
417
|
-
/**
|
|
418
|
-
* Generate history section
|
|
419
|
-
*/
|
|
420
|
-
static generateHistorySection(history) {
|
|
421
|
-
if (history.length === 0)
|
|
422
|
-
return "";
|
|
423
|
-
// Group by model
|
|
424
|
-
const byModel = new Map();
|
|
425
|
-
for (const entry of history) {
|
|
426
|
-
const entries = byModel.get(entry.modelId) ?? [];
|
|
427
|
-
entries.push(entry);
|
|
428
|
-
byModel.set(entry.modelId, entries);
|
|
429
|
-
}
|
|
430
|
-
return `
|
|
431
|
-
<div class="card" style="margin-top: 1.5rem;">
|
|
432
|
-
<h3 class="card-title">📈 Historical Performance</h3>
|
|
433
|
-
<div class="chart-container" style="height: 400px;">
|
|
434
|
-
<canvas id="historyChart"></canvas>
|
|
435
|
-
</div>
|
|
436
|
-
</div>`;
|
|
437
|
-
}
|
|
438
|
-
/**
|
|
439
|
-
* Generate Chart.js scripts
|
|
440
|
-
*/
|
|
441
|
-
static generateChartScripts(results) {
|
|
442
|
-
const labels = results.map((r) => r.modelName);
|
|
443
|
-
const pnlData = results.map((r) => r.metrics.totalPnl);
|
|
444
|
-
const accuracyData = results.map((r) => r.metrics.predictionMetrics.accuracy * 100);
|
|
445
|
-
const winRateData = results.map((r) => r.metrics.perpMetrics.winRate * 100);
|
|
446
|
-
const optimalityData = results.map((r) => r.metrics.optimalityScore);
|
|
447
|
-
const durationData = results.map((r) => r.metrics.timing.totalDuration / 1000);
|
|
448
|
-
const pnlColors = pnlData.map((v) => v >= 0 ? CHART_COLORS.success : CHART_COLORS.danger);
|
|
449
|
-
return `
|
|
450
|
-
// P&L Chart
|
|
451
|
-
new Chart(document.getElementById('pnlChart'), {
|
|
452
|
-
type: 'bar',
|
|
453
|
-
data: {
|
|
454
|
-
labels: ${JSON.stringify(labels)},
|
|
455
|
-
datasets: [{
|
|
456
|
-
label: 'Total P&L ($)',
|
|
457
|
-
data: ${JSON.stringify(pnlData)},
|
|
458
|
-
backgroundColor: ${JSON.stringify(pnlColors)},
|
|
459
|
-
borderRadius: 8,
|
|
460
|
-
}]
|
|
461
|
-
},
|
|
462
|
-
options: {
|
|
463
|
-
responsive: true,
|
|
464
|
-
maintainAspectRatio: false,
|
|
465
|
-
plugins: {
|
|
466
|
-
legend: { display: false }
|
|
467
|
-
},
|
|
468
|
-
scales: {
|
|
469
|
-
y: {
|
|
470
|
-
beginAtZero: true,
|
|
471
|
-
grid: { color: '#334155' }
|
|
472
|
-
},
|
|
473
|
-
x: {
|
|
474
|
-
grid: { display: false }
|
|
475
|
-
}
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
});
|
|
479
|
-
|
|
480
|
-
// Accuracy Chart
|
|
481
|
-
new Chart(document.getElementById('accuracyChart'), {
|
|
482
|
-
type: 'bar',
|
|
483
|
-
data: {
|
|
484
|
-
labels: ${JSON.stringify(labels)},
|
|
485
|
-
datasets: [{
|
|
486
|
-
label: 'Prediction Accuracy (%)',
|
|
487
|
-
data: ${JSON.stringify(accuracyData)},
|
|
488
|
-
backgroundColor: '${CHART_COLORS.primary}',
|
|
489
|
-
borderRadius: 8,
|
|
490
|
-
}]
|
|
491
|
-
},
|
|
492
|
-
options: {
|
|
493
|
-
responsive: true,
|
|
494
|
-
maintainAspectRatio: false,
|
|
495
|
-
plugins: {
|
|
496
|
-
legend: { display: false }
|
|
497
|
-
},
|
|
498
|
-
scales: {
|
|
499
|
-
y: {
|
|
500
|
-
beginAtZero: true,
|
|
501
|
-
max: 100,
|
|
502
|
-
grid: { color: '#334155' }
|
|
503
|
-
},
|
|
504
|
-
x: {
|
|
505
|
-
grid: { display: false }
|
|
506
|
-
}
|
|
507
|
-
}
|
|
508
|
-
}
|
|
509
|
-
});
|
|
510
|
-
|
|
511
|
-
// Perp Metrics Chart (grouped bar)
|
|
512
|
-
new Chart(document.getElementById('perpChart'), {
|
|
513
|
-
type: 'bar',
|
|
514
|
-
data: {
|
|
515
|
-
labels: ${JSON.stringify(labels)},
|
|
516
|
-
datasets: [
|
|
517
|
-
{
|
|
518
|
-
label: 'Win Rate (%)',
|
|
519
|
-
data: ${JSON.stringify(winRateData)},
|
|
520
|
-
backgroundColor: '${CHART_COLORS.success}',
|
|
521
|
-
borderRadius: 4,
|
|
522
|
-
},
|
|
523
|
-
{
|
|
524
|
-
label: 'Optimality (%)',
|
|
525
|
-
data: ${JSON.stringify(optimalityData)},
|
|
526
|
-
backgroundColor: '${CHART_COLORS.purple}',
|
|
527
|
-
borderRadius: 4,
|
|
528
|
-
}
|
|
529
|
-
]
|
|
530
|
-
},
|
|
531
|
-
options: {
|
|
532
|
-
responsive: true,
|
|
533
|
-
maintainAspectRatio: false,
|
|
534
|
-
scales: {
|
|
535
|
-
y: {
|
|
536
|
-
beginAtZero: true,
|
|
537
|
-
max: 100,
|
|
538
|
-
grid: { color: '#334155' }
|
|
539
|
-
},
|
|
540
|
-
x: {
|
|
541
|
-
grid: { display: false }
|
|
542
|
-
}
|
|
543
|
-
}
|
|
544
|
-
}
|
|
545
|
-
});
|
|
546
|
-
|
|
547
|
-
// Timing Chart
|
|
548
|
-
new Chart(document.getElementById('timingChart'), {
|
|
549
|
-
type: 'bar',
|
|
550
|
-
data: {
|
|
551
|
-
labels: ${JSON.stringify(labels)},
|
|
552
|
-
datasets: [{
|
|
553
|
-
label: 'Total Duration (s)',
|
|
554
|
-
data: ${JSON.stringify(durationData)},
|
|
555
|
-
backgroundColor: '${CHART_COLORS.cyan}',
|
|
556
|
-
borderRadius: 8,
|
|
557
|
-
}]
|
|
558
|
-
},
|
|
559
|
-
options: {
|
|
560
|
-
responsive: true,
|
|
561
|
-
maintainAspectRatio: false,
|
|
562
|
-
plugins: {
|
|
563
|
-
legend: { display: false }
|
|
564
|
-
},
|
|
565
|
-
scales: {
|
|
566
|
-
y: {
|
|
567
|
-
beginAtZero: true,
|
|
568
|
-
grid: { color: '#334155' }
|
|
569
|
-
},
|
|
570
|
-
x: {
|
|
571
|
-
grid: { display: false }
|
|
572
|
-
}
|
|
573
|
-
}
|
|
574
|
-
}
|
|
575
|
-
});`;
|
|
576
|
-
}
|
|
577
|
-
/**
|
|
578
|
-
* Generate a simple terminal-friendly chart using ASCII
|
|
579
|
-
*/
|
|
580
|
-
static generateTerminalChart(title, data, options = {}) {
|
|
581
|
-
const width = options.width ?? 40;
|
|
582
|
-
const formatValue = options.valueFormat ?? ((v) => v.toFixed(2));
|
|
583
|
-
const maxValue = Math.max(...data.map((d) => Math.abs(d.value)));
|
|
584
|
-
const maxLabelLen = Math.max(...data.map((d) => d.label.length));
|
|
585
|
-
const lines = [];
|
|
586
|
-
lines.push(`\n ${title}`);
|
|
587
|
-
lines.push(` ${"─".repeat(width + maxLabelLen + 20)}`);
|
|
588
|
-
for (const item of data) {
|
|
589
|
-
const normalizedValue = maxValue > 0 ? Math.abs(item.value) / maxValue : 0;
|
|
590
|
-
const barLen = Math.round(normalizedValue * width);
|
|
591
|
-
const bar = item.value >= 0 ? "█".repeat(barLen) : "░".repeat(barLen);
|
|
592
|
-
const color = item.value >= 0 ? "\x1b[32m" : "\x1b[31m";
|
|
593
|
-
const reset = "\x1b[0m";
|
|
594
|
-
const paddedLabel = item.label.padEnd(maxLabelLen);
|
|
595
|
-
lines.push(` ${paddedLabel} │${color}${bar}${reset} ${formatValue(item.value)}`);
|
|
596
|
-
}
|
|
597
|
-
lines.push(` ${"─".repeat(width + maxLabelLen + 20)}`);
|
|
598
|
-
return lines.join("\n");
|
|
599
|
-
}
|
|
600
|
-
/**
|
|
601
|
-
* Generate a comparison summary for terminal output
|
|
602
|
-
*/
|
|
603
|
-
static generateTerminalSummary(results) {
|
|
604
|
-
const sorted = [...results].sort((a, b) => b.metrics.totalPnl - a.metrics.totalPnl);
|
|
605
|
-
const winner = sorted[0];
|
|
606
|
-
const lines = [];
|
|
607
|
-
lines.push("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
608
|
-
lines.push("📊 BENCHMARK RESULTS");
|
|
609
|
-
lines.push("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
610
|
-
// P&L Chart
|
|
611
|
-
lines.push(BenchmarkChartGenerator.generateTerminalChart("💰 Total P&L", sorted.map((r) => ({ label: r.modelName, value: r.metrics.totalPnl })), { valueFormat: (v) => `$${v.toFixed(2)}` }));
|
|
612
|
-
// Accuracy Chart
|
|
613
|
-
lines.push(BenchmarkChartGenerator.generateTerminalChart("🎯 Prediction Accuracy", sorted.map((r) => ({
|
|
614
|
-
label: r.modelName,
|
|
615
|
-
value: r.metrics.predictionMetrics.accuracy * 100,
|
|
616
|
-
})), { valueFormat: (v) => `${v.toFixed(1)}%` }));
|
|
617
|
-
// Winner
|
|
618
|
-
if (winner) {
|
|
619
|
-
const loser = sorted[sorted.length - 1];
|
|
620
|
-
const pnlDelta = winner.metrics.totalPnl - (loser?.metrics.totalPnl ?? 0);
|
|
621
|
-
lines.push("\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
|
622
|
-
lines.push(`🏆 WINNER: ${winner.modelName}`);
|
|
623
|
-
lines.push(` P&L: $${winner.metrics.totalPnl.toFixed(2)}`);
|
|
624
|
-
lines.push(` Accuracy: ${(winner.metrics.predictionMetrics.accuracy * 100).toFixed(1)}%`);
|
|
625
|
-
if (results.length > 1 && loser) {
|
|
626
|
-
lines.push(` Lead: $${pnlDelta.toFixed(2)} over ${loser.modelName}`);
|
|
627
|
-
}
|
|
628
|
-
lines.push("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
|
|
629
|
-
}
|
|
630
|
-
return lines.join("\n");
|
|
631
|
-
}
|
|
632
|
-
}
|