@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -5,9 +5,9 @@
5
5
  * and contains all required fields.
6
6
  */
7
7
 
8
- import type { JsonValue } from '../adapter';
9
- import { logger } from '../utils/logger';
10
- import type { BenchmarkGameSnapshot } from './BenchmarkDataGenerator';
8
+ import type { JsonValue } from "../adapter";
9
+ import { logger } from "../utils/logger";
10
+ import type { BenchmarkGameSnapshot } from "./BenchmarkDataGenerator";
11
11
 
12
12
  export interface BenchmarkValidationResult {
13
13
  valid: boolean;
@@ -23,58 +23,58 @@ export function validate(snapshot: unknown): BenchmarkValidationResult {
23
23
  const warnings: string[] = [];
24
24
 
25
25
  // 1. Check required top-level fields
26
- if (!snapshot || typeof snapshot !== 'object') {
27
- errors.push('Snapshot is null, undefined, or not an object');
26
+ if (!snapshot || typeof snapshot !== "object") {
27
+ errors.push("Snapshot is null, undefined, or not an object");
28
28
  return { valid: false, errors, warnings };
29
29
  }
30
30
 
31
31
  const snap = snapshot as Record<string, JsonValue>;
32
32
 
33
- if (!snap.id) errors.push('Missing required field: id');
34
- if (!snap.version) errors.push('Missing required field: version');
35
- if (typeof snap.duration !== 'number')
36
- errors.push('Missing or invalid field: duration');
37
- if (typeof snap.tickInterval !== 'number')
38
- errors.push('Missing or invalid field: tickInterval');
39
- if (!snap.initialState) errors.push('Missing required field: initialState');
33
+ if (!snap.id) errors.push("Missing required field: id");
34
+ if (!snap.version) errors.push("Missing required field: version");
35
+ if (typeof snap.duration !== "number")
36
+ errors.push("Missing or invalid field: duration");
37
+ if (typeof snap.tickInterval !== "number")
38
+ errors.push("Missing or invalid field: tickInterval");
39
+ if (!snap.initialState) errors.push("Missing required field: initialState");
40
40
  if (!Array.isArray(snap.ticks))
41
- errors.push('Missing or invalid field: ticks (must be array)');
42
- if (!snap.groundTruth) errors.push('Missing required field: groundTruth');
41
+ errors.push("Missing or invalid field: ticks (must be array)");
42
+ if (!snap.groundTruth) errors.push("Missing required field: groundTruth");
43
43
 
44
44
  // 2. Validate initial state
45
- if (snap.initialState && typeof snap.initialState === 'object') {
45
+ if (snap.initialState && typeof snap.initialState === "object") {
46
46
  const state = snap.initialState as Record<string, JsonValue>;
47
47
 
48
- if (typeof state.tick !== 'number')
49
- errors.push('initialState.tick must be a number');
50
- if (state.tick !== 0) warnings.push('initialState.tick should be 0');
48
+ if (typeof state.tick !== "number")
49
+ errors.push("initialState.tick must be a number");
50
+ if (state.tick !== 0) warnings.push("initialState.tick should be 0");
51
51
 
52
52
  if (!Array.isArray(state.predictionMarkets)) {
53
- errors.push('initialState.predictionMarkets must be an array');
53
+ errors.push("initialState.predictionMarkets must be an array");
54
54
  }
55
55
 
56
56
  if (!Array.isArray(state.perpetualMarkets)) {
57
- errors.push('initialState.perpetualMarkets must be an array');
57
+ errors.push("initialState.perpetualMarkets must be an array");
58
58
  }
59
59
 
60
60
  if (!Array.isArray(state.agents)) {
61
- errors.push('initialState.agents must be an array');
61
+ errors.push("initialState.agents must be an array");
62
62
  }
63
63
  }
64
64
 
65
65
  // 3. Validate ticks
66
66
  if (Array.isArray(snap.ticks)) {
67
67
  if (snap.ticks.length === 0) {
68
- warnings.push('Ticks array is empty');
68
+ warnings.push("Ticks array is empty");
69
69
  }
70
70
 
71
71
  snap.ticks.forEach((tick: JsonValue, index: number) => {
72
- if (!tick || typeof tick !== 'object') {
72
+ if (!tick || typeof tick !== "object") {
73
73
  errors.push(`Tick ${index}: invalid tick object`);
74
74
  return;
75
75
  }
76
76
  const tickObj = tick as Record<string, JsonValue>;
77
- if (typeof tickObj.number !== 'number') {
77
+ if (typeof tickObj.number !== "number") {
78
78
  errors.push(`Tick ${index}: missing or invalid 'number' field`);
79
79
  }
80
80
 
@@ -90,51 +90,51 @@ export function validate(snapshot: unknown): BenchmarkValidationResult {
90
90
  // Check tick numbering is sequential
91
91
  for (let i = 0; i < snap.ticks.length; i++) {
92
92
  const tick = snap.ticks[i] as Record<string, JsonValue> | undefined;
93
- if (tick && typeof tick.number === 'number' && tick.number !== i) {
93
+ if (tick && typeof tick.number === "number" && tick.number !== i) {
94
94
  warnings.push(`Tick ${i}: number ${tick.number} doesn't match index`);
95
95
  }
96
96
  }
97
97
  }
98
98
 
99
99
  // 4. Validate ground truth
100
- if (snap.groundTruth && typeof snap.groundTruth === 'object') {
100
+ if (snap.groundTruth && typeof snap.groundTruth === "object") {
101
101
  const gt = snap.groundTruth as Record<string, JsonValue>;
102
102
 
103
- if (!gt.marketOutcomes || typeof gt.marketOutcomes !== 'object') {
104
- errors.push('groundTruth.marketOutcomes must be an object');
103
+ if (!gt.marketOutcomes || typeof gt.marketOutcomes !== "object") {
104
+ errors.push("groundTruth.marketOutcomes must be an object");
105
105
  }
106
106
 
107
- if (!gt.priceHistory || typeof gt.priceHistory !== 'object') {
108
- errors.push('groundTruth.priceHistory must be an object');
107
+ if (!gt.priceHistory || typeof gt.priceHistory !== "object") {
108
+ errors.push("groundTruth.priceHistory must be an object");
109
109
  }
110
110
 
111
111
  if (!Array.isArray(gt.optimalActions)) {
112
- errors.push('groundTruth.optimalActions must be an array');
112
+ errors.push("groundTruth.optimalActions must be an array");
113
113
  }
114
114
 
115
115
  if (!Array.isArray(gt.socialOpportunities)) {
116
- errors.push('groundTruth.socialOpportunities must be an array');
116
+ errors.push("groundTruth.socialOpportunities must be an array");
117
117
  }
118
118
 
119
119
  if (!Array.isArray(gt.hiddenFacts)) {
120
- errors.push('groundTruth.hiddenFacts must be an array');
120
+ errors.push("groundTruth.hiddenFacts must be an array");
121
121
  }
122
122
 
123
123
  if (!Array.isArray(gt.hiddenEvents)) {
124
- errors.push('groundTruth.hiddenEvents must be an array');
124
+ errors.push("groundTruth.hiddenEvents must be an array");
125
125
  }
126
126
 
127
- if (!gt.trueFacts || typeof gt.trueFacts !== 'object') {
128
- errors.push('groundTruth.trueFacts must be an object');
127
+ if (!gt.trueFacts || typeof gt.trueFacts !== "object") {
128
+ errors.push("groundTruth.trueFacts must be an object");
129
129
  }
130
130
  }
131
131
 
132
132
  // 5. Cross-validate: markets in initialState should have outcomes in groundTruth
133
133
  if (
134
134
  snap.initialState &&
135
- typeof snap.initialState === 'object' &&
135
+ typeof snap.initialState === "object" &&
136
136
  snap.groundTruth &&
137
- typeof snap.groundTruth === 'object'
137
+ typeof snap.groundTruth === "object"
138
138
  ) {
139
139
  const initialState = snap.initialState as Record<string, JsonValue>;
140
140
  const groundTruth = snap.groundTruth as Record<string, JsonValue>;
@@ -145,7 +145,7 @@ export function validate(snapshot: unknown): BenchmarkValidationResult {
145
145
  ) as Array<Record<string, JsonValue>>;
146
146
  const outcomes = (
147
147
  groundTruth.marketOutcomes &&
148
- typeof groundTruth.marketOutcomes === 'object'
148
+ typeof groundTruth.marketOutcomes === "object"
149
149
  ? groundTruth.marketOutcomes
150
150
  : {}
151
151
  ) as Record<string, JsonValue>;
@@ -153,17 +153,17 @@ export function validate(snapshot: unknown): BenchmarkValidationResult {
153
153
  markets.forEach((market) => {
154
154
  if (
155
155
  market.id &&
156
- typeof market.id === 'string' &&
156
+ typeof market.id === "string" &&
157
157
  !(market.id in outcomes)
158
158
  ) {
159
159
  warnings.push(
160
- `Market ${market.id} in initialState but no outcome in groundTruth`
160
+ `Market ${market.id} in initialState but no outcome in groundTruth`,
161
161
  );
162
162
  }
163
163
  });
164
164
  }
165
165
 
166
- logger.info('Benchmark validation complete', {
166
+ logger.info("Benchmark validation complete", {
167
167
  valid: errors.length === 0,
168
168
  errors: errors.length,
169
169
  warnings: warnings.length,
@@ -179,8 +179,10 @@ export function validate(snapshot: unknown): BenchmarkValidationResult {
179
179
  /**
180
180
  * Quick sanity check (fast, minimal validation)
181
181
  */
182
- export function sanityCheck(snapshot: unknown): snapshot is BenchmarkGameSnapshot {
183
- if (!snapshot || typeof snapshot !== 'object') return false;
182
+ export function sanityCheck(
183
+ snapshot: unknown,
184
+ ): snapshot is BenchmarkGameSnapshot {
185
+ if (!snapshot || typeof snapshot !== "object") return false;
184
186
  const snap = snapshot as Record<string, JsonValue>;
185
187
  return !!(
186
188
  snap.id &&
@@ -194,11 +196,11 @@ export function sanityCheck(snapshot: unknown): snapshot is BenchmarkGameSnapsho
194
196
  * Validate and throw if invalid
195
197
  */
196
198
  export function validateOrThrow(
197
- snapshot: unknown
199
+ snapshot: unknown,
198
200
  ): asserts snapshot is BenchmarkGameSnapshot {
199
201
  const result = validate(snapshot);
200
202
 
201
203
  if (!result.valid) {
202
- throw new Error(`Invalid benchmark data: ${result.errors.join(', ')}`);
204
+ throw new Error(`Invalid benchmark data: ${result.errors.join(", ")}`);
203
205
  }
204
206
  }
@@ -8,16 +8,16 @@
8
8
  * - Progress tracking
9
9
  */
10
10
 
11
- import { logger } from '../utils/logger';
12
- import { type BenchmarkRunConfig, BenchmarkRunner } from './BenchmarkRunner';
13
- import type { SimulationResult } from './SimulationEngine';
11
+ import { logger } from "../utils/logger";
12
+ import { type BenchmarkRunConfig, BenchmarkRunner } from "./BenchmarkRunner";
13
+ import type { SimulationResult } from "./SimulationEngine";
14
14
 
15
15
  export interface FastEvalConfig {
16
16
  /** Benchmark file path */
17
17
  benchmarkPath: string;
18
18
 
19
19
  /** Agent runtime to test */
20
- agentRuntime: BenchmarkRunConfig['agentRuntime'];
20
+ agentRuntime: BenchmarkRunConfig["agentRuntime"];
21
21
 
22
22
  /** Agent user ID */
23
23
  agentUserId: string;
@@ -63,6 +63,7 @@ export interface FastEvalResult {
63
63
  worstRun: SimulationResult;
64
64
  }
65
65
 
66
+ // biome-ignore lint/complexity/noStaticOnlyClass: Runner namespace - run/runWithProgress are logically grouped
66
67
  export class FastEvalRunner {
67
68
  /**
68
69
  * Run fast evaluation
@@ -98,7 +99,7 @@ export class FastEvalRunner {
98
99
  const iterations = config.iterations || 1;
99
100
  const parallelRuns = config.parallelRuns || 1;
100
101
 
101
- logger.info('Starting fast evaluation', {
102
+ logger.info("Starting fast evaluation", {
102
103
  benchmarkPath: config.benchmarkPath,
103
104
  agentUserId: config.agentUserId,
104
105
  iterations,
@@ -118,7 +119,7 @@ export class FastEvalRunner {
118
119
  const batchSize = batchEnd - batchStart;
119
120
 
120
121
  logger.info(
121
- `Running batch ${batchStart + 1}-${batchEnd} of ${iterations}`
122
+ `Running batch ${batchStart + 1}-${batchEnd} of ${iterations}`,
122
123
  );
123
124
 
124
125
  // Run batch in parallel
@@ -157,18 +158,18 @@ export class FastEvalRunner {
157
158
  const avgAccuracy =
158
159
  results.reduce(
159
160
  (sum, r) => sum + r.metrics.predictionMetrics.accuracy,
160
- 0
161
+ 0,
161
162
  ) / results.length;
162
163
  const avgOptimality =
163
164
  results.reduce((sum, r) => sum + r.metrics.optimalityScore, 0) /
164
165
  results.length;
165
166
 
166
167
  const bestRun = results.reduce((best, current) =>
167
- current.metrics.totalPnl > best.metrics.totalPnl ? current : best
168
+ current.metrics.totalPnl > best.metrics.totalPnl ? current : best,
168
169
  );
169
170
 
170
171
  const worstRun = results.reduce((worst, current) =>
171
- current.metrics.totalPnl < worst.metrics.totalPnl ? current : worst
172
+ current.metrics.totalPnl < worst.metrics.totalPnl ? current : worst,
172
173
  );
173
174
 
174
175
  const summary = {
@@ -179,7 +180,7 @@ export class FastEvalRunner {
179
180
  runsCompleted: results.length,
180
181
  };
181
182
 
182
- logger.info('Fast evaluation completed', summary);
183
+ logger.info("Fast evaluation completed", summary);
183
184
 
184
185
  return {
185
186
  results,
@@ -193,22 +194,22 @@ export class FastEvalRunner {
193
194
  * Run evaluation with progress bar
194
195
  */
195
196
  static async runWithProgress(
196
- config: FastEvalConfig
197
+ config: FastEvalConfig,
197
198
  ): Promise<FastEvalResult> {
198
199
  let lastProgress = 0;
199
200
 
200
- return this.run({
201
+ return FastEvalRunner.run({
201
202
  ...config,
202
203
  onProgress: (progress) => {
203
204
  const percent = Math.round((progress.completed / progress.total) * 100);
204
205
  if (percent !== lastProgress) {
205
206
  const barLength = 40;
206
207
  const filled = Math.round(
207
- (progress.completed / progress.total) * barLength
208
+ (progress.completed / progress.total) * barLength,
208
209
  );
209
- const bar = ''.repeat(filled) + ''.repeat(barLength - filled);
210
+ const bar = "".repeat(filled) + "".repeat(barLength - filled);
210
211
  process.stdout.write(
211
- `\r[${bar}] ${percent}% (${progress.completed}/${progress.total})`
212
+ `\r[${bar}] ${percent}% (${progress.completed}/${progress.total})`,
212
213
  );
213
214
  lastProgress = percent;
214
215
  }
@@ -218,7 +219,7 @@ export class FastEvalRunner {
218
219
  }
219
220
  },
220
221
  }).then((result) => {
221
- process.stdout.write('\n');
222
+ process.stdout.write("\n");
222
223
  return result;
223
224
  });
224
225
  }
@@ -4,10 +4,10 @@
4
4
  * Validates that benchmark metrics are calculated correctly against ground truth.
5
5
  */
6
6
 
7
- import type { ValidationResult } from '../training/ConfigValidator';
8
- import { logger } from '../utils/logger';
9
- import type { GroundTruth } from './BenchmarkDataGenerator';
10
- import type { AgentAction, SimulationMetrics } from './simulation-types';
7
+ import type { ValidationResult } from "../training/ConfigValidator";
8
+ import { logger } from "../utils/logger";
9
+ import type { GroundTruth } from "./BenchmarkDataGenerator";
10
+ import type { AgentAction, SimulationMetrics } from "./simulation-types";
11
11
 
12
12
  export class MetricsValidator {
13
13
  /**
@@ -16,16 +16,16 @@ export class MetricsValidator {
16
16
  static validate(
17
17
  metrics: SimulationMetrics,
18
18
  actions: AgentAction[],
19
- groundTruth: GroundTruth
19
+ groundTruth: GroundTruth,
20
20
  ): ValidationResult {
21
21
  const errors: string[] = [];
22
22
  const warnings: string[] = [];
23
23
 
24
24
  // 1. Validate prediction accuracy calculation
25
- const predictionValidation = this.validatePredictionMetrics(
25
+ const predictionValidation = MetricsValidator.validatePredictionMetrics(
26
26
  metrics.predictionMetrics,
27
27
  actions,
28
- groundTruth
28
+ groundTruth,
29
29
  );
30
30
  errors.push(...predictionValidation.errors);
31
31
  warnings.push(...predictionValidation.warnings);
@@ -38,23 +38,23 @@ export class MetricsValidator {
38
38
  // 3. Validate timing metrics are reasonable
39
39
  if (metrics.timing.avgResponseTime < 0) {
40
40
  errors.push(
41
- `Invalid average response time: ${metrics.timing.avgResponseTime}`
41
+ `Invalid average response time: ${metrics.timing.avgResponseTime}`,
42
42
  );
43
43
  }
44
44
 
45
45
  if (metrics.timing.maxResponseTime < metrics.timing.avgResponseTime) {
46
46
  errors.push(
47
- `Max response time less than average: ${metrics.timing.maxResponseTime} < ${metrics.timing.avgResponseTime}`
47
+ `Max response time less than average: ${metrics.timing.maxResponseTime} < ${metrics.timing.avgResponseTime}`,
48
48
  );
49
49
  }
50
50
 
51
51
  // 4. Validate action counts match
52
52
  const predictionActions = actions.filter(
53
- (a) => a.type === 'buy_prediction'
53
+ (a) => a.type === "buy_prediction",
54
54
  );
55
55
  if (predictionActions.length !== metrics.predictionMetrics.totalPositions) {
56
56
  warnings.push(
57
- `Prediction action count mismatch: ${predictionActions.length} actions vs ${metrics.predictionMetrics.totalPositions} positions`
57
+ `Prediction action count mismatch: ${predictionActions.length} actions vs ${metrics.predictionMetrics.totalPositions} positions`,
58
58
  );
59
59
  }
60
60
 
@@ -64,20 +64,20 @@ export class MetricsValidator {
64
64
  const calculatedAccuracy =
65
65
  totalPositions > 0 ? correctPredictions / totalPositions : 0;
66
66
  const accuracyDiff = Math.abs(
67
- calculatedAccuracy - metrics.predictionMetrics.accuracy
67
+ calculatedAccuracy - metrics.predictionMetrics.accuracy,
68
68
  );
69
69
 
70
70
  if (accuracyDiff > 0.01) {
71
71
  // Allow 1% tolerance for floating point
72
72
  errors.push(
73
- `Accuracy calculation mismatch: reported ${metrics.predictionMetrics.accuracy}, calculated ${calculatedAccuracy}`
73
+ `Accuracy calculation mismatch: reported ${metrics.predictionMetrics.accuracy}, calculated ${calculatedAccuracy}`,
74
74
  );
75
75
  }
76
76
 
77
77
  // 6. Validate correct + incorrect = total
78
78
  if (correctPredictions + incorrectPredictions !== totalPositions) {
79
79
  errors.push(
80
- `Prediction count mismatch: ${correctPredictions} + ${incorrectPredictions} != ${totalPositions}`
80
+ `Prediction count mismatch: ${correctPredictions} + ${incorrectPredictions} != ${totalPositions}`,
81
81
  );
82
82
  }
83
83
 
@@ -86,17 +86,17 @@ export class MetricsValidator {
86
86
  const calculatedWinRate =
87
87
  metrics.perpMetrics.profitableTrades / metrics.perpMetrics.totalTrades;
88
88
  const winRateDiff = Math.abs(
89
- calculatedWinRate - metrics.perpMetrics.winRate
89
+ calculatedWinRate - metrics.perpMetrics.winRate,
90
90
  );
91
91
 
92
92
  if (winRateDiff > 0.01) {
93
93
  errors.push(
94
- `Win rate calculation mismatch: reported ${metrics.perpMetrics.winRate}, calculated ${calculatedWinRate}`
94
+ `Win rate calculation mismatch: reported ${metrics.perpMetrics.winRate}, calculated ${calculatedWinRate}`,
95
95
  );
96
96
  }
97
97
  }
98
98
 
99
- logger.info('Metrics validation complete', {
99
+ logger.info("Metrics validation complete", {
100
100
  valid: errors.length === 0,
101
101
  errors: errors.length,
102
102
  warnings: warnings.length,
@@ -113,16 +113,16 @@ export class MetricsValidator {
113
113
  * Validate prediction metrics against ground truth
114
114
  */
115
115
  private static validatePredictionMetrics(
116
- _predictionMetrics: SimulationMetrics['predictionMetrics'],
116
+ _predictionMetrics: SimulationMetrics["predictionMetrics"],
117
117
  actions: AgentAction[],
118
- groundTruth: GroundTruth
118
+ groundTruth: GroundTruth,
119
119
  ): ValidationResult {
120
120
  const errors: string[] = [];
121
121
  const warnings: string[] = [];
122
122
 
123
123
  // Get all prediction actions
124
124
  const predictionActions = actions.filter(
125
- (a) => a.type === 'buy_prediction'
125
+ (a) => a.type === "buy_prediction",
126
126
  );
127
127
 
128
128
  // Validate each action against ground truth
@@ -133,7 +133,6 @@ export class MetricsValidator {
133
133
  // Check if we have ground truth for this market
134
134
  if (!(marketId in groundTruth.marketOutcomes)) {
135
135
  warnings.push(`No ground truth for market ${marketId}`);
136
- continue;
137
136
  }
138
137
 
139
138
  // Verify the outcome exists in ground truth