@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -7,10 +7,10 @@
7
7
  * @packageDocumentation
8
8
  */
9
9
 
10
- import type { BehavioralMetrics } from '../metrics/types';
11
- import { getMetricsSummary } from '../metrics/types';
12
- import { getPriorityMetrics, getRubric } from '../rubrics';
13
- import type { TrajectoryStep } from '../training/types';
10
+ import type { BehavioralMetrics } from "../metrics/types";
11
+ import { getMetricsSummary } from "../metrics/types";
12
+ import { getPriorityMetrics, getRubric } from "../rubrics";
13
+ import type { TrajectoryStep } from "../training/types";
14
14
 
15
15
  /**
16
16
  * Context for trajectory evaluation.
@@ -56,10 +56,10 @@ export class JudgePromptBuilder {
56
56
  */
57
57
  buildSinglePrompt(
58
58
  trajectory: TrajectoryContext,
59
- options: JudgePromptOptions = {}
59
+ options: JudgePromptOptions = {},
60
60
  ): { system: string; user: string } {
61
61
  const opts = { ...DEFAULT_OPTIONS, ...options };
62
- const archetype = trajectory.archetype || 'default';
62
+ const archetype = trajectory.archetype || "default";
63
63
  const rubric = getRubric(archetype);
64
64
  const priorityMetrics = getPriorityMetrics(archetype);
65
65
 
@@ -75,12 +75,12 @@ export class JudgePromptBuilder {
75
75
  buildComparisonPrompt(
76
76
  trajectories: TrajectoryContext[],
77
77
  scenarioId: string,
78
- options: JudgePromptOptions = {}
78
+ options: JudgePromptOptions = {},
79
79
  ): { system: string; user: string } {
80
80
  const opts = { ...DEFAULT_OPTIONS, ...options };
81
81
 
82
82
  // Get archetype from first trajectory (assume all same archetype for comparison)
83
- const archetype = trajectories[0]?.archetype || 'default';
83
+ const archetype = trajectories[0]?.archetype || "default";
84
84
  const rubric = getRubric(archetype);
85
85
  const priorityMetrics = getPriorityMetrics(archetype);
86
86
 
@@ -89,7 +89,7 @@ export class JudgePromptBuilder {
89
89
  trajectories,
90
90
  scenarioId,
91
91
  priorityMetrics,
92
- opts
92
+ opts,
93
93
  );
94
94
 
95
95
  return { system, user };
@@ -115,7 +115,7 @@ IMPORTANT: The metrics provided are CONTEXT to inform your judgment. Use them to
115
115
  */
116
116
  private buildComparisonSystemPrompt(
117
117
  archetype: string,
118
- rubric: string
118
+ rubric: string,
119
119
  ): string {
120
120
  return `You are an expert evaluator of AI agent performance. All trajectories below were given the same scenario and are from "${archetype}" archetype agents.
121
121
 
@@ -139,28 +139,28 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
139
139
  private buildUserPrompt(
140
140
  trajectory: TrajectoryContext,
141
141
  priorityMetrics: string[],
142
- options: JudgePromptOptions
142
+ options: JudgePromptOptions,
143
143
  ): string {
144
144
  const parts: string[] = [];
145
145
 
146
146
  // Agent info
147
147
  parts.push(`## Agent Information`);
148
148
  parts.push(`- Agent ID: ${trajectory.agentId}`);
149
- parts.push(`- Archetype: ${trajectory.archetype || 'unknown'}`);
149
+ parts.push(`- Archetype: ${trajectory.archetype || "unknown"}`);
150
150
  parts.push(
151
- `- Episode Length: ${trajectory.episodeLength || trajectory.steps.length} ticks`
151
+ `- Episode Length: ${trajectory.episodeLength || trajectory.steps.length} ticks`,
152
152
  );
153
- parts.push('');
153
+ parts.push("");
154
154
 
155
155
  // Metrics section
156
156
  parts.push(`## Behavioral Metrics`);
157
157
  parts.push(this.formatMetrics(trajectory.metrics, priorityMetrics));
158
- parts.push('');
158
+ parts.push("");
159
159
 
160
160
  // Action summary
161
161
  parts.push(`## Action Summary`);
162
162
  parts.push(this.summarizeActions(trajectory.steps));
163
- parts.push('');
163
+ parts.push("");
164
164
 
165
165
  // Key decisions (if requested)
166
166
  if (options.includeKeyDecisions) {
@@ -168,7 +168,7 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
168
168
  if (keyDecisions) {
169
169
  parts.push(`## Key Decisions`);
170
170
  parts.push(keyDecisions);
171
- parts.push('');
171
+ parts.push("");
172
172
  }
173
173
  }
174
174
 
@@ -178,18 +178,18 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
178
178
  parts.push(
179
179
  this.formatRecentActions(
180
180
  trajectory.steps,
181
- options.maxActionsToShow || 20
182
- )
181
+ options.maxActionsToShow || 20,
182
+ ),
183
183
  );
184
- parts.push('');
184
+ parts.push("");
185
185
  }
186
186
 
187
187
  // Instructions
188
188
  parts.push(`## Instructions`);
189
189
  parts.push(
190
- `Score this trajectory on a scale of 0.0 to 1.0 based on how well it embodies the ${trajectory.archetype || 'agent'} archetype's values.`
190
+ `Score this trajectory on a scale of 0.0 to 1.0 based on how well it embodies the ${trajectory.archetype || "agent"} archetype's values.`,
191
191
  );
192
- parts.push('');
192
+ parts.push("");
193
193
  parts.push(`Respond with JSON:`);
194
194
  parts.push(`{
195
195
  "score": <float 0-1>,
@@ -198,7 +198,7 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
198
198
  "weaknesses": ["<weakness 1>", "<weakness 2>"]
199
199
  }`);
200
200
 
201
- return parts.join('\n');
201
+ return parts.join("\n");
202
202
  }
203
203
 
204
204
  /**
@@ -208,18 +208,18 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
208
208
  trajectories: TrajectoryContext[],
209
209
  scenarioId: string,
210
210
  priorityMetrics: string[],
211
- _options: JudgePromptOptions
211
+ _options: JudgePromptOptions,
212
212
  ): string {
213
213
  const parts: string[] = [];
214
214
 
215
215
  parts.push(`## Scenario: ${scenarioId}`);
216
216
  parts.push(`## Number of Trajectories: ${trajectories.length}`);
217
- parts.push('');
217
+ parts.push("");
218
218
 
219
219
  // Performance context for all trajectories
220
220
  parts.push(`## Trajectory Performance Context`);
221
221
  parts.push(`(Use this to inform your scoring)`);
222
- parts.push('');
222
+ parts.push("");
223
223
 
224
224
  for (let i = 0; i < trajectories.length; i++) {
225
225
  const traj = trajectories[i];
@@ -227,30 +227,30 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
227
227
 
228
228
  const trajId = `trajectory-${i + 1}`;
229
229
  parts.push(`### ${trajId}`);
230
- parts.push(`- Archetype: ${traj.archetype || 'unknown'}`);
230
+ parts.push(`- Archetype: ${traj.archetype || "unknown"}`);
231
231
  parts.push(
232
- `- Episode Length: ${traj.episodeLength || traj.steps.length} steps`
232
+ `- Episode Length: ${traj.episodeLength || traj.steps.length} steps`,
233
233
  );
234
- parts.push(`- Total Reward: ${traj.totalReward?.toFixed(2) || '0.00'}`);
235
- parts.push('');
234
+ parts.push(`- Total Reward: ${traj.totalReward?.toFixed(2) || "0.00"}`);
235
+ parts.push("");
236
236
 
237
237
  // Key metrics for this trajectory
238
238
  parts.push(`**Key Metrics:**`);
239
239
  parts.push(this.formatMetrics(traj.metrics, priorityMetrics));
240
- parts.push('');
240
+ parts.push("");
241
241
 
242
242
  // Action summary
243
243
  parts.push(`**Actions:**`);
244
244
  parts.push(this.summarizeActions(traj.steps));
245
- parts.push('');
245
+ parts.push("");
246
246
  }
247
247
 
248
248
  // Instructions
249
249
  parts.push(`## Instructions`);
250
250
  parts.push(
251
- `Score each trajectory from 0.0 to 1.0 RELATIVE to each other based on the archetype rubric.`
251
+ `Score each trajectory from 0.0 to 1.0 RELATIVE to each other based on the archetype rubric.`,
252
252
  );
253
- parts.push('');
253
+ parts.push("");
254
254
  parts.push(`Respond with ONLY valid JSON:`);
255
255
  parts.push(`{
256
256
  "scores": [
@@ -267,7 +267,7 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
267
267
  ]
268
268
  }`);
269
269
 
270
- return parts.join('\n');
270
+ return parts.join("\n");
271
271
  }
272
272
 
273
273
  /**
@@ -275,48 +275,48 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
275
275
  */
276
276
  private formatMetrics(
277
277
  metrics: BehavioralMetrics,
278
- priorityMetrics: string[]
278
+ priorityMetrics: string[],
279
279
  ): string {
280
280
  const lines: string[] = [];
281
281
 
282
282
  // Show priority metrics first with emphasis
283
283
  if (priorityMetrics.length > 0) {
284
- lines.push('### ⭐ KEY METRICS FOR THIS ARCHETYPE');
284
+ lines.push("### ⭐ KEY METRICS FOR THIS ARCHETYPE");
285
285
  for (const metricPath of priorityMetrics.slice(0, 6)) {
286
286
  const value = this.getMetricValue(metrics, metricPath);
287
287
  const label = this.formatMetricLabel(metricPath);
288
288
  lines.push(`- **${label}**: ${value}`);
289
289
  }
290
- lines.push('');
290
+ lines.push("");
291
291
  }
292
292
 
293
293
  // Summary metrics
294
294
  const summary = getMetricsSummary(metrics);
295
- lines.push('### Performance Summary');
295
+ lines.push("### Performance Summary");
296
296
  lines.push(`- Total P&L: $${summary.totalPnL.toFixed(2)}`);
297
297
  lines.push(`- Win Rate: ${(summary.winRate * 100).toFixed(1)}%`);
298
298
  lines.push(`- Trades Executed: ${summary.tradesExecuted}`);
299
299
  lines.push(
300
- `- Action Success Rate: ${(summary.actionSuccessRate * 100).toFixed(1)}%`
300
+ `- Action Success Rate: ${(summary.actionSuccessRate * 100).toFixed(1)}%`,
301
301
  );
302
- lines.push('');
302
+ lines.push("");
303
303
 
304
304
  // Social metrics
305
- lines.push('### Social Activity');
305
+ lines.push("### Social Activity");
306
306
  lines.push(
307
- `- Unique Users Interacted: ${metrics.social.uniqueUsersInteracted}`
307
+ `- Unique Users Interacted: ${metrics.social.uniqueUsersInteracted}`,
308
308
  );
309
309
  lines.push(`- Group Chats Joined: ${metrics.social.groupChatsJoined}`);
310
310
  lines.push(`- DMs Initiated: ${metrics.social.dmsInitiated}`);
311
311
  lines.push(`- Posts Created: ${metrics.social.postsCreated}`);
312
312
  lines.push(`- Comments Made: ${metrics.social.commentsMade}`);
313
313
  lines.push(
314
- `- Social to Trade Ratio: ${metrics.behavior.socialToTradeRatio.toFixed(2)}`
314
+ `- Social to Trade Ratio: ${metrics.behavior.socialToTradeRatio.toFixed(2)}`,
315
315
  );
316
- lines.push('');
316
+ lines.push("");
317
317
 
318
318
  // Trading metrics
319
- lines.push('### Trading Performance');
319
+ lines.push("### Trading Performance");
320
320
  lines.push(`- Total P&L: $${metrics.trading.totalPnL.toFixed(2)}`);
321
321
  lines.push(`- Win Rate: ${(metrics.trading.winRate * 100).toFixed(1)}%`);
322
322
  lines.push(`- Sharpe Ratio: ${metrics.trading.sharpeRatio.toFixed(2)}`);
@@ -324,94 +324,94 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
324
324
  lines.push(`- Markets Traded: ${metrics.trading.marketsTraded}`);
325
325
  lines.push(`- Largest Win: $${metrics.trading.largestWin.toFixed(2)}`);
326
326
  lines.push(`- Largest Loss: $${metrics.trading.largestLoss.toFixed(2)}`);
327
- lines.push('');
327
+ lines.push("");
328
328
 
329
329
  // Influence metrics
330
- lines.push('### Influence');
330
+ lines.push("### Influence");
331
331
  lines.push(`- Followers Gained: ${metrics.influence.followersGained}`);
332
332
  lines.push(
333
- `- Reputation Delta: ${metrics.influence.reputationDelta > 0 ? '+' : ''}${metrics.influence.reputationDelta}`
333
+ `- Reputation Delta: ${metrics.influence.reputationDelta > 0 ? "+" : ""}${metrics.influence.reputationDelta}`,
334
334
  );
335
335
  lines.push(`- Positive Reactions: ${metrics.influence.positiveReactions}`);
336
336
  lines.push(`- Information Spread: ${metrics.influence.informationSpread}`);
337
- lines.push('');
337
+ lines.push("");
338
338
 
339
339
  // Behavior metrics
340
- lines.push('### Behavior Patterns');
340
+ lines.push("### Behavior Patterns");
341
341
  lines.push(
342
- `- Actions Per Tick: ${metrics.behavior.actionsPerTick.toFixed(2)}`
342
+ `- Actions Per Tick: ${metrics.behavior.actionsPerTick.toFixed(2)}`,
343
343
  );
344
344
  lines.push(
345
- `- Consistency Score: ${(metrics.behavior.consistencyScore * 100).toFixed(1)}%`
345
+ `- Consistency Score: ${(metrics.behavior.consistencyScore * 100).toFixed(1)}%`,
346
346
  );
347
347
  lines.push(
348
- `- Dominant Action: ${metrics.behavior.dominantActionType || 'none'}`
348
+ `- Dominant Action: ${metrics.behavior.dominantActionType || "none"}`,
349
349
  );
350
- lines.push('');
350
+ lines.push("");
351
351
 
352
352
  // Information metrics
353
- lines.push('### Information Activity');
353
+ lines.push("### Information Activity");
354
354
  lines.push(`- Research Actions: ${metrics.information.researchActions}`);
355
355
  lines.push(`- Predictions Made: ${metrics.information.predictionsMade}`);
356
356
  lines.push(
357
- `- Prediction Accuracy: ${(metrics.information.predictionAccuracy * 100).toFixed(1)}%`
357
+ `- Prediction Accuracy: ${(metrics.information.predictionAccuracy * 100).toFixed(1)}%`,
358
358
  );
359
359
 
360
- return lines.join('\n');
360
+ return lines.join("\n");
361
361
  }
362
362
 
363
363
  /**
364
364
  * Get a metric value from the metrics object using a dot-path
365
365
  */
366
366
  private getMetricValue(metrics: BehavioralMetrics, path: string): string {
367
- const [category, key] = path.split('.');
368
- if (!category || !key) return 'N/A';
367
+ const [category, key] = path.split(".");
368
+ if (!category || !key) return "N/A";
369
369
 
370
370
  // Access nested metric value based on category
371
371
  let value: number | string | string[] | undefined;
372
372
  switch (category) {
373
- case 'trading':
373
+ case "trading":
374
374
  value = metrics.trading[key as keyof typeof metrics.trading];
375
375
  break;
376
- case 'social':
376
+ case "social":
377
377
  value = metrics.social[key as keyof typeof metrics.social];
378
378
  break;
379
- case 'influence':
379
+ case "influence":
380
380
  value = metrics.influence[key as keyof typeof metrics.influence];
381
381
  break;
382
- case 'behavior':
382
+ case "behavior":
383
383
  value = metrics.behavior[key as keyof typeof metrics.behavior];
384
384
  break;
385
- case 'information':
385
+ case "information":
386
386
  value = metrics.information[key as keyof typeof metrics.information];
387
387
  break;
388
388
  default:
389
- return 'N/A';
389
+ return "N/A";
390
390
  }
391
391
 
392
- if (value === undefined || value === null) return 'N/A';
392
+ if (value === undefined || value === null) return "N/A";
393
393
 
394
394
  // Format based on value type
395
- if (typeof value === 'number') {
395
+ if (typeof value === "number") {
396
396
  // Check if it's a rate/percentage
397
397
  if (
398
- key.includes('Rate') ||
399
- key.includes('Accuracy') ||
400
- key.includes('Score')
398
+ key.includes("Rate") ||
399
+ key.includes("Accuracy") ||
400
+ key.includes("Score")
401
401
  ) {
402
402
  return `${(value * 100).toFixed(1)}%`;
403
403
  }
404
404
  // Check if it's a currency
405
405
  if (
406
- key.includes('PnL') ||
407
- key.includes('Win') ||
408
- key.includes('Loss') ||
409
- key.includes('Drawdown')
406
+ key.includes("PnL") ||
407
+ key.includes("Win") ||
408
+ key.includes("Loss") ||
409
+ key.includes("Drawdown")
410
410
  ) {
411
411
  return `$${value.toFixed(2)}`;
412
412
  }
413
413
  // Check if it's a ratio
414
- if (key.includes('Ratio')) {
414
+ if (key.includes("Ratio")) {
415
415
  return value.toFixed(2);
416
416
  }
417
417
  // Integer-like values
@@ -428,12 +428,12 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
428
428
  * Format a metric path into a human-readable label
429
429
  */
430
430
  private formatMetricLabel(path: string): string {
431
- const [, key] = path.split('.');
431
+ const [, key] = path.split(".");
432
432
  if (!key) return path;
433
433
 
434
434
  // Convert camelCase to Title Case with spaces
435
435
  return key
436
- .replace(/([A-Z])/g, ' $1')
436
+ .replace(/([A-Z])/g, " $1")
437
437
  .replace(/^./, (str) => str.toUpperCase())
438
438
  .trim();
439
439
  }
@@ -461,18 +461,18 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
461
461
  }
462
462
 
463
463
  const sortedActions = Array.from(actionCounts.entries()).sort(
464
- (a, b) => b[1] - a[1]
464
+ (a, b) => b[1] - a[1],
465
465
  );
466
466
 
467
467
  const lines: string[] = [];
468
468
  lines.push(
469
- `- Total Actions: ${steps.length} (${successCount} successful, ${errorCount} failed)`
469
+ `- Total Actions: ${steps.length} (${successCount} successful, ${errorCount} failed)`,
470
470
  );
471
471
  lines.push(
472
- `- Action Types: ${sortedActions.map(([type, count]) => `${type}(${count})`).join(', ')}`
472
+ `- Action Types: ${sortedActions.map(([type, count]) => `${type}(${count})`).join(", ")}`,
473
473
  );
474
474
 
475
- return lines.join('\n');
475
+ return lines.join("\n");
476
476
  }
477
477
 
478
478
  /**
@@ -481,12 +481,12 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
481
481
  private extractKeyDecisions(steps: TrajectoryStep[]): string | null {
482
482
  const keyActions: string[] = [];
483
483
  const keyActionTypes = new Set([
484
- 'trade',
485
- 'buy',
486
- 'sell',
487
- 'predict',
488
- 'create_group_chat',
489
- 'post',
484
+ "trade",
485
+ "buy",
486
+ "sell",
487
+ "predict",
488
+ "create_group_chat",
489
+ "post",
490
490
  ]);
491
491
 
492
492
  for (const step of steps) {
@@ -508,10 +508,10 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
508
508
  }
509
509
  if (result.pnl !== undefined) {
510
510
  const pnl = Number(result.pnl);
511
- description += ` → P&L: ${pnl >= 0 ? '+' : ''}$${pnl.toFixed(2)}`;
511
+ description += ` → P&L: ${pnl >= 0 ? "+" : ""}$${pnl.toFixed(2)}`;
512
512
  }
513
513
 
514
- keyActions.push(`- ${description} ${action.success ? '' : ''}`);
514
+ keyActions.push(`- ${description} ${action.success ? "" : ""}`);
515
515
  }
516
516
  }
517
517
 
@@ -520,7 +520,7 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
520
520
  }
521
521
 
522
522
  // Limit to most recent 10 key actions
523
- return keyActions.slice(-10).join('\n');
523
+ return keyActions.slice(-10).join("\n");
524
524
  }
525
525
 
526
526
  /**
@@ -528,7 +528,7 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
528
528
  */
529
529
  private formatRecentActions(
530
530
  steps: TrajectoryStep[],
531
- maxActions: number
531
+ maxActions: number,
532
532
  ): string {
533
533
  const recentSteps = steps.slice(-maxActions);
534
534
  const lines: string[] = [];
@@ -537,16 +537,16 @@ The metrics provided are CONTEXT to inform your judgment. Use them to understand
537
537
  const action = step.action;
538
538
  if (!action) continue;
539
539
 
540
- const success = action.success ? '' : '';
540
+ const success = action.success ? "" : "";
541
541
  const reasoning = action.reasoning
542
542
  ? ` | Reason: ${action.reasoning.substring(0, 50)}...`
543
- : '';
543
+ : "";
544
544
  lines.push(
545
- `- [${step.stepNumber}] ${action.actionType} ${success}${reasoning}`
545
+ `- [${step.stepNumber}] ${action.actionType} ${success}${reasoning}`,
546
546
  );
547
547
  }
548
548
 
549
- return lines.join('\n') || 'No actions recorded';
549
+ return lines.join("\n") || "No actions recorded";
550
550
  }
551
551
  }
552
552
 
@@ -11,10 +11,10 @@
11
11
  * @packageDocumentation
12
12
  */
13
13
 
14
- import { getTrainingDataAdapter } from '../adapter';
15
- import { createHash } from 'crypto';
16
- import { getRubricHash, RUBRICS_VERSION } from '../rubrics';
17
- import { logger } from '../utils/logger';
14
+ import { createHash } from "node:crypto";
15
+ import { getTrainingDataAdapter } from "../adapter";
16
+ import { getRubricHash, RUBRICS_VERSION } from "../rubrics";
17
+ import { logger } from "../utils/logger";
18
18
 
19
19
  /**
20
20
  * Cached score entry
@@ -84,10 +84,10 @@ export class LLMJudgeCache {
84
84
  private generateCacheKey(
85
85
  trajectoryId: string,
86
86
  stepsJson: string,
87
- archetype: string
87
+ archetype: string,
88
88
  ): string {
89
89
  const content = `${trajectoryId}:${stepsJson}:${archetype}:${RUBRICS_VERSION}`;
90
- return createHash('sha256').update(content).digest('hex').substring(0, 32);
90
+ return createHash("sha256").update(content).digest("hex").substring(0, 32);
91
91
  }
92
92
 
93
93
  /**
@@ -121,7 +121,7 @@ export class LLMJudgeCache {
121
121
  get(
122
122
  trajectoryId: string,
123
123
  stepsJson: string,
124
- archetype: string
124
+ archetype: string,
125
125
  ): CachedScore | null {
126
126
  const cacheKey = this.generateCacheKey(trajectoryId, stepsJson, archetype);
127
127
  const cached = this.cache.get(cacheKey);
@@ -144,9 +144,9 @@ export class LLMJudgeCache {
144
144
  this.updateHitRate();
145
145
 
146
146
  logger.debug(
147
- 'Cache hit',
147
+ "Cache hit",
148
148
  { trajectoryId, archetype, cacheKey: cacheKey.substring(0, 8) },
149
- 'LLMJudgeCache'
149
+ "LLMJudgeCache",
150
150
  );
151
151
 
152
152
  return cached;
@@ -162,7 +162,7 @@ export class LLMJudgeCache {
162
162
  score: number,
163
163
  reasoning: string,
164
164
  strengths: string[] = [],
165
- weaknesses: string[] = []
165
+ weaknesses: string[] = [],
166
166
  ): void {
167
167
  // Enforce max entries limit
168
168
  if (this.cache.size >= this.config.maxEntries) {
@@ -172,7 +172,7 @@ export class LLMJudgeCache {
172
172
  const cacheKey = this.generateCacheKey(trajectoryId, stepsJson, archetype);
173
173
  const now = new Date();
174
174
  const expiresAt = new Date(
175
- now.getTime() + this.config.ttlHours * 60 * 60 * 1000
175
+ now.getTime() + this.config.ttlHours * 60 * 60 * 1000,
176
176
  );
177
177
 
178
178
  const entry: CachedScore = {
@@ -192,9 +192,9 @@ export class LLMJudgeCache {
192
192
  this.cache.set(cacheKey, entry);
193
193
 
194
194
  logger.debug(
195
- 'Cache set',
195
+ "Cache set",
196
196
  { trajectoryId, archetype, score, cacheKey: cacheKey.substring(0, 8) },
197
- 'LLMJudgeCache'
197
+ "LLMJudgeCache",
198
198
  );
199
199
  }
200
200
 
@@ -242,9 +242,9 @@ export class LLMJudgeCache {
242
242
  this.stats.invalidations += invalidated;
243
243
 
244
244
  logger.info(
245
- 'Invalidated cache entries',
245
+ "Invalidated cache entries",
246
246
  { archetype, count: invalidated },
247
- 'LLMJudgeCache'
247
+ "LLMJudgeCache",
248
248
  );
249
249
 
250
250
  return invalidated;
@@ -258,7 +258,7 @@ export class LLMJudgeCache {
258
258
  this.cache.clear();
259
259
  this.stats.invalidations += count;
260
260
 
261
- logger.info('Cleared cache', { count }, 'LLMJudgeCache');
261
+ logger.info("Cleared cache", { count }, "LLMJudgeCache");
262
262
  }
263
263
 
264
264
  /**
@@ -290,18 +290,18 @@ export class LLMJudgeCache {
290
290
  this.set(
291
291
  row.trajectoryId,
292
292
  row.stepsJson,
293
- 'default',
293
+ "default",
294
294
  row.aiJudgeReward,
295
- row.aiJudgeReasoning
295
+ row.aiJudgeReasoning,
296
296
  );
297
297
  loaded++;
298
298
  }
299
299
  }
300
300
 
301
301
  logger.info(
302
- 'Warmed cache from database',
302
+ "Warmed cache from database",
303
303
  { loaded, attempted: results.length },
304
- 'LLMJudgeCache'
304
+ "LLMJudgeCache",
305
305
  );
306
306
 
307
307
  return loaded;
@@ -322,7 +322,10 @@ export const scoreValidator = {
322
322
  */
323
323
  isValidScore(score: number): boolean {
324
324
  return (
325
- typeof score === 'number' && !isNaN(score) && score >= 0 && score <= 1
325
+ typeof score === "number" &&
326
+ !Number.isNaN(score) &&
327
+ score >= 0 &&
328
+ score <= 1
326
329
  );
327
330
  },
328
331
 
@@ -331,7 +334,7 @@ export const scoreValidator = {
331
334
  */
332
335
  isValidReasoning(reasoning: string): boolean {
333
336
  return (
334
- typeof reasoning === 'string' &&
337
+ typeof reasoning === "string" &&
335
338
  reasoning.length >= 20 &&
336
339
  reasoning.length <= 5000
337
340
  );
@@ -356,7 +359,7 @@ export const scoreValidator = {
356
359
  * Check if scores are consistent (similar trajectories should have similar scores)
357
360
  */
358
361
  checkScoreConsistency(
359
- scores: Array<{ trajectoryId: string; score: number; metricsHash: string }>
362
+ scores: Array<{ trajectoryId: string; score: number; metricsHash: string }>,
360
363
  ): { consistent: boolean; outliers: string[] } {
361
364
  if (scores.length < 3) {
362
365
  return { consistent: true, outliers: [] };
@@ -4,6 +4,6 @@
4
4
  * LLM-as-judge scoring with archetype-specific rubrics.
5
5
  */
6
6
 
7
- export * from './ArchetypeScoringService';
8
- export * from './JudgePromptBuilder';
9
- export * from './LLMJudgeCache';
7
+ export * from "./ArchetypeScoringService";
8
+ export * from "./JudgePromptBuilder";
9
+ export * from "./LLMJudgeCache";