@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -13,10 +13,10 @@ export function getCurrentWindowId(): string {
13
13
  const now = new Date();
14
14
  // Round down to the start of the current hour
15
15
  const windowStart = new Date(
16
- Math.floor(now.getTime() / (60 * 60 * 1000)) * (60 * 60 * 1000)
16
+ Math.floor(now.getTime() / (60 * 60 * 1000)) * (60 * 60 * 1000),
17
17
  );
18
18
  // Format as ISO string, take first 13 chars + :00
19
- return windowStart.toISOString().slice(0, 13) + ':00';
19
+ return `${windowStart.toISOString().slice(0, 13)}:00`;
20
20
  }
21
21
 
22
22
  /**
@@ -27,11 +27,11 @@ export function getCurrentWindowId(): string {
27
27
  export function getPreviousWindowId(offset: number = 1): string {
28
28
  const now = new Date();
29
29
  const windowStart = new Date(
30
- Math.floor(now.getTime() / (60 * 60 * 1000)) * (60 * 60 * 1000)
30
+ Math.floor(now.getTime() / (60 * 60 * 1000)) * (60 * 60 * 1000),
31
31
  );
32
32
  // Go back N hours
33
33
  windowStart.setHours(windowStart.getHours() - offset);
34
- return windowStart.toISOString().slice(0, 13) + ':00';
34
+ return `${windowStart.toISOString().slice(0, 13)}:00`;
35
35
  }
36
36
 
37
37
  /**
@@ -51,11 +51,11 @@ export function parseWindowId(windowId: string): Date {
51
51
  */
52
52
  export function isWindowComplete(
53
53
  windowId: string,
54
- windowDurationHours: number = 1
54
+ windowDurationHours: number = 1,
55
55
  ): boolean {
56
56
  const windowStart = parseWindowId(windowId);
57
57
  const windowEnd = new Date(
58
- windowStart.getTime() + windowDurationHours * 60 * 60 * 1000
58
+ windowStart.getTime() + windowDurationHours * 60 * 60 * 1000,
59
59
  );
60
60
  return Date.now() > windowEnd.getTime();
61
61
  }
@@ -68,7 +68,7 @@ export function isWindowComplete(
68
68
  */
69
69
  export function getWindowRange(
70
70
  windowId: string,
71
- windowDurationHours: number = 1
71
+ windowDurationHours: number = 1,
72
72
  ) {
73
73
  const start = parseWindowId(windowId);
74
74
  const end = new Date(start.getTime() + windowDurationHours * 60 * 60 * 1000);
@@ -85,18 +85,18 @@ export function getWindowRange(
85
85
  export function generateWindowIds(
86
86
  startTime: Date,
87
87
  endTime: Date,
88
- windowDurationHours: number = 1
88
+ windowDurationHours: number = 1,
89
89
  ): string[] {
90
90
  const windows: string[] = [];
91
91
  const windowMs = windowDurationHours * 60 * 60 * 1000;
92
92
 
93
93
  // Round start time down to window boundary
94
94
  const currentWindowStart = new Date(
95
- Math.floor(startTime.getTime() / windowMs) * windowMs
95
+ Math.floor(startTime.getTime() / windowMs) * windowMs,
96
96
  );
97
97
 
98
98
  while (currentWindowStart.getTime() <= endTime.getTime()) {
99
- windows.push(currentWindowStart.toISOString().slice(0, 13) + ':00');
99
+ windows.push(`${currentWindowStart.toISOString().slice(0, 13)}:00`);
100
100
  currentWindowStart.setTime(currentWindowStart.getTime() + windowMs);
101
101
  }
102
102
 
@@ -111,13 +111,13 @@ export function generateWindowIds(
111
111
  */
112
112
  export function getWindowIdForTimestamp(
113
113
  timestamp: Date,
114
- windowDurationHours: number = 1
114
+ windowDurationHours: number = 1,
115
115
  ): string {
116
116
  const windowMs = windowDurationHours * 60 * 60 * 1000;
117
117
  const windowStart = new Date(
118
- Math.floor(timestamp.getTime() / windowMs) * windowMs
118
+ Math.floor(timestamp.getTime() / windowMs) * windowMs,
119
119
  );
120
- return windowStart.toISOString().slice(0, 13) + ':00';
120
+ return `${windowStart.toISOString().slice(0, 13)}:00`;
121
121
  }
122
122
 
123
123
  /**
@@ -130,7 +130,7 @@ export function getWindowIdForTimestamp(
130
130
  export function isTimestampInWindow(
131
131
  timestamp: Date,
132
132
  windowId: string,
133
- windowDurationHours: number = 1
133
+ windowDurationHours: number = 1,
134
134
  ): boolean {
135
135
  const { start, end } = getWindowRange(windowId, windowDurationHours);
136
136
  const time = timestamp.getTime();
@@ -2,9 +2,9 @@
2
2
  * Training Package Utilities
3
3
  */
4
4
 
5
- export { logger } from './logger';
6
- export { generateSnowflakeId } from './snowflake';
7
- export { assertHasLLMCalls, validateLLMCalls } from './synthetic-detector';
5
+ export { logger } from "./logger";
6
+ export { generateSnowflakeId } from "./snowflake";
7
+ export { assertHasLLMCalls, validateLLMCalls } from "./synthetic-detector";
8
8
 
9
9
  /**
10
10
  * Split an array into batches of a specified size
@@ -77,9 +77,9 @@ export function formatPercent(value: number, decimals = 1): string {
77
77
  export function formatCurrency(
78
78
  value: number,
79
79
  decimals = 2,
80
- prefix = '$'
80
+ prefix = "$",
81
81
  ): string {
82
- const sign = value >= 0 ? '' : '-';
82
+ const sign = value >= 0 ? "" : "-";
83
83
  return `${sign}${prefix}${Math.abs(value).toFixed(decimals)}`;
84
84
  }
85
85
 
@@ -94,8 +94,8 @@ export function formatCurrency(
94
94
  export function formatCurrencyWithSign(
95
95
  value: number,
96
96
  decimals = 2,
97
- prefix = '$'
97
+ prefix = "$",
98
98
  ): string {
99
- const sign = value >= 0 ? '+' : '-';
99
+ const sign = value >= 0 ? "+" : "-";
100
100
  return `${sign}${prefix}${Math.abs(value).toFixed(decimals)}`;
101
101
  }
@@ -12,7 +12,7 @@ function formatData(data: LogData): string {
12
12
  if (data instanceof Error) {
13
13
  return data.message;
14
14
  }
15
- if (typeof data === 'object' && data !== null) {
15
+ if (typeof data === "object" && data !== null) {
16
16
  return JSON.stringify(data, null, 2);
17
17
  }
18
18
  return String(data);
@@ -20,7 +20,7 @@ function formatData(data: LogData): string {
20
20
 
21
21
  export const logger = {
22
22
  info: (message: string, data?: LogData, context?: string) => {
23
- const prefix = context ? `[${context}] ` : '';
23
+ const prefix = context ? `[${context}] ` : "";
24
24
  if (data !== undefined) {
25
25
  console.log(`${prefix}[INFO] ${message}`, formatData(data));
26
26
  } else {
@@ -29,7 +29,7 @@ export const logger = {
29
29
  },
30
30
 
31
31
  error: (message: string, data?: LogData, context?: string) => {
32
- const prefix = context ? `[${context}] ` : '';
32
+ const prefix = context ? `[${context}] ` : "";
33
33
  if (data !== undefined) {
34
34
  console.error(`${prefix}[ERROR] ${message}`, formatData(data));
35
35
  } else {
@@ -38,7 +38,7 @@ export const logger = {
38
38
  },
39
39
 
40
40
  warn: (message: string, data?: LogData, context?: string) => {
41
- const prefix = context ? `[${context}] ` : '';
41
+ const prefix = context ? `[${context}] ` : "";
42
42
  if (data !== undefined) {
43
43
  console.warn(`${prefix}[WARN] ${message}`, formatData(data));
44
44
  } else {
@@ -48,7 +48,7 @@ export const logger = {
48
48
 
49
49
  debug: (message: string, data?: LogData, context?: string) => {
50
50
  if (process.env.DEBUG) {
51
- const prefix = context ? `[${context}] ` : '';
51
+ const prefix = context ? `[${context}] ` : "";
52
52
  if (data !== undefined) {
53
53
  console.log(`${prefix}[DEBUG] ${message}`, formatData(data));
54
54
  } else {
@@ -13,5 +13,5 @@ export async function generateSnowflakeId(): Promise<string> {
13
13
  if (counter > 999) counter = 0;
14
14
 
15
15
  // Format: timestamp (13 digits) + counter (3 digits)
16
- return `${timestamp}${currentCounter.toString().padStart(3, '0')}`;
16
+ return `${timestamp}${currentCounter.toString().padStart(3, "0")}`;
17
17
  }
@@ -58,9 +58,9 @@ export function validateLLMCalls(steps: TrajectoryStep[]): {
58
58
  totalLLMCalls++;
59
59
 
60
60
  // Validate LLM call has actual content
61
- const systemPrompt = call.systemPrompt ?? call.system_prompt ?? '';
62
- const userPrompt = call.userPrompt ?? call.user_prompt ?? '';
63
- const response = call.response ?? '';
61
+ const systemPrompt = call.systemPrompt ?? call.system_prompt ?? "";
62
+ const userPrompt = call.userPrompt ?? call.user_prompt ?? "";
63
+ const response = call.response ?? "";
64
64
 
65
65
  if (systemPrompt.length < 10) {
66
66
  issues.push(`Step ${i}, call ${j}: Missing or empty system prompt`);
@@ -79,7 +79,7 @@ export function validateLLMCalls(steps: TrajectoryStep[]): {
79
79
  // At least 3 steps should have LLM calls for valid training data
80
80
  if (stepsWithLLM < 3) {
81
81
  issues.push(
82
- `Only ${stepsWithLLM}/${steps.length} steps have LLM calls (minimum: 3)`
82
+ `Only ${stepsWithLLM}/${steps.length} steps have LLM calls (minimum: 3)`,
83
83
  );
84
84
  }
85
85
 
@@ -98,14 +98,14 @@ export function validateLLMCalls(steps: TrajectoryStep[]): {
98
98
  */
99
99
  export function assertHasLLMCalls(
100
100
  steps: TrajectoryStep[],
101
- trajectoryId: string
101
+ trajectoryId: string,
102
102
  ): void {
103
103
  const validation = validateLLMCalls(steps);
104
104
 
105
105
  if (!validation.valid) {
106
106
  throw new Error(
107
- `Trajectory ${trajectoryId} failed LLM validation: ${validation.issues.join('; ')}. ` +
108
- 'Training data must contain real LLM calls.'
107
+ `Trajectory ${trajectoryId} failed LLM validation: ${validation.issues.join("; ")}. ` +
108
+ "Training data must contain real LLM calls.",
109
109
  );
110
110
  }
111
111
  }