@elizaos/training 2.0.0-alpha.21 → 2.0.0-alpha.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.turbo/turbo-lint.log +2 -0
  2. package/.turbo/turbo-typecheck.log +1 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/adapter.js +59 -0
  5. package/dist/archetypes/ArchetypeConfigService.js +510 -0
  6. package/dist/archetypes/derive-archetype.js +196 -0
  7. package/dist/archetypes/index.js +7 -0
  8. package/dist/benchmark/ArchetypeMatchupBenchmark.js +547 -0
  9. package/dist/benchmark/BenchmarkChartGenerator.js +632 -0
  10. package/dist/benchmark/BenchmarkDataGenerator.js +825 -0
  11. package/dist/benchmark/BenchmarkDataViewer.js +197 -0
  12. package/dist/benchmark/BenchmarkHistoryService.js +135 -0
  13. package/dist/benchmark/BenchmarkRunner.js +483 -0
  14. package/dist/benchmark/BenchmarkValidator.js +158 -0
  15. package/dist/benchmark/FastEvalRunner.js +133 -0
  16. package/dist/benchmark/MetricsValidator.js +104 -0
  17. package/dist/benchmark/MetricsVisualizer.js +775 -0
  18. package/dist/benchmark/ModelBenchmarkService.js +433 -0
  19. package/dist/benchmark/ModelRegistry.js +122 -0
  20. package/dist/benchmark/RulerBenchmarkIntegration.js +168 -0
  21. package/dist/benchmark/SimulationA2AInterface.js +683 -0
  22. package/dist/benchmark/SimulationEngine.js +522 -0
  23. package/dist/benchmark/TaskRunner.js +60 -0
  24. package/dist/benchmark/__tests__/BenchmarkRunner.test.js +409 -0
  25. package/dist/benchmark/__tests__/HeadToHead.test.js +105 -0
  26. package/dist/benchmark/index.js +23 -0
  27. package/dist/benchmark/parseSimulationMetrics.js +86 -0
  28. package/dist/benchmark/simulation-types.js +1 -0
  29. package/dist/dependencies.js +197 -0
  30. package/dist/generation/TrajectoryGenerator.js +244 -0
  31. package/dist/generation/index.js +6 -0
  32. package/dist/huggingface/HuggingFaceDatasetUploader.js +463 -0
  33. package/dist/huggingface/HuggingFaceIntegrationService.js +272 -0
  34. package/dist/huggingface/HuggingFaceModelUploader.js +385 -0
  35. package/dist/huggingface/index.js +9 -0
  36. package/dist/huggingface/shared/HuggingFaceUploadUtil.js +144 -0
  37. package/dist/index.js +41 -0
  38. package/dist/init-training.js +43 -0
  39. package/dist/metrics/TrajectoryMetricsExtractor.js +523 -0
  40. package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +628 -0
  41. package/dist/metrics/index.js +7 -0
  42. package/dist/metrics/types.js +21 -0
  43. package/dist/rubrics/__tests__/index.test.js +150 -0
  44. package/dist/rubrics/ass-kisser.js +83 -0
  45. package/dist/rubrics/degen.js +78 -0
  46. package/dist/rubrics/goody-twoshoes.js +82 -0
  47. package/dist/rubrics/index.js +184 -0
  48. package/dist/rubrics/information-trader.js +82 -0
  49. package/dist/rubrics/infosec.js +99 -0
  50. package/dist/rubrics/liar.js +102 -0
  51. package/dist/rubrics/perps-trader.js +85 -0
  52. package/dist/rubrics/researcher.js +79 -0
  53. package/dist/rubrics/scammer.js +80 -0
  54. package/dist/rubrics/social-butterfly.js +71 -0
  55. package/dist/rubrics/super-predictor.js +95 -0
  56. package/dist/rubrics/trader.js +65 -0
  57. package/dist/scoring/ArchetypeScoringService.js +301 -0
  58. package/dist/scoring/JudgePromptBuilder.js +401 -0
  59. package/dist/scoring/LLMJudgeCache.js +263 -0
  60. package/dist/scoring/index.js +8 -0
  61. package/dist/training/AutomationPipeline.js +714 -0
  62. package/dist/training/BenchmarkService.js +370 -0
  63. package/dist/training/ConfigValidator.js +153 -0
  64. package/dist/training/MarketOutcomesTracker.js +142 -0
  65. package/dist/training/ModelDeployer.js +128 -0
  66. package/dist/training/ModelFetcher.js +48 -0
  67. package/dist/training/ModelSelectionService.js +248 -0
  68. package/dist/training/ModelUsageVerifier.js +106 -0
  69. package/dist/training/MultiModelOrchestrator.js +349 -0
  70. package/dist/training/RLModelConfig.js +295 -0
  71. package/dist/training/RewardBackpropagationService.js +117 -0
  72. package/dist/training/RulerScoringService.js +450 -0
  73. package/dist/training/TrainingMonitor.js +108 -0
  74. package/dist/training/TrajectoryRecorder.js +281 -0
  75. package/dist/training/__tests__/TrajectoryRecorder.test.js +363 -0
  76. package/dist/training/index.js +30 -0
  77. package/dist/training/logRLConfig.js +29 -0
  78. package/dist/training/pipeline.js +80 -0
  79. package/dist/training/storage/ModelStorageService.js +190 -0
  80. package/dist/training/storage/TrainingDataArchiver.js +136 -0
  81. package/dist/training/storage/index.js +7 -0
  82. package/dist/training/types.js +6 -0
  83. package/dist/training/window-utils.js +100 -0
  84. package/dist/utils/index.js +73 -0
  85. package/dist/utils/logger.js +55 -0
  86. package/dist/utils/snowflake.js +15 -0
  87. package/dist/utils/synthetic-detector.js +67 -0
  88. package/package.json +2 -2
  89. package/research-output/training-runs/training-run-1773742857616.json +38 -0
  90. package/research-output/training-runs/training-run-1773742946977.json +38 -0
  91. package/research-output/training-runs/training-run-1773743278891.json +38 -0
  92. package/research-output/training-runs/training-run-1773743409754.json +38 -0
  93. package/research-output/training-runs/training-run-1773743651086.json +38 -0
  94. package/research-output/training-runs/training-run-1773743782883.json +38 -0
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Snowflake ID Generator
3
+ *
4
+ * Generates unique IDs for training package entities.
5
+ * Uses a simple timestamp-based approach.
6
+ */
7
+ let counter = 0;
8
+ export async function generateSnowflakeId() {
9
+ const timestamp = Date.now();
10
+ const currentCounter = counter++;
11
+ if (counter > 999)
12
+ counter = 0;
13
+ // Format: timestamp (13 digits) + counter (3 digits)
14
+ return `${timestamp}${currentCounter.toString().padStart(3, "0")}`;
15
+ }
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Training Data Validator
3
+ *
4
+ * Validates that training data contains real LLM calls.
5
+ * No synthetic pattern detection needed - we simply don't generate synthetic data.
6
+ */
7
+ /**
8
+ * Validate that trajectory steps contain real LLM calls.
9
+ *
10
+ * Training data MUST have actual LLM calls with real prompts and responses.
11
+ *
12
+ * @returns Object with validation result and details
13
+ */
14
+ export function validateLLMCalls(steps) {
15
+ const issues = [];
16
+ let stepsWithLLM = 0;
17
+ let totalLLMCalls = 0;
18
+ for (let i = 0; i < steps.length; i++) {
19
+ const step = steps[i];
20
+ const llmCalls = step?.llmCalls ?? step?.llm_calls ?? [];
21
+ if (llmCalls.length === 0) {
22
+ continue;
23
+ }
24
+ stepsWithLLM++;
25
+ for (let j = 0; j < llmCalls.length; j++) {
26
+ const call = llmCalls[j];
27
+ if (!call)
28
+ continue;
29
+ totalLLMCalls++;
30
+ // Validate LLM call has actual content
31
+ const systemPrompt = call.systemPrompt ?? call.system_prompt ?? "";
32
+ const userPrompt = call.userPrompt ?? call.user_prompt ?? "";
33
+ const response = call.response ?? "";
34
+ if (systemPrompt.length < 10) {
35
+ issues.push(`Step ${i}, call ${j}: Missing or empty system prompt`);
36
+ }
37
+ if (userPrompt.length < 10) {
38
+ issues.push(`Step ${i}, call ${j}: Missing or empty user prompt`);
39
+ }
40
+ if (response.length < 5) {
41
+ issues.push(`Step ${i}, call ${j}: Missing or empty response`);
42
+ }
43
+ }
44
+ }
45
+ // At least 3 steps should have LLM calls for valid training data
46
+ if (stepsWithLLM < 3) {
47
+ issues.push(`Only ${stepsWithLLM}/${steps.length} steps have LLM calls (minimum: 3)`);
48
+ }
49
+ return {
50
+ valid: issues.length === 0,
51
+ totalSteps: steps.length,
52
+ stepsWithLLM,
53
+ totalLLMCalls,
54
+ issues,
55
+ };
56
+ }
57
+ /**
58
+ * Assert that trajectory steps contain real LLM calls.
59
+ * Throws an error if validation fails.
60
+ */
61
+ export function assertHasLLMCalls(steps, trajectoryId) {
62
+ const validation = validateLLMCalls(steps);
63
+ if (!validation.valid) {
64
+ throw new Error(`Trajectory ${trajectoryId} failed LLM validation: ${validation.issues.join("; ")}. ` +
65
+ "Training data must contain real LLM calls.");
66
+ }
67
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elizaos/training",
3
- "version": "2.0.0-alpha.21",
3
+ "version": "2.0.0-alpha.22",
4
4
  "description": "ElizaOS RL training pipeline with benchmarking and model publishing support",
5
5
  "main": "./src/index.ts",
6
6
  "types": "./src/index.ts",
@@ -53,5 +53,5 @@
53
53
  "bun-types": "^1.3.2",
54
54
  "typescript": "^5.9.3"
55
55
  },
56
- "gitHead": "dd83c65e58df52768d31b55abe591573407ab346"
56
+ "gitHead": "56caa0e2d9f193f75091154d639df4a48065d80f"
57
57
  }
@@ -0,0 +1,38 @@
1
+ {
2
+ "timestamp": "2026-03-17T10:20:57.616Z",
3
+ "config": {
4
+ "skipTraining": true,
5
+ "skipBenchmark": true,
6
+ "ticks": 100,
7
+ "archetype": "trader",
8
+ "verbose": false
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "Check Prerequisites",
13
+ "success": true,
14
+ "message": "Prerequisites satisfied (Ollama: yes)",
15
+ "details": {
16
+ "python": true,
17
+ "trainingDir": true,
18
+ "mlx": false,
19
+ "ollama": true
20
+ },
21
+ "duration": 132
22
+ },
23
+ {
24
+ "name": "Install Dependencies",
25
+ "success": true,
26
+ "message": "Python dependencies installed",
27
+ "duration": 1528
28
+ },
29
+ {
30
+ "name": "Train Model",
31
+ "success": true,
32
+ "message": "Training skipped (--skip-training)",
33
+ "duration": 0
34
+ }
35
+ ],
36
+ "totalDuration": 1660,
37
+ "success": true
38
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "timestamp": "2026-03-17T10:22:26.977Z",
3
+ "config": {
4
+ "skipTraining": true,
5
+ "skipBenchmark": true,
6
+ "ticks": 100,
7
+ "archetype": "trader",
8
+ "verbose": false
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "Check Prerequisites",
13
+ "success": true,
14
+ "message": "Prerequisites satisfied (Ollama: yes)",
15
+ "details": {
16
+ "python": true,
17
+ "trainingDir": true,
18
+ "mlx": false,
19
+ "ollama": true
20
+ },
21
+ "duration": 55
22
+ },
23
+ {
24
+ "name": "Install Dependencies",
25
+ "success": true,
26
+ "message": "Python dependencies installed",
27
+ "duration": 1215
28
+ },
29
+ {
30
+ "name": "Train Model",
31
+ "success": true,
32
+ "message": "Training skipped (--skip-training)",
33
+ "duration": 0
34
+ }
35
+ ],
36
+ "totalDuration": 1270,
37
+ "success": true
38
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "timestamp": "2026-03-17T10:27:58.891Z",
3
+ "config": {
4
+ "skipTraining": true,
5
+ "skipBenchmark": true,
6
+ "ticks": 100,
7
+ "archetype": "trader",
8
+ "verbose": false
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "Check Prerequisites",
13
+ "success": true,
14
+ "message": "Prerequisites satisfied (Ollama: yes)",
15
+ "details": {
16
+ "python": true,
17
+ "trainingDir": true,
18
+ "mlx": false,
19
+ "ollama": true
20
+ },
21
+ "duration": 315
22
+ },
23
+ {
24
+ "name": "Install Dependencies",
25
+ "success": true,
26
+ "message": "Python dependencies installed",
27
+ "duration": 3870
28
+ },
29
+ {
30
+ "name": "Train Model",
31
+ "success": true,
32
+ "message": "Training skipped (--skip-training)",
33
+ "duration": 0
34
+ }
35
+ ],
36
+ "totalDuration": 4185,
37
+ "success": true
38
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "timestamp": "2026-03-17T10:30:09.754Z",
3
+ "config": {
4
+ "skipTraining": true,
5
+ "skipBenchmark": true,
6
+ "ticks": 100,
7
+ "archetype": "trader",
8
+ "verbose": false
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "Check Prerequisites",
13
+ "success": true,
14
+ "message": "Prerequisites satisfied (Ollama: yes)",
15
+ "details": {
16
+ "python": true,
17
+ "trainingDir": true,
18
+ "mlx": false,
19
+ "ollama": true
20
+ },
21
+ "duration": 145
22
+ },
23
+ {
24
+ "name": "Install Dependencies",
25
+ "success": true,
26
+ "message": "Python dependencies installed",
27
+ "duration": 2265
28
+ },
29
+ {
30
+ "name": "Train Model",
31
+ "success": true,
32
+ "message": "Training skipped (--skip-training)",
33
+ "duration": 1
34
+ }
35
+ ],
36
+ "totalDuration": 2412,
37
+ "success": true
38
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "timestamp": "2026-03-17T10:34:11.086Z",
3
+ "config": {
4
+ "skipTraining": true,
5
+ "skipBenchmark": true,
6
+ "ticks": 100,
7
+ "archetype": "trader",
8
+ "verbose": false
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "Check Prerequisites",
13
+ "success": true,
14
+ "message": "Prerequisites satisfied (Ollama: yes)",
15
+ "details": {
16
+ "python": true,
17
+ "trainingDir": true,
18
+ "mlx": false,
19
+ "ollama": true
20
+ },
21
+ "duration": 138
22
+ },
23
+ {
24
+ "name": "Install Dependencies",
25
+ "success": true,
26
+ "message": "Python dependencies installed",
27
+ "duration": 1809
28
+ },
29
+ {
30
+ "name": "Train Model",
31
+ "success": true,
32
+ "message": "Training skipped (--skip-training)",
33
+ "duration": 1
34
+ }
35
+ ],
36
+ "totalDuration": 1949,
37
+ "success": true
38
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "timestamp": "2026-03-17T10:36:22.883Z",
3
+ "config": {
4
+ "skipTraining": true,
5
+ "skipBenchmark": true,
6
+ "ticks": 100,
7
+ "archetype": "trader",
8
+ "verbose": false
9
+ },
10
+ "results": [
11
+ {
12
+ "name": "Check Prerequisites",
13
+ "success": true,
14
+ "message": "Prerequisites satisfied (Ollama: yes)",
15
+ "details": {
16
+ "python": true,
17
+ "trainingDir": true,
18
+ "mlx": false,
19
+ "ollama": true
20
+ },
21
+ "duration": 40
22
+ },
23
+ {
24
+ "name": "Install Dependencies",
25
+ "success": true,
26
+ "message": "Python dependencies installed",
27
+ "duration": 1138
28
+ },
29
+ {
30
+ "name": "Train Model",
31
+ "success": true,
32
+ "message": "Training skipped (--skip-training)",
33
+ "duration": 0
34
+ }
35
+ ],
36
+ "totalDuration": 1178,
37
+ "success": true
38
+ }