npm - @elizaos/training - Versions diffs - 2.0.0-alpha.41 → 2.0.0-alpha.44 - Mend

@elizaos/training 2.0.0-alpha.41 → 2.0.0-alpha.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

package/package.json +2 -2
package/research-output/training-runs/training-run-1773726941205.json +38 -0
package/research-output/training-runs/training-run-1773742857616.json +38 -0
package/research-output/training-runs/training-run-1773742946977.json +38 -0
package/research-output/training-runs/training-run-1773743278891.json +38 -0
package/research-output/training-runs/training-run-1773743409754.json +38 -0
package/research-output/training-runs/training-run-1773743651086.json +38 -0
package/research-output/training-runs/training-run-1773743782883.json +38 -0
package/research-output/training-runs/training-run-1773755075895.json +38 -0
package/research-output/training-runs/training-run-1773755142682.json +38 -0
package/scripts/rank_trajectories.ts +20 -6
package/scripts/run_task_benchmark.ts +7 -13
package/src/adapter.ts +96 -49
package/src/archetypes/ArchetypeConfigService.ts +276 -264
package/src/archetypes/derive-archetype.ts +47 -47
package/src/archetypes/index.ts +2 -2
package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
package/src/benchmark/BenchmarkDataGenerator.ts +162 -152
package/src/benchmark/BenchmarkDataViewer.ts +98 -97
package/src/benchmark/BenchmarkHistoryService.ts +13 -12
package/src/benchmark/BenchmarkRunner.ts +94 -85
package/src/benchmark/BenchmarkValidator.ts +48 -46
package/src/benchmark/FastEvalRunner.ts +17 -16
package/src/benchmark/MetricsValidator.ts +141 -141
package/src/benchmark/MetricsVisualizer.ts +92 -85
package/src/benchmark/ModelBenchmarkService.ts +90 -82
package/src/benchmark/ModelRegistry.ts +44 -44
package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
package/src/benchmark/SimulationA2AInterface.ts +118 -118
package/src/benchmark/SimulationEngine.ts +55 -54
package/src/benchmark/TaskRunner.ts +87 -79
package/src/benchmark/__tests__/BenchmarkRunner.test.ts +82 -82
package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
package/src/benchmark/index.ts +27 -27
package/src/benchmark/parseSimulationMetrics.ts +32 -32
package/src/benchmark/simulation-types.ts +10 -10
package/src/dependencies.ts +34 -34
package/src/generation/TrajectoryGenerator.ts +39 -37
package/src/generation/index.ts +1 -1
package/src/huggingface/HuggingFaceDatasetUploader.ts +74 -73
package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
package/src/huggingface/index.ts +6 -6
package/src/huggingface/shared/HuggingFaceUploadUtil.ts +66 -59
package/src/index.ts +30 -27
package/src/init-training.ts +6 -6
package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
package/src/metrics/index.ts +2 -2
package/src/rubrics/__tests__/index.test.ts +73 -73
package/src/rubrics/ass-kisser.ts +6 -6
package/src/rubrics/degen.ts +6 -6
package/src/rubrics/goody-twoshoes.ts +6 -6
package/src/rubrics/index.ts +50 -50
package/src/rubrics/information-trader.ts +6 -6
package/src/rubrics/infosec.ts +6 -6
package/src/rubrics/liar.ts +6 -6
package/src/rubrics/perps-trader.ts +6 -6
package/src/rubrics/researcher.ts +6 -6
package/src/rubrics/scammer.ts +6 -6
package/src/rubrics/social-butterfly.ts +7 -7
package/src/rubrics/super-predictor.ts +6 -6
package/src/rubrics/trader.ts +5 -5
package/src/scoring/ArchetypeScoringService.ts +56 -54
package/src/scoring/JudgePromptBuilder.ts +96 -96
package/src/scoring/LLMJudgeCache.ts +26 -23
package/src/scoring/index.ts +3 -3
package/src/training/AutomationPipeline.ts +166 -154
package/src/training/BenchmarkService.ts +53 -47
package/src/training/ConfigValidator.ts +202 -190
package/src/training/MarketOutcomesTracker.ts +22 -12
package/src/training/ModelDeployer.ts +15 -15
package/src/training/ModelFetcher.ts +7 -7
package/src/training/ModelSelectionService.ts +32 -32
package/src/training/ModelUsageVerifier.ts +31 -24
package/src/training/MultiModelOrchestrator.ts +44 -44
package/src/training/RLModelConfig.ts +57 -57
package/src/training/RewardBackpropagationService.ts +18 -17
package/src/training/RulerScoringService.ts +86 -79
package/src/training/TrainingMonitor.ts +29 -29
package/src/training/TrajectoryRecorder.ts +40 -30
package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
package/src/training/index.ts +36 -36
package/src/training/logRLConfig.ts +7 -7
package/src/training/pipeline.ts +13 -16
package/src/training/storage/ModelStorageService.ts +32 -32
package/src/training/storage/TrainingDataArchiver.ts +21 -21
package/src/training/storage/index.ts +2 -2
package/src/training/types.ts +6 -6
package/src/training/window-utils.ts +14 -14
package/src/utils/index.ts +7 -7
package/src/utils/logger.ts +5 -5
package/src/utils/snowflake.ts +1 -1
package/src/utils/synthetic-detector.ts +7 -7

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@elizaos/training",
-  "version": "2.0.0-alpha.41",
+  "version": "2.0.0-alpha.44",
   "description": "ElizaOS RL training pipeline with benchmarking and model publishing support",
   "main": "./src/index.ts",
   "types": "./src/index.ts",
@@ -53,5 +53,5 @@
     "bun-types": "^1.3.2",
     "typescript": "^5.9.3"
   },
-  "gitHead": "b3e37e421bcd49b6bc7a34373edc7b3b3a282b8b"
+  "gitHead": "2b27a4e70ebdf054b117b87ed9e8f9f709fe006b"
 }

package/research-output/training-runs/training-run-1773726941205.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "timestamp": "2026-03-17T05:55:41.205Z",
+  "config": {
+    "skipTraining": true,
+    "skipBenchmark": true,
+    "ticks": 100,
+    "archetype": "trader",
+    "verbose": false
+  },
+  "results": [
+    {
+      "name": "Check Prerequisites",
+      "success": true,
+      "message": "Prerequisites satisfied (Ollama: yes)",
+      "details": {
+        "python": true,
+        "trainingDir": true,
+        "mlx": false,
+        "ollama": true
+      },
+      "duration": 139
+    },
+    {
+      "name": "Install Dependencies",
+      "success": true,
+      "message": "Python dependencies installed",
+      "duration": 2130
+    },
+    {
+      "name": "Train Model",
+      "success": true,
+      "message": "Training skipped (--skip-training)",
+      "duration": 0
+    }
+  ],
+  "totalDuration": 2270,
+  "success": true
+}

package/research-output/training-runs/training-run-1773742857616.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "timestamp": "2026-03-17T10:20:57.616Z",
+  "config": {
+    "skipTraining": true,
+    "skipBenchmark": true,
+    "ticks": 100,
+    "archetype": "trader",
+    "verbose": false
+  },
+  "results": [
+    {
+      "name": "Check Prerequisites",
+      "success": true,
+      "message": "Prerequisites satisfied (Ollama: yes)",
+      "details": {
+        "python": true,
+        "trainingDir": true,
+        "mlx": false,
+        "ollama": true
+      },
+      "duration": 132
+    },
+    {
+      "name": "Install Dependencies",
+      "success": true,
+      "message": "Python dependencies installed",
+      "duration": 1528
+    },
+    {
+      "name": "Train Model",
+      "success": true,
+      "message": "Training skipped (--skip-training)",
+      "duration": 0
+    }
+  ],
+  "totalDuration": 1660,
+  "success": true
+}

package/research-output/training-runs/training-run-1773742946977.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "timestamp": "2026-03-17T10:22:26.977Z",
+  "config": {
+    "skipTraining": true,
+    "skipBenchmark": true,
+    "ticks": 100,
+    "archetype": "trader",
+    "verbose": false
+  },
+  "results": [
+    {
+      "name": "Check Prerequisites",
+      "success": true,
+      "message": "Prerequisites satisfied (Ollama: yes)",
+      "details": {
+        "python": true,
+        "trainingDir": true,
+        "mlx": false,
+        "ollama": true
+      },
+      "duration": 55
+    },
+    {
+      "name": "Install Dependencies",
+      "success": true,
+      "message": "Python dependencies installed",
+      "duration": 1215
+    },
+    {
+      "name": "Train Model",
+      "success": true,
+      "message": "Training skipped (--skip-training)",
+      "duration": 0
+    }
+  ],
+  "totalDuration": 1270,
+  "success": true
+}

package/research-output/training-runs/training-run-1773743278891.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "timestamp": "2026-03-17T10:27:58.891Z",
+  "config": {
+    "skipTraining": true,
+    "skipBenchmark": true,
+    "ticks": 100,
+    "archetype": "trader",
+    "verbose": false
+  },
+  "results": [
+    {
+      "name": "Check Prerequisites",
+      "success": true,
+      "message": "Prerequisites satisfied (Ollama: yes)",
+      "details": {
+        "python": true,
+        "trainingDir": true,
+        "mlx": false,
+        "ollama": true
+      },
+      "duration": 315
+    },
+    {
+      "name": "Install Dependencies",
+      "success": true,
+      "message": "Python dependencies installed",
+      "duration": 3870
+    },
+    {
+      "name": "Train Model",
+      "success": true,
+      "message": "Training skipped (--skip-training)",
+      "duration": 0
+    }
+  ],
+  "totalDuration": 4185,
+  "success": true
+}

package/research-output/training-runs/training-run-1773743409754.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "timestamp": "2026-03-17T10:30:09.754Z",
+  "config": {
+    "skipTraining": true,
+    "skipBenchmark": true,
+    "ticks": 100,
+    "archetype": "trader",
+    "verbose": false
+  },
+  "results": [
+    {
+      "name": "Check Prerequisites",
+      "success": true,
+      "message": "Prerequisites satisfied (Ollama: yes)",
+      "details": {
+        "python": true,
+        "trainingDir": true,
+        "mlx": false,
+        "ollama": true
+      },
+      "duration": 145
+    },
+    {
+      "name": "Install Dependencies",
+      "success": true,
+      "message": "Python dependencies installed",
+      "duration": 2265
+    },
+    {
+      "name": "Train Model",
+      "success": true,
+      "message": "Training skipped (--skip-training)",
+      "duration": 1
+    }
+  ],
+  "totalDuration": 2412,
+  "success": true
+}

package/research-output/training-runs/training-run-1773743651086.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "timestamp": "2026-03-17T10:34:11.086Z",
+  "config": {
+    "skipTraining": true,
+    "skipBenchmark": true,
+    "ticks": 100,
+    "archetype": "trader",
+    "verbose": false
+  },
+  "results": [
+    {
+      "name": "Check Prerequisites",
+      "success": true,
+      "message": "Prerequisites satisfied (Ollama: yes)",
+      "details": {
+        "python": true,
+        "trainingDir": true,
+        "mlx": false,
+        "ollama": true
+      },
+      "duration": 138
+    },
+    {
+      "name": "Install Dependencies",
+      "success": true,
+      "message": "Python dependencies installed",
+      "duration": 1809
+    },
+    {
+      "name": "Train Model",
+      "success": true,
+      "message": "Training skipped (--skip-training)",
+      "duration": 1
+    }
+  ],
+  "totalDuration": 1949,
+  "success": true
+}

package/research-output/training-runs/training-run-1773743782883.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "timestamp": "2026-03-17T10:36:22.883Z",
+  "config": {
+    "skipTraining": true,
+    "skipBenchmark": true,
+    "ticks": 100,
+    "archetype": "trader",
+    "verbose": false
+  },
+  "results": [
+    {
+      "name": "Check Prerequisites",
+      "success": true,
+      "message": "Prerequisites satisfied (Ollama: yes)",
+      "details": {
+        "python": true,
+        "trainingDir": true,
+        "mlx": false,
+        "ollama": true
+      },
+      "duration": 40
+    },
+    {
+      "name": "Install Dependencies",
+      "success": true,
+      "message": "Python dependencies installed",
+      "duration": 1138
+    },
+    {
+      "name": "Train Model",
+      "success": true,
+      "message": "Training skipped (--skip-training)",
+      "duration": 0
+    }
+  ],
+  "totalDuration": 1178,
+  "success": true
+}

package/research-output/training-runs/training-run-1773755075895.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "timestamp": "2026-03-17T13:44:35.895Z",
+  "config": {
+    "skipTraining": true,
+    "skipBenchmark": true,
+    "ticks": 100,
+    "archetype": "trader",
+    "verbose": false
+  },
+  "results": [
+    {
+      "name": "Check Prerequisites",
+      "success": true,
+      "message": "Prerequisites satisfied (Ollama: yes)",
+      "details": {
+        "python": true,
+        "trainingDir": true,
+        "mlx": false,
+        "ollama": true
+      },
+      "duration": 138
+    },
+    {
+      "name": "Install Dependencies",
+      "success": true,
+      "message": "Python dependencies installed",
+      "duration": 2068
+    },
+    {
+      "name": "Train Model",
+      "success": true,
+      "message": "Training skipped (--skip-training)",
+      "duration": 0
+    }
+  ],
+  "totalDuration": 2208,
+  "success": true
+}

package/research-output/training-runs/training-run-1773755142682.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "timestamp": "2026-03-17T13:45:42.682Z",
+  "config": {
+    "skipTraining": true,
+    "skipBenchmark": true,
+    "ticks": 100,
+    "archetype": "trader",
+    "verbose": false
+  },
+  "results": [
+    {
+      "name": "Check Prerequisites",
+      "success": true,
+      "message": "Prerequisites satisfied (Ollama: yes)",
+      "details": {
+        "python": true,
+        "trainingDir": true,
+        "mlx": false,
+        "ollama": true
+      },
+      "duration": 132
+    },
+    {
+      "name": "Install Dependencies",
+      "success": true,
+      "message": "Python dependencies installed",
+      "duration": 2688
+    },
+    {
+      "name": "Train Model",
+      "success": true,
+      "message": "Training skipped (--skip-training)",
+      "duration": 0
+    }
+  ],
+  "totalDuration": 2820,
+  "success": true
+}

package/scripts/rank_trajectories.ts CHANGED Viewed

@@ -67,7 +67,6 @@ async function main() {
     // Initialize Judge Runtime
     const character = {
         name: 'JudgeAgent',
-        modelProvider: "openai" as any,
         bio: ['I am an impartial AI judge.'],
         settings: {
             secrets: {
@@ -138,7 +137,18 @@ async function main() {
     console.log(`Found ${lines.length} trajectories to rank.`);
-    const scoredTrajectories = [];
+    interface ScoredTrajectory {
+        trajectoryId?: string;
+        steps?: Array<{
+            action?: { parameters?: { text?: string } };
+        }>;
+        metadata?: { task?: string };
+        score?: number;
+        reasoning?: string;
+        isScored?: boolean;
+    }
+    const scoredTrajectories: ScoredTrajectory[] = [];
     // Clear output file first if overwriting
     if (fs.existsSync(outputFile)) {
@@ -147,12 +157,12 @@ async function main() {
     for (const line of lines) {
         try {
-            const trajectory = JSON.parse(line);
+            const trajectory = JSON.parse(line) as ScoredTrajectory;
             const { steps, metadata } = trajectory;
             const task = metadata?.task || 'Unknown Task';
             // Extract the last step's action/response
-            const lastStep = steps[steps.length - 1];
+            const lastStep = steps && steps.length > 0 ? steps[steps.length - 1] : undefined;
             const response = lastStep?.action?.parameters?.text || "No response found";
             console.log(`Ranking trajectory ${trajectory.trajectoryId}...`);
@@ -177,11 +187,15 @@ Return ONLY valid JSON.
             const resultText = typeof result === 'string' ? result : result.text;
             // Parse JSON
-            let scoreData;
+            interface ScoreData {
+                score: number;
+                reasoning: string;
+            }
+            let scoreData: ScoreData;
             try {
                 // simple cleanup for markdown code blocks
                 const jsonStr = resultText.replace(/```json/g, '').replace(/```/g, '').trim();
-                scoreData = JSON.parse(jsonStr);
+                scoreData = JSON.parse(jsonStr) as ScoreData;
             } catch (e) {
                 console.warn(`Failed to parse judge output for ${trajectory.trajectoryId}: ${resultText}`);
                 scoreData = { score: 0, reasoning: "Parse Error" };

package/scripts/run_task_benchmark.ts CHANGED Viewed

@@ -50,8 +50,7 @@ class BenchmarkRuntimeManager implements IAgentRuntimeManager {
         // Create a new runtime
         const character = {
             name: 'BenchmarkAgent',
-            modelProvider: "openai" as any,
-            bio: 'A helpful assistant for benchmarking.',
+            bio: ['A helpful assistant for benchmarking.'],
             settings: {
                 secrets: {
                     OPENAI_API_KEY: process.env.OPENAI_API_KEY || ''
@@ -60,13 +59,7 @@ class BenchmarkRuntimeManager implements IAgentRuntimeManager {
         };
         const runtime = new AgentRuntime({
-            token: process.env.OPENAI_API_KEY || '',
-            modelProvider: "openai" as any,
             character,
-            plugins: [],
-            providers: [],
-            actions: [],
-            evaluators: [],
         });
         // We must initialize with allowNoDatabase to avoid DB error
@@ -116,7 +109,7 @@ class BenchmarkTaskInteractor implements ITaskInteractor {
             const userMemory: Memory = {
                 id: messageId as `${string}-${string}-${string}-${string}-${string}`,
-                userId: userId as `${string}-${string}-${string}-${string}-${string}`,
+                entityId: userId as `${string}-${string}-${string}-${string}-${string}`,
                 agentId: runtime.agentId,
                 roomId: roomId as `${string}-${string}-${string}-${string}-${string}`,
                 content: {
@@ -143,6 +136,7 @@ Assistant:`;
             // Signature: generateText(input: string, options?: GenerateTextOptions)
             const result = await runtime.generateText(context, {
                 modelType: ModelType.TEXT_SMALL,
+                stopSequences: [],
             });
             // Handle both string and object return types for safety
             const response = typeof result === 'string' ? result : result.text;
@@ -227,11 +221,11 @@ async function main() {
         agentService: new BenchmarkAgentService(),
         agentRuntimeManager: new BenchmarkRuntimeManager(),
         autonomousCoordinator: {
-            executeAutonomousTick: async () => ({ success: true })
-        } as any,
+            executeAutonomousTick: async () => ({ success: true }),
+        },
         llmCaller: {
-            callGroqDirect: async () => "mock response"
-        } as any,
+            callGroqDirect: async () => "mock response",
+        },
     });
     // Import task interactor config