npm - @elizaos/training - Versions diffs - 2.0.0-alpha.10 - Mend

@elizaos/training 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (224) hide show

package/Dockerfile +75 -0
package/LICENSE +21 -0
package/Makefile +374 -0
package/README.md +346 -0
package/config/rubrics.json +137 -0
package/docker-compose.test.yml +57 -0
package/package.json +57 -0
package/python/config/babylon_atropos.yaml +90 -0
package/python/config/profiles/12gb.json +11 -0
package/python/config/profiles/16gb.json +10 -0
package/python/config/profiles/24gb.json +10 -0
package/python/config/profiles/48gb.json +10 -0
package/python/config/profiles/cpu.json +11 -0
package/python/config/profiles/l40-2gpu-safe.json +20 -0
package/python/config/profiles/l40-2gpu.json +22 -0
package/python/config/profiles/l40-4gpu.json +21 -0
package/python/config/profiles/l40.json +17 -0
package/python/config/tinker_training.yaml +143 -0
package/python/curriculum_state.json +165 -0
package/python/env.template +86 -0
package/python/env.training.template +46 -0
package/python/pyproject.toml +41 -0
package/python/requirements-ci.txt +31 -0
package/python/requirements.txt +87 -0
package/python/scripts/__init__.py +4 -0
package/python/scripts/benchmark_should_respond.py +190 -0
package/python/scripts/debug_inference.py +62 -0
package/python/scripts/import_json_trajectories.py +412 -0
package/python/scripts/local-finetune/README.md +63 -0
package/python/scripts/local-finetune/ingest_and_score.py +139 -0
package/python/scripts/local-finetune/merge_model.py +32 -0
package/python/scripts/local-finetune/test_adapter.py +91 -0
package/python/scripts/local-finetune/train_from_csv.py +132 -0
package/python/scripts/merge_trajectories.py +318 -0
package/python/scripts/optimize_prompt_grpo.py +269 -0
package/python/scripts/run_ab_test.py +143 -0
package/python/scripts/run_full_pipeline.py +544 -0
package/python/scripts/run_tinker_training.py +192 -0
package/python/scripts/run_training.py +914 -0
package/python/scripts/test_generation.py +29 -0
package/python/scripts/test_judge.py +155 -0
package/python/scripts/test_pipeline.py +356 -0
package/python/scripts/test_trained_model.py +380 -0
package/python/scripts/train_grpo.py +360 -0
package/python/scripts/train_jsonl.py +223 -0
package/python/scripts/train_local.py +528 -0
package/python/setup.py +20 -0
package/python/src/__init__.py +190 -0
package/python/src/data_bridge/__init__.py +24 -0
package/python/src/data_bridge/converter.py +435 -0
package/python/src/data_bridge/reader.py +393 -0
package/python/src/models.py +283 -0
package/python/src/training/__init__.py +605 -0
package/python/src/training/ab_testing.py +404 -0
package/python/src/training/action_executor.py +621 -0
package/python/src/training/archetype_trainer.py +347 -0
package/python/src/training/atropos_trainer.py +980 -0
package/python/src/training/babylon_env.py +1254 -0
package/python/src/training/error_recovery.py +647 -0
package/python/src/training/evaluation.py +856 -0
package/python/src/training/fast_simulator.py +880 -0
package/python/src/training/format_validator.py +584 -0
package/python/src/training/hybrid_env.py +522 -0
package/python/src/training/kl_controller.py +628 -0
package/python/src/training/multi_prompt_dataset.py +883 -0
package/python/src/training/multi_turn.py +656 -0
package/python/src/training/online_env.py +1084 -0
package/python/src/training/quality_scorer.py +391 -0
package/python/src/training/quality_utils.py +633 -0
package/python/src/training/rewards.py +1344 -0
package/python/src/training/rlaif_env.py +17 -0
package/python/src/training/rollout_generator.py +502 -0
package/python/src/training/rubric_loader.py +198 -0
package/python/src/training/scenario_pool.py +1072 -0
package/python/src/training/schemas.py +481 -0
package/python/src/training/service_manager.py +552 -0
package/python/src/training/simulation_bridge.py +535 -0
package/python/src/training/tick_reward_attribution.py +399 -0
package/python/src/training/tinker_client.py +575 -0
package/python/src/training/tinker_trainer.py +646 -0
package/python/src/training/tokenization_utils.py +402 -0
package/python/tests/e2e/__init__.py +13 -0
package/python/tests/e2e/conftest.py +258 -0
package/python/tests/e2e/test_full_pipeline.py +643 -0
package/python/tests/e2e/test_online_training_e2e.py +365 -0
package/python/tests/integration/__init__.py +12 -0
package/python/tests/integration/conftest.py +383 -0
package/python/tests/integration/test_db_integration.py +649 -0
package/python/tests/integration/test_json_mode_integration.py +554 -0
package/python/tests/test_action_executor.py +594 -0
package/python/tests/test_archetype_scoring.py +1027 -0
package/python/tests/test_atropos_integration.py +360 -0
package/python/tests/test_evaluation.py +727 -0
package/python/tests/test_format_validator.py +486 -0
package/python/tests/test_kl_controller.py +432 -0
package/python/tests/test_lr_scheduler.py +579 -0
package/python/tests/test_multi_turn.py +590 -0
package/python/tests/test_online_env.py +519 -0
package/python/tests/test_quality_scorer.py +474 -0
package/python/tests/test_scenario_pool.py +735 -0
package/python/tests/test_service_manager.py +585 -0
package/python/tests/test_simulation_rollout.py +581 -0
package/python/tests/test_tokenization_utils.py +501 -0
package/python/tests/test_training_orchestrator.py +497 -0
package/python/tests/test_training_output_structure.py +661 -0
package/research-output/training-runs/training-run-1770772042899.json +26 -0
package/research-output/training-runs/training-run-1770930079670.json +32 -0
package/research-output/training-runs/training-run-1770930143700.json +44 -0
package/research-output/training-runs/training-run-1770930183638.json +38 -0
package/research-output/training-runs/training-run-1770930442049.json +38 -0
package/research-output/training-runs/training-run-1770930793243.json +38 -0
package/research-output/training-runs/training-run-1771276293257.json +38 -0
package/research-output/training-runs/training-run-1771276389280.json +38 -0
package/research-output/training-runs/training-run-1771276502776.json +38 -0
package/research-output/training-runs/training-run-1771277340748.json +38 -0
package/research-output/training-runs/training-run-1773013658993.json +38 -0
package/research-output/training-runs/training-run-1773013861014.json +38 -0
package/research-output/training-runs/training-run-1773014215983.json +38 -0
package/scripts/assess-training-data.ts +422 -0
package/scripts/e2e-training-test.ts +550 -0
package/scripts/export-rubrics.ts +64 -0
package/scripts/generate-research-report.ts +1523 -0
package/scripts/generate_dataset.sh +173 -0
package/scripts/generate_should_respond.ts +267 -0
package/scripts/generate_should_respond_dataset.ts +162 -0
package/scripts/json-mode-benchmark.ts +399 -0
package/scripts/rank_trajectories.ts +207 -0
package/scripts/real-archetype-benchmark.ts +210 -0
package/scripts/run-baseline-comparison.ts +116 -0
package/scripts/run-full-pipeline.ts +272 -0
package/scripts/run_rlaif_loop.ts +78 -0
package/scripts/run_task_benchmark.ts +247 -0
package/scripts/runpod_setup.sh +137 -0
package/scripts/runpod_validate.sh +147 -0
package/scripts/test-model-in-game.ts +955 -0
package/scripts/test-scoring.ts +73 -0
package/scripts/test-trained-model.ts +209 -0
package/scripts/train-and-test.ts +824 -0
package/scripts/verify-final.ts +118 -0
package/src/adapter.ts +516 -0
package/src/archetypes/ArchetypeConfigService.ts +626 -0
package/src/archetypes/derive-archetype.ts +249 -0
package/src/archetypes/index.ts +22 -0
package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
package/src/benchmark/BenchmarkDataViewer.ts +324 -0
package/src/benchmark/BenchmarkHistoryService.ts +221 -0
package/src/benchmark/BenchmarkRunner.ts +685 -0
package/src/benchmark/BenchmarkValidator.ts +204 -0
package/src/benchmark/FastEvalRunner.ts +225 -0
package/src/benchmark/MetricsValidator.ts +165 -0
package/src/benchmark/MetricsVisualizer.ts +909 -0
package/src/benchmark/ModelBenchmarkService.ts +611 -0
package/src/benchmark/ModelRegistry.ts +158 -0
package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
package/src/benchmark/SimulationA2AInterface.ts +1169 -0
package/src/benchmark/SimulationEngine.ts +832 -0
package/src/benchmark/TaskRunner.ts +94 -0
package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
package/src/benchmark/index.ts +91 -0
package/src/benchmark/parseSimulationMetrics.ts +124 -0
package/src/benchmark/simulation-types.ts +78 -0
package/src/dependencies.ts +475 -0
package/src/generation/TrajectoryGenerator.ts +387 -0
package/src/generation/index.ts +12 -0
package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
package/src/huggingface/index.ts +27 -0
package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
package/src/index.ts +102 -0
package/src/init-training.ts +53 -0
package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
package/src/metrics/index.ts +8 -0
package/src/metrics/types.ts +200 -0
package/src/rubrics/__tests__/index.test.ts +184 -0
package/src/rubrics/ass-kisser.ts +85 -0
package/src/rubrics/degen.ts +80 -0
package/src/rubrics/goody-twoshoes.ts +84 -0
package/src/rubrics/index.ts +236 -0
package/src/rubrics/information-trader.ts +84 -0
package/src/rubrics/infosec.ts +101 -0
package/src/rubrics/liar.ts +104 -0
package/src/rubrics/perps-trader.ts +87 -0
package/src/rubrics/researcher.ts +81 -0
package/src/rubrics/scammer.ts +82 -0
package/src/rubrics/social-butterfly.ts +73 -0
package/src/rubrics/super-predictor.ts +97 -0
package/src/rubrics/trader.ts +67 -0
package/src/scoring/ArchetypeScoringService.ts +486 -0
package/src/scoring/JudgePromptBuilder.ts +556 -0
package/src/scoring/LLMJudgeCache.ts +401 -0
package/src/scoring/index.ts +9 -0
package/src/training/AutomationPipeline.ts +916 -0
package/src/training/BenchmarkService.ts +518 -0
package/src/training/ConfigValidator.ts +220 -0
package/src/training/MarketOutcomesTracker.ts +187 -0
package/src/training/ModelDeployer.ts +186 -0
package/src/training/ModelFetcher.ts +76 -0
package/src/training/ModelSelectionService.ts +341 -0
package/src/training/ModelUsageVerifier.ts +160 -0
package/src/training/MultiModelOrchestrator.ts +580 -0
package/src/training/RLModelConfig.ts +407 -0
package/src/training/RewardBackpropagationService.ts +149 -0
package/src/training/RulerScoringService.ts +666 -0
package/src/training/TrainingMonitor.ts +166 -0
package/src/training/TrajectoryRecorder.ts +399 -0
package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
package/src/training/index.ts +100 -0
package/src/training/logRLConfig.ts +34 -0
package/src/training/pipeline.ts +129 -0
package/src/training/storage/ModelStorageService.ts +279 -0
package/src/training/storage/TrainingDataArchiver.ts +197 -0
package/src/training/storage/index.ts +17 -0
package/src/training/types.ts +207 -0
package/src/training/window-utils.ts +138 -0
package/src/utils/index.ts +101 -0
package/src/utils/logger.ts +59 -0
package/src/utils/snowflake.ts +17 -0
package/src/utils/synthetic-detector.ts +111 -0
package/tsconfig.json +20 -0

package/python/tests/test_atropos_integration.py ADDED Viewed

@@ -0,0 +1,360 @@
+"""
+Integration tests for Atropos RLAIF implementation
+Tests:
+1. Module imports work correctly
+2. Data conversion functions work
+3. Reward functions produce valid outputs
+4. Environment can be instantiated (mock mode)
+"""
+import pytest
+from datetime import datetime
+from typing import Dict
+# Check for optional dependencies
+try:
+    import torch  # noqa: F401
+    HAS_TORCH = True
+except ImportError:
+    HAS_TORCH = False
+try:
+    import wandb  # noqa: F401
+    HAS_WANDB = True
+except ImportError:
+    HAS_WANDB = False
+requires_torch = pytest.mark.skipif(not HAS_TORCH, reason="torch not installed")
+requires_wandb = pytest.mark.skipif(not HAS_WANDB, reason="wandb not installed")
+# Test imports work
+class TestImports:
+    """Verify all modules can be imported"""
+    def test_import_models(self):
+        from src.models import (
+            TrainingTrajectory,
+            AtroposScoredGroup,
+        )
+        assert TrainingTrajectory is not None
+        assert AtroposScoredGroup is not None
+    def test_import_converter(self):
+        from src.data_bridge import (
+            TrajectoryToAtroposConverter,
+            ScoredGroupResult,
+        )
+        assert TrajectoryToAtroposConverter is not None
+        assert ScoredGroupResult is not None
+    def test_import_rewards(self):
+        from src.training.rewards import (
+            pnl_reward,
+            RewardNormalizer,
+        )
+        assert pnl_reward is not None
+        assert RewardNormalizer is not None
+    @requires_torch
+    def test_import_trainer(self):
+        from src.training import (
+            AtroposTrainer,
+        )
+        assert AtroposTrainer is not None
+    @requires_wandb
+    def test_import_environment(self):
+        from src.training import (
+            RLAIFEnv,
+        )
+        assert RLAIFEnv is not None
+class TestRewardFunctions:
+    """Test reward calculation functions using archetype-aware API"""
+    def test_pnl_reward_positive(self):
+        from src.training.rewards import pnl_reward, TrajectoryRewardInputs
+        # Positive P&L should give positive reward
+        inputs = TrajectoryRewardInputs(
+            final_pnl=500.0,
+            starting_balance=10000.0,
+            end_balance=10500.0,
+        )
+        reward = pnl_reward(inputs)
+        assert reward > 0.0
+    def test_pnl_reward_negative(self):
+        from src.training.rewards import pnl_reward, TrajectoryRewardInputs
+        # Negative P&L should give negative reward
+        inputs = TrajectoryRewardInputs(
+            final_pnl=-500.0,
+            starting_balance=10000.0,
+            end_balance=9500.0,
+        )
+        reward = pnl_reward(inputs)
+        assert reward < 0.0
+    def test_pnl_reward_zero(self):
+        from src.training.rewards import pnl_reward, TrajectoryRewardInputs
+        # Zero P&L should give ~0
+        inputs = TrajectoryRewardInputs(
+            final_pnl=0.0,
+            starting_balance=10000.0,
+            end_balance=10000.0,
+        )
+        reward = pnl_reward(inputs)
+        assert -0.1 <= reward <= 0.1
+    def test_archetype_composite_reward(self):
+        from src.training.rewards import (
+            archetype_composite_reward,
+            TrajectoryRewardInputs,
+            BehaviorMetrics,
+        )
+        inputs = TrajectoryRewardInputs(
+            final_pnl=500.0,
+            starting_balance=10000.0,
+            end_balance=10500.0,
+            format_score=0.8,
+            reasoning_score=0.75,
+        )
+        behavior = BehaviorMetrics(
+            trades_executed=5,
+            total_pnl=500.0,
+            episode_length=10,
+        )
+        reward = archetype_composite_reward(inputs, "trader", behavior)
+        assert 0.0 <= reward <= 1.0
+    def test_composite_reward_with_inputs(self):
+        from src.training.rewards import composite_reward, TrajectoryRewardInputs
+        inputs = TrajectoryRewardInputs(
+            final_pnl=500.0,
+            starting_balance=10000.0,
+            end_balance=10500.0,
+        )
+        reward = composite_reward(inputs)
+        assert 0.0 <= reward <= 1.0
+    def test_relative_scores(self):
+        from src.training.rewards import relative_scores
+        # relative_scores expects a list of raw reward floats
+        rewards = [0.8, 0.5, 0.2]  # High, medium, low rewards
+        scores = relative_scores(rewards)
+        # Should return normalized scores in [0, 1]
+        assert all(0.0 <= s <= 1.0 for s in scores)
+        # Best reward should have highest relative score
+        assert scores[0] > scores[1] > scores[2]
+    def test_reward_normalizer(self):
+        from src.training.rewards import RewardNormalizer
+        normalizer = RewardNormalizer()
+        # Update with some rewards
+        for r in [0.5, 0.6, 0.7, 0.8, 0.55, 0.65, 0.75, 0.85]:
+            normalizer.update(r)
+        # Normalize should work
+        normalized = normalizer.normalize(0.65)
+        assert isinstance(normalized, float)
+class TestConverter:
+    """Test Trajectory to Atropos conversion"""
+    def create_sample_trajectory(self) -> Dict:
+        """Create a sample trajectory for testing"""
+        from src.models import (
+            TrainingTrajectory,
+            TrajectoryStep,
+            EnvironmentState,
+            Action,
+            LLMCall,
+        )
+        steps = []
+        for i in range(5):
+            step = TrajectoryStep(
+                step_number=i,
+                timestamp=1000000 + i * 1000,
+                environment_state=EnvironmentState(
+                    agent_balance=10000.0 + i * 100,
+                    agent_pnl=i * 100.0,
+                    open_positions=i,
+                ),
+                provider_accesses=[],
+                llm_calls=[
+                    LLMCall(
+                        model="gpt-4",
+                        system_prompt="You are a trading agent",
+                        user_prompt=f"Market update {i}",
+                        response=f"Action {i}",
+                        temperature=0.7,
+                        max_tokens=100,
+                        purpose="action",
+                    )
+                ],
+                action=Action(
+                    action_type="trade",
+                    parameters={"amount": 100},
+                    success=True,
+                ),
+                reward=0.1,
+            )
+            steps.append(step)
+        return TrainingTrajectory(
+            id="test-1",
+            trajectory_id="traj-1",
+            agent_id="agent-1",
+            window_id="2024-01-01T00:00",
+            start_time=datetime.now(),
+            end_time=datetime.now(),
+            duration_ms=5000,
+            steps=steps,
+            total_reward=0.5,
+            final_pnl=400.0,
+            episode_length=5,
+            final_status="completed",
+        )
+    def test_convert_trajectory(self):
+        from src.data_bridge import TrajectoryToAtroposConverter
+        converter = TrajectoryToAtroposConverter()
+        traj = self.create_sample_trajectory()
+        result = converter.convert_trajectory(traj)
+        assert result is not None
+        assert len(result.messages) >= 3  # system + at least one exchange
+        assert result.metadata["trajectory_id"] == "traj-1"
+        assert result.metadata["final_pnl"] == 400.0
+    def test_convert_window_group(self):
+        from src.data_bridge import TrajectoryToAtroposConverter
+        converter = TrajectoryToAtroposConverter()
+        trajs = [self.create_sample_trajectory() for _ in range(4)]
+        # Modify trajectory IDs
+        for i, t in enumerate(trajs):
+            t.trajectory_id = f"traj-{i}"
+        result = converter.convert_window_group(trajs, None)
+        assert result.group_size == 4
+        assert len(result.scores) == 4
+        assert len(result.messages) == 4
+    def test_dropout(self):
+        from src.data_bridge import TrajectoryToAtroposConverter
+        # High dropout should skip some trajectories
+        converter = TrajectoryToAtroposConverter(dropout_rate=0.5)
+        dropped_count = 0
+        for _ in range(100):
+            traj = self.create_sample_trajectory()
+            result = converter.convert_trajectory(traj)
+            if result is None:
+                dropped_count += 1
+        # Should drop roughly 50%
+        assert 30 < dropped_count < 70
+@requires_torch
+class TestTrainerConfig:
+    """Test trainer configuration (requires torch)"""
+    def test_default_config(self):
+        from src.training import AtroposTrainingConfig
+        config = AtroposTrainingConfig()
+        assert config.model_name == "Qwen/Qwen2.5-3B-Instruct"
+        assert config.learning_rate == 1e-5
+        assert config.training_steps == 100
+    def test_custom_config(self):
+        from src.training import AtroposTrainingConfig
+        config = AtroposTrainingConfig(
+            model_name="Qwen/Qwen2.5-7B-Instruct",
+            training_steps=50,
+            learning_rate=5e-6,
+        )
+        assert config.model_name == "Qwen/Qwen2.5-7B-Instruct"
+        assert config.training_steps == 50
+        assert config.learning_rate == 5e-6
+@requires_wandb
+class TestEnvironmentConfig:
+    """Test environment configuration (requires wandb)"""
+    def test_default_config(self):
+        from src.training import RLAIFEnvConfig
+        config = RLAIFEnvConfig()
+        assert config.group_size == 4
+        assert config.lookback_hours == 72
+        assert config.min_agents_per_window == 2
+    def test_custom_config(self):
+        from src.training import RLAIFEnvConfig
+        config = RLAIFEnvConfig(
+            group_size=8,
+            lookback_hours=48,
+            judge_model="gpt-4",
+        )
+        assert config.group_size == 8
+        assert config.lookback_hours == 48
+        assert config.judge_model == "gpt-4"
+class TestCalculateDropoutRate:
+    """Test dropout rate calculation"""
+    def test_no_dropout_needed(self):
+        from src.data_bridge import calculate_dropout_rate
+        rate = calculate_dropout_rate(500, target_trajectories=1000)
+        assert rate == 0.0
+    def test_dropout_needed(self):
+        from src.data_bridge import calculate_dropout_rate
+        rate = calculate_dropout_rate(2000, target_trajectories=1000)
+        assert 0.0 < rate <= 0.3
+    def test_max_dropout_cap(self):
+        from src.data_bridge import calculate_dropout_rate
+        rate = calculate_dropout_rate(10000, target_trajectories=1000, max_dropout=0.2)
+        assert rate == 0.2
+# Run tests with: pytest tests/test_atropos_integration.py -v
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])