npm - @elizaos/training - Versions diffs - 2.0.0-alpha.10 - Mend

@elizaos/training 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (224) hide show

package/Dockerfile +75 -0
package/LICENSE +21 -0
package/Makefile +374 -0
package/README.md +346 -0
package/config/rubrics.json +137 -0
package/docker-compose.test.yml +57 -0
package/package.json +57 -0
package/python/config/babylon_atropos.yaml +90 -0
package/python/config/profiles/12gb.json +11 -0
package/python/config/profiles/16gb.json +10 -0
package/python/config/profiles/24gb.json +10 -0
package/python/config/profiles/48gb.json +10 -0
package/python/config/profiles/cpu.json +11 -0
package/python/config/profiles/l40-2gpu-safe.json +20 -0
package/python/config/profiles/l40-2gpu.json +22 -0
package/python/config/profiles/l40-4gpu.json +21 -0
package/python/config/profiles/l40.json +17 -0
package/python/config/tinker_training.yaml +143 -0
package/python/curriculum_state.json +165 -0
package/python/env.template +86 -0
package/python/env.training.template +46 -0
package/python/pyproject.toml +41 -0
package/python/requirements-ci.txt +31 -0
package/python/requirements.txt +87 -0
package/python/scripts/__init__.py +4 -0
package/python/scripts/benchmark_should_respond.py +190 -0
package/python/scripts/debug_inference.py +62 -0
package/python/scripts/import_json_trajectories.py +412 -0
package/python/scripts/local-finetune/README.md +63 -0
package/python/scripts/local-finetune/ingest_and_score.py +139 -0
package/python/scripts/local-finetune/merge_model.py +32 -0
package/python/scripts/local-finetune/test_adapter.py +91 -0
package/python/scripts/local-finetune/train_from_csv.py +132 -0
package/python/scripts/merge_trajectories.py +318 -0
package/python/scripts/optimize_prompt_grpo.py +269 -0
package/python/scripts/run_ab_test.py +143 -0
package/python/scripts/run_full_pipeline.py +544 -0
package/python/scripts/run_tinker_training.py +192 -0
package/python/scripts/run_training.py +914 -0
package/python/scripts/test_generation.py +29 -0
package/python/scripts/test_judge.py +155 -0
package/python/scripts/test_pipeline.py +356 -0
package/python/scripts/test_trained_model.py +380 -0
package/python/scripts/train_grpo.py +360 -0
package/python/scripts/train_jsonl.py +223 -0
package/python/scripts/train_local.py +528 -0
package/python/setup.py +20 -0
package/python/src/__init__.py +190 -0
package/python/src/data_bridge/__init__.py +24 -0
package/python/src/data_bridge/converter.py +435 -0
package/python/src/data_bridge/reader.py +393 -0
package/python/src/models.py +283 -0
package/python/src/training/__init__.py +605 -0
package/python/src/training/ab_testing.py +404 -0
package/python/src/training/action_executor.py +621 -0
package/python/src/training/archetype_trainer.py +347 -0
package/python/src/training/atropos_trainer.py +980 -0
package/python/src/training/babylon_env.py +1254 -0
package/python/src/training/error_recovery.py +647 -0
package/python/src/training/evaluation.py +856 -0
package/python/src/training/fast_simulator.py +880 -0
package/python/src/training/format_validator.py +584 -0
package/python/src/training/hybrid_env.py +522 -0
package/python/src/training/kl_controller.py +628 -0
package/python/src/training/multi_prompt_dataset.py +883 -0
package/python/src/training/multi_turn.py +656 -0
package/python/src/training/online_env.py +1084 -0
package/python/src/training/quality_scorer.py +391 -0
package/python/src/training/quality_utils.py +633 -0
package/python/src/training/rewards.py +1344 -0
package/python/src/training/rlaif_env.py +17 -0
package/python/src/training/rollout_generator.py +502 -0
package/python/src/training/rubric_loader.py +198 -0
package/python/src/training/scenario_pool.py +1072 -0
package/python/src/training/schemas.py +481 -0
package/python/src/training/service_manager.py +552 -0
package/python/src/training/simulation_bridge.py +535 -0
package/python/src/training/tick_reward_attribution.py +399 -0
package/python/src/training/tinker_client.py +575 -0
package/python/src/training/tinker_trainer.py +646 -0
package/python/src/training/tokenization_utils.py +402 -0
package/python/tests/e2e/__init__.py +13 -0
package/python/tests/e2e/conftest.py +258 -0
package/python/tests/e2e/test_full_pipeline.py +643 -0
package/python/tests/e2e/test_online_training_e2e.py +365 -0
package/python/tests/integration/__init__.py +12 -0
package/python/tests/integration/conftest.py +383 -0
package/python/tests/integration/test_db_integration.py +649 -0
package/python/tests/integration/test_json_mode_integration.py +554 -0
package/python/tests/test_action_executor.py +594 -0
package/python/tests/test_archetype_scoring.py +1027 -0
package/python/tests/test_atropos_integration.py +360 -0
package/python/tests/test_evaluation.py +727 -0
package/python/tests/test_format_validator.py +486 -0
package/python/tests/test_kl_controller.py +432 -0
package/python/tests/test_lr_scheduler.py +579 -0
package/python/tests/test_multi_turn.py +590 -0
package/python/tests/test_online_env.py +519 -0
package/python/tests/test_quality_scorer.py +474 -0
package/python/tests/test_scenario_pool.py +735 -0
package/python/tests/test_service_manager.py +585 -0
package/python/tests/test_simulation_rollout.py +581 -0
package/python/tests/test_tokenization_utils.py +501 -0
package/python/tests/test_training_orchestrator.py +497 -0
package/python/tests/test_training_output_structure.py +661 -0
package/research-output/training-runs/training-run-1770772042899.json +26 -0
package/research-output/training-runs/training-run-1770930079670.json +32 -0
package/research-output/training-runs/training-run-1770930143700.json +44 -0
package/research-output/training-runs/training-run-1770930183638.json +38 -0
package/research-output/training-runs/training-run-1770930442049.json +38 -0
package/research-output/training-runs/training-run-1770930793243.json +38 -0
package/research-output/training-runs/training-run-1771276293257.json +38 -0
package/research-output/training-runs/training-run-1771276389280.json +38 -0
package/research-output/training-runs/training-run-1771276502776.json +38 -0
package/research-output/training-runs/training-run-1771277340748.json +38 -0
package/research-output/training-runs/training-run-1773013658993.json +38 -0
package/research-output/training-runs/training-run-1773013861014.json +38 -0
package/research-output/training-runs/training-run-1773014215983.json +38 -0
package/scripts/assess-training-data.ts +422 -0
package/scripts/e2e-training-test.ts +550 -0
package/scripts/export-rubrics.ts +64 -0
package/scripts/generate-research-report.ts +1523 -0
package/scripts/generate_dataset.sh +173 -0
package/scripts/generate_should_respond.ts +267 -0
package/scripts/generate_should_respond_dataset.ts +162 -0
package/scripts/json-mode-benchmark.ts +399 -0
package/scripts/rank_trajectories.ts +207 -0
package/scripts/real-archetype-benchmark.ts +210 -0
package/scripts/run-baseline-comparison.ts +116 -0
package/scripts/run-full-pipeline.ts +272 -0
package/scripts/run_rlaif_loop.ts +78 -0
package/scripts/run_task_benchmark.ts +247 -0
package/scripts/runpod_setup.sh +137 -0
package/scripts/runpod_validate.sh +147 -0
package/scripts/test-model-in-game.ts +955 -0
package/scripts/test-scoring.ts +73 -0
package/scripts/test-trained-model.ts +209 -0
package/scripts/train-and-test.ts +824 -0
package/scripts/verify-final.ts +118 -0
package/src/adapter.ts +516 -0
package/src/archetypes/ArchetypeConfigService.ts +626 -0
package/src/archetypes/derive-archetype.ts +249 -0
package/src/archetypes/index.ts +22 -0
package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
package/src/benchmark/BenchmarkDataViewer.ts +324 -0
package/src/benchmark/BenchmarkHistoryService.ts +221 -0
package/src/benchmark/BenchmarkRunner.ts +685 -0
package/src/benchmark/BenchmarkValidator.ts +204 -0
package/src/benchmark/FastEvalRunner.ts +225 -0
package/src/benchmark/MetricsValidator.ts +165 -0
package/src/benchmark/MetricsVisualizer.ts +909 -0
package/src/benchmark/ModelBenchmarkService.ts +611 -0
package/src/benchmark/ModelRegistry.ts +158 -0
package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
package/src/benchmark/SimulationA2AInterface.ts +1169 -0
package/src/benchmark/SimulationEngine.ts +832 -0
package/src/benchmark/TaskRunner.ts +94 -0
package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
package/src/benchmark/index.ts +91 -0
package/src/benchmark/parseSimulationMetrics.ts +124 -0
package/src/benchmark/simulation-types.ts +78 -0
package/src/dependencies.ts +475 -0
package/src/generation/TrajectoryGenerator.ts +387 -0
package/src/generation/index.ts +12 -0
package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
package/src/huggingface/index.ts +27 -0
package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
package/src/index.ts +102 -0
package/src/init-training.ts +53 -0
package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
package/src/metrics/index.ts +8 -0
package/src/metrics/types.ts +200 -0
package/src/rubrics/__tests__/index.test.ts +184 -0
package/src/rubrics/ass-kisser.ts +85 -0
package/src/rubrics/degen.ts +80 -0
package/src/rubrics/goody-twoshoes.ts +84 -0
package/src/rubrics/index.ts +236 -0
package/src/rubrics/information-trader.ts +84 -0
package/src/rubrics/infosec.ts +101 -0
package/src/rubrics/liar.ts +104 -0
package/src/rubrics/perps-trader.ts +87 -0
package/src/rubrics/researcher.ts +81 -0
package/src/rubrics/scammer.ts +82 -0
package/src/rubrics/social-butterfly.ts +73 -0
package/src/rubrics/super-predictor.ts +97 -0
package/src/rubrics/trader.ts +67 -0
package/src/scoring/ArchetypeScoringService.ts +486 -0
package/src/scoring/JudgePromptBuilder.ts +556 -0
package/src/scoring/LLMJudgeCache.ts +401 -0
package/src/scoring/index.ts +9 -0
package/src/training/AutomationPipeline.ts +916 -0
package/src/training/BenchmarkService.ts +518 -0
package/src/training/ConfigValidator.ts +220 -0
package/src/training/MarketOutcomesTracker.ts +187 -0
package/src/training/ModelDeployer.ts +186 -0
package/src/training/ModelFetcher.ts +76 -0
package/src/training/ModelSelectionService.ts +341 -0
package/src/training/ModelUsageVerifier.ts +160 -0
package/src/training/MultiModelOrchestrator.ts +580 -0
package/src/training/RLModelConfig.ts +407 -0
package/src/training/RewardBackpropagationService.ts +149 -0
package/src/training/RulerScoringService.ts +666 -0
package/src/training/TrainingMonitor.ts +166 -0
package/src/training/TrajectoryRecorder.ts +399 -0
package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
package/src/training/index.ts +100 -0
package/src/training/logRLConfig.ts +34 -0
package/src/training/pipeline.ts +129 -0
package/src/training/storage/ModelStorageService.ts +279 -0
package/src/training/storage/TrainingDataArchiver.ts +197 -0
package/src/training/storage/index.ts +17 -0
package/src/training/types.ts +207 -0
package/src/training/window-utils.ts +138 -0
package/src/utils/index.ts +101 -0
package/src/utils/logger.ts +59 -0
package/src/utils/snowflake.ts +17 -0
package/src/utils/synthetic-detector.ts +111 -0
package/tsconfig.json +20 -0

package/python/tests/test_lr_scheduler.py ADDED Viewed

@@ -0,0 +1,579 @@
+"""
+Tests for Learning Rate Scheduling and AtroposTrainingConfig.
+Tests cover:
+- LR scheduler types (constant, linear, cosine)
+- Warmup behavior
+- Boundary conditions (step 0, step == total_steps)
+- Minimum LR ratio enforcement
+- Config validation
+- Checkpoint resume logic
+"""
+import math
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock
+import pytest
+try:
+    import torch
+    from torch.optim import AdamW
+except ImportError:
+    pytest.skip("torch not installed", allow_module_level=True)
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from src.training.atropos_trainer import (
+    AtroposTrainingConfig,
+    AtroposTrainer,
+    LRSchedulerType,
+    get_lr_scheduler,
+)
+class TestLRSchedulerType:
+    """Tests for LRSchedulerType enum"""
+    def test_all_types_defined(self):
+        """Verify all scheduler types exist"""
+        assert LRSchedulerType.CONSTANT.value == "constant"
+        assert LRSchedulerType.LINEAR.value == "linear"
+        assert LRSchedulerType.COSINE.value == "cosine"
+    def test_type_count(self):
+        """Verify expected number of types"""
+        assert len(LRSchedulerType) == 3
+    def test_from_string(self):
+        """Test creating scheduler type from string"""
+        assert LRSchedulerType("constant") == LRSchedulerType.CONSTANT
+        assert LRSchedulerType("linear") == LRSchedulerType.LINEAR
+        assert LRSchedulerType("cosine") == LRSchedulerType.COSINE
+    def test_invalid_type_raises(self):
+        """Test invalid type raises ValueError"""
+        with pytest.raises(ValueError):
+            LRSchedulerType("invalid")
+class TestConstantScheduler:
+    """Tests for constant LR scheduler"""
+    @pytest.fixture
+    def optimizer(self):
+        """Create a simple optimizer for testing"""
+        model = torch.nn.Linear(10, 10)
+        return AdamW(model.parameters(), lr=1e-4)
+    def test_constant_no_decay(self, optimizer):
+        """Test constant scheduler maintains LR throughout training"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.CONSTANT,
+            num_training_steps=100,
+            warmup_steps=0,
+            min_lr_ratio=0.1,
+        )
+        lrs = []
+        for _ in range(100):
+            lrs.append(scheduler.get_last_lr()[0])
+            scheduler.step()
+        # All LRs should be the same (within floating point tolerance)
+        assert all(abs(lr - 1e-4) < 1e-10 for lr in lrs)
+    def test_constant_with_warmup(self, optimizer):
+        """Test constant scheduler with warmup phase"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.CONSTANT,
+            num_training_steps=100,
+            warmup_steps=10,
+            min_lr_ratio=0.1,
+        )
+        warmup_lrs = []
+        for _ in range(10):
+            warmup_lrs.append(scheduler.get_last_lr()[0])
+            scheduler.step()
+        post_warmup_lrs = []
+        for _ in range(90):
+            post_warmup_lrs.append(scheduler.get_last_lr()[0])
+            scheduler.step()
+        # Warmup should increase LR
+        assert warmup_lrs[0] < warmup_lrs[-1]
+        # Post warmup should be constant at full LR
+        assert all(abs(lr - 1e-4) < 1e-10 for lr in post_warmup_lrs)
+class TestLinearScheduler:
+    """Tests for linear LR scheduler"""
+    @pytest.fixture
+    def optimizer(self):
+        model = torch.nn.Linear(10, 10)
+        return AdamW(model.parameters(), lr=1e-4)
+    def test_linear_decay(self, optimizer):
+        """Test linear scheduler decays LR linearly"""
+        min_lr_ratio = 0.1
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.LINEAR,
+            num_training_steps=100,
+            warmup_steps=0,
+            min_lr_ratio=min_lr_ratio,
+        )
+        lrs = []
+        for _ in range(100):
+            lrs.append(scheduler.get_last_lr()[0])
+            scheduler.step()
+        # Should start at initial LR
+        assert abs(lrs[0] - 1e-4) < 1e-10
+        # Should end near min LR (use relative tolerance for floating point)
+        expected_min = 1e-4 * min_lr_ratio
+        assert abs(lrs[-1] - expected_min) / expected_min < 0.1  # 10% relative tolerance
+        # Should be monotonically decreasing
+        for i in range(1, len(lrs)):
+            assert lrs[i] <= lrs[i-1] + 1e-12  # Small tolerance for floating point
+    def test_linear_with_warmup(self, optimizer):
+        """Test linear scheduler with warmup"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.LINEAR,
+            num_training_steps=100,
+            warmup_steps=20,
+            min_lr_ratio=0.1,
+        )
+        # Warmup phase
+        for step in range(20):
+            lr = scheduler.get_last_lr()[0]
+            expected = 1e-4 * (step / 20)
+            assert abs(lr - expected) < 1e-10, f"Step {step}: expected {expected}, got {lr}"
+            scheduler.step()
+        # After warmup, should be at full LR
+        assert abs(scheduler.get_last_lr()[0] - 1e-4) < 1e-10
+    def test_linear_min_lr_respected(self, optimizer):
+        """Test that LR never goes below min_lr_ratio"""
+        min_lr_ratio = 0.2
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.LINEAR,
+            num_training_steps=100,
+            warmup_steps=0,
+            min_lr_ratio=min_lr_ratio,
+        )
+        min_expected = 1e-4 * min_lr_ratio
+        for _ in range(150):  # Go beyond training steps
+            lr = scheduler.get_last_lr()[0]
+            assert lr >= min_expected - 1e-12
+            scheduler.step()
+class TestCosineScheduler:
+    """Tests for cosine annealing LR scheduler"""
+    @pytest.fixture
+    def optimizer(self):
+        model = torch.nn.Linear(10, 10)
+        return AdamW(model.parameters(), lr=1e-4)
+    def test_cosine_decay(self, optimizer):
+        """Test cosine scheduler follows cosine curve"""
+        min_lr_ratio = 0.1
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.COSINE,
+            num_training_steps=100,
+            warmup_steps=0,
+            min_lr_ratio=min_lr_ratio,
+        )
+        lrs = []
+        for _ in range(100):
+            lrs.append(scheduler.get_last_lr()[0])
+            scheduler.step()
+        # Should start at initial LR
+        assert abs(lrs[0] - 1e-4) < 1e-10
+        # Should end near min LR (use relative tolerance)
+        expected_min = 1e-4 * min_lr_ratio
+        assert abs(lrs[-1] - expected_min) / expected_min < 0.1  # 10% relative tolerance
+        # Should follow cosine curve shape
+        # At step 50 (halfway), should be near midpoint
+        step_50_expected = 0.5 * (1e-4 * min_lr_ratio + 1e-4)
+        assert abs(lrs[50] - step_50_expected) < 1e-6
+    def test_cosine_with_warmup(self, optimizer):
+        """Test cosine scheduler with warmup"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.COSINE,
+            num_training_steps=100,
+            warmup_steps=10,
+            min_lr_ratio=0.1,
+        )
+        # Warmup should increase linearly
+        for step in range(10):
+            lr = scheduler.get_last_lr()[0]
+            expected = 1e-4 * (step / 10)
+            assert abs(lr - expected) < 1e-10
+            scheduler.step()
+        # After warmup, should start cosine from full LR
+        assert abs(scheduler.get_last_lr()[0] - 1e-4) < 1e-10
+    def test_cosine_min_lr_respected(self, optimizer):
+        """Test that LR never goes below min_lr_ratio"""
+        min_lr_ratio = 0.3
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.COSINE,
+            num_training_steps=100,
+            warmup_steps=0,
+            min_lr_ratio=min_lr_ratio,
+        )
+        min_expected = 1e-4 * min_lr_ratio
+        for _ in range(150):  # Go beyond training steps
+            lr = scheduler.get_last_lr()[0]
+            assert lr >= min_expected - 1e-10
+            scheduler.step()
+    def test_cosine_smooth_transition(self, optimizer):
+        """Test cosine has smooth transitions (no discontinuities)"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.COSINE,
+            num_training_steps=100,
+            warmup_steps=0,
+            min_lr_ratio=0.1,
+        )
+        lrs = []
+        for _ in range(100):
+            lrs.append(scheduler.get_last_lr()[0])
+            scheduler.step()
+        # Check that changes between steps are gradual
+        for i in range(1, len(lrs)):
+            delta = abs(lrs[i] - lrs[i-1])
+            # Max change should be reasonable (< 5% of initial LR per step)
+            assert delta < 1e-4 * 0.05
+class TestWarmupBehavior:
+    """Tests specifically for warmup behavior"""
+    @pytest.fixture
+    def optimizer(self):
+        model = torch.nn.Linear(10, 10)
+        return AdamW(model.parameters(), lr=1e-4)
+    def test_zero_warmup_steps(self, optimizer):
+        """Test scheduler works with zero warmup steps"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.COSINE,
+            num_training_steps=100,
+            warmup_steps=0,
+            min_lr_ratio=0.1,
+        )
+        # Should start at full LR immediately
+        assert abs(scheduler.get_last_lr()[0] - 1e-4) < 1e-10
+    def test_warmup_at_step_zero(self, optimizer):
+        """Test warmup starts at zero LR"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.COSINE,
+            num_training_steps=100,
+            warmup_steps=10,
+            min_lr_ratio=0.1,
+        )
+        # At step 0, LR should be 0
+        assert scheduler.get_last_lr()[0] == 0.0
+    def test_warmup_reaches_full_lr(self, optimizer):
+        """Test warmup reaches full LR at end of warmup"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.COSINE,
+            num_training_steps=100,
+            warmup_steps=10,
+            min_lr_ratio=0.1,
+        )
+        # Step through warmup
+        for _ in range(10):
+            scheduler.step()
+        # Should be at full LR
+        assert abs(scheduler.get_last_lr()[0] - 1e-4) < 1e-10
+    def test_warmup_equal_to_total_steps(self, optimizer):
+        """Test edge case where warmup == total steps"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.LINEAR,
+            num_training_steps=10,
+            warmup_steps=10,
+            min_lr_ratio=0.1,
+        )
+        # Should complete warmup and not crash
+        for _ in range(15):
+            scheduler.step()
+    def test_warmup_greater_than_total_steps(self, optimizer):
+        """Test edge case where warmup > total steps"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.LINEAR,
+            num_training_steps=10,
+            warmup_steps=20,
+            min_lr_ratio=0.1,
+        )
+        # Should not crash
+        for step in range(25):
+            lr = scheduler.get_last_lr()[0]
+            scheduler.step()
+class TestAtroposTrainingConfig:
+    """Tests for AtroposTrainingConfig"""
+    def test_default_values(self):
+        """Test all default values are set correctly"""
+        config = AtroposTrainingConfig()
+        assert config.model_name == "Qwen/Qwen2.5-3B-Instruct"
+        assert config.learning_rate == 1e-5
+        assert config.min_learning_rate == 1e-7
+        assert config.training_steps == 100
+        assert config.batch_size == 4
+        assert config.gradient_accumulation_steps == 8
+        assert config.seq_len == 4096
+        assert config.max_grad_norm == 1.0
+        assert config.lr_scheduler == LRSchedulerType.COSINE
+        assert config.warmup_steps == 10
+        assert config.vllm_port == 9001
+        assert config.vllm_restart_interval == 5
+        assert config.vllm_gpu_utilization == 0.45
+        assert config.save_path == "./trained_models"
+        assert config.save_every_steps == 5
+        assert config.keep_checkpoints == 3
+        assert config.resume_from is None
+        assert config.api_url == "http://localhost:8000"
+        assert config.log_to_file is True
+        assert config.log_file == "./logs/training_metrics.jsonl"
+        assert config.use_wandb is True
+        assert config.wandb_project == "eliza-training"
+        assert config.wandb_entity is None
+        assert config.wandb_run_name is None
+    def test_custom_values(self):
+        """Test custom values override defaults"""
+        config = AtroposTrainingConfig(
+            model_name="custom/model",
+            learning_rate=5e-5,
+            training_steps=50,
+            lr_scheduler=LRSchedulerType.LINEAR,
+            use_wandb=False,
+        )
+        assert config.model_name == "custom/model"
+        assert config.learning_rate == 5e-5
+        assert config.training_steps == 50
+        assert config.lr_scheduler == LRSchedulerType.LINEAR
+        assert config.use_wandb is False
+    def test_min_lr_ratio_calculation(self):
+        """Test min_lr_ratio is calculated correctly from config"""
+        config = AtroposTrainingConfig(
+            learning_rate=1e-4,
+            min_learning_rate=1e-6,
+        )
+        expected_ratio = config.min_learning_rate / config.learning_rate
+        assert abs(expected_ratio - 0.01) < 1e-10
+    def test_device_auto_detection(self):
+        """Test device is auto-detected"""
+        config = AtroposTrainingConfig()
+        if torch.cuda.is_available():
+            assert config.device == "cuda"
+        else:
+            assert config.device == "cpu"
+    def test_device_override(self):
+        """Test device can be overridden"""
+        config = AtroposTrainingConfig(device="cpu")
+        assert config.device == "cpu"
+class TestAtroposTrainer:
+    """Tests for AtroposTrainer class"""
+    def test_initialization(self):
+        """Test trainer initializes correctly"""
+        config = AtroposTrainingConfig()
+        trainer = AtroposTrainer(config)
+        assert trainer.config == config
+        assert trainer.model is None
+        assert trainer.tokenizer is None
+        assert trainer.optimizer is None
+        assert trainer.scheduler is None
+        assert trainer.current_step == 0
+        assert trainer.vllm_process is None
+        assert trainer._wandb_initialized is False
+        assert trainer._checkpoint_history == []
+        assert len(trainer.run_id) > 0
+    def test_extract_step_from_path_valid(self):
+        """Test step extraction from checkpoint path"""
+        config = AtroposTrainingConfig()
+        trainer = AtroposTrainer(config)
+        assert trainer._extract_step_from_path("./models/step_50") == 50
+        assert trainer._extract_step_from_path("/path/to/step_100") == 100
+        assert trainer._extract_step_from_path("step_0") == 0
+        assert trainer._extract_step_from_path("step_999") == 999
+    def test_extract_step_from_path_invalid(self):
+        """Test step extraction with invalid paths"""
+        config = AtroposTrainingConfig()
+        trainer = AtroposTrainer(config)
+        # Non-step paths should return 0
+        assert trainer._extract_step_from_path("./models/final_model") == 0
+        assert trainer._extract_step_from_path("./models/checkpoint") == 0
+        assert trainer._extract_step_from_path("./step_abc") == 0
+        assert trainer._extract_step_from_path("") == 0
+    def test_extract_step_from_path_edge_cases(self):
+        """Test step extraction edge cases"""
+        config = AtroposTrainingConfig()
+        trainer = AtroposTrainer(config)
+        # Path with just "step_"
+        assert trainer._extract_step_from_path("step_") == 0
+        # Path with negative-looking number (should not match)
+        assert trainer._extract_step_from_path("step_-5") == 0
+        # Leading zeros
+        assert trainer._extract_step_from_path("step_007") == 7
+    def test_run_id_format(self):
+        """Test run_id is in expected format"""
+        config = AtroposTrainingConfig()
+        trainer = AtroposTrainer(config)
+        # Should be YYYYMMDD-HHMMSS format
+        assert len(trainer.run_id) == 15
+        assert trainer.run_id[8] == '-'
+        assert trainer.run_id[:8].isdigit()
+        assert trainer.run_id[9:].isdigit()
+class TestBoundaryConditions:
+    """Tests for various boundary conditions"""
+    @pytest.fixture
+    def optimizer(self):
+        model = torch.nn.Linear(10, 10)
+        return AdamW(model.parameters(), lr=1e-4)
+    def test_single_training_step(self, optimizer):
+        """Test scheduler with only 1 training step"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.COSINE,
+            num_training_steps=1,
+            warmup_steps=0,
+            min_lr_ratio=0.1,
+        )
+        # Should not crash
+        lr = scheduler.get_last_lr()[0]
+        scheduler.step()
+        assert lr >= 0
+    def test_min_lr_ratio_zero(self, optimizer):
+        """Test with min_lr_ratio of 0 (decay to zero)"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.LINEAR,
+            num_training_steps=100,
+            warmup_steps=0,
+            min_lr_ratio=0.0,
+        )
+        for _ in range(100):
+            scheduler.step()
+        # Should be at or very near 0
+        assert scheduler.get_last_lr()[0] < 1e-12
+    def test_min_lr_ratio_one(self, optimizer):
+        """Test with min_lr_ratio of 1 (no decay)"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.LINEAR,
+            num_training_steps=100,
+            warmup_steps=0,
+            min_lr_ratio=1.0,
+        )
+        lrs = []
+        for _ in range(100):
+            lrs.append(scheduler.get_last_lr()[0])
+            scheduler.step()
+        # All should be at initial LR
+        assert all(abs(lr - 1e-4) < 1e-10 for lr in lrs)
+    def test_very_large_step_count(self, optimizer):
+        """Test scheduler handles large step counts"""
+        scheduler = get_lr_scheduler(
+            optimizer=optimizer,
+            scheduler_type=LRSchedulerType.COSINE,
+            num_training_steps=1000000,
+            warmup_steps=1000,
+            min_lr_ratio=0.01,
+        )
+        # Just verify it doesn't crash or produce NaN
+        for _ in range(1000):
+            lr = scheduler.get_last_lr()[0]
+            assert not math.isnan(lr)
+            assert not math.isinf(lr)
+            scheduler.step()