npm - @elizaos/training - Versions diffs - 2.0.0-alpha.10 - Mend

@elizaos/training 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (224) hide show

package/Dockerfile +75 -0
package/LICENSE +21 -0
package/Makefile +374 -0
package/README.md +346 -0
package/config/rubrics.json +137 -0
package/docker-compose.test.yml +57 -0
package/package.json +57 -0
package/python/config/babylon_atropos.yaml +90 -0
package/python/config/profiles/12gb.json +11 -0
package/python/config/profiles/16gb.json +10 -0
package/python/config/profiles/24gb.json +10 -0
package/python/config/profiles/48gb.json +10 -0
package/python/config/profiles/cpu.json +11 -0
package/python/config/profiles/l40-2gpu-safe.json +20 -0
package/python/config/profiles/l40-2gpu.json +22 -0
package/python/config/profiles/l40-4gpu.json +21 -0
package/python/config/profiles/l40.json +17 -0
package/python/config/tinker_training.yaml +143 -0
package/python/curriculum_state.json +165 -0
package/python/env.template +86 -0
package/python/env.training.template +46 -0
package/python/pyproject.toml +41 -0
package/python/requirements-ci.txt +31 -0
package/python/requirements.txt +87 -0
package/python/scripts/__init__.py +4 -0
package/python/scripts/benchmark_should_respond.py +190 -0
package/python/scripts/debug_inference.py +62 -0
package/python/scripts/import_json_trajectories.py +412 -0
package/python/scripts/local-finetune/README.md +63 -0
package/python/scripts/local-finetune/ingest_and_score.py +139 -0
package/python/scripts/local-finetune/merge_model.py +32 -0
package/python/scripts/local-finetune/test_adapter.py +91 -0
package/python/scripts/local-finetune/train_from_csv.py +132 -0
package/python/scripts/merge_trajectories.py +318 -0
package/python/scripts/optimize_prompt_grpo.py +269 -0
package/python/scripts/run_ab_test.py +143 -0
package/python/scripts/run_full_pipeline.py +544 -0
package/python/scripts/run_tinker_training.py +192 -0
package/python/scripts/run_training.py +914 -0
package/python/scripts/test_generation.py +29 -0
package/python/scripts/test_judge.py +155 -0
package/python/scripts/test_pipeline.py +356 -0
package/python/scripts/test_trained_model.py +380 -0
package/python/scripts/train_grpo.py +360 -0
package/python/scripts/train_jsonl.py +223 -0
package/python/scripts/train_local.py +528 -0
package/python/setup.py +20 -0
package/python/src/__init__.py +190 -0
package/python/src/data_bridge/__init__.py +24 -0
package/python/src/data_bridge/converter.py +435 -0
package/python/src/data_bridge/reader.py +393 -0
package/python/src/models.py +283 -0
package/python/src/training/__init__.py +605 -0
package/python/src/training/ab_testing.py +404 -0
package/python/src/training/action_executor.py +621 -0
package/python/src/training/archetype_trainer.py +347 -0
package/python/src/training/atropos_trainer.py +980 -0
package/python/src/training/babylon_env.py +1254 -0
package/python/src/training/error_recovery.py +647 -0
package/python/src/training/evaluation.py +856 -0
package/python/src/training/fast_simulator.py +880 -0
package/python/src/training/format_validator.py +584 -0
package/python/src/training/hybrid_env.py +522 -0
package/python/src/training/kl_controller.py +628 -0
package/python/src/training/multi_prompt_dataset.py +883 -0
package/python/src/training/multi_turn.py +656 -0
package/python/src/training/online_env.py +1084 -0
package/python/src/training/quality_scorer.py +391 -0
package/python/src/training/quality_utils.py +633 -0
package/python/src/training/rewards.py +1344 -0
package/python/src/training/rlaif_env.py +17 -0
package/python/src/training/rollout_generator.py +502 -0
package/python/src/training/rubric_loader.py +198 -0
package/python/src/training/scenario_pool.py +1072 -0
package/python/src/training/schemas.py +481 -0
package/python/src/training/service_manager.py +552 -0
package/python/src/training/simulation_bridge.py +535 -0
package/python/src/training/tick_reward_attribution.py +399 -0
package/python/src/training/tinker_client.py +575 -0
package/python/src/training/tinker_trainer.py +646 -0
package/python/src/training/tokenization_utils.py +402 -0
package/python/tests/e2e/__init__.py +13 -0
package/python/tests/e2e/conftest.py +258 -0
package/python/tests/e2e/test_full_pipeline.py +643 -0
package/python/tests/e2e/test_online_training_e2e.py +365 -0
package/python/tests/integration/__init__.py +12 -0
package/python/tests/integration/conftest.py +383 -0
package/python/tests/integration/test_db_integration.py +649 -0
package/python/tests/integration/test_json_mode_integration.py +554 -0
package/python/tests/test_action_executor.py +594 -0
package/python/tests/test_archetype_scoring.py +1027 -0
package/python/tests/test_atropos_integration.py +360 -0
package/python/tests/test_evaluation.py +727 -0
package/python/tests/test_format_validator.py +486 -0
package/python/tests/test_kl_controller.py +432 -0
package/python/tests/test_lr_scheduler.py +579 -0
package/python/tests/test_multi_turn.py +590 -0
package/python/tests/test_online_env.py +519 -0
package/python/tests/test_quality_scorer.py +474 -0
package/python/tests/test_scenario_pool.py +735 -0
package/python/tests/test_service_manager.py +585 -0
package/python/tests/test_simulation_rollout.py +581 -0
package/python/tests/test_tokenization_utils.py +501 -0
package/python/tests/test_training_orchestrator.py +497 -0
package/python/tests/test_training_output_structure.py +661 -0
package/research-output/training-runs/training-run-1770772042899.json +26 -0
package/research-output/training-runs/training-run-1770930079670.json +32 -0
package/research-output/training-runs/training-run-1770930143700.json +44 -0
package/research-output/training-runs/training-run-1770930183638.json +38 -0
package/research-output/training-runs/training-run-1770930442049.json +38 -0
package/research-output/training-runs/training-run-1770930793243.json +38 -0
package/research-output/training-runs/training-run-1771276293257.json +38 -0
package/research-output/training-runs/training-run-1771276389280.json +38 -0
package/research-output/training-runs/training-run-1771276502776.json +38 -0
package/research-output/training-runs/training-run-1771277340748.json +38 -0
package/research-output/training-runs/training-run-1773013658993.json +38 -0
package/research-output/training-runs/training-run-1773013861014.json +38 -0
package/research-output/training-runs/training-run-1773014215983.json +38 -0
package/scripts/assess-training-data.ts +422 -0
package/scripts/e2e-training-test.ts +550 -0
package/scripts/export-rubrics.ts +64 -0
package/scripts/generate-research-report.ts +1523 -0
package/scripts/generate_dataset.sh +173 -0
package/scripts/generate_should_respond.ts +267 -0
package/scripts/generate_should_respond_dataset.ts +162 -0
package/scripts/json-mode-benchmark.ts +399 -0
package/scripts/rank_trajectories.ts +207 -0
package/scripts/real-archetype-benchmark.ts +210 -0
package/scripts/run-baseline-comparison.ts +116 -0
package/scripts/run-full-pipeline.ts +272 -0
package/scripts/run_rlaif_loop.ts +78 -0
package/scripts/run_task_benchmark.ts +247 -0
package/scripts/runpod_setup.sh +137 -0
package/scripts/runpod_validate.sh +147 -0
package/scripts/test-model-in-game.ts +955 -0
package/scripts/test-scoring.ts +73 -0
package/scripts/test-trained-model.ts +209 -0
package/scripts/train-and-test.ts +824 -0
package/scripts/verify-final.ts +118 -0
package/src/adapter.ts +516 -0
package/src/archetypes/ArchetypeConfigService.ts +626 -0
package/src/archetypes/derive-archetype.ts +249 -0
package/src/archetypes/index.ts +22 -0
package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
package/src/benchmark/BenchmarkDataViewer.ts +324 -0
package/src/benchmark/BenchmarkHistoryService.ts +221 -0
package/src/benchmark/BenchmarkRunner.ts +685 -0
package/src/benchmark/BenchmarkValidator.ts +204 -0
package/src/benchmark/FastEvalRunner.ts +225 -0
package/src/benchmark/MetricsValidator.ts +165 -0
package/src/benchmark/MetricsVisualizer.ts +909 -0
package/src/benchmark/ModelBenchmarkService.ts +611 -0
package/src/benchmark/ModelRegistry.ts +158 -0
package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
package/src/benchmark/SimulationA2AInterface.ts +1169 -0
package/src/benchmark/SimulationEngine.ts +832 -0
package/src/benchmark/TaskRunner.ts +94 -0
package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
package/src/benchmark/index.ts +91 -0
package/src/benchmark/parseSimulationMetrics.ts +124 -0
package/src/benchmark/simulation-types.ts +78 -0
package/src/dependencies.ts +475 -0
package/src/generation/TrajectoryGenerator.ts +387 -0
package/src/generation/index.ts +12 -0
package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
package/src/huggingface/index.ts +27 -0
package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
package/src/index.ts +102 -0
package/src/init-training.ts +53 -0
package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
package/src/metrics/index.ts +8 -0
package/src/metrics/types.ts +200 -0
package/src/rubrics/__tests__/index.test.ts +184 -0
package/src/rubrics/ass-kisser.ts +85 -0
package/src/rubrics/degen.ts +80 -0
package/src/rubrics/goody-twoshoes.ts +84 -0
package/src/rubrics/index.ts +236 -0
package/src/rubrics/information-trader.ts +84 -0
package/src/rubrics/infosec.ts +101 -0
package/src/rubrics/liar.ts +104 -0
package/src/rubrics/perps-trader.ts +87 -0
package/src/rubrics/researcher.ts +81 -0
package/src/rubrics/scammer.ts +82 -0
package/src/rubrics/social-butterfly.ts +73 -0
package/src/rubrics/super-predictor.ts +97 -0
package/src/rubrics/trader.ts +67 -0
package/src/scoring/ArchetypeScoringService.ts +486 -0
package/src/scoring/JudgePromptBuilder.ts +556 -0
package/src/scoring/LLMJudgeCache.ts +401 -0
package/src/scoring/index.ts +9 -0
package/src/training/AutomationPipeline.ts +916 -0
package/src/training/BenchmarkService.ts +518 -0
package/src/training/ConfigValidator.ts +220 -0
package/src/training/MarketOutcomesTracker.ts +187 -0
package/src/training/ModelDeployer.ts +186 -0
package/src/training/ModelFetcher.ts +76 -0
package/src/training/ModelSelectionService.ts +341 -0
package/src/training/ModelUsageVerifier.ts +160 -0
package/src/training/MultiModelOrchestrator.ts +580 -0
package/src/training/RLModelConfig.ts +407 -0
package/src/training/RewardBackpropagationService.ts +149 -0
package/src/training/RulerScoringService.ts +666 -0
package/src/training/TrainingMonitor.ts +166 -0
package/src/training/TrajectoryRecorder.ts +399 -0
package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
package/src/training/index.ts +100 -0
package/src/training/logRLConfig.ts +34 -0
package/src/training/pipeline.ts +129 -0
package/src/training/storage/ModelStorageService.ts +279 -0
package/src/training/storage/TrainingDataArchiver.ts +197 -0
package/src/training/storage/index.ts +17 -0
package/src/training/types.ts +207 -0
package/src/training/window-utils.ts +138 -0
package/src/utils/index.ts +101 -0
package/src/utils/logger.ts +59 -0
package/src/utils/snowflake.ts +17 -0
package/src/utils/synthetic-detector.ts +111 -0
package/tsconfig.json +20 -0

package/Dockerfile ADDED Viewed

@@ -0,0 +1,75 @@
+# Babylon RL Training Docker Image
+#
+# Supports local dev (12GB GPU) through production (4x L40 192GB)
+#
+# Build:
+#   docker build -t babylon-training .
+#
+# Run (single GPU):
+#   docker run --gpus all -v $(pwd)/trained_models:/app/trained_models babylon-training \
+#     --profile l40 --steps 5000
+#
+# Run (4x GPU tensor parallel):
+#   docker run --gpus all -v $(pwd)/trained_models:/app/trained_models babylon-training \
+#     --profile l40-4gpu --steps 10000
+FROM nvidia/cuda:12.1-runtime-ubuntu22.04
+# Prevent interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    python3.11 \
+    python3.11-venv \
+    python3-pip \
+    git \
+    curl \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+# Create app directory
+WORKDIR /app
+# Install Python dependencies
+COPY python/requirements.txt ./requirements.txt
+RUN python3.11 -m pip install --no-cache-dir --upgrade pip && \
+    python3.11 -m pip install --no-cache-dir -r requirements.txt
+# Install vLLM (separate layer for caching)
+RUN python3.11 -m pip install --no-cache-dir vllm>=0.4.0
+# Install atroposlib
+RUN python3.11 -m pip install --no-cache-dir atroposlib
+# Install flash-attention (optional, for performance)
+RUN python3.11 -m pip install --no-cache-dir flash-attn --no-build-isolation || echo "Flash attention not available"
+# Copy application code
+COPY python/ ./python/
+COPY Makefile ./Makefile
+# Set Python path
+ENV PYTHONPATH=/app/python
+# Create directories for outputs
+RUN mkdir -p /app/trained_models /app/logs /app/data
+# Default environment variables
+ENV DATABASE_URL=""
+ENV WANDB_API_KEY=""
+ENV WANDB_PROJECT="babylon-training"
+ENV CUDA_VISIBLE_DEVICES="0"
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:8000/ || exit 1
+# Entrypoint
+ENTRYPOINT ["python3.11", "python/scripts/run_training.py"]
+# Default command (can be overridden)
+CMD ["--profile", "l40", "--steps", "5000"]

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Shaw Walters and elizaOS Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/Makefile ADDED Viewed

@@ -0,0 +1,374 @@
+# Babylon Training Pipeline - Developer Makefile
+#
+# Quick reference:
+#   make help       - Show all commands
+#   make tier1      - Run Python unit tests (no infra)
+#   make tier2      - Run JSON mode tests
+#   make tier3      - Run DB integration tests
+#   make tier4      - Run full GPU training
+#
+# Infrastructure:
+#   make db-up      - Start test PostgreSQL/Redis
+#   make db-down    - Stop and remove containers
+#   make db-migrate - Apply database schema
+.PHONY: all help tier1 tier2 tier3 tier4 db-up db-down db-migrate \
+        train-12gb train-16gb train-24gb train-l40 train-l40-2gpu train-l40-4gpu \
+        train-online bridge-server generate-data venv lint test clean
+# Default target
+all: test
+# Colors for output
+CYAN := \033[36m
+GREEN := \033[32m
+YELLOW := \033[33m
+RESET := \033[0m
+# Paths
+PYTHON_DIR := python
+VENV := venv
+VENV_BIN := $(VENV)/bin
+PYTHON := $(VENV_BIN)/python
+PIP := $(VENV_BIN)/pip
+PYTEST := $(VENV_BIN)/pytest
+# Database
+DB_URL := postgresql://babylon_test:test_password@localhost:5434/babylon_test
+DB_COMPOSE := docker-compose.test.yml
+# Default profile (can be overridden: make train PROFILE=24gb)
+PROFILE ?= 12gb
+#---------------------------------------------------------------------------
+# Help
+#---------------------------------------------------------------------------
+help:
+	@echo ""
+	@echo "$(CYAN)Babylon Training Pipeline$(RESET)"
+	@echo "=========================="
+	@echo ""
+	@echo "$(GREEN)Testing Tiers:$(RESET)"
+	@echo "  make tier1          Python unit tests (no infrastructure)"
+	@echo "  make tier2          JSON mode integration tests"
+	@echo "  make tier3          Database integration tests (requires Docker)"
+	@echo "  make tier4          Full GPU training test"
+	@echo ""
+	@echo "$(GREEN)Infrastructure:$(RESET)"
+	@echo "  make db-up          Start test PostgreSQL and Redis"
+	@echo "  make db-down        Stop and remove test containers"
+	@echo "  make db-migrate     Apply database schema"
+	@echo "  make db-reset       Stop, clean, start, and migrate"
+	@echo ""
+	@echo "$(GREEN)Training (with GPU profiles):$(RESET)"
+	@echo "  make train-12gb     Train with 12GB GPU profile (RTX 3060)"
+	@echo "  make train-16gb     Train with 16GB GPU profile (RTX 4080)"
+	@echo "  make train-24gb     Train with 24GB GPU profile (RTX 4090)"
+	@echo "  make train-l40      Train with L40 (48GB) profile"
+	@echo "  make train-l40-2gpu Train with 2x L40 (96GB) profile"
+	@echo "  make train-l40-4gpu Train with 4x L40 (192GB) for Qwen3 30B"
+	@echo "  make train PROFILE=<name>  Train with custom profile"
+	@echo ""
+	@echo "$(GREEN)Online Training (Phase 3):$(RESET)"
+	@echo "  make bridge-server  Start TypeScript simulation bridge"
+	@echo "  make bridge-check   Check if bridge server is running"
+	@echo "  make train-online   Run online training (requires bridge-server)"
+	@echo "  make train-hybrid   Run hybrid training (mix offline + online)"
+	@echo "  make generate-data  Generate trajectories for offline training"
+	@echo ""
+	@echo "$(GREEN)Cloud & Production (Phase 4):$(RESET)"
+	@echo "  make docker-build   Build Docker image for cloud deployment"
+	@echo "  make train-cloud    Train with W&B logging enabled"
+	@echo "  make train-cloud-l40      Cloud training with 1x L40"
+	@echo "  make train-cloud-l40-2gpu Cloud training with 2x L40"
+	@echo "  make train-cloud-l40-4gpu Cloud training with 4x L40 (Qwen3 30B)"
+	@echo "  make train-cloud-online   Cloud online training with W&B"
+	@echo ""
+	@echo "$(GREEN)A/B Testing & Evaluation:$(RESET)"
+	@echo "  make ab-test        Run A/B test (MODEL_A vs MODEL_B)"
+	@echo "  make ab-test-quick  Quick A/B test with trained model"
+	@echo ""
+	@echo "$(GREEN)Development:$(RESET)"
+	@echo "  make venv           Create/update Python virtual environment"
+	@echo "  make lint           Run linting"
+	@echo "  make test           Run all tests (tier1 + tier2)"
+	@echo "  make clean          Remove generated files"
+	@echo ""
+	@echo "$(YELLOW)Profiles available:$(RESET)"
+	@echo "  12gb, 16gb, 24gb, 48gb, cpu, l40, l40-2gpu, l40-4gpu"
+	@echo ""
+#---------------------------------------------------------------------------
+# Virtual Environment
+#---------------------------------------------------------------------------
+venv:
+	@echo "$(CYAN)Setting up Python virtual environment...$(RESET)"
+	cd $(PYTHON_DIR) && python3 -m venv venv
+	cd $(PYTHON_DIR) && $(PIP) install --upgrade pip
+	cd $(PYTHON_DIR) && $(PIP) install -r requirements.txt
+	cd $(PYTHON_DIR) && $(PIP) install -e .
+	@echo "$(GREEN)✓ Virtual environment ready$(RESET)"
+	@echo "  Activate with: source $(PYTHON_DIR)/$(VENV_BIN)/activate"
+#---------------------------------------------------------------------------
+# Testing Tiers
+#---------------------------------------------------------------------------
+tier1:
+	@echo "$(CYAN)Running Tier 1: Python Unit Tests$(RESET)"
+	cd $(PYTHON_DIR) && PYTHONPATH=. $(PYTEST) tests/ -v \
+		--ignore=tests/integration/ \
+		--ignore=tests/e2e/ \
+		-x
+	@echo "$(GREEN)✓ Tier 1 passed$(RESET)"
+tier2:
+	@echo "$(CYAN)Running Tier 2: JSON Mode Tests$(RESET)"
+	cd $(PYTHON_DIR) && PYTHONPATH=. $(PYTEST) tests/integration/test_json_mode_integration.py -v -x
+	@echo "$(GREEN)✓ Tier 2 passed$(RESET)"
+tier3: db-up db-migrate
+	@echo "$(CYAN)Running Tier 3: Database Integration Tests$(RESET)"
+	cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) PYTHONPATH=. \
+		$(PYTEST) tests/integration/test_db_integration.py -v -x
+	@echo "$(GREEN)✓ Tier 3 passed$(RESET)"
+tier4: db-up db-migrate tier4-import
+	@echo "$(CYAN)Running Tier 4: Full GPU Training$(RESET)"
+	@echo "Using profile: $(PROFILE)"
+	cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) WANDB_MODE=offline \
+		PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
+		$(PYTHON) scripts/run_training.py \
+		--profile $(PROFILE) \
+		--steps 1 \
+		--no-wandb \
+		--skip-validation
+	@echo "$(GREEN)✓ Tier 4 passed$(RESET)"
+# Training data output directory (absolute path from repo root)
+TRAINING_DATA_DIR := $(shell cd ../.. && pwd)/training-data-output
+tier4-generate:
+	@echo "$(CYAN)Generating training data...$(RESET)"
+	cd ../.. && bun run packages/engine/examples/generate-training-data.ts \
+		--causal --hours 2 --npcs 5 --seed 42
+	@echo "$(GREEN)✓ Training data generated$(RESET)"
+tier4-import:
+	@echo "$(CYAN)Importing trajectories to database...$(RESET)"
+	@if [ -d "$(TRAINING_DATA_DIR)/trajectories" ]; then \
+		cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) \
+		$(PYTHON) scripts/import_json_trajectories.py \
+		--source $(TRAINING_DATA_DIR); \
+	else \
+		echo "$(YELLOW)Note: No trajectories found. Run 'make tier4-generate' first.$(RESET)"; \
+	fi
+#---------------------------------------------------------------------------
+# Infrastructure
+#---------------------------------------------------------------------------
+db-up:
+	@echo "$(CYAN)Starting test database...$(RESET)"
+	docker compose -f $(DB_COMPOSE) up -d
+	@sleep 3
+	@docker compose -f $(DB_COMPOSE) ps
+	@echo "$(GREEN)✓ Database ready$(RESET)"
+db-down:
+	@echo "$(CYAN)Stopping test database...$(RESET)"
+	docker compose -f $(DB_COMPOSE) down -v
+	@echo "$(GREEN)✓ Database stopped$(RESET)"
+db-migrate:
+	@echo "$(CYAN)Applying database schema...$(RESET)"
+	cd ../db && DATABASE_URL=$(DB_URL) bunx drizzle-kit push --force
+	@echo "$(GREEN)✓ Schema applied$(RESET)"
+db-reset: db-down db-up db-migrate
+	@echo "$(GREEN)✓ Database reset complete$(RESET)"
+#---------------------------------------------------------------------------
+# Training Shortcuts
+#---------------------------------------------------------------------------
+train-12gb:
+	$(MAKE) train PROFILE=12gb
+train-16gb:
+	$(MAKE) train PROFILE=16gb
+train-24gb:
+	$(MAKE) train PROFILE=24gb
+train-l40:
+	$(MAKE) train PROFILE=l40
+train-l40-2gpu:
+	$(MAKE) train PROFILE=l40-2gpu
+train-l40-4gpu:
+	$(MAKE) train PROFILE=l40-4gpu
+#---------------------------------------------------------------------------
+# Phase 4: Cloud & Production
+#---------------------------------------------------------------------------
+# Build Docker image for cloud deployment
+docker-build:
+	@echo "$(CYAN)Building Docker image for cloud deployment...$(RESET)"
+	docker build -t babylon-training:latest .
+	@echo "$(GREEN)✓ Docker image built: babylon-training:latest$(RESET)"
+# Production training with W&B logging (requires WANDB_API_KEY)
+train-cloud: db-up db-migrate
+	@echo "$(CYAN)Starting production cloud training with W&B logging...$(RESET)"
+	@if [ -z "$$WANDB_API_KEY" ]; then \
+		echo "$(YELLOW)Warning: WANDB_API_KEY not set. W&B logging will be disabled.$(RESET)"; \
+	fi
+	cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) \
+		PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
+		$(PYTHON) scripts/run_training.py \
+		--profile $(PROFILE) \
+		--wandb-project $(WANDB_PROJECT) \
+		$(if $(WANDB_ENTITY),--wandb-entity $(WANDB_ENTITY),) \
+		$(if $(WANDB_RUN_NAME),--wandb-run-name $(WANDB_RUN_NAME),)
+# Cloud training with specific L40 profiles
+train-cloud-l40:
+	$(MAKE) train-cloud PROFILE=l40
+train-cloud-l40-2gpu:
+	$(MAKE) train-cloud PROFILE=l40-2gpu
+train-cloud-l40-4gpu:
+	$(MAKE) train-cloud PROFILE=l40-4gpu
+# Online cloud training (with bridge)
+train-cloud-online: db-up db-migrate bridge-check
+	@echo "$(CYAN)Starting production online training with W&B logging...$(RESET)"
+	cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) \
+		USE_SIMULATION_BRIDGE=1 \
+		SIMULATION_BRIDGE_URL=http://localhost:3001 \
+		PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
+		$(PYTHON) scripts/run_training.py \
+		--profile $(PROFILE) \
+		--mode online \
+		--bridge-url http://localhost:3001 \
+		--wandb-project $(WANDB_PROJECT) \
+		$(if $(WANDB_ENTITY),--wandb-entity $(WANDB_ENTITY),)
+# Default W&B settings
+WANDB_PROJECT ?= babylon-training
+WANDB_ENTITY ?=
+WANDB_RUN_NAME ?=
+train: db-up db-migrate
+	@echo "$(CYAN)Starting training with profile: $(PROFILE)$(RESET)"
+	cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) WANDB_MODE=offline \
+		PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
+		$(PYTHON) scripts/run_training.py \
+		--profile $(PROFILE) \
+		--no-wandb \
+		--skip-validation
+#---------------------------------------------------------------------------
+# Online Training
+#---------------------------------------------------------------------------
+bridge-server:
+	@echo "$(CYAN)Starting TypeScript simulation bridge server...$(RESET)"
+	cd ../engine && bun run src/services/simulation-bridge-server.ts
+train-online: db-up db-migrate
+	@echo "$(CYAN)Starting online training (requires bridge-server running)$(RESET)"
+	@echo "Make sure you've started the bridge server with: make bridge-server"
+	cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) WANDB_MODE=offline \
+		USE_SIMULATION_BRIDGE=1 \
+		SIMULATION_BRIDGE_URL=http://localhost:3001 \
+		PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
+		$(PYTHON) scripts/run_training.py \
+		--profile $(PROFILE) \
+		--mode online \
+		--bridge-url http://localhost:3001 \
+		--no-wandb
+train-hybrid: db-up db-migrate
+	@echo "$(CYAN)Starting hybrid training (requires bridge-server running)$(RESET)"
+	@echo "Make sure you've started the bridge server with: make bridge-server"
+	@echo "Using online ratio: $(ONLINE_RATIO)"
+	cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) WANDB_MODE=offline \
+		USE_SIMULATION_BRIDGE=1 \
+		SIMULATION_BRIDGE_URL=http://localhost:3001 \
+		HYBRID_ONLINE_RATIO=$(ONLINE_RATIO) \
+		PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
+		$(PYTHON) scripts/run_training.py \
+		--profile $(PROFILE) \
+		--mode hybrid \
+		--bridge-url http://localhost:3001 \
+		--hybrid-online-ratio $(ONLINE_RATIO) \
+		--no-wandb
+# Default online ratio for hybrid mode
+ONLINE_RATIO ?= 0.2
+# Check if bridge server is running
+bridge-check:
+	@curl -s http://localhost:3001/health > /dev/null 2>&1 && \
+		echo "$(GREEN)✓ Simulation bridge is running$(RESET)" || \
+		(echo "$(YELLOW)✗ Simulation bridge not running. Start with: make bridge-server$(RESET)" && exit 1)
+generate-data:
+	@echo "$(CYAN)Generating training trajectories...$(RESET)"
+	./scripts/generate_dataset.sh $(HOURS) $(PARALLEL) $(NPCS) $(OUTPUT)
+	@echo "$(GREEN)✓ Data generation complete$(RESET)"
+#---------------------------------------------------------------------------
+# A/B Testing & Evaluation
+#---------------------------------------------------------------------------
+# Run A/B test comparing trained model against baseline
+ab-test:
+	@echo "$(CYAN)Running A/B test: $(MODEL_A) vs $(MODEL_B)$(RESET)"
+	cd $(PYTHON_DIR) && \
+		PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
+		$(PYTHON) scripts/run_ab_test.py \
+		--model-a $(MODEL_A) \
+		--model-b $(MODEL_B) \
+		--num-runs $(AB_RUNS) \
+		--output-dir $(AB_OUTPUT) \
+		$(if $(AB_ARCHETYPES),--archetypes $(AB_ARCHETYPES),)
+# Quick A/B test with trained model vs base
+ab-test-quick:
+	$(MAKE) ab-test MODEL_B=./trained_models/final_model AB_RUNS=1
+# Default A/B test settings
+MODEL_A ?= Qwen/Qwen2.5-0.5B-Instruct
+MODEL_B ?= ./trained_models/final_model
+AB_RUNS ?= 3
+AB_OUTPUT ?= ./ab_test_results
+AB_ARCHETYPES ?=
+#---------------------------------------------------------------------------
+# Development
+#---------------------------------------------------------------------------
+lint:
+	@echo "$(CYAN)Running linting...$(RESET)"
+	cd ../.. && bun run lint
+test: tier1 tier2
+	@echo "$(GREEN)✓ All quick tests passed$(RESET)"
+clean:
+	@echo "$(CYAN)Cleaning generated files...$(RESET)"
+	rm -rf $(PYTHON_DIR)/logs
+	rm -rf $(PYTHON_DIR)/trained_models
+	rm -rf $(PYTHON_DIR)/.pytest_cache
+	rm -rf $(PYTHON_DIR)/__pycache__
+	find $(PYTHON_DIR) -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
+	@echo "$(GREEN)✓ Clean complete$(RESET)"