@elizaos/training 2.0.0-alpha.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/Dockerfile +75 -0
  2. package/Makefile +374 -0
  3. package/README.md +346 -0
  4. package/config/rubrics.json +137 -0
  5. package/data/.gitkeep +0 -0
  6. package/data/degen/.gitkeep +2 -0
  7. package/data/trader/.gitkeep +2 -0
  8. package/docker-compose.test.yml +57 -0
  9. package/package.json +58 -0
  10. package/python/config/babylon_atropos.yaml +90 -0
  11. package/python/config/profiles/12gb.json +11 -0
  12. package/python/config/profiles/16gb.json +10 -0
  13. package/python/config/profiles/24gb.json +10 -0
  14. package/python/config/profiles/48gb.json +10 -0
  15. package/python/config/profiles/cpu.json +11 -0
  16. package/python/config/profiles/l40-2gpu-safe.json +20 -0
  17. package/python/config/profiles/l40-2gpu.json +22 -0
  18. package/python/config/profiles/l40-4gpu.json +21 -0
  19. package/python/config/profiles/l40.json +17 -0
  20. package/python/config/tinker_training.yaml +143 -0
  21. package/python/curriculum_state.json +165 -0
  22. package/python/env.template +86 -0
  23. package/python/env.training.template +46 -0
  24. package/python/pyproject.toml +41 -0
  25. package/python/requirements-ci.txt +31 -0
  26. package/python/requirements.txt +87 -0
  27. package/python/scripts/__init__.py +4 -0
  28. package/python/scripts/import_json_trajectories.py +412 -0
  29. package/python/scripts/local-finetune/README.md +63 -0
  30. package/python/scripts/local-finetune/ingest_and_score.py +139 -0
  31. package/python/scripts/local-finetune/merge_model.py +32 -0
  32. package/python/scripts/local-finetune/test_adapter.py +91 -0
  33. package/python/scripts/local-finetune/train_from_csv.py +132 -0
  34. package/python/scripts/merge_trajectories.py +318 -0
  35. package/python/scripts/run_ab_test.py +143 -0
  36. package/python/scripts/run_full_pipeline.py +544 -0
  37. package/python/scripts/run_tinker_training.py +192 -0
  38. package/python/scripts/run_training.py +914 -0
  39. package/python/scripts/test_judge.py +155 -0
  40. package/python/scripts/test_pipeline.py +356 -0
  41. package/python/scripts/test_trained_model.py +380 -0
  42. package/python/scripts/train_local.py +528 -0
  43. package/python/setup.py +20 -0
  44. package/python/src/__init__.py +190 -0
  45. package/python/src/data_bridge/__init__.py +24 -0
  46. package/python/src/data_bridge/converter.py +435 -0
  47. package/python/src/data_bridge/reader.py +393 -0
  48. package/python/src/models.py +283 -0
  49. package/python/src/training/__init__.py +605 -0
  50. package/python/src/training/ab_testing.py +404 -0
  51. package/python/src/training/action_executor.py +621 -0
  52. package/python/src/training/archetype_trainer.py +347 -0
  53. package/python/src/training/atropos_trainer.py +980 -0
  54. package/python/src/training/babylon_env.py +1254 -0
  55. package/python/src/training/error_recovery.py +647 -0
  56. package/python/src/training/evaluation.py +856 -0
  57. package/python/src/training/fast_simulator.py +880 -0
  58. package/python/src/training/format_validator.py +584 -0
  59. package/python/src/training/hybrid_env.py +522 -0
  60. package/python/src/training/kl_controller.py +628 -0
  61. package/python/src/training/multi_prompt_dataset.py +883 -0
  62. package/python/src/training/multi_turn.py +656 -0
  63. package/python/src/training/online_env.py +1084 -0
  64. package/python/src/training/quality_scorer.py +391 -0
  65. package/python/src/training/quality_utils.py +633 -0
  66. package/python/src/training/rewards.py +1344 -0
  67. package/python/src/training/rlaif_env.py +17 -0
  68. package/python/src/training/rollout_generator.py +502 -0
  69. package/python/src/training/rubric_loader.py +198 -0
  70. package/python/src/training/scenario_pool.py +1072 -0
  71. package/python/src/training/schemas.py +481 -0
  72. package/python/src/training/service_manager.py +552 -0
  73. package/python/src/training/simulation_bridge.py +535 -0
  74. package/python/src/training/tick_reward_attribution.py +399 -0
  75. package/python/src/training/tinker_client.py +575 -0
  76. package/python/src/training/tinker_trainer.py +646 -0
  77. package/python/src/training/tokenization_utils.py +402 -0
  78. package/python/tests/e2e/__init__.py +13 -0
  79. package/python/tests/e2e/conftest.py +258 -0
  80. package/python/tests/e2e/test_full_pipeline.py +643 -0
  81. package/python/tests/e2e/test_online_training_e2e.py +365 -0
  82. package/python/tests/integration/__init__.py +12 -0
  83. package/python/tests/integration/conftest.py +383 -0
  84. package/python/tests/integration/test_db_integration.py +649 -0
  85. package/python/tests/integration/test_json_mode_integration.py +554 -0
  86. package/python/tests/test_action_executor.py +594 -0
  87. package/python/tests/test_archetype_scoring.py +1027 -0
  88. package/python/tests/test_atropos_integration.py +360 -0
  89. package/python/tests/test_evaluation.py +727 -0
  90. package/python/tests/test_format_validator.py +486 -0
  91. package/python/tests/test_kl_controller.py +432 -0
  92. package/python/tests/test_lr_scheduler.py +579 -0
  93. package/python/tests/test_multi_turn.py +590 -0
  94. package/python/tests/test_online_env.py +519 -0
  95. package/python/tests/test_quality_scorer.py +474 -0
  96. package/python/tests/test_scenario_pool.py +735 -0
  97. package/python/tests/test_service_manager.py +585 -0
  98. package/python/tests/test_simulation_rollout.py +581 -0
  99. package/python/tests/test_tokenization_utils.py +501 -0
  100. package/python/tests/test_training_orchestrator.py +497 -0
  101. package/python/tests/test_training_output_structure.py +661 -0
  102. package/research-output/training-runs/training-run-1770772042899.json +26 -0
  103. package/research-output/training-runs/training-run-1770930079670.json +32 -0
  104. package/research-output/training-runs/training-run-1770930143700.json +44 -0
  105. package/research-output/training-runs/training-run-1770930183638.json +38 -0
  106. package/research-output/training-runs/training-run-1770930442049.json +38 -0
  107. package/research-output/training-runs/training-run-1770930793243.json +38 -0
  108. package/scripts/assess-training-data.ts +422 -0
  109. package/scripts/e2e-training-test.ts +550 -0
  110. package/scripts/export-rubrics.ts +64 -0
  111. package/scripts/generate-research-report.ts +1523 -0
  112. package/scripts/generate_dataset.sh +173 -0
  113. package/scripts/json-mode-benchmark.ts +399 -0
  114. package/scripts/real-archetype-benchmark.ts +210 -0
  115. package/scripts/run-baseline-comparison.ts +116 -0
  116. package/scripts/run-full-pipeline.ts +272 -0
  117. package/scripts/runpod_setup.sh +137 -0
  118. package/scripts/runpod_validate.sh +147 -0
  119. package/scripts/test-model-in-game.ts +955 -0
  120. package/scripts/test-scoring.ts +73 -0
  121. package/scripts/test-trained-model.ts +209 -0
  122. package/scripts/train-and-test.ts +824 -0
  123. package/scripts/verify-final.ts +118 -0
  124. package/src/adapter.ts +516 -0
  125. package/src/archetypes/ArchetypeConfigService.ts +626 -0
  126. package/src/archetypes/derive-archetype.ts +249 -0
  127. package/src/archetypes/index.ts +22 -0
  128. package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
  129. package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
  130. package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
  131. package/src/benchmark/BenchmarkDataViewer.ts +324 -0
  132. package/src/benchmark/BenchmarkHistoryService.ts +221 -0
  133. package/src/benchmark/BenchmarkRunner.ts +685 -0
  134. package/src/benchmark/BenchmarkValidator.ts +206 -0
  135. package/src/benchmark/FastEvalRunner.ts +225 -0
  136. package/src/benchmark/MetricsValidator.ts +165 -0
  137. package/src/benchmark/MetricsVisualizer.ts +909 -0
  138. package/src/benchmark/ModelBenchmarkService.ts +611 -0
  139. package/src/benchmark/ModelRegistry.ts +158 -0
  140. package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
  141. package/src/benchmark/SimulationA2AInterface.ts +1169 -0
  142. package/src/benchmark/SimulationEngine.ts +832 -0
  143. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
  144. package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
  145. package/src/benchmark/index.ts +89 -0
  146. package/src/benchmark/parseSimulationMetrics.ts +124 -0
  147. package/src/benchmark/simulation-types.ts +78 -0
  148. package/src/dependencies.ts +439 -0
  149. package/src/generation/TrajectoryGenerator.ts +387 -0
  150. package/src/generation/index.ts +12 -0
  151. package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
  152. package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
  153. package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
  154. package/src/huggingface/index.ts +27 -0
  155. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
  156. package/src/index.ts +102 -0
  157. package/src/init-training.ts +53 -0
  158. package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
  159. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
  160. package/src/metrics/index.ts +8 -0
  161. package/src/metrics/types.ts +200 -0
  162. package/src/rubrics/__tests__/index.test.ts +184 -0
  163. package/src/rubrics/ass-kisser.ts +85 -0
  164. package/src/rubrics/degen.ts +80 -0
  165. package/src/rubrics/goody-twoshoes.ts +84 -0
  166. package/src/rubrics/index.ts +236 -0
  167. package/src/rubrics/information-trader.ts +84 -0
  168. package/src/rubrics/infosec.ts +101 -0
  169. package/src/rubrics/liar.ts +104 -0
  170. package/src/rubrics/perps-trader.ts +87 -0
  171. package/src/rubrics/researcher.ts +81 -0
  172. package/src/rubrics/scammer.ts +82 -0
  173. package/src/rubrics/social-butterfly.ts +73 -0
  174. package/src/rubrics/super-predictor.ts +97 -0
  175. package/src/rubrics/trader.ts +67 -0
  176. package/src/scoring/ArchetypeScoringService.ts +486 -0
  177. package/src/scoring/JudgePromptBuilder.ts +556 -0
  178. package/src/scoring/LLMJudgeCache.ts +401 -0
  179. package/src/scoring/index.ts +9 -0
  180. package/src/training/AutomationPipeline.ts +916 -0
  181. package/src/training/BenchmarkService.ts +518 -0
  182. package/src/training/ConfigValidator.ts +220 -0
  183. package/src/training/MarketOutcomesTracker.ts +187 -0
  184. package/src/training/ModelDeployer.ts +186 -0
  185. package/src/training/ModelFetcher.ts +76 -0
  186. package/src/training/ModelSelectionService.ts +341 -0
  187. package/src/training/ModelUsageVerifier.ts +160 -0
  188. package/src/training/MultiModelOrchestrator.ts +580 -0
  189. package/src/training/RLModelConfig.ts +407 -0
  190. package/src/training/RewardBackpropagationService.ts +149 -0
  191. package/src/training/RulerScoringService.ts +666 -0
  192. package/src/training/TrainingMonitor.ts +166 -0
  193. package/src/training/TrajectoryRecorder.ts +399 -0
  194. package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
  195. package/src/training/index.ts +100 -0
  196. package/src/training/logRLConfig.ts +34 -0
  197. package/src/training/pipeline.ts +129 -0
  198. package/src/training/storage/ModelStorageService.ts +279 -0
  199. package/src/training/storage/TrainingDataArchiver.ts +197 -0
  200. package/src/training/storage/index.ts +17 -0
  201. package/src/training/types.ts +207 -0
  202. package/src/training/window-utils.ts +138 -0
  203. package/src/utils/index.ts +101 -0
  204. package/src/utils/logger.ts +59 -0
  205. package/src/utils/snowflake.ts +17 -0
  206. package/src/utils/synthetic-detector.ts +111 -0
  207. package/tsconfig.json +20 -0
package/Dockerfile ADDED
@@ -0,0 +1,75 @@
1
+ # Babylon RL Training Docker Image
2
+ #
3
+ # Supports local dev (12GB GPU) through production (4x L40 192GB)
4
+ #
5
+ # Build:
6
+ # docker build -t babylon-training .
7
+ #
8
+ # Run (single GPU):
9
+ # docker run --gpus all -v $(pwd)/trained_models:/app/trained_models babylon-training \
10
+ # --profile l40 --steps 5000
11
+ #
12
+ # Run (4x GPU tensor parallel):
13
+ # docker run --gpus all -v $(pwd)/trained_models:/app/trained_models babylon-training \
14
+ # --profile l40-4gpu --steps 10000
15
+
16
+ FROM nvidia/cuda:12.1-runtime-ubuntu22.04
17
+
18
+ # Prevent interactive prompts
19
+ ENV DEBIAN_FRONTEND=noninteractive
20
+
21
+ # Install system dependencies
22
+ RUN apt-get update && apt-get install -y \
23
+ python3.11 \
24
+ python3.11-venv \
25
+ python3-pip \
26
+ git \
27
+ curl \
28
+ wget \
29
+ && rm -rf /var/lib/apt/lists/*
30
+
31
+ # Create app directory
32
+ WORKDIR /app
33
+
34
+ # Install Python dependencies
35
+ COPY python/requirements.txt ./requirements.txt
36
+ RUN python3.11 -m pip install --no-cache-dir --upgrade pip && \
37
+ python3.11 -m pip install --no-cache-dir -r requirements.txt
38
+
39
+ # Install vLLM (separate layer for caching)
40
+ RUN python3.11 -m pip install --no-cache-dir vllm>=0.4.0
41
+
42
+ # Install atroposlib
43
+ RUN python3.11 -m pip install --no-cache-dir atroposlib
44
+
45
+ # Install flash-attention (optional, for performance)
46
+ RUN python3.11 -m pip install --no-cache-dir flash-attn --no-build-isolation || echo "Flash attention not available"
47
+
48
+ # Copy application code
49
+ COPY python/ ./python/
50
+ COPY Makefile ./Makefile
51
+
52
+ # Set Python path
53
+ ENV PYTHONPATH=/app/python
54
+
55
+ # Create directories for outputs
56
+ RUN mkdir -p /app/trained_models /app/logs /app/data
57
+
58
+ # Default environment variables
59
+ ENV DATABASE_URL=""
60
+ ENV WANDB_API_KEY=""
61
+ ENV WANDB_PROJECT="babylon-training"
62
+ ENV CUDA_VISIBLE_DEVICES="0"
63
+
64
+ # Health check
65
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
66
+ CMD curl -f http://localhost:8000/ || exit 1
67
+
68
+ # Entrypoint
69
+ ENTRYPOINT ["python3.11", "python/scripts/run_training.py"]
70
+
71
+ # Default command (can be overridden)
72
+ CMD ["--profile", "l40", "--steps", "5000"]
73
+
74
+
75
+
package/Makefile ADDED
@@ -0,0 +1,374 @@
1
+ # Babylon Training Pipeline - Developer Makefile
2
+ #
3
+ # Quick reference:
4
+ # make help - Show all commands
5
+ # make tier1 - Run Python unit tests (no infra)
6
+ # make tier2 - Run JSON mode tests
7
+ # make tier3 - Run DB integration tests
8
+ # make tier4 - Run full GPU training
9
+ #
10
+ # Infrastructure:
11
+ # make db-up - Start test PostgreSQL/Redis
12
+ # make db-down - Stop and remove containers
13
+ # make db-migrate - Apply database schema
14
+
15
+ .PHONY: all help tier1 tier2 tier3 tier4 db-up db-down db-migrate \
16
+ train-12gb train-16gb train-24gb train-l40 train-l40-2gpu train-l40-4gpu \
17
+ train-online bridge-server generate-data venv lint test clean
18
+
19
+ # Default target
20
+ all: test
21
+
22
+ # Colors for output
23
+ CYAN := \033[36m
24
+ GREEN := \033[32m
25
+ YELLOW := \033[33m
26
+ RESET := \033[0m
27
+
28
+ # Paths
29
+ PYTHON_DIR := python
30
+ VENV := venv
31
+ VENV_BIN := $(VENV)/bin
32
+ PYTHON := $(VENV_BIN)/python
33
+ PIP := $(VENV_BIN)/pip
34
+ PYTEST := $(VENV_BIN)/pytest
35
+
36
+ # Database
37
+ DB_URL := postgresql://babylon_test:test_password@localhost:5434/babylon_test
38
+ DB_COMPOSE := docker-compose.test.yml
39
+
40
+ # Default profile (can be overridden: make train PROFILE=24gb)
41
+ PROFILE ?= 12gb
42
+
43
+ #---------------------------------------------------------------------------
44
+ # Help
45
+ #---------------------------------------------------------------------------
46
+
47
+ help:
48
+ @echo ""
49
+ @echo "$(CYAN)Babylon Training Pipeline$(RESET)"
50
+ @echo "=========================="
51
+ @echo ""
52
+ @echo "$(GREEN)Testing Tiers:$(RESET)"
53
+ @echo " make tier1 Python unit tests (no infrastructure)"
54
+ @echo " make tier2 JSON mode integration tests"
55
+ @echo " make tier3 Database integration tests (requires Docker)"
56
+ @echo " make tier4 Full GPU training test"
57
+ @echo ""
58
+ @echo "$(GREEN)Infrastructure:$(RESET)"
59
+ @echo " make db-up Start test PostgreSQL and Redis"
60
+ @echo " make db-down Stop and remove test containers"
61
+ @echo " make db-migrate Apply database schema"
62
+ @echo " make db-reset Stop, clean, start, and migrate"
63
+ @echo ""
64
+ @echo "$(GREEN)Training (with GPU profiles):$(RESET)"
65
+ @echo " make train-12gb Train with 12GB GPU profile (RTX 3060)"
66
+ @echo " make train-16gb Train with 16GB GPU profile (RTX 4080)"
67
+ @echo " make train-24gb Train with 24GB GPU profile (RTX 4090)"
68
+ @echo " make train-l40 Train with L40 (48GB) profile"
69
+ @echo " make train-l40-2gpu Train with 2x L40 (96GB) profile"
70
+ @echo " make train-l40-4gpu Train with 4x L40 (192GB) for Qwen3 30B"
71
+ @echo " make train PROFILE=<name> Train with custom profile"
72
+ @echo ""
73
+ @echo "$(GREEN)Online Training (Phase 3):$(RESET)"
74
+ @echo " make bridge-server Start TypeScript simulation bridge"
75
+ @echo " make bridge-check Check if bridge server is running"
76
+ @echo " make train-online Run online training (requires bridge-server)"
77
+ @echo " make train-hybrid Run hybrid training (mix offline + online)"
78
+ @echo " make generate-data Generate trajectories for offline training"
79
+ @echo ""
80
+ @echo "$(GREEN)Cloud & Production (Phase 4):$(RESET)"
81
+ @echo " make docker-build Build Docker image for cloud deployment"
82
+ @echo " make train-cloud Train with W&B logging enabled"
83
+ @echo " make train-cloud-l40 Cloud training with 1x L40"
84
+ @echo " make train-cloud-l40-2gpu Cloud training with 2x L40"
85
+ @echo " make train-cloud-l40-4gpu Cloud training with 4x L40 (Qwen3 30B)"
86
+ @echo " make train-cloud-online Cloud online training with W&B"
87
+ @echo ""
88
+ @echo "$(GREEN)A/B Testing & Evaluation:$(RESET)"
89
+ @echo " make ab-test Run A/B test (MODEL_A vs MODEL_B)"
90
+ @echo " make ab-test-quick Quick A/B test with trained model"
91
+ @echo ""
92
+ @echo "$(GREEN)Development:$(RESET)"
93
+ @echo " make venv Create/update Python virtual environment"
94
+ @echo " make lint Run linting"
95
+ @echo " make test Run all tests (tier1 + tier2)"
96
+ @echo " make clean Remove generated files"
97
+ @echo ""
98
+ @echo "$(YELLOW)Profiles available:$(RESET)"
99
+ @echo " 12gb, 16gb, 24gb, 48gb, cpu, l40, l40-2gpu, l40-4gpu"
100
+ @echo ""
101
+
102
+ #---------------------------------------------------------------------------
103
+ # Virtual Environment
104
+ #---------------------------------------------------------------------------
105
+
106
+ venv:
107
+ @echo "$(CYAN)Setting up Python virtual environment...$(RESET)"
108
+ cd $(PYTHON_DIR) && python3 -m venv venv
109
+ cd $(PYTHON_DIR) && $(PIP) install --upgrade pip
110
+ cd $(PYTHON_DIR) && $(PIP) install -r requirements.txt
111
+ cd $(PYTHON_DIR) && $(PIP) install -e .
112
+ @echo "$(GREEN)✓ Virtual environment ready$(RESET)"
113
+ @echo " Activate with: source $(PYTHON_DIR)/$(VENV_BIN)/activate"
114
+
115
+ #---------------------------------------------------------------------------
116
+ # Testing Tiers
117
+ #---------------------------------------------------------------------------
118
+
119
+ tier1:
120
+ @echo "$(CYAN)Running Tier 1: Python Unit Tests$(RESET)"
121
+ cd $(PYTHON_DIR) && PYTHONPATH=. $(PYTEST) tests/ -v \
122
+ --ignore=tests/integration/ \
123
+ --ignore=tests/e2e/ \
124
+ -x
125
+ @echo "$(GREEN)✓ Tier 1 passed$(RESET)"
126
+
127
+ tier2:
128
+ @echo "$(CYAN)Running Tier 2: JSON Mode Tests$(RESET)"
129
+ cd $(PYTHON_DIR) && PYTHONPATH=. $(PYTEST) tests/integration/test_json_mode_integration.py -v -x
130
+ @echo "$(GREEN)✓ Tier 2 passed$(RESET)"
131
+
132
+ tier3: db-up db-migrate
133
+ @echo "$(CYAN)Running Tier 3: Database Integration Tests$(RESET)"
134
+ cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) PYTHONPATH=. \
135
+ $(PYTEST) tests/integration/test_db_integration.py -v -x
136
+ @echo "$(GREEN)✓ Tier 3 passed$(RESET)"
137
+
138
+ tier4: db-up db-migrate tier4-import
139
+ @echo "$(CYAN)Running Tier 4: Full GPU Training$(RESET)"
140
+ @echo "Using profile: $(PROFILE)"
141
+ cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) WANDB_MODE=offline \
142
+ PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
143
+ $(PYTHON) scripts/run_training.py \
144
+ --profile $(PROFILE) \
145
+ --steps 1 \
146
+ --no-wandb \
147
+ --skip-validation
148
+ @echo "$(GREEN)✓ Tier 4 passed$(RESET)"
149
+
150
+ # Training data output directory (absolute path from repo root)
151
+ TRAINING_DATA_DIR := $(shell cd ../.. && pwd)/training-data-output
152
+
153
+ tier4-generate:
154
+ @echo "$(CYAN)Generating training data...$(RESET)"
155
+ cd ../.. && bun run packages/engine/examples/generate-training-data.ts \
156
+ --causal --hours 2 --npcs 5 --seed 42
157
+ @echo "$(GREEN)✓ Training data generated$(RESET)"
158
+
159
+ tier4-import:
160
+ @echo "$(CYAN)Importing trajectories to database...$(RESET)"
161
+ @if [ -d "$(TRAINING_DATA_DIR)/trajectories" ]; then \
162
+ cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) \
163
+ $(PYTHON) scripts/import_json_trajectories.py \
164
+ --source $(TRAINING_DATA_DIR); \
165
+ else \
166
+ echo "$(YELLOW)Note: No trajectories found. Run 'make tier4-generate' first.$(RESET)"; \
167
+ fi
168
+
169
+ #---------------------------------------------------------------------------
170
+ # Infrastructure
171
+ #---------------------------------------------------------------------------
172
+
173
+ db-up:
174
+ @echo "$(CYAN)Starting test database...$(RESET)"
175
+ docker compose -f $(DB_COMPOSE) up -d
176
+ @sleep 3
177
+ @docker compose -f $(DB_COMPOSE) ps
178
+ @echo "$(GREEN)✓ Database ready$(RESET)"
179
+
180
+ db-down:
181
+ @echo "$(CYAN)Stopping test database...$(RESET)"
182
+ docker compose -f $(DB_COMPOSE) down -v
183
+ @echo "$(GREEN)✓ Database stopped$(RESET)"
184
+
185
+ db-migrate:
186
+ @echo "$(CYAN)Applying database schema...$(RESET)"
187
+ cd ../db && DATABASE_URL=$(DB_URL) bunx drizzle-kit push --force
188
+ @echo "$(GREEN)✓ Schema applied$(RESET)"
189
+
190
+ db-reset: db-down db-up db-migrate
191
+ @echo "$(GREEN)✓ Database reset complete$(RESET)"
192
+
193
+ #---------------------------------------------------------------------------
194
+ # Training Shortcuts
195
+ #---------------------------------------------------------------------------
196
+
197
+ train-12gb:
198
+ $(MAKE) train PROFILE=12gb
199
+
200
+ train-16gb:
201
+ $(MAKE) train PROFILE=16gb
202
+
203
+ train-24gb:
204
+ $(MAKE) train PROFILE=24gb
205
+
206
+ train-l40:
207
+ $(MAKE) train PROFILE=l40
208
+
209
+ train-l40-2gpu:
210
+ $(MAKE) train PROFILE=l40-2gpu
211
+
212
+ train-l40-4gpu:
213
+ $(MAKE) train PROFILE=l40-4gpu
214
+
215
+ #---------------------------------------------------------------------------
216
+ # Phase 4: Cloud & Production
217
+ #---------------------------------------------------------------------------
218
+
219
+ # Build Docker image for cloud deployment
220
+ docker-build:
221
+ @echo "$(CYAN)Building Docker image for cloud deployment...$(RESET)"
222
+ docker build -t babylon-training:latest .
223
+ @echo "$(GREEN)✓ Docker image built: babylon-training:latest$(RESET)"
224
+
225
+ # Production training with W&B logging (requires WANDB_API_KEY)
226
+ train-cloud: db-up db-migrate
227
+ @echo "$(CYAN)Starting production cloud training with W&B logging...$(RESET)"
228
+ @if [ -z "$$WANDB_API_KEY" ]; then \
229
+ echo "$(YELLOW)Warning: WANDB_API_KEY not set. W&B logging will be disabled.$(RESET)"; \
230
+ fi
231
+ cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) \
232
+ PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
233
+ $(PYTHON) scripts/run_training.py \
234
+ --profile $(PROFILE) \
235
+ --wandb-project $(WANDB_PROJECT) \
236
+ $(if $(WANDB_ENTITY),--wandb-entity $(WANDB_ENTITY),) \
237
+ $(if $(WANDB_RUN_NAME),--wandb-run-name $(WANDB_RUN_NAME),)
238
+
239
+ # Cloud training with specific L40 profiles
240
+ train-cloud-l40:
241
+ $(MAKE) train-cloud PROFILE=l40
242
+
243
+ train-cloud-l40-2gpu:
244
+ $(MAKE) train-cloud PROFILE=l40-2gpu
245
+
246
+ train-cloud-l40-4gpu:
247
+ $(MAKE) train-cloud PROFILE=l40-4gpu
248
+
249
+ # Online cloud training (with bridge)
250
+ train-cloud-online: db-up db-migrate bridge-check
251
+ @echo "$(CYAN)Starting production online training with W&B logging...$(RESET)"
252
+ cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) \
253
+ USE_SIMULATION_BRIDGE=1 \
254
+ SIMULATION_BRIDGE_URL=http://localhost:3001 \
255
+ PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
256
+ $(PYTHON) scripts/run_training.py \
257
+ --profile $(PROFILE) \
258
+ --mode online \
259
+ --bridge-url http://localhost:3001 \
260
+ --wandb-project $(WANDB_PROJECT) \
261
+ $(if $(WANDB_ENTITY),--wandb-entity $(WANDB_ENTITY),)
262
+
263
+ # Default W&B settings
264
+ WANDB_PROJECT ?= babylon-training
265
+ WANDB_ENTITY ?=
266
+ WANDB_RUN_NAME ?=
267
+
268
+ train: db-up db-migrate
269
+ @echo "$(CYAN)Starting training with profile: $(PROFILE)$(RESET)"
270
+ cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) WANDB_MODE=offline \
271
+ PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
272
+ $(PYTHON) scripts/run_training.py \
273
+ --profile $(PROFILE) \
274
+ --no-wandb \
275
+ --skip-validation
276
+
277
+ #---------------------------------------------------------------------------
278
+ # Online Training
279
+ #---------------------------------------------------------------------------
280
+
281
+ bridge-server:
282
+ @echo "$(CYAN)Starting TypeScript simulation bridge server...$(RESET)"
283
+ cd ../engine && bun run src/services/simulation-bridge-server.ts
284
+
285
+ train-online: db-up db-migrate
286
+ @echo "$(CYAN)Starting online training (requires bridge-server running)$(RESET)"
287
+ @echo "Make sure you've started the bridge server with: make bridge-server"
288
+ cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) WANDB_MODE=offline \
289
+ USE_SIMULATION_BRIDGE=1 \
290
+ SIMULATION_BRIDGE_URL=http://localhost:3001 \
291
+ PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
292
+ $(PYTHON) scripts/run_training.py \
293
+ --profile $(PROFILE) \
294
+ --mode online \
295
+ --bridge-url http://localhost:3001 \
296
+ --no-wandb
297
+
298
+ train-hybrid: db-up db-migrate
299
+ @echo "$(CYAN)Starting hybrid training (requires bridge-server running)$(RESET)"
300
+ @echo "Make sure you've started the bridge server with: make bridge-server"
301
+ @echo "Using online ratio: $(ONLINE_RATIO)"
302
+ cd $(PYTHON_DIR) && DATABASE_URL=$(DB_URL) WANDB_MODE=offline \
303
+ USE_SIMULATION_BRIDGE=1 \
304
+ SIMULATION_BRIDGE_URL=http://localhost:3001 \
305
+ HYBRID_ONLINE_RATIO=$(ONLINE_RATIO) \
306
+ PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
307
+ $(PYTHON) scripts/run_training.py \
308
+ --profile $(PROFILE) \
309
+ --mode hybrid \
310
+ --bridge-url http://localhost:3001 \
311
+ --hybrid-online-ratio $(ONLINE_RATIO) \
312
+ --no-wandb
313
+
314
+ # Default online ratio for hybrid mode
315
+ ONLINE_RATIO ?= 0.2
316
+
317
+ # Check if bridge server is running
318
+ bridge-check:
319
+ @curl -s http://localhost:3001/health > /dev/null 2>&1 && \
320
+ echo "$(GREEN)✓ Simulation bridge is running$(RESET)" || \
321
+ (echo "$(YELLOW)✗ Simulation bridge not running. Start with: make bridge-server$(RESET)" && exit 1)
322
+
323
+ generate-data:
324
+ @echo "$(CYAN)Generating training trajectories...$(RESET)"
325
+ ./scripts/generate_dataset.sh $(HOURS) $(PARALLEL) $(NPCS) $(OUTPUT)
326
+ @echo "$(GREEN)✓ Data generation complete$(RESET)"
327
+
328
+ #---------------------------------------------------------------------------
329
+ # A/B Testing & Evaluation
330
+ #---------------------------------------------------------------------------
331
+
332
+ # Run A/B test comparing trained model against baseline
333
+ ab-test:
334
+ @echo "$(CYAN)Running A/B test: $(MODEL_A) vs $(MODEL_B)$(RESET)"
335
+ cd $(PYTHON_DIR) && \
336
+ PATH="$(shell pwd)/$(PYTHON_DIR)/$(VENV_BIN):$$PATH" \
337
+ $(PYTHON) scripts/run_ab_test.py \
338
+ --model-a $(MODEL_A) \
339
+ --model-b $(MODEL_B) \
340
+ --num-runs $(AB_RUNS) \
341
+ --output-dir $(AB_OUTPUT) \
342
+ $(if $(AB_ARCHETYPES),--archetypes $(AB_ARCHETYPES),)
343
+
344
+ # Quick A/B test with trained model vs base
345
+ ab-test-quick:
346
+ $(MAKE) ab-test MODEL_B=./trained_models/final_model AB_RUNS=1
347
+
348
+ # Default A/B test settings
349
+ MODEL_A ?= Qwen/Qwen2.5-0.5B-Instruct
350
+ MODEL_B ?= ./trained_models/final_model
351
+ AB_RUNS ?= 3
352
+ AB_OUTPUT ?= ./ab_test_results
353
+ AB_ARCHETYPES ?=
354
+
355
+ #---------------------------------------------------------------------------
356
+ # Development
357
+ #---------------------------------------------------------------------------
358
+
359
+ lint:
360
+ @echo "$(CYAN)Running linting...$(RESET)"
361
+ cd ../.. && bun run lint
362
+
363
+ test: tier1 tier2
364
+ @echo "$(GREEN)✓ All quick tests passed$(RESET)"
365
+
366
+ clean:
367
+ @echo "$(CYAN)Cleaning generated files...$(RESET)"
368
+ rm -rf $(PYTHON_DIR)/logs
369
+ rm -rf $(PYTHON_DIR)/trained_models
370
+ rm -rf $(PYTHON_DIR)/.pytest_cache
371
+ rm -rf $(PYTHON_DIR)/__pycache__
372
+ find $(PYTHON_DIR) -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
373
+ @echo "$(GREEN)✓ Clean complete$(RESET)"
374
+