@elizaos/training 2.0.0-alpha.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/Dockerfile +75 -0
  2. package/Makefile +374 -0
  3. package/README.md +346 -0
  4. package/config/rubrics.json +137 -0
  5. package/data/.gitkeep +0 -0
  6. package/data/degen/.gitkeep +2 -0
  7. package/data/trader/.gitkeep +2 -0
  8. package/docker-compose.test.yml +57 -0
  9. package/package.json +58 -0
  10. package/python/config/babylon_atropos.yaml +90 -0
  11. package/python/config/profiles/12gb.json +11 -0
  12. package/python/config/profiles/16gb.json +10 -0
  13. package/python/config/profiles/24gb.json +10 -0
  14. package/python/config/profiles/48gb.json +10 -0
  15. package/python/config/profiles/cpu.json +11 -0
  16. package/python/config/profiles/l40-2gpu-safe.json +20 -0
  17. package/python/config/profiles/l40-2gpu.json +22 -0
  18. package/python/config/profiles/l40-4gpu.json +21 -0
  19. package/python/config/profiles/l40.json +17 -0
  20. package/python/config/tinker_training.yaml +143 -0
  21. package/python/curriculum_state.json +165 -0
  22. package/python/env.template +86 -0
  23. package/python/env.training.template +46 -0
  24. package/python/pyproject.toml +41 -0
  25. package/python/requirements-ci.txt +31 -0
  26. package/python/requirements.txt +87 -0
  27. package/python/scripts/__init__.py +4 -0
  28. package/python/scripts/import_json_trajectories.py +412 -0
  29. package/python/scripts/local-finetune/README.md +63 -0
  30. package/python/scripts/local-finetune/ingest_and_score.py +139 -0
  31. package/python/scripts/local-finetune/merge_model.py +32 -0
  32. package/python/scripts/local-finetune/test_adapter.py +91 -0
  33. package/python/scripts/local-finetune/train_from_csv.py +132 -0
  34. package/python/scripts/merge_trajectories.py +318 -0
  35. package/python/scripts/run_ab_test.py +143 -0
  36. package/python/scripts/run_full_pipeline.py +544 -0
  37. package/python/scripts/run_tinker_training.py +192 -0
  38. package/python/scripts/run_training.py +914 -0
  39. package/python/scripts/test_judge.py +155 -0
  40. package/python/scripts/test_pipeline.py +356 -0
  41. package/python/scripts/test_trained_model.py +380 -0
  42. package/python/scripts/train_local.py +528 -0
  43. package/python/setup.py +20 -0
  44. package/python/src/__init__.py +190 -0
  45. package/python/src/data_bridge/__init__.py +24 -0
  46. package/python/src/data_bridge/converter.py +435 -0
  47. package/python/src/data_bridge/reader.py +393 -0
  48. package/python/src/models.py +283 -0
  49. package/python/src/training/__init__.py +605 -0
  50. package/python/src/training/ab_testing.py +404 -0
  51. package/python/src/training/action_executor.py +621 -0
  52. package/python/src/training/archetype_trainer.py +347 -0
  53. package/python/src/training/atropos_trainer.py +980 -0
  54. package/python/src/training/babylon_env.py +1254 -0
  55. package/python/src/training/error_recovery.py +647 -0
  56. package/python/src/training/evaluation.py +856 -0
  57. package/python/src/training/fast_simulator.py +880 -0
  58. package/python/src/training/format_validator.py +584 -0
  59. package/python/src/training/hybrid_env.py +522 -0
  60. package/python/src/training/kl_controller.py +628 -0
  61. package/python/src/training/multi_prompt_dataset.py +883 -0
  62. package/python/src/training/multi_turn.py +656 -0
  63. package/python/src/training/online_env.py +1084 -0
  64. package/python/src/training/quality_scorer.py +391 -0
  65. package/python/src/training/quality_utils.py +633 -0
  66. package/python/src/training/rewards.py +1344 -0
  67. package/python/src/training/rlaif_env.py +17 -0
  68. package/python/src/training/rollout_generator.py +502 -0
  69. package/python/src/training/rubric_loader.py +198 -0
  70. package/python/src/training/scenario_pool.py +1072 -0
  71. package/python/src/training/schemas.py +481 -0
  72. package/python/src/training/service_manager.py +552 -0
  73. package/python/src/training/simulation_bridge.py +535 -0
  74. package/python/src/training/tick_reward_attribution.py +399 -0
  75. package/python/src/training/tinker_client.py +575 -0
  76. package/python/src/training/tinker_trainer.py +646 -0
  77. package/python/src/training/tokenization_utils.py +402 -0
  78. package/python/tests/e2e/__init__.py +13 -0
  79. package/python/tests/e2e/conftest.py +258 -0
  80. package/python/tests/e2e/test_full_pipeline.py +643 -0
  81. package/python/tests/e2e/test_online_training_e2e.py +365 -0
  82. package/python/tests/integration/__init__.py +12 -0
  83. package/python/tests/integration/conftest.py +383 -0
  84. package/python/tests/integration/test_db_integration.py +649 -0
  85. package/python/tests/integration/test_json_mode_integration.py +554 -0
  86. package/python/tests/test_action_executor.py +594 -0
  87. package/python/tests/test_archetype_scoring.py +1027 -0
  88. package/python/tests/test_atropos_integration.py +360 -0
  89. package/python/tests/test_evaluation.py +727 -0
  90. package/python/tests/test_format_validator.py +486 -0
  91. package/python/tests/test_kl_controller.py +432 -0
  92. package/python/tests/test_lr_scheduler.py +579 -0
  93. package/python/tests/test_multi_turn.py +590 -0
  94. package/python/tests/test_online_env.py +519 -0
  95. package/python/tests/test_quality_scorer.py +474 -0
  96. package/python/tests/test_scenario_pool.py +735 -0
  97. package/python/tests/test_service_manager.py +585 -0
  98. package/python/tests/test_simulation_rollout.py +581 -0
  99. package/python/tests/test_tokenization_utils.py +501 -0
  100. package/python/tests/test_training_orchestrator.py +497 -0
  101. package/python/tests/test_training_output_structure.py +661 -0
  102. package/research-output/training-runs/training-run-1770772042899.json +26 -0
  103. package/research-output/training-runs/training-run-1770930079670.json +32 -0
  104. package/research-output/training-runs/training-run-1770930143700.json +44 -0
  105. package/research-output/training-runs/training-run-1770930183638.json +38 -0
  106. package/research-output/training-runs/training-run-1770930442049.json +38 -0
  107. package/research-output/training-runs/training-run-1770930793243.json +38 -0
  108. package/scripts/assess-training-data.ts +422 -0
  109. package/scripts/e2e-training-test.ts +550 -0
  110. package/scripts/export-rubrics.ts +64 -0
  111. package/scripts/generate-research-report.ts +1523 -0
  112. package/scripts/generate_dataset.sh +173 -0
  113. package/scripts/json-mode-benchmark.ts +399 -0
  114. package/scripts/real-archetype-benchmark.ts +210 -0
  115. package/scripts/run-baseline-comparison.ts +116 -0
  116. package/scripts/run-full-pipeline.ts +272 -0
  117. package/scripts/runpod_setup.sh +137 -0
  118. package/scripts/runpod_validate.sh +147 -0
  119. package/scripts/test-model-in-game.ts +955 -0
  120. package/scripts/test-scoring.ts +73 -0
  121. package/scripts/test-trained-model.ts +209 -0
  122. package/scripts/train-and-test.ts +824 -0
  123. package/scripts/verify-final.ts +118 -0
  124. package/src/adapter.ts +516 -0
  125. package/src/archetypes/ArchetypeConfigService.ts +626 -0
  126. package/src/archetypes/derive-archetype.ts +249 -0
  127. package/src/archetypes/index.ts +22 -0
  128. package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
  129. package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
  130. package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
  131. package/src/benchmark/BenchmarkDataViewer.ts +324 -0
  132. package/src/benchmark/BenchmarkHistoryService.ts +221 -0
  133. package/src/benchmark/BenchmarkRunner.ts +685 -0
  134. package/src/benchmark/BenchmarkValidator.ts +206 -0
  135. package/src/benchmark/FastEvalRunner.ts +225 -0
  136. package/src/benchmark/MetricsValidator.ts +165 -0
  137. package/src/benchmark/MetricsVisualizer.ts +909 -0
  138. package/src/benchmark/ModelBenchmarkService.ts +611 -0
  139. package/src/benchmark/ModelRegistry.ts +158 -0
  140. package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
  141. package/src/benchmark/SimulationA2AInterface.ts +1169 -0
  142. package/src/benchmark/SimulationEngine.ts +832 -0
  143. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
  144. package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
  145. package/src/benchmark/index.ts +89 -0
  146. package/src/benchmark/parseSimulationMetrics.ts +124 -0
  147. package/src/benchmark/simulation-types.ts +78 -0
  148. package/src/dependencies.ts +439 -0
  149. package/src/generation/TrajectoryGenerator.ts +387 -0
  150. package/src/generation/index.ts +12 -0
  151. package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
  152. package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
  153. package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
  154. package/src/huggingface/index.ts +27 -0
  155. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
  156. package/src/index.ts +102 -0
  157. package/src/init-training.ts +53 -0
  158. package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
  159. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
  160. package/src/metrics/index.ts +8 -0
  161. package/src/metrics/types.ts +200 -0
  162. package/src/rubrics/__tests__/index.test.ts +184 -0
  163. package/src/rubrics/ass-kisser.ts +85 -0
  164. package/src/rubrics/degen.ts +80 -0
  165. package/src/rubrics/goody-twoshoes.ts +84 -0
  166. package/src/rubrics/index.ts +236 -0
  167. package/src/rubrics/information-trader.ts +84 -0
  168. package/src/rubrics/infosec.ts +101 -0
  169. package/src/rubrics/liar.ts +104 -0
  170. package/src/rubrics/perps-trader.ts +87 -0
  171. package/src/rubrics/researcher.ts +81 -0
  172. package/src/rubrics/scammer.ts +82 -0
  173. package/src/rubrics/social-butterfly.ts +73 -0
  174. package/src/rubrics/super-predictor.ts +97 -0
  175. package/src/rubrics/trader.ts +67 -0
  176. package/src/scoring/ArchetypeScoringService.ts +486 -0
  177. package/src/scoring/JudgePromptBuilder.ts +556 -0
  178. package/src/scoring/LLMJudgeCache.ts +401 -0
  179. package/src/scoring/index.ts +9 -0
  180. package/src/training/AutomationPipeline.ts +916 -0
  181. package/src/training/BenchmarkService.ts +518 -0
  182. package/src/training/ConfigValidator.ts +220 -0
  183. package/src/training/MarketOutcomesTracker.ts +187 -0
  184. package/src/training/ModelDeployer.ts +186 -0
  185. package/src/training/ModelFetcher.ts +76 -0
  186. package/src/training/ModelSelectionService.ts +341 -0
  187. package/src/training/ModelUsageVerifier.ts +160 -0
  188. package/src/training/MultiModelOrchestrator.ts +580 -0
  189. package/src/training/RLModelConfig.ts +407 -0
  190. package/src/training/RewardBackpropagationService.ts +149 -0
  191. package/src/training/RulerScoringService.ts +666 -0
  192. package/src/training/TrainingMonitor.ts +166 -0
  193. package/src/training/TrajectoryRecorder.ts +399 -0
  194. package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
  195. package/src/training/index.ts +100 -0
  196. package/src/training/logRLConfig.ts +34 -0
  197. package/src/training/pipeline.ts +129 -0
  198. package/src/training/storage/ModelStorageService.ts +279 -0
  199. package/src/training/storage/TrainingDataArchiver.ts +197 -0
  200. package/src/training/storage/index.ts +17 -0
  201. package/src/training/types.ts +207 -0
  202. package/src/training/window-utils.ts +138 -0
  203. package/src/utils/index.ts +101 -0
  204. package/src/utils/logger.ts +59 -0
  205. package/src/utils/snowflake.ts +17 -0
  206. package/src/utils/synthetic-detector.ts +111 -0
  207. package/tsconfig.json +20 -0
@@ -0,0 +1,605 @@
1
+ """
2
+ RL training orchestration for ElizaOS-compatible runtimes
3
+
4
+ This package provides training infrastructure:
5
+
6
+ 1. **Atropos-based Trainer** (RECOMMENDED)
7
+ - `atropos_trainer.py` - GRPO trainer consuming from Atropos API
8
+ - `rlaif_env.py` - RLAIF environment with pluggable scoring modes
9
+
10
+ 2. **Fast Rollout Generation**
11
+ - `rollout_generator.py` - High-speed rollout generation with full agent tick capture
12
+ - `fast_simulator.py` - Unified simulator for benchmark + data generation
13
+ - `multi_prompt_dataset.py` - Dataset preparation for each LLM call type
14
+
15
+ 3. **Supporting Modules**
16
+ - `rewards.py` - Reward functions and normalization
17
+ - `quality_utils.py` - Trajectory quality scoring
18
+ - `tick_reward_attribution.py` - Granular reward attribution for multi-call ticks
19
+
20
+ See README.md for usage instructions.
21
+ """
22
+
23
+ # Import non-torch modules directly
24
+ from .rewards import (
25
+ pnl_reward,
26
+ risk_adjusted_reward,
27
+ efficiency_reward,
28
+ action_quality_reward,
29
+ composite_reward,
30
+ relative_scores,
31
+ ranking_to_scores,
32
+ pairwise_preferences_to_scores,
33
+ RewardNormalizer,
34
+ # Archetype-aware scoring
35
+ BehaviorMetrics,
36
+ archetype_composite_reward,
37
+ calculate_archetype_behavior_bonus,
38
+ get_archetype_weights,
39
+ ARCHETYPE_REWARD_WEIGHTS,
40
+ )
41
+
42
+ # Quality utilities (no torch dependency)
43
+ from .quality_utils import (
44
+ calculate_tick_quality_score,
45
+ calculate_trajectory_quality_score,
46
+ build_trajectory_from_ticks,
47
+ state_to_observation,
48
+ state_to_env_state,
49
+ validate_trajectory_quality,
50
+ ValidationResult,
51
+ )
52
+
53
+ # Multi-prompt dataset (no torch dependency)
54
+ from .multi_prompt_dataset import (
55
+ MultiPromptDatasetBuilder,
56
+ PromptDataset,
57
+ PromptSample,
58
+ prepare_multi_prompt_training_data,
59
+ PromptTypeAnalyzer,
60
+ validate_training_sample,
61
+ validate_trajectory_for_training,
62
+ )
63
+
64
+ # Tick reward attribution (no torch dependency)
65
+ from .tick_reward_attribution import (
66
+ TickRewardAttributor,
67
+ TickData,
68
+ TickOutcome,
69
+ LLMCallRecord,
70
+ CallPurpose,
71
+ build_training_samples_from_tick,
72
+ group_samples_for_grpo,
73
+ )
74
+
75
+ # Archetype training configuration (no torch dependency)
76
+ from .archetype_trainer import (
77
+ ArchetypeTrainer,
78
+ ArchetypeTrainingConfig,
79
+ ArchetypeTrainingResult,
80
+ )
81
+
82
+ # Rubric loading from config/rubrics.json (single source of truth)
83
+ from .rubric_loader import (
84
+ get_rubric,
85
+ get_priority_metrics,
86
+ get_available_archetypes,
87
+ reload_rubrics,
88
+ get_rubric_hash,
89
+ get_all_rubrics_hash,
90
+ get_rubrics_version,
91
+ normalize_archetype,
92
+ has_custom_rubric,
93
+ DEFAULT_RUBRIC,
94
+ RUBRICS_VERSION,
95
+ )
96
+
97
+ # Schema validation for data integrity
98
+ from .schemas import (
99
+ TrajectorySchema,
100
+ StepSchema,
101
+ ActionSchema,
102
+ LLMCallSchema,
103
+ EnvironmentStateSchema,
104
+ validate_trajectory,
105
+ validate_step,
106
+ validate_llm_call,
107
+ validate_trajectory_file,
108
+ compare_trajectory_formats,
109
+ ValidationResult as SchemaValidationResult,
110
+ )
111
+
112
+ # Phase 1 & 2: Online GRPO Training Infrastructure
113
+ from .scenario_pool import (
114
+ Scenario,
115
+ ScenarioPool,
116
+ ScenarioPoolConfig,
117
+ CurriculumManager,
118
+ MarketState,
119
+ PerpetualState,
120
+ NewsItem,
121
+ SocialPost,
122
+ PortfolioState as ScenarioPortfolioState,
123
+ )
124
+
125
+ from .tokenization_utils import (
126
+ TokenizationResult,
127
+ tokenize_for_trainer,
128
+ tokenize_conversation_for_trainer,
129
+ validate_masks,
130
+ create_masks_from_response_start,
131
+ fix_historical_masks,
132
+ )
133
+
134
+ from .action_executor import (
135
+ ActionResult,
136
+ ActionExecutor,
137
+ PortfolioState as ExecutorPortfolioState,
138
+ validate_action,
139
+ execute_action_for_training,
140
+ calculate_action_quality_bonus,
141
+ set_simulation_seed,
142
+ reset_simulation_rng,
143
+ )
144
+
145
+ from .format_validator import (
146
+ ThinkTagResult,
147
+ ActionValidationResult,
148
+ ReasoningQualityResult,
149
+ LengthAnalysisResult,
150
+ FormatValidationResult,
151
+ validate_response_format,
152
+ validate_think_tags,
153
+ validate_action_json,
154
+ get_format_and_reasoning_scores,
155
+ validate_for_training,
156
+ )
157
+
158
+ from .quality_scorer import (
159
+ QualityScore,
160
+ calculate_thinking_length_penalty,
161
+ calculate_response_length_penalty,
162
+ calculate_combined_length_penalty,
163
+ score_response,
164
+ score_response_for_reward,
165
+ get_quality_bonus_for_archetype,
166
+ score_response_batch,
167
+ get_relative_quality_scores,
168
+ )
169
+
170
+ # Phase 3: Evaluation & Monitoring
171
+ from .evaluation import (
172
+ EvaluationSuite,
173
+ EvalResult,
174
+ ArchetypeMetrics,
175
+ TestScenarioManager,
176
+ TestScenario,
177
+ BaselineManager,
178
+ BaselineResult,
179
+ RolloutDumper,
180
+ RolloutRecord,
181
+ get_wandb_config,
182
+ STEP_METRICS,
183
+ EVAL_METRICS,
184
+ )
185
+
186
+ # Phase 4: A/B Testing & Production Evaluation
187
+ from .ab_testing import (
188
+ ABTestRunner,
189
+ ABTestResult,
190
+ ModelResult,
191
+ EVAL_SCENARIOS,
192
+ run_ab_test,
193
+ )
194
+
195
+ # Phase 4: Advanced Features (NOT YET INTEGRATED - ready for future use)
196
+ # These modules are tested but not called by rlaif/online env runtime paths.
197
+ from .kl_controller import (
198
+ KLConfig,
199
+ KLStats,
200
+ KLControllerBase,
201
+ create_kl_controller,
202
+ compute_kl_divergence,
203
+ estimate_kl_from_samples,
204
+ )
205
+
206
+ from .multi_turn import (
207
+ TurnData,
208
+ EpisodeBuffer,
209
+ GAEConfig,
210
+ MultiTurnEpisodeManager,
211
+ EpisodeCollector,
212
+ shape_trading_rewards,
213
+ compute_episode_return,
214
+ normalize_episode_rewards,
215
+ )
216
+
217
+ # Phase 5: Simulation Bridge for online training
218
+ from .simulation_bridge import (
219
+ SimulationBridge,
220
+ PerpMarket,
221
+ PredictionMarket,
222
+ Position,
223
+ NewsItem as BridgeNewsItem,
224
+ Relationship,
225
+ SocialContext,
226
+ MarketState as BridgeMarketState,
227
+ Scenario as BridgeScenario,
228
+ ActionOutcome,
229
+ TickResult,
230
+ create_bridge,
231
+ )
232
+
233
+ # Error recovery and graceful degradation
234
+ from .error_recovery import (
235
+ ErrorCategory,
236
+ TrainingError,
237
+ classify_error,
238
+ is_recoverable,
239
+ with_retry,
240
+ with_retry_async,
241
+ RecoveryResult,
242
+ recover_json_parse,
243
+ recover_trajectory_archetype,
244
+ filter_valid_trajectories,
245
+ DatabaseConnectionManager,
246
+ GracefulShutdown,
247
+ TrainingProgress,
248
+ safe_divide,
249
+ clamp,
250
+ require_env,
251
+ get_env_or_default,
252
+ )
253
+
254
+ # Lazy imports for torch-dependent modules
255
+ # These imports are dynamically returned via __getattr__ - not unused # noqa: F401
256
+ def __getattr__(name: str):
257
+ """Lazy import for torch-dependent modules."""
258
+ if name in (
259
+ "AtroposTrainer",
260
+ "BabylonAtroposTrainer",
261
+ "AtroposTrainingConfig",
262
+ ):
263
+ from .atropos_trainer import ( # noqa: F401
264
+ AtroposTrainer,
265
+ BabylonAtroposTrainer,
266
+ AtroposTrainingConfig,
267
+ )
268
+ return locals()[name]
269
+
270
+ if name in (
271
+ "RLAIFEnv",
272
+ "RLAIFEnvConfig",
273
+ "BabylonRLAIFEnv",
274
+ "BabylonEnvConfig",
275
+ ):
276
+ from .rlaif_env import ( # noqa: F401
277
+ RLAIFEnv,
278
+ RLAIFEnvConfig,
279
+ BabylonRLAIFEnv,
280
+ BabylonEnvConfig,
281
+ )
282
+ return locals()[name]
283
+
284
+ if name in (
285
+ "BabylonOnlineEnv",
286
+ "BabylonOnlineEnvConfig",
287
+ ):
288
+ from .online_env import ( # noqa: F401
289
+ BabylonOnlineEnv,
290
+ BabylonOnlineEnvConfig,
291
+ )
292
+ return locals()[name]
293
+
294
+ if name in (
295
+ "BabylonHybridEnv",
296
+ "BabylonHybridEnvConfig",
297
+ ):
298
+ from .hybrid_env import ( # noqa: F401
299
+ BabylonHybridEnv,
300
+ BabylonHybridEnvConfig,
301
+ )
302
+ return locals()[name]
303
+
304
+ if name in (
305
+ "FastRolloutGenerator",
306
+ "RolloutConfig",
307
+ "RolloutResult",
308
+ "AgentTickData",
309
+ "RolloutQualityValidator",
310
+ "AgentRunner",
311
+ ):
312
+ from .rollout_generator import ( # noqa: F401
313
+ FastRolloutGenerator,
314
+ RolloutConfig,
315
+ RolloutResult,
316
+ AgentTickData,
317
+ RolloutQualityValidator,
318
+ AgentRunner,
319
+ )
320
+ return locals()[name]
321
+
322
+ if name in (
323
+ "FastSimulator",
324
+ "SimulatorConfig",
325
+ "SimulatorMetrics",
326
+ "GameState",
327
+ ):
328
+ from .fast_simulator import ( # noqa: F401
329
+ FastSimulator,
330
+ SimulatorConfig,
331
+ SimulatorMetrics,
332
+ GameState,
333
+ )
334
+ return locals()[name]
335
+
336
+ # Tinker integration (lazy - requires tinker package)
337
+ if name in (
338
+ "TinkerClient",
339
+ "BabylonTinkerClient",
340
+ "TinkerConfig",
341
+ "TinkerDatum",
342
+ "TrainStepResult",
343
+ "SampleResult",
344
+ "TINKER_AVAILABLE",
345
+ ):
346
+ from .tinker_client import ( # noqa: F401
347
+ TinkerClient,
348
+ BabylonTinkerClient,
349
+ TinkerConfig,
350
+ TinkerDatum,
351
+ TrainStepResult,
352
+ SampleResult,
353
+ TINKER_AVAILABLE,
354
+ )
355
+ return locals()[name]
356
+
357
+ if name in (
358
+ "TinkerTrainer",
359
+ "BabylonTinkerTrainer",
360
+ "TinkerTrainingConfig",
361
+ "TrainingMetrics",
362
+ ):
363
+ from .tinker_trainer import ( # noqa: F401
364
+ TinkerTrainer,
365
+ BabylonTinkerTrainer,
366
+ TinkerTrainingConfig,
367
+ TrainingMetrics,
368
+ )
369
+ return locals()[name]
370
+
371
+ # Service manager (lazy - requires requests)
372
+ if name in (
373
+ "ServiceManager",
374
+ "ServiceConfig",
375
+ "ServiceStatus",
376
+ "check_prerequisites",
377
+ ):
378
+ from .service_manager import ( # noqa: F401
379
+ ServiceManager,
380
+ ServiceConfig,
381
+ ServiceStatus,
382
+ check_prerequisites,
383
+ )
384
+ return locals()[name]
385
+
386
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
387
+
388
+
389
+ __all__ = [
390
+ # Atropos trainer (lazy - requires torch)
391
+ "AtroposTrainer",
392
+ "BabylonAtroposTrainer",
393
+ "AtroposTrainingConfig",
394
+ "RLAIFEnv",
395
+ "RLAIFEnvConfig",
396
+ "BabylonRLAIFEnv",
397
+ "BabylonEnvConfig",
398
+ "BabylonOnlineEnv",
399
+ "BabylonOnlineEnvConfig",
400
+ "BabylonHybridEnv",
401
+ "BabylonHybridEnvConfig",
402
+ # Phase 1 & 2: Online GRPO Training Infrastructure
403
+ "Scenario",
404
+ "ScenarioPool",
405
+ "ScenarioPoolConfig",
406
+ "CurriculumManager",
407
+ "MarketState",
408
+ "PerpetualState",
409
+ "NewsItem",
410
+ "SocialPost",
411
+ "ScenarioPortfolioState",
412
+ "TokenizationResult",
413
+ "tokenize_for_trainer",
414
+ "tokenize_conversation_for_trainer",
415
+ "validate_masks",
416
+ "create_masks_from_response_start",
417
+ "fix_historical_masks",
418
+ "ActionResult",
419
+ "ActionExecutor",
420
+ "ExecutorPortfolioState",
421
+ "validate_action",
422
+ "execute_action_for_training",
423
+ "calculate_action_quality_bonus",
424
+ "set_simulation_seed",
425
+ "reset_simulation_rng",
426
+ "ThinkTagResult",
427
+ "ActionValidationResult",
428
+ "ReasoningQualityResult",
429
+ "LengthAnalysisResult",
430
+ "FormatValidationResult",
431
+ "validate_response_format",
432
+ "validate_think_tags",
433
+ "validate_action_json",
434
+ "get_format_and_reasoning_scores",
435
+ "validate_for_training",
436
+ "QualityScore",
437
+ "calculate_thinking_length_penalty",
438
+ "calculate_response_length_penalty",
439
+ "calculate_combined_length_penalty",
440
+ "score_response",
441
+ "score_response_for_reward",
442
+ "get_quality_bonus_for_archetype",
443
+ "score_response_batch",
444
+ "get_relative_quality_scores",
445
+ # Phase 3: Evaluation & Monitoring
446
+ "EvaluationSuite",
447
+ "EvalResult",
448
+ "ArchetypeMetrics",
449
+ "TestScenarioManager",
450
+ "TestScenario",
451
+ "BaselineManager",
452
+ "BaselineResult",
453
+ "RolloutDumper",
454
+ "RolloutRecord",
455
+ "get_wandb_config",
456
+ "STEP_METRICS",
457
+ "EVAL_METRICS",
458
+ # Phase 4: A/B Testing
459
+ "ABTestRunner",
460
+ "ABTestResult",
461
+ "ModelResult",
462
+ "EVAL_SCENARIOS",
463
+ "run_ab_test",
464
+ # Phase 4: Advanced Features
465
+ "KLConfig",
466
+ "KLStats",
467
+ "KLControllerBase",
468
+ "create_kl_controller",
469
+ "compute_kl_divergence",
470
+ "estimate_kl_from_samples",
471
+ "TurnData",
472
+ "EpisodeBuffer",
473
+ "GAEConfig",
474
+ "MultiTurnEpisodeManager",
475
+ "EpisodeCollector",
476
+ "shape_trading_rewards",
477
+ "compute_episode_return",
478
+ "normalize_episode_rewards",
479
+ # Tinker trainer (lazy - requires tinker)
480
+ "TinkerClient",
481
+ "BabylonTinkerClient",
482
+ "TinkerConfig",
483
+ "TinkerDatum",
484
+ "TrainStepResult",
485
+ "SampleResult",
486
+ "TINKER_AVAILABLE",
487
+ "TinkerTrainer",
488
+ "BabylonTinkerTrainer",
489
+ "TinkerTrainingConfig",
490
+ "TrainingMetrics",
491
+ # Reward functions
492
+ "pnl_reward",
493
+ "risk_adjusted_reward",
494
+ "efficiency_reward",
495
+ "action_quality_reward",
496
+ "composite_reward",
497
+ "relative_scores",
498
+ "ranking_to_scores",
499
+ "pairwise_preferences_to_scores",
500
+ "RewardNormalizer",
501
+ # Archetype-aware scoring
502
+ "BehaviorMetrics",
503
+ "archetype_composite_reward",
504
+ "calculate_archetype_behavior_bonus",
505
+ "get_archetype_weights",
506
+ "ARCHETYPE_REWARD_WEIGHTS",
507
+ # Fast rollout generation (lazy - may require torch)
508
+ "FastRolloutGenerator",
509
+ "RolloutConfig",
510
+ "RolloutResult",
511
+ "AgentTickData",
512
+ "RolloutQualityValidator",
513
+ "AgentRunner",
514
+ "FastSimulator",
515
+ "SimulatorConfig",
516
+ "SimulatorMetrics",
517
+ "GameState",
518
+ "MultiPromptDatasetBuilder",
519
+ "PromptDataset",
520
+ "PromptSample",
521
+ "prepare_multi_prompt_training_data",
522
+ "PromptTypeAnalyzer",
523
+ "validate_training_sample",
524
+ "validate_trajectory_for_training",
525
+ # Tick reward attribution
526
+ "TickRewardAttributor",
527
+ "TickData",
528
+ "TickOutcome",
529
+ "LLMCallRecord",
530
+ "CallPurpose",
531
+ "build_training_samples_from_tick",
532
+ "group_samples_for_grpo",
533
+ # Quality utilities
534
+ "calculate_tick_quality_score",
535
+ "calculate_trajectory_quality_score",
536
+ "build_trajectory_from_ticks",
537
+ "state_to_observation",
538
+ "state_to_env_state",
539
+ "validate_trajectory_quality",
540
+ "ValidationResult",
541
+ # Archetype training
542
+ "ArchetypeTrainer",
543
+ "ArchetypeTrainingConfig",
544
+ "ArchetypeTrainingResult",
545
+ # Rubric loading
546
+ "get_rubric",
547
+ "get_priority_metrics",
548
+ "get_available_archetypes",
549
+ "reload_rubrics",
550
+ "get_rubric_hash",
551
+ "get_all_rubrics_hash",
552
+ "get_rubrics_version",
553
+ "normalize_archetype",
554
+ "has_custom_rubric",
555
+ "DEFAULT_RUBRIC",
556
+ "RUBRICS_VERSION",
557
+ # Service manager
558
+ "ServiceManager",
559
+ "ServiceConfig",
560
+ "ServiceStatus",
561
+ "check_prerequisites",
562
+ # Schema validation
563
+ "TrajectorySchema",
564
+ "StepSchema",
565
+ "ActionSchema",
566
+ "LLMCallSchema",
567
+ "EnvironmentStateSchema",
568
+ "validate_trajectory",
569
+ "validate_step",
570
+ "validate_llm_call",
571
+ "validate_trajectory_file",
572
+ "compare_trajectory_formats",
573
+ "SchemaValidationResult",
574
+ # Phase 5: Simulation Bridge
575
+ "SimulationBridge",
576
+ "PerpMarket",
577
+ "PredictionMarket",
578
+ "Position",
579
+ "BridgeNewsItem",
580
+ "Relationship",
581
+ "SocialContext",
582
+ "BridgeMarketState",
583
+ "BridgeScenario",
584
+ "ActionOutcome",
585
+ "TickResult",
586
+ "create_bridge",
587
+ # Error recovery
588
+ "ErrorCategory",
589
+ "TrainingError",
590
+ "classify_error",
591
+ "is_recoverable",
592
+ "with_retry",
593
+ "with_retry_async",
594
+ "RecoveryResult",
595
+ "recover_json_parse",
596
+ "recover_trajectory_archetype",
597
+ "filter_valid_trajectories",
598
+ "DatabaseConnectionManager",
599
+ "GracefulShutdown",
600
+ "TrainingProgress",
601
+ "safe_divide",
602
+ "clamp",
603
+ "require_env",
604
+ "get_env_or_default",
605
+ ]