@elizaos/training 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/Dockerfile +75 -0
  2. package/LICENSE +21 -0
  3. package/Makefile +374 -0
  4. package/README.md +346 -0
  5. package/config/rubrics.json +137 -0
  6. package/docker-compose.test.yml +57 -0
  7. package/package.json +57 -0
  8. package/python/config/babylon_atropos.yaml +90 -0
  9. package/python/config/profiles/12gb.json +11 -0
  10. package/python/config/profiles/16gb.json +10 -0
  11. package/python/config/profiles/24gb.json +10 -0
  12. package/python/config/profiles/48gb.json +10 -0
  13. package/python/config/profiles/cpu.json +11 -0
  14. package/python/config/profiles/l40-2gpu-safe.json +20 -0
  15. package/python/config/profiles/l40-2gpu.json +22 -0
  16. package/python/config/profiles/l40-4gpu.json +21 -0
  17. package/python/config/profiles/l40.json +17 -0
  18. package/python/config/tinker_training.yaml +143 -0
  19. package/python/curriculum_state.json +165 -0
  20. package/python/env.template +86 -0
  21. package/python/env.training.template +46 -0
  22. package/python/pyproject.toml +41 -0
  23. package/python/requirements-ci.txt +31 -0
  24. package/python/requirements.txt +87 -0
  25. package/python/scripts/__init__.py +4 -0
  26. package/python/scripts/benchmark_should_respond.py +190 -0
  27. package/python/scripts/debug_inference.py +62 -0
  28. package/python/scripts/import_json_trajectories.py +412 -0
  29. package/python/scripts/local-finetune/README.md +63 -0
  30. package/python/scripts/local-finetune/ingest_and_score.py +139 -0
  31. package/python/scripts/local-finetune/merge_model.py +32 -0
  32. package/python/scripts/local-finetune/test_adapter.py +91 -0
  33. package/python/scripts/local-finetune/train_from_csv.py +132 -0
  34. package/python/scripts/merge_trajectories.py +318 -0
  35. package/python/scripts/optimize_prompt_grpo.py +269 -0
  36. package/python/scripts/run_ab_test.py +143 -0
  37. package/python/scripts/run_full_pipeline.py +544 -0
  38. package/python/scripts/run_tinker_training.py +192 -0
  39. package/python/scripts/run_training.py +914 -0
  40. package/python/scripts/test_generation.py +29 -0
  41. package/python/scripts/test_judge.py +155 -0
  42. package/python/scripts/test_pipeline.py +356 -0
  43. package/python/scripts/test_trained_model.py +380 -0
  44. package/python/scripts/train_grpo.py +360 -0
  45. package/python/scripts/train_jsonl.py +223 -0
  46. package/python/scripts/train_local.py +528 -0
  47. package/python/setup.py +20 -0
  48. package/python/src/__init__.py +190 -0
  49. package/python/src/data_bridge/__init__.py +24 -0
  50. package/python/src/data_bridge/converter.py +435 -0
  51. package/python/src/data_bridge/reader.py +393 -0
  52. package/python/src/models.py +283 -0
  53. package/python/src/training/__init__.py +605 -0
  54. package/python/src/training/ab_testing.py +404 -0
  55. package/python/src/training/action_executor.py +621 -0
  56. package/python/src/training/archetype_trainer.py +347 -0
  57. package/python/src/training/atropos_trainer.py +980 -0
  58. package/python/src/training/babylon_env.py +1254 -0
  59. package/python/src/training/error_recovery.py +647 -0
  60. package/python/src/training/evaluation.py +856 -0
  61. package/python/src/training/fast_simulator.py +880 -0
  62. package/python/src/training/format_validator.py +584 -0
  63. package/python/src/training/hybrid_env.py +522 -0
  64. package/python/src/training/kl_controller.py +628 -0
  65. package/python/src/training/multi_prompt_dataset.py +883 -0
  66. package/python/src/training/multi_turn.py +656 -0
  67. package/python/src/training/online_env.py +1084 -0
  68. package/python/src/training/quality_scorer.py +391 -0
  69. package/python/src/training/quality_utils.py +633 -0
  70. package/python/src/training/rewards.py +1344 -0
  71. package/python/src/training/rlaif_env.py +17 -0
  72. package/python/src/training/rollout_generator.py +502 -0
  73. package/python/src/training/rubric_loader.py +198 -0
  74. package/python/src/training/scenario_pool.py +1072 -0
  75. package/python/src/training/schemas.py +481 -0
  76. package/python/src/training/service_manager.py +552 -0
  77. package/python/src/training/simulation_bridge.py +535 -0
  78. package/python/src/training/tick_reward_attribution.py +399 -0
  79. package/python/src/training/tinker_client.py +575 -0
  80. package/python/src/training/tinker_trainer.py +646 -0
  81. package/python/src/training/tokenization_utils.py +402 -0
  82. package/python/tests/e2e/__init__.py +13 -0
  83. package/python/tests/e2e/conftest.py +258 -0
  84. package/python/tests/e2e/test_full_pipeline.py +643 -0
  85. package/python/tests/e2e/test_online_training_e2e.py +365 -0
  86. package/python/tests/integration/__init__.py +12 -0
  87. package/python/tests/integration/conftest.py +383 -0
  88. package/python/tests/integration/test_db_integration.py +649 -0
  89. package/python/tests/integration/test_json_mode_integration.py +554 -0
  90. package/python/tests/test_action_executor.py +594 -0
  91. package/python/tests/test_archetype_scoring.py +1027 -0
  92. package/python/tests/test_atropos_integration.py +360 -0
  93. package/python/tests/test_evaluation.py +727 -0
  94. package/python/tests/test_format_validator.py +486 -0
  95. package/python/tests/test_kl_controller.py +432 -0
  96. package/python/tests/test_lr_scheduler.py +579 -0
  97. package/python/tests/test_multi_turn.py +590 -0
  98. package/python/tests/test_online_env.py +519 -0
  99. package/python/tests/test_quality_scorer.py +474 -0
  100. package/python/tests/test_scenario_pool.py +735 -0
  101. package/python/tests/test_service_manager.py +585 -0
  102. package/python/tests/test_simulation_rollout.py +581 -0
  103. package/python/tests/test_tokenization_utils.py +501 -0
  104. package/python/tests/test_training_orchestrator.py +497 -0
  105. package/python/tests/test_training_output_structure.py +661 -0
  106. package/research-output/training-runs/training-run-1770772042899.json +26 -0
  107. package/research-output/training-runs/training-run-1770930079670.json +32 -0
  108. package/research-output/training-runs/training-run-1770930143700.json +44 -0
  109. package/research-output/training-runs/training-run-1770930183638.json +38 -0
  110. package/research-output/training-runs/training-run-1770930442049.json +38 -0
  111. package/research-output/training-runs/training-run-1770930793243.json +38 -0
  112. package/research-output/training-runs/training-run-1771276293257.json +38 -0
  113. package/research-output/training-runs/training-run-1771276389280.json +38 -0
  114. package/research-output/training-runs/training-run-1771276502776.json +38 -0
  115. package/research-output/training-runs/training-run-1771277340748.json +38 -0
  116. package/research-output/training-runs/training-run-1773013658993.json +38 -0
  117. package/research-output/training-runs/training-run-1773013861014.json +38 -0
  118. package/research-output/training-runs/training-run-1773014215983.json +38 -0
  119. package/scripts/assess-training-data.ts +422 -0
  120. package/scripts/e2e-training-test.ts +550 -0
  121. package/scripts/export-rubrics.ts +64 -0
  122. package/scripts/generate-research-report.ts +1523 -0
  123. package/scripts/generate_dataset.sh +173 -0
  124. package/scripts/generate_should_respond.ts +267 -0
  125. package/scripts/generate_should_respond_dataset.ts +162 -0
  126. package/scripts/json-mode-benchmark.ts +399 -0
  127. package/scripts/rank_trajectories.ts +207 -0
  128. package/scripts/real-archetype-benchmark.ts +210 -0
  129. package/scripts/run-baseline-comparison.ts +116 -0
  130. package/scripts/run-full-pipeline.ts +272 -0
  131. package/scripts/run_rlaif_loop.ts +78 -0
  132. package/scripts/run_task_benchmark.ts +247 -0
  133. package/scripts/runpod_setup.sh +137 -0
  134. package/scripts/runpod_validate.sh +147 -0
  135. package/scripts/test-model-in-game.ts +955 -0
  136. package/scripts/test-scoring.ts +73 -0
  137. package/scripts/test-trained-model.ts +209 -0
  138. package/scripts/train-and-test.ts +824 -0
  139. package/scripts/verify-final.ts +118 -0
  140. package/src/adapter.ts +516 -0
  141. package/src/archetypes/ArchetypeConfigService.ts +626 -0
  142. package/src/archetypes/derive-archetype.ts +249 -0
  143. package/src/archetypes/index.ts +22 -0
  144. package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
  145. package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
  146. package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
  147. package/src/benchmark/BenchmarkDataViewer.ts +324 -0
  148. package/src/benchmark/BenchmarkHistoryService.ts +221 -0
  149. package/src/benchmark/BenchmarkRunner.ts +685 -0
  150. package/src/benchmark/BenchmarkValidator.ts +204 -0
  151. package/src/benchmark/FastEvalRunner.ts +225 -0
  152. package/src/benchmark/MetricsValidator.ts +165 -0
  153. package/src/benchmark/MetricsVisualizer.ts +909 -0
  154. package/src/benchmark/ModelBenchmarkService.ts +611 -0
  155. package/src/benchmark/ModelRegistry.ts +158 -0
  156. package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
  157. package/src/benchmark/SimulationA2AInterface.ts +1169 -0
  158. package/src/benchmark/SimulationEngine.ts +832 -0
  159. package/src/benchmark/TaskRunner.ts +94 -0
  160. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
  161. package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
  162. package/src/benchmark/index.ts +91 -0
  163. package/src/benchmark/parseSimulationMetrics.ts +124 -0
  164. package/src/benchmark/simulation-types.ts +78 -0
  165. package/src/dependencies.ts +475 -0
  166. package/src/generation/TrajectoryGenerator.ts +387 -0
  167. package/src/generation/index.ts +12 -0
  168. package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
  169. package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
  170. package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
  171. package/src/huggingface/index.ts +27 -0
  172. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
  173. package/src/index.ts +102 -0
  174. package/src/init-training.ts +53 -0
  175. package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
  176. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
  177. package/src/metrics/index.ts +8 -0
  178. package/src/metrics/types.ts +200 -0
  179. package/src/rubrics/__tests__/index.test.ts +184 -0
  180. package/src/rubrics/ass-kisser.ts +85 -0
  181. package/src/rubrics/degen.ts +80 -0
  182. package/src/rubrics/goody-twoshoes.ts +84 -0
  183. package/src/rubrics/index.ts +236 -0
  184. package/src/rubrics/information-trader.ts +84 -0
  185. package/src/rubrics/infosec.ts +101 -0
  186. package/src/rubrics/liar.ts +104 -0
  187. package/src/rubrics/perps-trader.ts +87 -0
  188. package/src/rubrics/researcher.ts +81 -0
  189. package/src/rubrics/scammer.ts +82 -0
  190. package/src/rubrics/social-butterfly.ts +73 -0
  191. package/src/rubrics/super-predictor.ts +97 -0
  192. package/src/rubrics/trader.ts +67 -0
  193. package/src/scoring/ArchetypeScoringService.ts +486 -0
  194. package/src/scoring/JudgePromptBuilder.ts +556 -0
  195. package/src/scoring/LLMJudgeCache.ts +401 -0
  196. package/src/scoring/index.ts +9 -0
  197. package/src/training/AutomationPipeline.ts +916 -0
  198. package/src/training/BenchmarkService.ts +518 -0
  199. package/src/training/ConfigValidator.ts +220 -0
  200. package/src/training/MarketOutcomesTracker.ts +187 -0
  201. package/src/training/ModelDeployer.ts +186 -0
  202. package/src/training/ModelFetcher.ts +76 -0
  203. package/src/training/ModelSelectionService.ts +341 -0
  204. package/src/training/ModelUsageVerifier.ts +160 -0
  205. package/src/training/MultiModelOrchestrator.ts +580 -0
  206. package/src/training/RLModelConfig.ts +407 -0
  207. package/src/training/RewardBackpropagationService.ts +149 -0
  208. package/src/training/RulerScoringService.ts +666 -0
  209. package/src/training/TrainingMonitor.ts +166 -0
  210. package/src/training/TrajectoryRecorder.ts +399 -0
  211. package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
  212. package/src/training/index.ts +100 -0
  213. package/src/training/logRLConfig.ts +34 -0
  214. package/src/training/pipeline.ts +129 -0
  215. package/src/training/storage/ModelStorageService.ts +279 -0
  216. package/src/training/storage/TrainingDataArchiver.ts +197 -0
  217. package/src/training/storage/index.ts +17 -0
  218. package/src/training/types.ts +207 -0
  219. package/src/training/window-utils.ts +138 -0
  220. package/src/utils/index.ts +101 -0
  221. package/src/utils/logger.ts +59 -0
  222. package/src/utils/snowflake.ts +17 -0
  223. package/src/utils/synthetic-detector.ts +111 -0
  224. package/tsconfig.json +20 -0
@@ -0,0 +1,247 @@
1
+ #!/usr/bin/env bun
2
+
3
+ import {
4
+ AgentRuntime,
5
+ stringToUuid,
6
+ ModelType,
7
+ type IAgentRuntime,
8
+ type Memory,
9
+ type State,
10
+ } from '../../typescript/src/index';
11
+
12
+ import { v4 as uuidv4 } from 'uuid';
13
+ import * as fs from 'fs';
14
+ import * as path from 'path';
15
+ import { parseArgs } from 'util';
16
+
17
+ // Import from local src
18
+ import {
19
+ configureTrainingDependencies,
20
+ TaskRunner,
21
+ type CreateAgentParams,
22
+ type IAgentRuntimeLike,
23
+ type IAgentRuntimeManager,
24
+ type IAgentService,
25
+ type ITaskInteractor,
26
+ type TrajectoryStepForTraining,
27
+ type UserLike,
28
+ } from '../src';
29
+
30
+ // Implement Dependencies
31
+
32
+ class BenchmarkAgentService implements IAgentService {
33
+ async createAgent(params: CreateAgentParams): Promise<UserLike> {
34
+ // Return dummy user
35
+ return {
36
+ id: stringToUuid(params.name),
37
+ username: params.name,
38
+ };
39
+ }
40
+ }
41
+
42
+ class BenchmarkRuntimeManager implements IAgentRuntimeManager {
43
+ private runtimes = new Map<string, IAgentRuntime>();
44
+
45
+ async getRuntime(agentId: string): Promise<IAgentRuntimeLike> {
46
+ if (this.runtimes.has(agentId)) {
47
+ return this.runtimes.get(agentId) as unknown as IAgentRuntimeLike;
48
+ }
49
+
50
+ // Create a new runtime
51
+ const character = {
52
+ name: 'BenchmarkAgent',
53
+ modelProvider: "openai" as any,
54
+ bio: 'A helpful assistant for benchmarking.',
55
+ settings: {
56
+ secrets: {
57
+ OPENAI_API_KEY: process.env.OPENAI_API_KEY || ''
58
+ }
59
+ }
60
+ };
61
+
62
+ const runtime = new AgentRuntime({
63
+ token: process.env.OPENAI_API_KEY || '',
64
+ modelProvider: "openai" as any,
65
+ character,
66
+ plugins: [],
67
+ providers: [],
68
+ actions: [],
69
+ evaluators: [],
70
+ });
71
+
72
+ // We must initialize with allowNoDatabase to avoid DB error
73
+ await runtime.initialize({ allowNoDatabase: true });
74
+
75
+ // Register a mock model handler for TEXT_SMALL to allow generateText to work
76
+ runtime.registerModel(
77
+ ModelType.TEXT_SMALL,
78
+ async (rt, params) => {
79
+ return "This is a mock response from the benchmark script.";
80
+ },
81
+ "mock-provider",
82
+ 100
83
+ );
84
+
85
+ this.runtimes.set(agentId, runtime);
86
+
87
+ return runtime as unknown as IAgentRuntimeLike;
88
+ }
89
+
90
+ async resetRuntime(agentId: string): Promise<void> {
91
+ this.runtimes.delete(agentId);
92
+ }
93
+ }
94
+
95
+ class BenchmarkTaskInteractor implements ITaskInteractor {
96
+ async executeTask(
97
+ agentRuntime: IAgentRuntimeLike,
98
+ taskPrompt: string,
99
+ options?: { maxTurns?: number; temperature?: number }
100
+ ): Promise<{
101
+ success: boolean;
102
+ response: string;
103
+ trajectoryId?: string;
104
+ steps?: TrajectoryStepForTraining[];
105
+ error?: string;
106
+ }> {
107
+ const runtime = agentRuntime as unknown as AgentRuntime;
108
+ const trajectoryId = uuidv4();
109
+ const startTime = Date.now();
110
+
111
+ try {
112
+ // 1. Create User Memory (in memory only, since we use no-db)
113
+ const messageId = uuidv4();
114
+ const userId = stringToUuid('user');
115
+ const roomId = stringToUuid('benchmark-room');
116
+
117
+ const userMemory: Memory = {
118
+ id: messageId as `${string}-${string}-${string}-${string}-${string}`,
119
+ userId: userId as `${string}-${string}-${string}-${string}-${string}`,
120
+ agentId: runtime.agentId,
121
+ roomId: roomId as `${string}-${string}-${string}-${string}-${string}`,
122
+ content: {
123
+ text: taskPrompt,
124
+ },
125
+ createdAt: Date.now(),
126
+ };
127
+
128
+ // Use standard createMemory method
129
+ // createMemory(memory: Memory, tableName: string, unique?: boolean)
130
+ await runtime.createMemory(userMemory, 'messages', true);
131
+
132
+ // 2. Generate Response
133
+ const state: State = await runtime.composeState(userMemory);
134
+
135
+ const context = `You are ${runtime.character.name}.
136
+ ${state.bio}
137
+ ${state.lore}
138
+
139
+ User: ${taskPrompt}
140
+ Assistant:`;
141
+
142
+ // Use generateText from runtime
143
+ // Signature: generateText(input: string, options?: GenerateTextOptions)
144
+ const result = await runtime.generateText(context, {
145
+ modelType: ModelType.TEXT_SMALL,
146
+ });
147
+ // Handle both string and object return types for safety
148
+ const response = typeof result === 'string' ? result : result.text;
149
+
150
+ // Real implementation of logging:
151
+ const steps: TrajectoryStepForTraining[] = [{
152
+ stepId: uuidv4(),
153
+ stepNumber: 1,
154
+ timestamp: Date.now(),
155
+ environmentState: { timestamp: Date.now(), agentPoints: 0 },
156
+ observation: { userMessage: taskPrompt },
157
+ providerAccesses: [],
158
+ llmCalls: [],
159
+ action: {
160
+ attemptId: uuidv4(),
161
+ timestamp: Date.now(),
162
+ actionType: 'text_response',
163
+ actionName: 'response',
164
+ parameters: { text: response },
165
+ success: true
166
+ },
167
+ reward: 0,
168
+ done: true,
169
+ metadata: {}
170
+ }];
171
+
172
+ // Log to File
173
+ const trajectoryRecord = {
174
+ id: uuidv4(),
175
+ trajectoryId: trajectoryId,
176
+ agentId: runtime.agentId,
177
+ startTime: new Date(startTime).toISOString(),
178
+ endTime: new Date().toISOString(),
179
+ durationMs: Date.now() - startTime,
180
+ steps,
181
+ metadata: { task: taskPrompt },
182
+ isTrainingData: true,
183
+ };
184
+
185
+ const logFile = path.resolve(process.cwd(), 'trajectories.jsonl');
186
+ fs.appendFileSync(logFile, JSON.stringify(trajectoryRecord) + '\n');
187
+ console.log(`Saved trajectory to ${logFile}`);
188
+
189
+ return {
190
+ success: true,
191
+ response: String(response),
192
+ trajectoryId,
193
+ steps
194
+ };
195
+
196
+ } catch (e) {
197
+ console.error('Error executing task', e);
198
+ return {
199
+ success: false,
200
+ response: '',
201
+ error: e instanceof Error ? e.message : String(e)
202
+ };
203
+ }
204
+ }
205
+ }
206
+
207
+ async function main() {
208
+ const { values } = parseArgs({
209
+ args: process.argv.slice(2),
210
+ options: {
211
+ task: { type: 'string', default: 'Hello, who are you?' },
212
+ iterations: { type: 'string', default: '1' },
213
+ model: { type: 'string', default: 'gpt-4o-mini' },
214
+ },
215
+ });
216
+
217
+ const config = {
218
+ agentName: 'BenchmarkBot',
219
+ taskPrompt: values.task as string,
220
+ // bun packages/training/scripts/run_task_benchmark.ts --model "llama3.2" (requires Ollama running)
221
+ iterations: parseInt(values.iterations as string, 10),
222
+ model: values.model as string,
223
+ };
224
+
225
+ // Configure Dependencies
226
+ configureTrainingDependencies({
227
+ agentService: new BenchmarkAgentService(),
228
+ agentRuntimeManager: new BenchmarkRuntimeManager(),
229
+ autonomousCoordinator: {
230
+ executeAutonomousTick: async () => ({ success: true })
231
+ } as any,
232
+ llmCaller: {
233
+ callGroqDirect: async () => "mock response"
234
+ } as any,
235
+ });
236
+
237
+ // Import task interactor config
238
+ const { configureTaskInteractor } = await import('../src/dependencies');
239
+ configureTaskInteractor(new BenchmarkTaskInteractor());
240
+
241
+ const runner = new TaskRunner(config);
242
+ const results = await runner.run();
243
+
244
+ console.log(JSON.stringify(results, null, 2));
245
+ }
246
+
247
+ main().catch(console.error);
@@ -0,0 +1,137 @@
1
+ #!/bin/bash
2
+ #
3
+ # RunPod Setup Script for Babylon Training
4
+ #
5
+ # Usage:
6
+ # 1. SSH into your RunPod instance
7
+ # 2. Clone the repo
8
+ # 3. Run: bash packages/training/scripts/runpod_setup.sh
9
+ #
10
+ # Prerequisites:
11
+ # - 2x L40 GPUs (96GB total VRAM)
12
+ # - WANDB_API_KEY environment variable (optional)
13
+ # - DATABASE_URL for trajectory data (or use synthetic)
14
+ #
15
+
16
+ set -e
17
+
18
+ CYAN='\033[0;36m'
19
+ GREEN='\033[0;32m'
20
+ YELLOW='\033[1;33m'
21
+ RED='\033[0;31m'
22
+ RESET='\033[0m'
23
+
24
+ echo -e "${CYAN}======================================${RESET}"
25
+ echo -e "${CYAN} Babylon Training - RunPod Setup ${RESET}"
26
+ echo -e "${CYAN}======================================${RESET}"
27
+ echo ""
28
+
29
+ # Check GPU availability
30
+ echo -e "${CYAN}[1/7] Checking GPU availability...${RESET}"
31
+ if command -v nvidia-smi &> /dev/null; then
32
+ GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
33
+ echo -e "${GREEN}✓ Found $GPU_COUNT GPU(s):${RESET}"
34
+ nvidia-smi --query-gpu=name,memory.total --format=csv
35
+ else
36
+ echo -e "${RED}✗ nvidia-smi not found. GPU drivers not installed?${RESET}"
37
+ exit 1
38
+ fi
39
+
40
+ # Navigate to training directory
41
+ cd "$(dirname "$0")/.."
42
+ TRAINING_DIR=$(pwd)
43
+ echo -e "${GREEN}✓ Working directory: $TRAINING_DIR${RESET}"
44
+
45
+ # Install system dependencies
46
+ echo ""
47
+ echo -e "${CYAN}[2/7] Installing system dependencies...${RESET}"
48
+ apt-get update -qq
49
+ apt-get install -y -qq python3.11 python3.11-venv python3-pip curl git > /dev/null 2>&1
50
+ echo -e "${GREEN}✓ System dependencies installed${RESET}"
51
+
52
+ # Create virtual environment
53
+ echo ""
54
+ echo -e "${CYAN}[3/7] Setting up Python virtual environment...${RESET}"
55
+ cd python
56
+ if [ ! -d "venv" ]; then
57
+ python3.11 -m venv venv
58
+ fi
59
+ source venv/bin/activate
60
+ pip install --upgrade pip -q
61
+ echo -e "${GREEN}✓ Virtual environment activated${RESET}"
62
+
63
+ # Install Python dependencies
64
+ echo ""
65
+ echo -e "${CYAN}[4/7] Installing Python dependencies (this may take 5-10 minutes)...${RESET}"
66
+ pip install -r requirements.txt -q
67
+ pip install vllm>=0.4.0 atroposlib wandb -q
68
+ echo -e "${GREEN}✓ Python dependencies installed${RESET}"
69
+
70
+ # Try to install flash-attention (optional, may fail on some systems)
71
+ echo ""
72
+ echo -e "${CYAN}[5/7] Installing flash-attention (optional)...${RESET}"
73
+ pip install flash-attn --no-build-isolation -q 2>/dev/null && \
74
+ echo -e "${GREEN}✓ Flash attention installed${RESET}" || \
75
+ echo -e "${YELLOW}⚠ Flash attention not available (optional, continuing)${RESET}"
76
+
77
+ # Verify installation
78
+ echo ""
79
+ echo -e "${CYAN}[6/7] Verifying installation...${RESET}"
80
+ python -c "
81
+ import torch
82
+ import vllm
83
+ print(f'PyTorch: {torch.__version__}')
84
+ print(f'CUDA available: {torch.cuda.is_available()}')
85
+ print(f'GPU count: {torch.cuda.device_count()}')
86
+ for i in range(torch.cuda.device_count()):
87
+ props = torch.cuda.get_device_properties(i)
88
+ print(f' GPU {i}: {props.name} ({props.total_memory / 1e9:.1f} GB)')
89
+ print(f'vLLM: {vllm.__version__}')
90
+ "
91
+ echo -e "${GREEN}✓ Installation verified${RESET}"
92
+
93
+ # Setup environment
94
+ echo ""
95
+ echo -e "${CYAN}[7/7] Setting up environment...${RESET}"
96
+
97
+ # Check for W&B key
98
+ if [ -n "$WANDB_API_KEY" ]; then
99
+ echo -e "${GREEN}✓ W&B API key found${RESET}"
100
+ else
101
+ echo -e "${YELLOW}⚠ WANDB_API_KEY not set. Set it with: export WANDB_API_KEY=your_key${RESET}"
102
+ fi
103
+
104
+ # Check for database
105
+ if [ -n "$DATABASE_URL" ]; then
106
+ echo -e "${GREEN}✓ DATABASE_URL found${RESET}"
107
+ else
108
+ echo -e "${YELLOW}⚠ DATABASE_URL not set. Will use synthetic data for online training.${RESET}"
109
+ fi
110
+
111
+ echo ""
112
+ echo -e "${GREEN}======================================${RESET}"
113
+ echo -e "${GREEN} Setup Complete! ${RESET}"
114
+ echo -e "${GREEN}======================================${RESET}"
115
+ echo ""
116
+ echo -e "Next steps:"
117
+ echo ""
118
+ echo -e " ${CYAN}# Activate environment${RESET}"
119
+ echo -e " source python/venv/bin/activate"
120
+ echo ""
121
+ echo -e " ${CYAN}# Quick validation (single GPU, small model)${RESET}"
122
+ echo -e " make train PROFILE=48gb STEPS=20"
123
+ echo ""
124
+ echo -e " ${CYAN}# 2x L40 validation (14B model)${RESET}"
125
+ echo -e " make train PROFILE=l40-2gpu STEPS=50"
126
+ echo ""
127
+ echo -e " ${CYAN}# Full cloud training with W&B${RESET}"
128
+ echo -e " export WANDB_API_KEY=your_key"
129
+ echo -e " make train-cloud PROFILE=l40-2gpu STEPS=100"
130
+ echo ""
131
+ echo -e " ${CYAN}# Online training (requires bridge server)${RESET}"
132
+ echo -e " # Terminal 1: make bridge-server"
133
+ echo -e " # Terminal 2: make train-online PROFILE=l40-2gpu"
134
+ echo ""
135
+
136
+
137
+
@@ -0,0 +1,147 @@
1
+ #!/bin/bash
2
+ #
3
+ # RunPod Validation Script
4
+ #
5
+ # Runs a quick validation of the training pipeline on cloud GPUs.
6
+ # Expects setup to be complete (run runpod_setup.sh first).
7
+ #
8
+
9
+ set -e
10
+
11
+ CYAN='\033[0;36m'
12
+ GREEN='\033[0;32m'
13
+ YELLOW='\033[1;33m'
14
+ RED='\033[0;31m'
15
+ RESET='\033[0m'
16
+
17
+ cd "$(dirname "$0")/.."
18
+ source python/venv/bin/activate
19
+
20
+ echo -e "${CYAN}======================================${RESET}"
21
+ echo -e "${CYAN} Babylon Training - Cloud Validation ${RESET}"
22
+ echo -e "${CYAN}======================================${RESET}"
23
+ echo ""
24
+
25
+ # Check GPU count
26
+ GPU_COUNT=$(python -c "import torch; print(torch.cuda.device_count())")
27
+ echo -e "${GREEN}GPU Count: $GPU_COUNT${RESET}"
28
+
29
+ # Determine profile based on GPU count
30
+ if [ "$GPU_COUNT" -ge 4 ]; then
31
+ PROFILE="l40-4gpu"
32
+ MODEL="Qwen/Qwen3-30B-A3B"
33
+ elif [ "$GPU_COUNT" -ge 2 ]; then
34
+ PROFILE="l40-2gpu"
35
+ MODEL="Qwen/Qwen2.5-32B-Instruct"
36
+ else
37
+ PROFILE="l40"
38
+ MODEL="Qwen/Qwen2.5-14B-Instruct"
39
+ fi
40
+
41
+ echo -e "${GREEN}Selected profile: $PROFILE${RESET}"
42
+ echo -e "${GREEN}Model: $MODEL${RESET}"
43
+ echo ""
44
+
45
+ # Test 1: Quick vLLM model loading
46
+ echo -e "${CYAN}[Test 1/4] Testing vLLM model loading...${RESET}"
47
+ python -c "
48
+ import torch
49
+ from vllm import LLM
50
+
51
+ print('Loading model for inference test...')
52
+ llm = LLM(
53
+ model='$MODEL',
54
+ tensor_parallel_size=$GPU_COUNT,
55
+ gpu_memory_utilization=0.5,
56
+ max_model_len=2048,
57
+ )
58
+ print('✓ Model loaded successfully')
59
+
60
+ # Quick inference test
61
+ outputs = llm.generate(['Hello, I am a trading agent.'], max_tokens=20)
62
+ print(f'✓ Inference test passed: {outputs[0].outputs[0].text[:50]}...')
63
+ " && echo -e "${GREEN}✓ vLLM test passed${RESET}" || {
64
+ echo -e "${RED}✗ vLLM test failed${RESET}"
65
+ echo -e "${YELLOW}Trying with smaller model...${RESET}"
66
+
67
+ # Fallback to smaller model
68
+ python -c "
69
+ from vllm import LLM
70
+ llm = LLM(model='Qwen/Qwen2.5-7B-Instruct', tensor_parallel_size=min($GPU_COUNT, 2), gpu_memory_utilization=0.4, max_model_len=2048)
71
+ print('✓ Fallback model loaded')
72
+ outputs = llm.generate(['Hello'], max_tokens=10)
73
+ print(f'✓ Inference: {outputs[0].outputs[0].text}')
74
+ "
75
+ PROFILE="48gb" # Fall back to smaller profile
76
+ }
77
+
78
+ echo ""
79
+
80
+ # Test 2: Service manager
81
+ echo -e "${CYAN}[Test 2/4] Testing service manager...${RESET}"
82
+ cd python
83
+ PYTHONPATH=. python -c "
84
+ from src.training.service_manager import ServiceConfig, check_prerequisites
85
+
86
+ config = ServiceConfig(
87
+ model_name='Qwen/Qwen2.5-7B-Instruct',
88
+ tensor_parallel_size=$GPU_COUNT,
89
+ vllm_gpu_memory_utilization=0.4,
90
+ )
91
+ print(f'✓ ServiceConfig created: tensor_parallel={config.tensor_parallel_size}')
92
+
93
+ errors = check_prerequisites()
94
+ if errors:
95
+ for e in errors:
96
+ print(f' Warning: {e}')
97
+ else:
98
+ print('✓ All prerequisites met')
99
+ "
100
+ cd ..
101
+ echo -e "${GREEN}✓ Service manager test passed${RESET}"
102
+ echo ""
103
+
104
+ # Test 3: Quick training run (10 steps)
105
+ echo -e "${CYAN}[Test 3/4] Running quick training validation (10 steps)...${RESET}"
106
+ echo -e "${YELLOW}This will take 5-15 minutes depending on model size...${RESET}"
107
+ echo ""
108
+
109
+ # Use a simpler profile for the quick test
110
+ make train PROFILE=48gb STEPS=10 2>&1 | tee /tmp/training_validation.log
111
+
112
+ if [ $? -eq 0 ]; then
113
+ echo -e "${GREEN}✓ Training validation passed${RESET}"
114
+ else
115
+ echo -e "${RED}✗ Training validation failed${RESET}"
116
+ echo -e "Check /tmp/training_validation.log for details"
117
+ exit 1
118
+ fi
119
+ echo ""
120
+
121
+ # Test 4: Check trained model output
122
+ echo -e "${CYAN}[Test 4/4] Checking trained model output...${RESET}"
123
+ if [ -d "python/trained_models/final_model" ]; then
124
+ echo -e "${GREEN}✓ Trained model saved to python/trained_models/final_model${RESET}"
125
+ ls -la python/trained_models/final_model/ | head -10
126
+ else
127
+ echo -e "${YELLOW}⚠ No final model found (might be too few steps)${RESET}"
128
+ fi
129
+ echo ""
130
+
131
+ echo -e "${GREEN}======================================${RESET}"
132
+ echo -e "${GREEN} Validation Complete! ${RESET}"
133
+ echo -e "${GREEN}======================================${RESET}"
134
+ echo ""
135
+ echo -e "Cloud training is working. Next steps:"
136
+ echo ""
137
+ echo -e " ${CYAN}# Full training run with W&B logging${RESET}"
138
+ echo -e " export WANDB_API_KEY=your_key"
139
+ echo -e " make train-cloud PROFILE=$PROFILE STEPS=1000"
140
+ echo ""
141
+ echo -e " ${CYAN}# Or with online training${RESET}"
142
+ echo -e " make bridge-server &"
143
+ echo -e " make train-online PROFILE=$PROFILE STEPS=500"
144
+ echo ""
145
+
146
+
147
+