npm - @elizaos/training - Versions diffs - 2.0.0-alpha.10 - Mend

@elizaos/training 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (224) hide show

package/Dockerfile +75 -0
package/LICENSE +21 -0
package/Makefile +374 -0
package/README.md +346 -0
package/config/rubrics.json +137 -0
package/docker-compose.test.yml +57 -0
package/package.json +57 -0
package/python/config/babylon_atropos.yaml +90 -0
package/python/config/profiles/12gb.json +11 -0
package/python/config/profiles/16gb.json +10 -0
package/python/config/profiles/24gb.json +10 -0
package/python/config/profiles/48gb.json +10 -0
package/python/config/profiles/cpu.json +11 -0
package/python/config/profiles/l40-2gpu-safe.json +20 -0
package/python/config/profiles/l40-2gpu.json +22 -0
package/python/config/profiles/l40-4gpu.json +21 -0
package/python/config/profiles/l40.json +17 -0
package/python/config/tinker_training.yaml +143 -0
package/python/curriculum_state.json +165 -0
package/python/env.template +86 -0
package/python/env.training.template +46 -0
package/python/pyproject.toml +41 -0
package/python/requirements-ci.txt +31 -0
package/python/requirements.txt +87 -0
package/python/scripts/__init__.py +4 -0
package/python/scripts/benchmark_should_respond.py +190 -0
package/python/scripts/debug_inference.py +62 -0
package/python/scripts/import_json_trajectories.py +412 -0
package/python/scripts/local-finetune/README.md +63 -0
package/python/scripts/local-finetune/ingest_and_score.py +139 -0
package/python/scripts/local-finetune/merge_model.py +32 -0
package/python/scripts/local-finetune/test_adapter.py +91 -0
package/python/scripts/local-finetune/train_from_csv.py +132 -0
package/python/scripts/merge_trajectories.py +318 -0
package/python/scripts/optimize_prompt_grpo.py +269 -0
package/python/scripts/run_ab_test.py +143 -0
package/python/scripts/run_full_pipeline.py +544 -0
package/python/scripts/run_tinker_training.py +192 -0
package/python/scripts/run_training.py +914 -0
package/python/scripts/test_generation.py +29 -0
package/python/scripts/test_judge.py +155 -0
package/python/scripts/test_pipeline.py +356 -0
package/python/scripts/test_trained_model.py +380 -0
package/python/scripts/train_grpo.py +360 -0
package/python/scripts/train_jsonl.py +223 -0
package/python/scripts/train_local.py +528 -0
package/python/setup.py +20 -0
package/python/src/__init__.py +190 -0
package/python/src/data_bridge/__init__.py +24 -0
package/python/src/data_bridge/converter.py +435 -0
package/python/src/data_bridge/reader.py +393 -0
package/python/src/models.py +283 -0
package/python/src/training/__init__.py +605 -0
package/python/src/training/ab_testing.py +404 -0
package/python/src/training/action_executor.py +621 -0
package/python/src/training/archetype_trainer.py +347 -0
package/python/src/training/atropos_trainer.py +980 -0
package/python/src/training/babylon_env.py +1254 -0
package/python/src/training/error_recovery.py +647 -0
package/python/src/training/evaluation.py +856 -0
package/python/src/training/fast_simulator.py +880 -0
package/python/src/training/format_validator.py +584 -0
package/python/src/training/hybrid_env.py +522 -0
package/python/src/training/kl_controller.py +628 -0
package/python/src/training/multi_prompt_dataset.py +883 -0
package/python/src/training/multi_turn.py +656 -0
package/python/src/training/online_env.py +1084 -0
package/python/src/training/quality_scorer.py +391 -0
package/python/src/training/quality_utils.py +633 -0
package/python/src/training/rewards.py +1344 -0
package/python/src/training/rlaif_env.py +17 -0
package/python/src/training/rollout_generator.py +502 -0
package/python/src/training/rubric_loader.py +198 -0
package/python/src/training/scenario_pool.py +1072 -0
package/python/src/training/schemas.py +481 -0
package/python/src/training/service_manager.py +552 -0
package/python/src/training/simulation_bridge.py +535 -0
package/python/src/training/tick_reward_attribution.py +399 -0
package/python/src/training/tinker_client.py +575 -0
package/python/src/training/tinker_trainer.py +646 -0
package/python/src/training/tokenization_utils.py +402 -0
package/python/tests/e2e/__init__.py +13 -0
package/python/tests/e2e/conftest.py +258 -0
package/python/tests/e2e/test_full_pipeline.py +643 -0
package/python/tests/e2e/test_online_training_e2e.py +365 -0
package/python/tests/integration/__init__.py +12 -0
package/python/tests/integration/conftest.py +383 -0
package/python/tests/integration/test_db_integration.py +649 -0
package/python/tests/integration/test_json_mode_integration.py +554 -0
package/python/tests/test_action_executor.py +594 -0
package/python/tests/test_archetype_scoring.py +1027 -0
package/python/tests/test_atropos_integration.py +360 -0
package/python/tests/test_evaluation.py +727 -0
package/python/tests/test_format_validator.py +486 -0
package/python/tests/test_kl_controller.py +432 -0
package/python/tests/test_lr_scheduler.py +579 -0
package/python/tests/test_multi_turn.py +590 -0
package/python/tests/test_online_env.py +519 -0
package/python/tests/test_quality_scorer.py +474 -0
package/python/tests/test_scenario_pool.py +735 -0
package/python/tests/test_service_manager.py +585 -0
package/python/tests/test_simulation_rollout.py +581 -0
package/python/tests/test_tokenization_utils.py +501 -0
package/python/tests/test_training_orchestrator.py +497 -0
package/python/tests/test_training_output_structure.py +661 -0
package/research-output/training-runs/training-run-1770772042899.json +26 -0
package/research-output/training-runs/training-run-1770930079670.json +32 -0
package/research-output/training-runs/training-run-1770930143700.json +44 -0
package/research-output/training-runs/training-run-1770930183638.json +38 -0
package/research-output/training-runs/training-run-1770930442049.json +38 -0
package/research-output/training-runs/training-run-1770930793243.json +38 -0
package/research-output/training-runs/training-run-1771276293257.json +38 -0
package/research-output/training-runs/training-run-1771276389280.json +38 -0
package/research-output/training-runs/training-run-1771276502776.json +38 -0
package/research-output/training-runs/training-run-1771277340748.json +38 -0
package/research-output/training-runs/training-run-1773013658993.json +38 -0
package/research-output/training-runs/training-run-1773013861014.json +38 -0
package/research-output/training-runs/training-run-1773014215983.json +38 -0
package/scripts/assess-training-data.ts +422 -0
package/scripts/e2e-training-test.ts +550 -0
package/scripts/export-rubrics.ts +64 -0
package/scripts/generate-research-report.ts +1523 -0
package/scripts/generate_dataset.sh +173 -0
package/scripts/generate_should_respond.ts +267 -0
package/scripts/generate_should_respond_dataset.ts +162 -0
package/scripts/json-mode-benchmark.ts +399 -0
package/scripts/rank_trajectories.ts +207 -0
package/scripts/real-archetype-benchmark.ts +210 -0
package/scripts/run-baseline-comparison.ts +116 -0
package/scripts/run-full-pipeline.ts +272 -0
package/scripts/run_rlaif_loop.ts +78 -0
package/scripts/run_task_benchmark.ts +247 -0
package/scripts/runpod_setup.sh +137 -0
package/scripts/runpod_validate.sh +147 -0
package/scripts/test-model-in-game.ts +955 -0
package/scripts/test-scoring.ts +73 -0
package/scripts/test-trained-model.ts +209 -0
package/scripts/train-and-test.ts +824 -0
package/scripts/verify-final.ts +118 -0
package/src/adapter.ts +516 -0
package/src/archetypes/ArchetypeConfigService.ts +626 -0
package/src/archetypes/derive-archetype.ts +249 -0
package/src/archetypes/index.ts +22 -0
package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
package/src/benchmark/BenchmarkDataViewer.ts +324 -0
package/src/benchmark/BenchmarkHistoryService.ts +221 -0
package/src/benchmark/BenchmarkRunner.ts +685 -0
package/src/benchmark/BenchmarkValidator.ts +204 -0
package/src/benchmark/FastEvalRunner.ts +225 -0
package/src/benchmark/MetricsValidator.ts +165 -0
package/src/benchmark/MetricsVisualizer.ts +909 -0
package/src/benchmark/ModelBenchmarkService.ts +611 -0
package/src/benchmark/ModelRegistry.ts +158 -0
package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
package/src/benchmark/SimulationA2AInterface.ts +1169 -0
package/src/benchmark/SimulationEngine.ts +832 -0
package/src/benchmark/TaskRunner.ts +94 -0
package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
package/src/benchmark/index.ts +91 -0
package/src/benchmark/parseSimulationMetrics.ts +124 -0
package/src/benchmark/simulation-types.ts +78 -0
package/src/dependencies.ts +475 -0
package/src/generation/TrajectoryGenerator.ts +387 -0
package/src/generation/index.ts +12 -0
package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
package/src/huggingface/index.ts +27 -0
package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
package/src/index.ts +102 -0
package/src/init-training.ts +53 -0
package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
package/src/metrics/index.ts +8 -0
package/src/metrics/types.ts +200 -0
package/src/rubrics/__tests__/index.test.ts +184 -0
package/src/rubrics/ass-kisser.ts +85 -0
package/src/rubrics/degen.ts +80 -0
package/src/rubrics/goody-twoshoes.ts +84 -0
package/src/rubrics/index.ts +236 -0
package/src/rubrics/information-trader.ts +84 -0
package/src/rubrics/infosec.ts +101 -0
package/src/rubrics/liar.ts +104 -0
package/src/rubrics/perps-trader.ts +87 -0
package/src/rubrics/researcher.ts +81 -0
package/src/rubrics/scammer.ts +82 -0
package/src/rubrics/social-butterfly.ts +73 -0
package/src/rubrics/super-predictor.ts +97 -0
package/src/rubrics/trader.ts +67 -0
package/src/scoring/ArchetypeScoringService.ts +486 -0
package/src/scoring/JudgePromptBuilder.ts +556 -0
package/src/scoring/LLMJudgeCache.ts +401 -0
package/src/scoring/index.ts +9 -0
package/src/training/AutomationPipeline.ts +916 -0
package/src/training/BenchmarkService.ts +518 -0
package/src/training/ConfigValidator.ts +220 -0
package/src/training/MarketOutcomesTracker.ts +187 -0
package/src/training/ModelDeployer.ts +186 -0
package/src/training/ModelFetcher.ts +76 -0
package/src/training/ModelSelectionService.ts +341 -0
package/src/training/ModelUsageVerifier.ts +160 -0
package/src/training/MultiModelOrchestrator.ts +580 -0
package/src/training/RLModelConfig.ts +407 -0
package/src/training/RewardBackpropagationService.ts +149 -0
package/src/training/RulerScoringService.ts +666 -0
package/src/training/TrainingMonitor.ts +166 -0
package/src/training/TrajectoryRecorder.ts +399 -0
package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
package/src/training/index.ts +100 -0
package/src/training/logRLConfig.ts +34 -0
package/src/training/pipeline.ts +129 -0
package/src/training/storage/ModelStorageService.ts +279 -0
package/src/training/storage/TrainingDataArchiver.ts +197 -0
package/src/training/storage/index.ts +17 -0
package/src/training/types.ts +207 -0
package/src/training/window-utils.ts +138 -0
package/src/utils/index.ts +101 -0
package/src/utils/logger.ts +59 -0
package/src/utils/snowflake.ts +17 -0
package/src/utils/synthetic-detector.ts +111 -0
package/tsconfig.json +20 -0

package/scripts/run_task_benchmark.ts ADDED Viewed

@@ -0,0 +1,247 @@
+#!/usr/bin/env bun
+import {
+    AgentRuntime,
+    stringToUuid,
+    ModelType,
+    type IAgentRuntime,
+    type Memory,
+    type State,
+} from '../../typescript/src/index';
+import { v4 as uuidv4 } from 'uuid';
+import * as fs from 'fs';
+import * as path from 'path';
+import { parseArgs } from 'util';
+// Import from local src
+import {
+    configureTrainingDependencies,
+    TaskRunner,
+    type CreateAgentParams,
+    type IAgentRuntimeLike,
+    type IAgentRuntimeManager,
+    type IAgentService,
+    type ITaskInteractor,
+    type TrajectoryStepForTraining,
+    type UserLike,
+} from '../src';
+// Implement Dependencies
+class BenchmarkAgentService implements IAgentService {
+    async createAgent(params: CreateAgentParams): Promise<UserLike> {
+        // Return dummy user
+        return {
+            id: stringToUuid(params.name),
+            username: params.name,
+        };
+    }
+}
+class BenchmarkRuntimeManager implements IAgentRuntimeManager {
+    private runtimes = new Map<string, IAgentRuntime>();
+    async getRuntime(agentId: string): Promise<IAgentRuntimeLike> {
+        if (this.runtimes.has(agentId)) {
+            return this.runtimes.get(agentId) as unknown as IAgentRuntimeLike;
+        }
+        // Create a new runtime
+        const character = {
+            name: 'BenchmarkAgent',
+            modelProvider: "openai" as any,
+            bio: 'A helpful assistant for benchmarking.',
+            settings: {
+                secrets: {
+                    OPENAI_API_KEY: process.env.OPENAI_API_KEY || ''
+                }
+            }
+        };
+        const runtime = new AgentRuntime({
+            token: process.env.OPENAI_API_KEY || '',
+            modelProvider: "openai" as any,
+            character,
+            plugins: [],
+            providers: [],
+            actions: [],
+            evaluators: [],
+        });
+        // We must initialize with allowNoDatabase to avoid DB error
+        await runtime.initialize({ allowNoDatabase: true });
+        // Register a mock model handler for TEXT_SMALL to allow generateText to work
+        runtime.registerModel(
+            ModelType.TEXT_SMALL,
+            async (rt, params) => {
+                return "This is a mock response from the benchmark script.";
+            },
+            "mock-provider",
+            100
+        );
+        this.runtimes.set(agentId, runtime);
+        return runtime as unknown as IAgentRuntimeLike;
+    }
+    async resetRuntime(agentId: string): Promise<void> {
+        this.runtimes.delete(agentId);
+    }
+}
+class BenchmarkTaskInteractor implements ITaskInteractor {
+    async executeTask(
+        agentRuntime: IAgentRuntimeLike,
+        taskPrompt: string,
+        options?: { maxTurns?: number; temperature?: number }
+    ): Promise<{
+        success: boolean;
+        response: string;
+        trajectoryId?: string;
+        steps?: TrajectoryStepForTraining[];
+        error?: string;
+    }> {
+        const runtime = agentRuntime as unknown as AgentRuntime;
+        const trajectoryId = uuidv4();
+        const startTime = Date.now();
+        try {
+            // 1. Create User Memory (in memory only, since we use no-db)
+            const messageId = uuidv4();
+            const userId = stringToUuid('user');
+            const roomId = stringToUuid('benchmark-room');
+            const userMemory: Memory = {
+                id: messageId as `${string}-${string}-${string}-${string}-${string}`,
+                userId: userId as `${string}-${string}-${string}-${string}-${string}`,
+                agentId: runtime.agentId,
+                roomId: roomId as `${string}-${string}-${string}-${string}-${string}`,
+                content: {
+                    text: taskPrompt,
+                },
+                createdAt: Date.now(),
+            };
+            // Use standard createMemory method
+            // createMemory(memory: Memory, tableName: string, unique?: boolean)
+            await runtime.createMemory(userMemory, 'messages', true);
+            // 2. Generate Response
+            const state: State = await runtime.composeState(userMemory);
+            const context = `You are ${runtime.character.name}.
+${state.bio}
+${state.lore}
+User: ${taskPrompt}
+Assistant:`;
+            // Use generateText from runtime
+            // Signature: generateText(input: string, options?: GenerateTextOptions)
+            const result = await runtime.generateText(context, {
+                modelType: ModelType.TEXT_SMALL,
+            });
+            // Handle both string and object return types for safety
+            const response = typeof result === 'string' ? result : result.text;
+            // Real implementation of logging:
+            const steps: TrajectoryStepForTraining[] = [{
+                stepId: uuidv4(),
+                stepNumber: 1,
+                timestamp: Date.now(),
+                environmentState: { timestamp: Date.now(), agentPoints: 0 },
+                observation: { userMessage: taskPrompt },
+                providerAccesses: [],
+                llmCalls: [],
+                action: {
+                    attemptId: uuidv4(),
+                    timestamp: Date.now(),
+                    actionType: 'text_response',
+                    actionName: 'response',
+                    parameters: { text: response },
+                    success: true
+                },
+                reward: 0,
+                done: true,
+                metadata: {}
+            }];
+            // Log to File
+            const trajectoryRecord = {
+                id: uuidv4(),
+                trajectoryId: trajectoryId,
+                agentId: runtime.agentId,
+                startTime: new Date(startTime).toISOString(),
+                endTime: new Date().toISOString(),
+                durationMs: Date.now() - startTime,
+                steps,
+                metadata: { task: taskPrompt },
+                isTrainingData: true,
+            };
+            const logFile = path.resolve(process.cwd(), 'trajectories.jsonl');
+            fs.appendFileSync(logFile, JSON.stringify(trajectoryRecord) + '\n');
+            console.log(`Saved trajectory to ${logFile}`);
+            return {
+                success: true,
+                response: String(response),
+                trajectoryId,
+                steps
+            };
+        } catch (e) {
+            console.error('Error executing task', e);
+            return {
+                success: false,
+                response: '',
+                error: e instanceof Error ? e.message : String(e)
+            };
+        }
+    }
+}
+async function main() {
+    const { values } = parseArgs({
+        args: process.argv.slice(2),
+        options: {
+            task: { type: 'string', default: 'Hello, who are you?' },
+            iterations: { type: 'string', default: '1' },
+            model: { type: 'string', default: 'gpt-4o-mini' },
+        },
+    });
+    const config = {
+        agentName: 'BenchmarkBot',
+        taskPrompt: values.task as string,
+        //   bun packages/training/scripts/run_task_benchmark.ts --model "llama3.2" (requires Ollama running)
+        iterations: parseInt(values.iterations as string, 10),
+        model: values.model as string,
+    };
+    // Configure Dependencies
+    configureTrainingDependencies({
+        agentService: new BenchmarkAgentService(),
+        agentRuntimeManager: new BenchmarkRuntimeManager(),
+        autonomousCoordinator: {
+            executeAutonomousTick: async () => ({ success: true })
+        } as any,
+        llmCaller: {
+            callGroqDirect: async () => "mock response"
+        } as any,
+    });
+    // Import task interactor config
+    const { configureTaskInteractor } = await import('../src/dependencies');
+    configureTaskInteractor(new BenchmarkTaskInteractor());
+    const runner = new TaskRunner(config);
+    const results = await runner.run();
+    console.log(JSON.stringify(results, null, 2));
+}
+main().catch(console.error);

package/scripts/runpod_setup.sh ADDED Viewed

@@ -0,0 +1,137 @@
+#!/bin/bash
+#
+# RunPod Setup Script for Babylon Training
+#
+# Usage:
+#   1. SSH into your RunPod instance
+#   2. Clone the repo
+#   3. Run: bash packages/training/scripts/runpod_setup.sh
+#
+# Prerequisites:
+#   - 2x L40 GPUs (96GB total VRAM)
+#   - WANDB_API_KEY environment variable (optional)
+#   - DATABASE_URL for trajectory data (or use synthetic)
+#
+set -e
+CYAN='\033[0;36m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+RESET='\033[0m'
+echo -e "${CYAN}======================================${RESET}"
+echo -e "${CYAN}  Babylon Training - RunPod Setup    ${RESET}"
+echo -e "${CYAN}======================================${RESET}"
+echo ""
+# Check GPU availability
+echo -e "${CYAN}[1/7] Checking GPU availability...${RESET}"
+if command -v nvidia-smi &> /dev/null; then
+    GPU_COUNT=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
+    echo -e "${GREEN}✓ Found $GPU_COUNT GPU(s):${RESET}"
+    nvidia-smi --query-gpu=name,memory.total --format=csv
+else
+    echo -e "${RED}✗ nvidia-smi not found. GPU drivers not installed?${RESET}"
+    exit 1
+fi
+# Navigate to training directory
+cd "$(dirname "$0")/.."
+TRAINING_DIR=$(pwd)
+echo -e "${GREEN}✓ Working directory: $TRAINING_DIR${RESET}"
+# Install system dependencies
+echo ""
+echo -e "${CYAN}[2/7] Installing system dependencies...${RESET}"
+apt-get update -qq
+apt-get install -y -qq python3.11 python3.11-venv python3-pip curl git > /dev/null 2>&1
+echo -e "${GREEN}✓ System dependencies installed${RESET}"
+# Create virtual environment
+echo ""
+echo -e "${CYAN}[3/7] Setting up Python virtual environment...${RESET}"
+cd python
+if [ ! -d "venv" ]; then
+    python3.11 -m venv venv
+fi
+source venv/bin/activate
+pip install --upgrade pip -q
+echo -e "${GREEN}✓ Virtual environment activated${RESET}"
+# Install Python dependencies
+echo ""
+echo -e "${CYAN}[4/7] Installing Python dependencies (this may take 5-10 minutes)...${RESET}"
+pip install -r requirements.txt -q
+pip install vllm>=0.4.0 atroposlib wandb -q
+echo -e "${GREEN}✓ Python dependencies installed${RESET}"
+# Try to install flash-attention (optional, may fail on some systems)
+echo ""
+echo -e "${CYAN}[5/7] Installing flash-attention (optional)...${RESET}"
+pip install flash-attn --no-build-isolation -q 2>/dev/null && \
+    echo -e "${GREEN}✓ Flash attention installed${RESET}" || \
+    echo -e "${YELLOW}⚠ Flash attention not available (optional, continuing)${RESET}"
+# Verify installation
+echo ""
+echo -e "${CYAN}[6/7] Verifying installation...${RESET}"
+python -c "
+import torch
+import vllm
+print(f'PyTorch: {torch.__version__}')
+print(f'CUDA available: {torch.cuda.is_available()}')
+print(f'GPU count: {torch.cuda.device_count()}')
+for i in range(torch.cuda.device_count()):
+    props = torch.cuda.get_device_properties(i)
+    print(f'  GPU {i}: {props.name} ({props.total_memory / 1e9:.1f} GB)')
+print(f'vLLM: {vllm.__version__}')
+"
+echo -e "${GREEN}✓ Installation verified${RESET}"
+# Setup environment
+echo ""
+echo -e "${CYAN}[7/7] Setting up environment...${RESET}"
+# Check for W&B key
+if [ -n "$WANDB_API_KEY" ]; then
+    echo -e "${GREEN}✓ W&B API key found${RESET}"
+else
+    echo -e "${YELLOW}⚠ WANDB_API_KEY not set. Set it with: export WANDB_API_KEY=your_key${RESET}"
+fi
+# Check for database
+if [ -n "$DATABASE_URL" ]; then
+    echo -e "${GREEN}✓ DATABASE_URL found${RESET}"
+else
+    echo -e "${YELLOW}⚠ DATABASE_URL not set. Will use synthetic data for online training.${RESET}"
+fi
+echo ""
+echo -e "${GREEN}======================================${RESET}"
+echo -e "${GREEN}  Setup Complete!                     ${RESET}"
+echo -e "${GREEN}======================================${RESET}"
+echo ""
+echo -e "Next steps:"
+echo ""
+echo -e "  ${CYAN}# Activate environment${RESET}"
+echo -e "  source python/venv/bin/activate"
+echo ""
+echo -e "  ${CYAN}# Quick validation (single GPU, small model)${RESET}"
+echo -e "  make train PROFILE=48gb STEPS=20"
+echo ""
+echo -e "  ${CYAN}# 2x L40 validation (14B model)${RESET}"
+echo -e "  make train PROFILE=l40-2gpu STEPS=50"
+echo ""
+echo -e "  ${CYAN}# Full cloud training with W&B${RESET}"
+echo -e "  export WANDB_API_KEY=your_key"
+echo -e "  make train-cloud PROFILE=l40-2gpu STEPS=100"
+echo ""
+echo -e "  ${CYAN}# Online training (requires bridge server)${RESET}"
+echo -e "  # Terminal 1: make bridge-server"
+echo -e "  # Terminal 2: make train-online PROFILE=l40-2gpu"
+echo ""

package/scripts/runpod_validate.sh ADDED Viewed

@@ -0,0 +1,147 @@
+#!/bin/bash
+#
+# RunPod Validation Script
+#
+# Runs a quick validation of the training pipeline on cloud GPUs.
+# Expects setup to be complete (run runpod_setup.sh first).
+#
+set -e
+CYAN='\033[0;36m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+RESET='\033[0m'
+cd "$(dirname "$0")/.."
+source python/venv/bin/activate
+echo -e "${CYAN}======================================${RESET}"
+echo -e "${CYAN}  Babylon Training - Cloud Validation ${RESET}"
+echo -e "${CYAN}======================================${RESET}"
+echo ""
+# Check GPU count
+GPU_COUNT=$(python -c "import torch; print(torch.cuda.device_count())")
+echo -e "${GREEN}GPU Count: $GPU_COUNT${RESET}"
+# Determine profile based on GPU count
+if [ "$GPU_COUNT" -ge 4 ]; then
+    PROFILE="l40-4gpu"
+    MODEL="Qwen/Qwen3-30B-A3B"
+elif [ "$GPU_COUNT" -ge 2 ]; then
+    PROFILE="l40-2gpu"
+    MODEL="Qwen/Qwen2.5-32B-Instruct"
+else
+    PROFILE="l40"
+    MODEL="Qwen/Qwen2.5-14B-Instruct"
+fi
+echo -e "${GREEN}Selected profile: $PROFILE${RESET}"
+echo -e "${GREEN}Model: $MODEL${RESET}"
+echo ""
+# Test 1: Quick vLLM model loading
+echo -e "${CYAN}[Test 1/4] Testing vLLM model loading...${RESET}"
+python -c "
+import torch
+from vllm import LLM
+print('Loading model for inference test...')
+llm = LLM(
+    model='$MODEL',
+    tensor_parallel_size=$GPU_COUNT,
+    gpu_memory_utilization=0.5,
+    max_model_len=2048,
+)
+print('✓ Model loaded successfully')
+# Quick inference test
+outputs = llm.generate(['Hello, I am a trading agent.'], max_tokens=20)
+print(f'✓ Inference test passed: {outputs[0].outputs[0].text[:50]}...')
+" && echo -e "${GREEN}✓ vLLM test passed${RESET}" || {
+    echo -e "${RED}✗ vLLM test failed${RESET}"
+    echo -e "${YELLOW}Trying with smaller model...${RESET}"
+    # Fallback to smaller model
+    python -c "
+from vllm import LLM
+llm = LLM(model='Qwen/Qwen2.5-7B-Instruct', tensor_parallel_size=min($GPU_COUNT, 2), gpu_memory_utilization=0.4, max_model_len=2048)
+print('✓ Fallback model loaded')
+outputs = llm.generate(['Hello'], max_tokens=10)
+print(f'✓ Inference: {outputs[0].outputs[0].text}')
+"
+    PROFILE="48gb"  # Fall back to smaller profile
+}
+echo ""
+# Test 2: Service manager
+echo -e "${CYAN}[Test 2/4] Testing service manager...${RESET}"
+cd python
+PYTHONPATH=. python -c "
+from src.training.service_manager import ServiceConfig, check_prerequisites
+config = ServiceConfig(
+    model_name='Qwen/Qwen2.5-7B-Instruct',
+    tensor_parallel_size=$GPU_COUNT,
+    vllm_gpu_memory_utilization=0.4,
+)
+print(f'✓ ServiceConfig created: tensor_parallel={config.tensor_parallel_size}')
+errors = check_prerequisites()
+if errors:
+    for e in errors:
+        print(f'  Warning: {e}')
+else:
+    print('✓ All prerequisites met')
+"
+cd ..
+echo -e "${GREEN}✓ Service manager test passed${RESET}"
+echo ""
+# Test 3: Quick training run (10 steps)
+echo -e "${CYAN}[Test 3/4] Running quick training validation (10 steps)...${RESET}"
+echo -e "${YELLOW}This will take 5-15 minutes depending on model size...${RESET}"
+echo ""
+# Use a simpler profile for the quick test
+make train PROFILE=48gb STEPS=10 2>&1 | tee /tmp/training_validation.log
+if [ $? -eq 0 ]; then
+    echo -e "${GREEN}✓ Training validation passed${RESET}"
+else
+    echo -e "${RED}✗ Training validation failed${RESET}"
+    echo -e "Check /tmp/training_validation.log for details"
+    exit 1
+fi
+echo ""
+# Test 4: Check trained model output
+echo -e "${CYAN}[Test 4/4] Checking trained model output...${RESET}"
+if [ -d "python/trained_models/final_model" ]; then
+    echo -e "${GREEN}✓ Trained model saved to python/trained_models/final_model${RESET}"
+    ls -la python/trained_models/final_model/ | head -10
+else
+    echo -e "${YELLOW}⚠ No final model found (might be too few steps)${RESET}"
+fi
+echo ""
+echo -e "${GREEN}======================================${RESET}"
+echo -e "${GREEN}  Validation Complete!                ${RESET}"
+echo -e "${GREEN}======================================${RESET}"
+echo ""
+echo -e "Cloud training is working. Next steps:"
+echo ""
+echo -e "  ${CYAN}# Full training run with W&B logging${RESET}"
+echo -e "  export WANDB_API_KEY=your_key"
+echo -e "  make train-cloud PROFILE=$PROFILE STEPS=1000"
+echo ""
+echo -e "  ${CYAN}# Or with online training${RESET}"
+echo -e "  make bridge-server &"
+echo -e "  make train-online PROFILE=$PROFILE STEPS=500"
+echo ""