@mariozechner/pi 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env bash
2
+ # Model runner script - runs sequentially, killed by pi stop
3
+ set -euo pipefail
4
+
5
+ # These values are replaced before upload by pi CLI
6
+ MODEL_ID="{{MODEL_ID}}"
7
+ NAME="{{NAME}}"
8
+ PORT="{{PORT}}"
9
+ VLLM_ARGS="{{VLLM_ARGS}}"
10
+
11
+ # Trap to ensure cleanup on exit and kill any child processes
12
+ cleanup() {
13
+ local exit_code=$?
14
+ echo "Model runner exiting with code $exit_code"
15
+ # Kill any child processes
16
+ pkill -P $$ 2>/dev/null || true
17
+ exit $exit_code
18
+ }
19
+ trap cleanup EXIT TERM INT
20
+
21
+ # Force colored output even when not a TTY
22
+ export FORCE_COLOR=1
23
+ export PYTHONUNBUFFERED=1
24
+ export TERM=xterm-256color
25
+ export RICH_FORCE_TERMINAL=1
26
+ export CLICOLOR_FORCE=1
27
+
28
+ # Source virtual environment
29
+ source /root/venv/bin/activate
30
+
31
+ echo "========================================="
32
+ echo "Model Run: $NAME"
33
+ echo "Model ID: $MODEL_ID"
34
+ echo "Port: $PORT"
35
+ if [ -n "$VLLM_ARGS" ]; then
36
+ echo "vLLM Args: $VLLM_ARGS"
37
+ fi
38
+ echo "========================================="
39
+ echo ""
40
+
41
+ # Download model (with color progress bars)
42
+ echo "Downloading model (will skip if cached)..."
43
+ HF_HUB_ENABLE_HF_TRANSFER=1 hf download "$MODEL_ID"
44
+
45
+ if [ $? -ne 0 ]; then
46
+ echo "❌ ERROR: Failed to download model" >&2
47
+ exit 1
48
+ fi
49
+
50
+ echo ""
51
+ echo "✅ Model download complete"
52
+ echo ""
53
+
54
+ # Build vLLM command
55
+ VLLM_CMD="vllm serve '$MODEL_ID' --port $PORT --api-key '$PI_API_KEY'"
56
+ if [ -n "$VLLM_ARGS" ]; then
57
+ VLLM_CMD="$VLLM_CMD $VLLM_ARGS"
58
+ fi
59
+
60
+ echo "Starting vLLM server..."
61
+ echo "Command: $VLLM_CMD"
62
+ echo "========================================="
63
+ echo ""
64
+
65
+ # Run vLLM in background so we can monitor it
66
+ echo "Starting vLLM process..."
67
+ bash -c "$VLLM_CMD" &
68
+ VLLM_PID=$!
69
+
70
+ # Monitor the vLLM process
71
+ echo "Monitoring vLLM process (PID: $VLLM_PID)..."
72
+ wait $VLLM_PID
73
+ VLLM_EXIT_CODE=$?
74
+
75
+ if [ $VLLM_EXIT_CODE -ne 0 ]; then
76
+ echo "❌ ERROR: vLLM exited with code $VLLM_EXIT_CODE" >&2
77
+ # Make sure to exit the script command too
78
+ kill -TERM $$ 2>/dev/null || true
79
+ exit $VLLM_EXIT_CODE
80
+ fi
81
+
82
+ echo "✅ vLLM exited normally"
83
+ exit 0
@@ -0,0 +1,334 @@
1
+ #!/usr/bin/env bash
2
+ # GPU pod bootstrap for vLLM deployment
3
+ set -euo pipefail
4
+
5
+ # Parse arguments passed from pi CLI
6
+ MOUNT_COMMAND=""
7
+ MODELS_PATH=""
8
+ HF_TOKEN=""
9
+ PI_API_KEY=""
10
+ VLLM_VERSION="release" # Default to release
11
+
12
+ while [[ $# -gt 0 ]]; do
13
+ case $1 in
14
+ --mount)
15
+ MOUNT_COMMAND="$2"
16
+ shift 2
17
+ ;;
18
+ --models-path)
19
+ MODELS_PATH="$2"
20
+ shift 2
21
+ ;;
22
+ --hf-token)
23
+ HF_TOKEN="$2"
24
+ shift 2
25
+ ;;
26
+ --vllm-api-key)
27
+ PI_API_KEY="$2"
28
+ shift 2
29
+ ;;
30
+ --vllm)
31
+ VLLM_VERSION="$2"
32
+ shift 2
33
+ ;;
34
+ *)
35
+ echo "ERROR: Unknown option: $1" >&2
36
+ exit 1
37
+ ;;
38
+ esac
39
+ done
40
+
41
+ # Validate required parameters
42
+ if [ -z "$HF_TOKEN" ]; then
43
+ echo "ERROR: HF_TOKEN is required" >&2
44
+ exit 1
45
+ fi
46
+
47
+ if [ -z "$PI_API_KEY" ]; then
48
+ echo "ERROR: PI_API_KEY is required" >&2
49
+ exit 1
50
+ fi
51
+
52
+ if [ -z "$MODELS_PATH" ]; then
53
+ echo "ERROR: MODELS_PATH is required" >&2
54
+ exit 1
55
+ fi
56
+
57
+ echo "=== Starting pod setup ==="
58
+
59
+ # Install system dependencies
60
+ apt update -y
61
+ apt install -y python3-pip python3-venv git build-essential cmake ninja-build curl wget lsb-release htop pkg-config
62
+
63
+ # --- Install matching CUDA toolkit -------------------------------------------
64
+ echo "Checking CUDA driver version..."
65
+ DRIVER_CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}')
66
+ echo "Driver supports CUDA: $DRIVER_CUDA_VERSION"
67
+
68
+ # Check if nvcc exists and its version
69
+ if command -v nvcc &> /dev/null; then
70
+ NVCC_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d, -f1)
71
+ echo "Current nvcc version: $NVCC_VERSION"
72
+ else
73
+ NVCC_VERSION="none"
74
+ echo "nvcc not found"
75
+ fi
76
+
77
+ # Install CUDA toolkit matching driver version if needed
78
+ if [[ "$NVCC_VERSION" != "$DRIVER_CUDA_VERSION" ]]; then
79
+ echo "Installing CUDA Toolkit $DRIVER_CUDA_VERSION to match driver..."
80
+
81
+ # Detect Ubuntu version
82
+ UBUNTU_VERSION=$(lsb_release -rs)
83
+ UBUNTU_CODENAME=$(lsb_release -cs)
84
+
85
+ echo "Detected Ubuntu $UBUNTU_VERSION ($UBUNTU_CODENAME)"
86
+
87
+ # Map Ubuntu version to NVIDIA repo path
88
+ if [[ "$UBUNTU_VERSION" == "24.04" ]]; then
89
+ REPO_PATH="ubuntu2404"
90
+ elif [[ "$UBUNTU_VERSION" == "22.04" ]]; then
91
+ REPO_PATH="ubuntu2204"
92
+ elif [[ "$UBUNTU_VERSION" == "20.04" ]]; then
93
+ REPO_PATH="ubuntu2004"
94
+ else
95
+ echo "Warning: Unsupported Ubuntu version $UBUNTU_VERSION, trying ubuntu2204"
96
+ REPO_PATH="ubuntu2204"
97
+ fi
98
+
99
+ # Add NVIDIA package repositories
100
+ wget https://developer.download.nvidia.com/compute/cuda/repos/${REPO_PATH}/x86_64/cuda-keyring_1.1-1_all.deb
101
+ dpkg -i cuda-keyring_1.1-1_all.deb
102
+ rm cuda-keyring_1.1-1_all.deb
103
+ apt-get update
104
+
105
+ # Install specific CUDA toolkit version
106
+ # Convert version format (12.9 -> 12-9)
107
+ CUDA_VERSION_APT=$(echo $DRIVER_CUDA_VERSION | sed 's/\./-/')
108
+ echo "Installing cuda-toolkit-${CUDA_VERSION_APT}..."
109
+ apt-get install -y cuda-toolkit-${CUDA_VERSION_APT}
110
+
111
+ # Add CUDA to PATH
112
+ export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
113
+ export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
114
+
115
+ # Verify installation
116
+ nvcc --version
117
+ else
118
+ echo "CUDA toolkit $NVCC_VERSION matches driver version"
119
+ export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
120
+ export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
121
+ fi
122
+
123
+ # --- Install uv (fast Python package manager) --------------------------------
124
+ curl -LsSf https://astral.sh/uv/install.sh | sh
125
+ export PATH="$HOME/.local/bin:$PATH"
126
+
127
+ # --- Install Python 3.12 if not available ------------------------------------
128
+ if ! command -v python3.12 &> /dev/null; then
129
+ echo "Python 3.12 not found. Installing via uv..."
130
+ uv python install 3.12
131
+ fi
132
+
133
+ # --- Clean up existing environments and caches -------------------------------
134
+ echo "Cleaning up existing environments and caches..."
135
+
136
+ # Remove existing venv for a clean installation
137
+ VENV="$HOME/venv"
138
+ if [ -d "$VENV" ]; then
139
+ echo "Removing existing virtual environment..."
140
+ rm -rf "$VENV"
141
+ fi
142
+
143
+ # Remove uv cache to ensure fresh installs
144
+ if [ -d "$HOME/.cache/uv" ]; then
145
+ echo "Clearing uv cache..."
146
+ rm -rf "$HOME/.cache/uv"
147
+ fi
148
+
149
+ # Remove vLLM cache to avoid conflicts
150
+ if [ -d "$HOME/.cache/vllm" ]; then
151
+ echo "Clearing vLLM cache..."
152
+ rm -rf "$HOME/.cache/vllm"
153
+ fi
154
+
155
+ # --- Create and activate venv ------------------------------------------------
156
+ echo "Creating fresh virtual environment..."
157
+ uv venv --python 3.12 --seed "$VENV"
158
+ source "$VENV/bin/activate"
159
+
160
+ # --- Install PyTorch and vLLM ------------------------------------------------
161
+ echo "Installing vLLM and dependencies (version: $VLLM_VERSION)..."
162
+ case "$VLLM_VERSION" in
163
+ release)
164
+ echo "Installing vLLM release with PyTorch..."
165
+ # Install vLLM with automatic PyTorch backend selection
166
+ # vLLM will automatically install the correct PyTorch version
167
+ uv pip install vllm>=0.10.0 --torch-backend=auto || {
168
+ echo "ERROR: Failed to install vLLM"
169
+ exit 1
170
+ }
171
+ ;;
172
+ nightly)
173
+ echo "Installing vLLM nightly with PyTorch..."
174
+ echo "This will install the latest nightly build of vLLM..."
175
+
176
+ # Install vLLM nightly with PyTorch
177
+ uv pip install -U vllm \
178
+ --torch-backend=auto \
179
+ --extra-index-url https://wheels.vllm.ai/nightly || {
180
+ echo "ERROR: Failed to install vLLM nightly"
181
+ exit 1
182
+ }
183
+
184
+ echo "vLLM nightly successfully installed!"
185
+ ;;
186
+ gpt-oss)
187
+ echo "Installing GPT-OSS special build with PyTorch nightly..."
188
+ echo "WARNING: This build is ONLY for GPT-OSS models!"
189
+ echo "Installing PyTorch nightly and cutting-edge dependencies..."
190
+
191
+ # Convert CUDA version format for PyTorch (12.4 -> cu124)
192
+ PYTORCH_CUDA="cu$(echo $DRIVER_CUDA_VERSION | sed 's/\.//')"
193
+ echo "Using PyTorch nightly with ${PYTORCH_CUDA} (driver supports ${DRIVER_CUDA_VERSION})"
194
+
195
+ # The GPT-OSS build will pull PyTorch nightly and other dependencies
196
+ # via the extra index URLs. We don't pre-install torch here to avoid conflicts.
197
+ uv pip install --pre vllm==0.10.1+gptoss \
198
+ --extra-index-url https://wheels.vllm.ai/gpt-oss/ \
199
+ --extra-index-url https://download.pytorch.org/whl/nightly/${PYTORCH_CUDA} \
200
+ --index-strategy unsafe-best-match || {
201
+ echo "ERROR: Failed to install GPT-OSS vLLM build"
202
+ echo "This automatically installs PyTorch nightly with ${PYTORCH_CUDA}, Triton nightly, and other dependencies"
203
+ exit 1
204
+ }
205
+
206
+ # Install gpt-oss library for tool support
207
+ uv pip install gpt-oss || {
208
+ echo "WARNING: Failed to install gpt-oss library (needed for tool use)"
209
+ }
210
+ ;;
211
+ *)
212
+ echo "ERROR: Unknown vLLM version: $VLLM_VERSION"
213
+ exit 1
214
+ ;;
215
+ esac
216
+
217
+ # --- Install additional packages ---------------------------------------------
218
+ echo "Installing additional packages..."
219
+ uv pip install huggingface-hub psutil tensorrt hf_transfer
220
+
221
+ # --- FlashInfer installation (optional, improves performance) ----------------
222
+ echo "Attempting FlashInfer installation (optional)..."
223
+ if uv pip install flashinfer-python; then
224
+ echo "FlashInfer installed successfully"
225
+ else
226
+ echo "FlashInfer not available, using Flash Attention instead"
227
+ fi
228
+
229
+ # --- Mount storage if provided -----------------------------------------------
230
+ if [ -n "$MOUNT_COMMAND" ]; then
231
+ echo "Setting up mount..."
232
+
233
+ # Create mount point directory if it doesn't exist
234
+ mkdir -p "$MODELS_PATH"
235
+
236
+ # Execute the mount command
237
+ eval "$MOUNT_COMMAND" || {
238
+ echo "WARNING: Mount command failed, continuing without mount"
239
+ }
240
+
241
+ # Verify mount succeeded (optional, may not always be a mount point)
242
+ if mountpoint -q "$MODELS_PATH" 2>/dev/null; then
243
+ echo "Storage successfully mounted at $MODELS_PATH"
244
+ else
245
+ echo "Note: $MODELS_PATH is not a mount point (might be local storage)"
246
+ fi
247
+ fi
248
+
249
+ # --- Model storage setup ------------------------------------------------------
250
+ echo ""
251
+ echo "=== Setting up model storage ==="
252
+ echo "Storage path: $MODELS_PATH"
253
+
254
+ # Check if the path exists and is writable
255
+ if [ ! -d "$MODELS_PATH" ]; then
256
+ echo "Creating model storage directory: $MODELS_PATH"
257
+ mkdir -p "$MODELS_PATH"
258
+ fi
259
+
260
+ if [ ! -w "$MODELS_PATH" ]; then
261
+ echo "ERROR: Model storage path is not writable: $MODELS_PATH"
262
+ echo "Please check permissions"
263
+ exit 1
264
+ fi
265
+
266
+ # Create the huggingface cache directory structure in the models path
267
+ mkdir -p "${MODELS_PATH}/huggingface/hub"
268
+
269
+ # Remove any existing cache directory or symlink
270
+ if [ -e ~/.cache/huggingface ] || [ -L ~/.cache/huggingface ]; then
271
+ echo "Removing existing ~/.cache/huggingface..."
272
+ rm -rf ~/.cache/huggingface 2>/dev/null || true
273
+ fi
274
+
275
+ # Create parent directory if needed
276
+ mkdir -p ~/.cache
277
+
278
+ # Create symlink from ~/.cache/huggingface to the models path
279
+ ln -s "${MODELS_PATH}/huggingface" ~/.cache/huggingface
280
+ echo "Created symlink: ~/.cache/huggingface -> ${MODELS_PATH}/huggingface"
281
+
282
+ # Verify the symlink works
283
+ if [ -d ~/.cache/huggingface/hub ]; then
284
+ echo "✓ Model storage configured successfully"
285
+
286
+ # Check available space
287
+ AVAILABLE_SPACE=$(df -h "$MODELS_PATH" | awk 'NR==2 {print $4}')
288
+ echo "Available space: $AVAILABLE_SPACE"
289
+ else
290
+ echo "ERROR: Could not verify model storage setup"
291
+ echo "The symlink was created but the target directory is not accessible"
292
+ exit 1
293
+ fi
294
+
295
+ # --- Configure environment ----------------------------------------------------
296
+ mkdir -p ~/.config/vllm
297
+ touch ~/.config/vllm/do_not_track
298
+
299
+ # Write environment to .bashrc for persistence
300
+ cat >> ~/.bashrc << EOF
301
+
302
+ # Pi vLLM environment
303
+ [ -d "\$HOME/venv" ] && source "\$HOME/venv/bin/activate"
304
+ export PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:\$HOME/.local/bin:\$PATH"
305
+ export LD_LIBRARY_PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:\${LD_LIBRARY_PATH:-}"
306
+ export HF_TOKEN="${HF_TOKEN}"
307
+ export PI_API_KEY="${PI_API_KEY}"
308
+ export HUGGING_FACE_HUB_TOKEN="${HF_TOKEN}"
309
+ export HF_HUB_ENABLE_HF_TRANSFER=1
310
+ export VLLM_NO_USAGE_STATS=1
311
+ export VLLM_DO_NOT_TRACK=1
312
+ export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
313
+ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
314
+ EOF
315
+
316
+ # Create log directory for vLLM
317
+ mkdir -p ~/.vllm_logs
318
+
319
+ # --- Output GPU info for pi CLI to parse -------------------------------------
320
+ echo ""
321
+ echo "===GPU_INFO_START==="
322
+ nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader | while IFS=, read -r id name memory; do
323
+ # Trim whitespace
324
+ id=$(echo "$id" | xargs)
325
+ name=$(echo "$name" | xargs)
326
+ memory=$(echo "$memory" | xargs)
327
+ echo "{\"id\": $id, \"name\": \"$name\", \"memory\": \"$memory\"}"
328
+ done
329
+ echo "===GPU_INFO_END==="
330
+
331
+ echo ""
332
+ echo "=== Setup complete ==="
333
+ echo "Pod is ready for vLLM deployments"
334
+ echo "Models will be cached at: $MODELS_PATH"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mariozechner/pi",
3
- "version": "0.5.1",
3
+ "version": "0.5.3",
4
4
  "description": "CLI tool for managing vLLM deployments on GPU pods",
5
5
  "type": "module",
6
6
  "bin": {
@@ -8,12 +8,13 @@
8
8
  },
9
9
  "scripts": {
10
10
  "clean": "rm -rf dist",
11
- "build": "tsc -p tsconfig.build.json && chmod +x dist/cli.js && cp src/models.json dist/",
11
+ "build": "tsc -p tsconfig.build.json && chmod +x dist/cli.js && cp src/models.json dist/ && cp -r scripts dist/",
12
12
  "check": "biome check --write .",
13
13
  "prepublishOnly": "npm run clean && npm run build"
14
14
  },
15
15
  "files": [
16
- "dist"
16
+ "dist",
17
+ "scripts"
17
18
  ],
18
19
  "keywords": [
19
20
  "llm",
@@ -33,7 +34,7 @@
33
34
  "node": ">=20.0.0"
34
35
  },
35
36
  "dependencies": {
36
- "@mariozechner/pi-agent": "^0.5.1",
37
+ "@mariozechner/pi-agent": "^0.5.2",
37
38
  "chalk": "^5.5.0"
38
39
  },
39
40
  "devDependencies": {}
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env bash
2
+ # Model runner script - runs sequentially, killed by pi stop
3
+ set -euo pipefail
4
+
5
+ # These values are replaced before upload by pi CLI
6
+ MODEL_ID="{{MODEL_ID}}"
7
+ NAME="{{NAME}}"
8
+ PORT="{{PORT}}"
9
+ VLLM_ARGS="{{VLLM_ARGS}}"
10
+
11
+ # Trap to ensure cleanup on exit and kill any child processes
12
+ cleanup() {
13
+ local exit_code=$?
14
+ echo "Model runner exiting with code $exit_code"
15
+ # Kill any child processes
16
+ pkill -P $$ 2>/dev/null || true
17
+ exit $exit_code
18
+ }
19
+ trap cleanup EXIT TERM INT
20
+
21
+ # Force colored output even when not a TTY
22
+ export FORCE_COLOR=1
23
+ export PYTHONUNBUFFERED=1
24
+ export TERM=xterm-256color
25
+ export RICH_FORCE_TERMINAL=1
26
+ export CLICOLOR_FORCE=1
27
+
28
+ # Source virtual environment
29
+ source /root/venv/bin/activate
30
+
31
+ echo "========================================="
32
+ echo "Model Run: $NAME"
33
+ echo "Model ID: $MODEL_ID"
34
+ echo "Port: $PORT"
35
+ if [ -n "$VLLM_ARGS" ]; then
36
+ echo "vLLM Args: $VLLM_ARGS"
37
+ fi
38
+ echo "========================================="
39
+ echo ""
40
+
41
+ # Download model (with color progress bars)
42
+ echo "Downloading model (will skip if cached)..."
43
+ HF_HUB_ENABLE_HF_TRANSFER=1 hf download "$MODEL_ID"
44
+
45
+ if [ $? -ne 0 ]; then
46
+ echo "❌ ERROR: Failed to download model" >&2
47
+ exit 1
48
+ fi
49
+
50
+ echo ""
51
+ echo "✅ Model download complete"
52
+ echo ""
53
+
54
+ # Build vLLM command
55
+ VLLM_CMD="vllm serve '$MODEL_ID' --port $PORT --api-key '$PI_API_KEY'"
56
+ if [ -n "$VLLM_ARGS" ]; then
57
+ VLLM_CMD="$VLLM_CMD $VLLM_ARGS"
58
+ fi
59
+
60
+ echo "Starting vLLM server..."
61
+ echo "Command: $VLLM_CMD"
62
+ echo "========================================="
63
+ echo ""
64
+
65
+ # Run vLLM in background so we can monitor it
66
+ echo "Starting vLLM process..."
67
+ bash -c "$VLLM_CMD" &
68
+ VLLM_PID=$!
69
+
70
+ # Monitor the vLLM process
71
+ echo "Monitoring vLLM process (PID: $VLLM_PID)..."
72
+ wait $VLLM_PID
73
+ VLLM_EXIT_CODE=$?
74
+
75
+ if [ $VLLM_EXIT_CODE -ne 0 ]; then
76
+ echo "❌ ERROR: vLLM exited with code $VLLM_EXIT_CODE" >&2
77
+ # Make sure to exit the script command too
78
+ kill -TERM $$ 2>/dev/null || true
79
+ exit $VLLM_EXIT_CODE
80
+ fi
81
+
82
+ echo "✅ vLLM exited normally"
83
+ exit 0
@@ -0,0 +1,334 @@
1
+ #!/usr/bin/env bash
2
+ # GPU pod bootstrap for vLLM deployment
3
+ set -euo pipefail
4
+
5
+ # Parse arguments passed from pi CLI
6
+ MOUNT_COMMAND=""
7
+ MODELS_PATH=""
8
+ HF_TOKEN=""
9
+ PI_API_KEY=""
10
+ VLLM_VERSION="release" # Default to release
11
+
12
+ while [[ $# -gt 0 ]]; do
13
+ case $1 in
14
+ --mount)
15
+ MOUNT_COMMAND="$2"
16
+ shift 2
17
+ ;;
18
+ --models-path)
19
+ MODELS_PATH="$2"
20
+ shift 2
21
+ ;;
22
+ --hf-token)
23
+ HF_TOKEN="$2"
24
+ shift 2
25
+ ;;
26
+ --vllm-api-key)
27
+ PI_API_KEY="$2"
28
+ shift 2
29
+ ;;
30
+ --vllm)
31
+ VLLM_VERSION="$2"
32
+ shift 2
33
+ ;;
34
+ *)
35
+ echo "ERROR: Unknown option: $1" >&2
36
+ exit 1
37
+ ;;
38
+ esac
39
+ done
40
+
41
+ # Validate required parameters
42
+ if [ -z "$HF_TOKEN" ]; then
43
+ echo "ERROR: HF_TOKEN is required" >&2
44
+ exit 1
45
+ fi
46
+
47
+ if [ -z "$PI_API_KEY" ]; then
48
+ echo "ERROR: PI_API_KEY is required" >&2
49
+ exit 1
50
+ fi
51
+
52
+ if [ -z "$MODELS_PATH" ]; then
53
+ echo "ERROR: MODELS_PATH is required" >&2
54
+ exit 1
55
+ fi
56
+
57
+ echo "=== Starting pod setup ==="
58
+
59
+ # Install system dependencies
60
+ apt update -y
61
+ apt install -y python3-pip python3-venv git build-essential cmake ninja-build curl wget lsb-release htop pkg-config
62
+
63
+ # --- Install matching CUDA toolkit -------------------------------------------
64
+ echo "Checking CUDA driver version..."
65
+ DRIVER_CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}')
66
+ echo "Driver supports CUDA: $DRIVER_CUDA_VERSION"
67
+
68
+ # Check if nvcc exists and its version
69
+ if command -v nvcc &> /dev/null; then
70
+ NVCC_VERSION=$(nvcc --version | grep "release" | awk '{print $6}' | cut -d, -f1)
71
+ echo "Current nvcc version: $NVCC_VERSION"
72
+ else
73
+ NVCC_VERSION="none"
74
+ echo "nvcc not found"
75
+ fi
76
+
77
+ # Install CUDA toolkit matching driver version if needed
78
+ if [[ "$NVCC_VERSION" != "$DRIVER_CUDA_VERSION" ]]; then
79
+ echo "Installing CUDA Toolkit $DRIVER_CUDA_VERSION to match driver..."
80
+
81
+ # Detect Ubuntu version
82
+ UBUNTU_VERSION=$(lsb_release -rs)
83
+ UBUNTU_CODENAME=$(lsb_release -cs)
84
+
85
+ echo "Detected Ubuntu $UBUNTU_VERSION ($UBUNTU_CODENAME)"
86
+
87
+ # Map Ubuntu version to NVIDIA repo path
88
+ if [[ "$UBUNTU_VERSION" == "24.04" ]]; then
89
+ REPO_PATH="ubuntu2404"
90
+ elif [[ "$UBUNTU_VERSION" == "22.04" ]]; then
91
+ REPO_PATH="ubuntu2204"
92
+ elif [[ "$UBUNTU_VERSION" == "20.04" ]]; then
93
+ REPO_PATH="ubuntu2004"
94
+ else
95
+ echo "Warning: Unsupported Ubuntu version $UBUNTU_VERSION, trying ubuntu2204"
96
+ REPO_PATH="ubuntu2204"
97
+ fi
98
+
99
+ # Add NVIDIA package repositories
100
+ wget https://developer.download.nvidia.com/compute/cuda/repos/${REPO_PATH}/x86_64/cuda-keyring_1.1-1_all.deb
101
+ dpkg -i cuda-keyring_1.1-1_all.deb
102
+ rm cuda-keyring_1.1-1_all.deb
103
+ apt-get update
104
+
105
+ # Install specific CUDA toolkit version
106
+ # Convert version format (12.9 -> 12-9)
107
+ CUDA_VERSION_APT=$(echo $DRIVER_CUDA_VERSION | sed 's/\./-/')
108
+ echo "Installing cuda-toolkit-${CUDA_VERSION_APT}..."
109
+ apt-get install -y cuda-toolkit-${CUDA_VERSION_APT}
110
+
111
+ # Add CUDA to PATH
112
+ export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
113
+ export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
114
+
115
+ # Verify installation
116
+ nvcc --version
117
+ else
118
+ echo "CUDA toolkit $NVCC_VERSION matches driver version"
119
+ export PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:$PATH
120
+ export LD_LIBRARY_PATH=/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:${LD_LIBRARY_PATH:-}
121
+ fi
122
+
123
+ # --- Install uv (fast Python package manager) --------------------------------
124
+ curl -LsSf https://astral.sh/uv/install.sh | sh
125
+ export PATH="$HOME/.local/bin:$PATH"
126
+
127
+ # --- Install Python 3.12 if not available ------------------------------------
128
+ if ! command -v python3.12 &> /dev/null; then
129
+ echo "Python 3.12 not found. Installing via uv..."
130
+ uv python install 3.12
131
+ fi
132
+
133
+ # --- Clean up existing environments and caches -------------------------------
134
+ echo "Cleaning up existing environments and caches..."
135
+
136
+ # Remove existing venv for a clean installation
137
+ VENV="$HOME/venv"
138
+ if [ -d "$VENV" ]; then
139
+ echo "Removing existing virtual environment..."
140
+ rm -rf "$VENV"
141
+ fi
142
+
143
+ # Remove uv cache to ensure fresh installs
144
+ if [ -d "$HOME/.cache/uv" ]; then
145
+ echo "Clearing uv cache..."
146
+ rm -rf "$HOME/.cache/uv"
147
+ fi
148
+
149
+ # Remove vLLM cache to avoid conflicts
150
+ if [ -d "$HOME/.cache/vllm" ]; then
151
+ echo "Clearing vLLM cache..."
152
+ rm -rf "$HOME/.cache/vllm"
153
+ fi
154
+
155
+ # --- Create and activate venv ------------------------------------------------
156
+ echo "Creating fresh virtual environment..."
157
+ uv venv --python 3.12 --seed "$VENV"
158
+ source "$VENV/bin/activate"
159
+
160
+ # --- Install PyTorch and vLLM ------------------------------------------------
161
+ echo "Installing vLLM and dependencies (version: $VLLM_VERSION)..."
162
+ case "$VLLM_VERSION" in
163
+ release)
164
+ echo "Installing vLLM release with PyTorch..."
165
+ # Install vLLM with automatic PyTorch backend selection
166
+ # vLLM will automatically install the correct PyTorch version
167
+ uv pip install vllm>=0.10.0 --torch-backend=auto || {
168
+ echo "ERROR: Failed to install vLLM"
169
+ exit 1
170
+ }
171
+ ;;
172
+ nightly)
173
+ echo "Installing vLLM nightly with PyTorch..."
174
+ echo "This will install the latest nightly build of vLLM..."
175
+
176
+ # Install vLLM nightly with PyTorch
177
+ uv pip install -U vllm \
178
+ --torch-backend=auto \
179
+ --extra-index-url https://wheels.vllm.ai/nightly || {
180
+ echo "ERROR: Failed to install vLLM nightly"
181
+ exit 1
182
+ }
183
+
184
+ echo "vLLM nightly successfully installed!"
185
+ ;;
186
+ gpt-oss)
187
+ echo "Installing GPT-OSS special build with PyTorch nightly..."
188
+ echo "WARNING: This build is ONLY for GPT-OSS models!"
189
+ echo "Installing PyTorch nightly and cutting-edge dependencies..."
190
+
191
+ # Convert CUDA version format for PyTorch (12.4 -> cu124)
192
+ PYTORCH_CUDA="cu$(echo $DRIVER_CUDA_VERSION | sed 's/\.//')"
193
+ echo "Using PyTorch nightly with ${PYTORCH_CUDA} (driver supports ${DRIVER_CUDA_VERSION})"
194
+
195
+ # The GPT-OSS build will pull PyTorch nightly and other dependencies
196
+ # via the extra index URLs. We don't pre-install torch here to avoid conflicts.
197
+ uv pip install --pre vllm==0.10.1+gptoss \
198
+ --extra-index-url https://wheels.vllm.ai/gpt-oss/ \
199
+ --extra-index-url https://download.pytorch.org/whl/nightly/${PYTORCH_CUDA} \
200
+ --index-strategy unsafe-best-match || {
201
+ echo "ERROR: Failed to install GPT-OSS vLLM build"
202
+ echo "This automatically installs PyTorch nightly with ${PYTORCH_CUDA}, Triton nightly, and other dependencies"
203
+ exit 1
204
+ }
205
+
206
+ # Install gpt-oss library for tool support
207
+ uv pip install gpt-oss || {
208
+ echo "WARNING: Failed to install gpt-oss library (needed for tool use)"
209
+ }
210
+ ;;
211
+ *)
212
+ echo "ERROR: Unknown vLLM version: $VLLM_VERSION"
213
+ exit 1
214
+ ;;
215
+ esac
216
+
217
+ # --- Install additional packages ---------------------------------------------
218
+ echo "Installing additional packages..."
219
+ uv pip install huggingface-hub psutil tensorrt hf_transfer
220
+
221
+ # --- FlashInfer installation (optional, improves performance) ----------------
222
+ echo "Attempting FlashInfer installation (optional)..."
223
+ if uv pip install flashinfer-python; then
224
+ echo "FlashInfer installed successfully"
225
+ else
226
+ echo "FlashInfer not available, using Flash Attention instead"
227
+ fi
228
+
229
+ # --- Mount storage if provided -----------------------------------------------
230
+ if [ -n "$MOUNT_COMMAND" ]; then
231
+ echo "Setting up mount..."
232
+
233
+ # Create mount point directory if it doesn't exist
234
+ mkdir -p "$MODELS_PATH"
235
+
236
+ # Execute the mount command
237
+ eval "$MOUNT_COMMAND" || {
238
+ echo "WARNING: Mount command failed, continuing without mount"
239
+ }
240
+
241
+ # Verify mount succeeded (optional, may not always be a mount point)
242
+ if mountpoint -q "$MODELS_PATH" 2>/dev/null; then
243
+ echo "Storage successfully mounted at $MODELS_PATH"
244
+ else
245
+ echo "Note: $MODELS_PATH is not a mount point (might be local storage)"
246
+ fi
247
+ fi
248
+
249
+ # --- Model storage setup ------------------------------------------------------
250
+ echo ""
251
+ echo "=== Setting up model storage ==="
252
+ echo "Storage path: $MODELS_PATH"
253
+
254
+ # Check if the path exists and is writable
255
+ if [ ! -d "$MODELS_PATH" ]; then
256
+ echo "Creating model storage directory: $MODELS_PATH"
257
+ mkdir -p "$MODELS_PATH"
258
+ fi
259
+
260
+ if [ ! -w "$MODELS_PATH" ]; then
261
+ echo "ERROR: Model storage path is not writable: $MODELS_PATH"
262
+ echo "Please check permissions"
263
+ exit 1
264
+ fi
265
+
266
+ # Create the huggingface cache directory structure in the models path
267
+ mkdir -p "${MODELS_PATH}/huggingface/hub"
268
+
269
+ # Remove any existing cache directory or symlink
270
+ if [ -e ~/.cache/huggingface ] || [ -L ~/.cache/huggingface ]; then
271
+ echo "Removing existing ~/.cache/huggingface..."
272
+ rm -rf ~/.cache/huggingface 2>/dev/null || true
273
+ fi
274
+
275
+ # Create parent directory if needed
276
+ mkdir -p ~/.cache
277
+
278
+ # Create symlink from ~/.cache/huggingface to the models path
279
+ ln -s "${MODELS_PATH}/huggingface" ~/.cache/huggingface
280
+ echo "Created symlink: ~/.cache/huggingface -> ${MODELS_PATH}/huggingface"
281
+
282
+ # Verify the symlink works
283
+ if [ -d ~/.cache/huggingface/hub ]; then
284
+ echo "✓ Model storage configured successfully"
285
+
286
+ # Check available space
287
+ AVAILABLE_SPACE=$(df -h "$MODELS_PATH" | awk 'NR==2 {print $4}')
288
+ echo "Available space: $AVAILABLE_SPACE"
289
+ else
290
+ echo "ERROR: Could not verify model storage setup"
291
+ echo "The symlink was created but the target directory is not accessible"
292
+ exit 1
293
+ fi
294
+
295
+ # --- Configure environment ----------------------------------------------------
296
+ mkdir -p ~/.config/vllm
297
+ touch ~/.config/vllm/do_not_track
298
+
299
+ # Write environment to .bashrc for persistence
300
+ cat >> ~/.bashrc << EOF
301
+
302
+ # Pi vLLM environment
303
+ [ -d "\$HOME/venv" ] && source "\$HOME/venv/bin/activate"
304
+ export PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/bin:\$HOME/.local/bin:\$PATH"
305
+ export LD_LIBRARY_PATH="/usr/local/cuda-${DRIVER_CUDA_VERSION}/lib64:\${LD_LIBRARY_PATH:-}"
306
+ export HF_TOKEN="${HF_TOKEN}"
307
+ export PI_API_KEY="${PI_API_KEY}"
308
+ export HUGGING_FACE_HUB_TOKEN="${HF_TOKEN}"
309
+ export HF_HUB_ENABLE_HF_TRANSFER=1
310
+ export VLLM_NO_USAGE_STATS=1
311
+ export VLLM_DO_NOT_TRACK=1
312
+ export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
313
+ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
314
+ EOF
315
+
316
+ # Create log directory for vLLM
317
+ mkdir -p ~/.vllm_logs
318
+
319
+ # --- Output GPU info for pi CLI to parse -------------------------------------
320
+ echo ""
321
+ echo "===GPU_INFO_START==="
322
+ nvidia-smi --query-gpu=index,name,memory.total --format=csv,noheader | while IFS=, read -r id name memory; do
323
+ # Trim whitespace
324
+ id=$(echo "$id" | xargs)
325
+ name=$(echo "$name" | xargs)
326
+ memory=$(echo "$memory" | xargs)
327
+ echo "{\"id\": $id, \"name\": \"$name\", \"memory\": \"$memory\"}"
328
+ done
329
+ echo "===GPU_INFO_END==="
330
+
331
+ echo ""
332
+ echo "=== Setup complete ==="
333
+ echo "Pod is ready for vLLM deployments"
334
+ echo "Models will be cached at: $MODELS_PATH"