@mariozechner/pi 0.2.4 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/pod_setup.sh DELETED
@@ -1,74 +0,0 @@
1
- #!/usr/bin/env bash
2
- # GPU pod bootstrap: Ubuntu 22.04 + CUDA 12.6/12.8, vLLM latest, FlashInfer w/ TRT kernels (sm70-120)
3
-
4
- set -euo pipefail
5
-
6
- apt update -y
7
- apt install -y python3-pip python3-venv git build-essential cmake ninja-build curl
8
-
9
- # --- Install uv (fast Python package manager) --------------------------------
10
- curl -LsSf https://astral.sh/uv/install.sh | sh
11
- export PATH="$HOME/.local/bin:$PATH"
12
-
13
- # --- Create and activate venv ------------------------------------------------
14
- VENV="$HOME/vllm_env"
15
- uv venv --python 3.12 --seed "$VENV"
16
- source "$VENV/bin/activate"
17
-
18
- # --- Install vLLM with automatic PyTorch selection ---------------------------
19
- echo "Installing vLLM with automatic CUDA/PyTorch detection..."
20
- # uv automatically selects the right PyTorch based on CUDA version
21
- uv pip install vllm --torch-backend=auto
22
-
23
- # --- Install additional packages ---------------------------------------------
24
- echo "Installing additional packages..."
25
- uv pip install huggingface-hub psutil tensorrt hf_transfer
26
-
27
- # --- FlashInfer installation (optional, improves performance) ----------------
28
- echo "Attempting FlashInfer installation (optional)..."
29
- # vLLM will use Flash Attention as fallback if FlashInfer is not available
30
-
31
- # Try the official FlashInfer package name
32
- if uv pip install flashinfer-python; then
33
- echo "FlashInfer installed successfully"
34
- ATTENTION_BACKEND="FLASHINFER"
35
- else
36
- echo "FlashInfer not available, using Flash Attention instead"
37
- ATTENTION_BACKEND="FLASH_ATTN"
38
- fi
39
-
40
- # --- HF token check ----------------------------------------------------------
41
- : "${HF_TOKEN:?HF_TOKEN env var required}"
42
-
43
- mkdir -p ~/.config/vllm
44
- touch ~/.config/vllm/do_not_track
45
-
46
- cat > ~/.pirc <<EOF
47
- # auto-sourced env
48
- [ -d "$HOME/vllm_env" ] && source "$HOME/vllm_env/bin/activate"
49
- export PATH="$HOME/.local/bin:$PATH"
50
- export VLLM_ATTENTION_BACKEND=${ATTENTION_BACKEND}
51
- export VLLM_USE_FLASHINFER_SAMPLER=1
52
- export VLLM_USE_DEEP_GEMM=1
53
- export VLLM_NO_USAGE_STATS=1
54
- export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
55
- export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
56
- export HF_TOKEN=${HF_TOKEN}
57
- export HUGGING_FACE_HUB_TOKEN=${HF_TOKEN}
58
- export HF_HUB_ENABLE_HF_TRANSFER=1
59
- EOF
60
-
61
- # --- RunPod specific setup ---------------------------------------------------
62
- if df -h | grep -q "runpod.net.*workspace"; then
63
- echo "Detected RunPod instance - setting up workspace symlink..."
64
- if [ ! -L ~/.cache/huggingface ]; then
65
- mkdir -p /workspace/cache/huggingface
66
- rm -rf ~/.cache/huggingface 2>/dev/null || true
67
- ln -s /workspace/cache/huggingface ~/.cache/huggingface
68
- echo "Created symlink: ~/.cache/huggingface -> /workspace/cache/huggingface"
69
- else
70
- echo "Symlink already exists"
71
- fi
72
- fi
73
-
74
- echo "=== DONE ==="