@mariozechner/pi 0.2.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +392 -294
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +348 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/models.d.ts +39 -0
- package/dist/commands/models.d.ts.map +1 -0
- package/dist/commands/models.js +612 -0
- package/dist/commands/models.js.map +1 -0
- package/dist/commands/pods.d.ts +21 -0
- package/dist/commands/pods.d.ts.map +1 -0
- package/dist/commands/pods.js +175 -0
- package/dist/commands/pods.js.map +1 -0
- package/dist/commands/prompt.d.ts +7 -0
- package/dist/commands/prompt.d.ts.map +1 -0
- package/dist/commands/prompt.js +55 -0
- package/dist/commands/prompt.js.map +1 -0
- package/dist/config.d.ts +11 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +74 -0
- package/dist/config.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -0
- package/dist/model-configs.d.ts +22 -0
- package/dist/model-configs.d.ts.map +1 -0
- package/dist/model-configs.js +75 -0
- package/dist/model-configs.js.map +1 -0
- package/dist/models.json +305 -0
- package/dist/ssh.d.ts +24 -0
- package/dist/ssh.d.ts.map +1 -0
- package/dist/ssh.js +115 -0
- package/dist/ssh.js.map +1 -0
- package/dist/types.d.ts +23 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +38 -40
- package/LICENSE +0 -21
- package/pi.js +0 -1379
- package/pod_setup.sh +0 -74
- package/vllm_manager.py +0 -662
package/pod_setup.sh
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
# GPU pod bootstrap: Ubuntu 22.04 + CUDA 12.6/12.8, vLLM latest, FlashInfer w/ TRT kernels (sm70-120)
|
|
3
|
-
|
|
4
|
-
set -euo pipefail
|
|
5
|
-
|
|
6
|
-
apt update -y
|
|
7
|
-
apt install -y python3-pip python3-venv git build-essential cmake ninja-build curl
|
|
8
|
-
|
|
9
|
-
# --- Install uv (fast Python package manager) --------------------------------
|
|
10
|
-
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
11
|
-
export PATH="$HOME/.local/bin:$PATH"
|
|
12
|
-
|
|
13
|
-
# --- Create and activate venv ------------------------------------------------
|
|
14
|
-
VENV="$HOME/vllm_env"
|
|
15
|
-
uv venv --python 3.12 --seed "$VENV"
|
|
16
|
-
source "$VENV/bin/activate"
|
|
17
|
-
|
|
18
|
-
# --- Install vLLM with automatic PyTorch selection ---------------------------
|
|
19
|
-
echo "Installing vLLM with automatic CUDA/PyTorch detection..."
|
|
20
|
-
# uv automatically selects the right PyTorch based on CUDA version
|
|
21
|
-
uv pip install vllm --torch-backend=auto
|
|
22
|
-
|
|
23
|
-
# --- Install additional packages ---------------------------------------------
|
|
24
|
-
echo "Installing additional packages..."
|
|
25
|
-
uv pip install huggingface-hub psutil tensorrt hf_transfer
|
|
26
|
-
|
|
27
|
-
# --- FlashInfer installation (optional, improves performance) ----------------
|
|
28
|
-
echo "Attempting FlashInfer installation (optional)..."
|
|
29
|
-
# vLLM will use Flash Attention as fallback if FlashInfer is not available
|
|
30
|
-
|
|
31
|
-
# Try the official FlashInfer package name
|
|
32
|
-
if uv pip install flashinfer-python; then
|
|
33
|
-
echo "FlashInfer installed successfully"
|
|
34
|
-
ATTENTION_BACKEND="FLASHINFER"
|
|
35
|
-
else
|
|
36
|
-
echo "FlashInfer not available, using Flash Attention instead"
|
|
37
|
-
ATTENTION_BACKEND="FLASH_ATTN"
|
|
38
|
-
fi
|
|
39
|
-
|
|
40
|
-
# --- HF token check ----------------------------------------------------------
|
|
41
|
-
: "${HF_TOKEN:?HF_TOKEN env var required}"
|
|
42
|
-
|
|
43
|
-
mkdir -p ~/.config/vllm
|
|
44
|
-
touch ~/.config/vllm/do_not_track
|
|
45
|
-
|
|
46
|
-
cat > ~/.pirc <<EOF
|
|
47
|
-
# auto-sourced env
|
|
48
|
-
[ -d "$HOME/vllm_env" ] && source "$HOME/vllm_env/bin/activate"
|
|
49
|
-
export PATH="$HOME/.local/bin:$PATH"
|
|
50
|
-
export VLLM_ATTENTION_BACKEND=${ATTENTION_BACKEND}
|
|
51
|
-
export VLLM_USE_FLASHINFER_SAMPLER=1
|
|
52
|
-
export VLLM_USE_DEEP_GEMM=1
|
|
53
|
-
export VLLM_NO_USAGE_STATS=1
|
|
54
|
-
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
|
|
55
|
-
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
|
56
|
-
export HF_TOKEN=${HF_TOKEN}
|
|
57
|
-
export HUGGING_FACE_HUB_TOKEN=${HF_TOKEN}
|
|
58
|
-
export HF_HUB_ENABLE_HF_TRANSFER=1
|
|
59
|
-
EOF
|
|
60
|
-
|
|
61
|
-
# --- RunPod specific setup ---------------------------------------------------
|
|
62
|
-
if df -h | grep -q "runpod.net.*workspace"; then
|
|
63
|
-
echo "Detected RunPod instance - setting up workspace symlink..."
|
|
64
|
-
if [ ! -L ~/.cache/huggingface ]; then
|
|
65
|
-
mkdir -p /workspace/cache/huggingface
|
|
66
|
-
rm -rf ~/.cache/huggingface 2>/dev/null || true
|
|
67
|
-
ln -s /workspace/cache/huggingface ~/.cache/huggingface
|
|
68
|
-
echo "Created symlink: ~/.cache/huggingface -> /workspace/cache/huggingface"
|
|
69
|
-
else
|
|
70
|
-
echo "Symlink already exists"
|
|
71
|
-
fi
|
|
72
|
-
fi
|
|
73
|
-
|
|
74
|
-
echo "=== DONE ==="
|