npm - @mariozechner/pi - Versions diffs - 0.1.3 → 0.1.5 - Mend

@mariozechner/pi 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -5,12 +5,12 @@ Quickly deploy LLMs on GPU pods from [Prime Intellect](https://www.primeintellec
 ## Installation
 ```bash
-npm install -g @badlogic/pi
+npm install -g @mariozechner/pi
 ```
 Or run directly with npx:
 ```bash
-npx @badlogic/pi
+npx @mariozechner/pi
 ```
 ## What This Is
@@ -314,4 +314,4 @@ Remember: Tool calling is still an evolving feature in the LLM ecosystem. What w
 - **Connection Refused**: Check pod is running and port is correct
 - **HF Token Issues**: Ensure HF_TOKEN is set before running setup
 - **Access Denied**: Some models (like Llama, Mistral) require completing an access request on HuggingFace first. Visit the model page and click "Request access"
-- **Tool Calling Errors**: See the Tool Calling section above - consider disabling it or using a different model
+- **Tool Calling Errors**: See the Tool Calling section above - consider disabling it or using a different model

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mariozechner/pi",
-  "version": "0.1.3",
+  "version": "0.1.5",
   "description": "CLI tool for managing vLLM deployments on GPU pods from Prime Intellect, Vast.ai, etc.",
   "main": "pi.js",
   "bin": {

package/pi.js CHANGED Viewed

@@ -204,7 +204,7 @@ class PrimeIntellectCLI {
             console.error('');
             console.error('Options:');
             console.error('  --name <name>      Model alias (default: auto-generated)');
-            console.error('  --context <size>   Context window: 4k, 8k, 16k, 32k or 4096, 8192, etc (default: model default)');
+            console.error('  --context <size>   Context window: 4k, 8k, 16k, 32k, 64k, 128k or 4096, 8192, etc (default: model default)');
             console.error('  --memory <percent> GPU memory: 30%, 50%, 90% or 0.3, 0.5, 0.9 (default: 90%)');
             console.error('  --all-gpus         Use all GPUs with tensor parallelism (ignores --memory)');
             console.error('  --vllm-args        Pass remaining args directly to vLLM (ignores other options)');
@@ -719,15 +719,20 @@ class PrimeIntellectCLI {
         console.log('  pi prompt <name> <msg>             Chat with a model\n');
         console.log('Start Options:');
         console.log('  --name <name>      Model alias (default: auto-generated)');
-        console.log('  --context <size>   Context window: 4k, 16k, 32k (default: 8k)');
+        console.log('  --context <size>   Context window: 4k, 8k, 16k, 32k, 64k, 128k (default: model default)');
         console.log('  --memory <percent> GPU memory: 30%, 50%, 90% (default: 90%)');
-        console.log('  --all-gpus         Use all GPUs with tensor parallelism\n');
+        console.log('  --all-gpus         Use all GPUs with tensor parallelism');
+        console.log('  --vllm-args        Pass remaining args directly to vLLM\n');
         console.log('Utility:');
         console.log('  pi shell                           SSH into active pod');
-        console.log('\nQuick Example:');
+        console.log('\nQuick Examples:');
         console.log('  pi start Qwen/Qwen2.5-7B-Instruct --name qwen');
         console.log('  pi prompt qwen "What is 2+2?"');
+        console.log('\n  # Qwen3-Coder on 8xH200 with custom vLLM args:');
+        console.log('  pi start Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 --name qwen-coder --vllm-args \\');
+        console.log('    --data-parallel-size 8 --enable-expert-parallel \\');
+        console.log('    --tool-call-parser qwen3_coder --enable-auto-tool-choice --max-model-len 200000');
         if (this.config.active && this.config.pods[this.config.active]) {
             console.log(`\nActive pod: ${this.config.active} (${this.config.pods[this.config.active].ssh})`);