@mariozechner/pi 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +3 -3
  2. package/package.json +1 -1
  3. package/pi.js +9 -4
package/README.md CHANGED
@@ -5,12 +5,12 @@ Quickly deploy LLMs on GPU pods from [Prime Intellect](https://www.primeintellec
5
5
  ## Installation
6
6
 
7
7
  ```bash
8
- npm install -g @badlogic/pi
8
+ npm install -g @mariozechner/pi
9
9
  ```
10
10
 
11
11
  Or run directly with npx:
12
12
  ```bash
13
- npx @badlogic/pi
13
+ npx @mariozechner/pi
14
14
  ```
15
15
 
16
16
  ## What This Is
@@ -314,4 +314,4 @@ Remember: Tool calling is still an evolving feature in the LLM ecosystem. What w
314
314
  - **Connection Refused**: Check pod is running and port is correct
315
315
  - **HF Token Issues**: Ensure HF_TOKEN is set before running setup
316
316
  - **Access Denied**: Some models (like Llama, Mistral) require completing an access request on HuggingFace first. Visit the model page and click "Request access"
317
- - **Tool Calling Errors**: See the Tool Calling section above - consider disabling it or using a different model
317
+ - **Tool Calling Errors**: See the Tool Calling section above - consider disabling it or using a different model
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mariozechner/pi",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "CLI tool for managing vLLM deployments on GPU pods from Prime Intellect, Vast.ai, etc.",
5
5
  "main": "pi.js",
6
6
  "bin": {
package/pi.js CHANGED
@@ -204,7 +204,7 @@ class PrimeIntellectCLI {
204
204
  console.error('');
205
205
  console.error('Options:');
206
206
  console.error(' --name <name> Model alias (default: auto-generated)');
207
- console.error(' --context <size> Context window: 4k, 8k, 16k, 32k or 4096, 8192, etc (default: model default)');
207
+ console.error(' --context <size> Context window: 4k, 8k, 16k, 32k, 64k, 128k or 4096, 8192, etc (default: model default)');
208
208
  console.error(' --memory <percent> GPU memory: 30%, 50%, 90% or 0.3, 0.5, 0.9 (default: 90%)');
209
209
  console.error(' --all-gpus Use all GPUs with tensor parallelism (ignores --memory)');
210
210
  console.error(' --vllm-args Pass remaining args directly to vLLM (ignores other options)');
@@ -719,15 +719,20 @@ class PrimeIntellectCLI {
719
719
  console.log(' pi prompt <name> <msg> Chat with a model\n');
720
720
  console.log('Start Options:');
721
721
  console.log(' --name <name> Model alias (default: auto-generated)');
722
- console.log(' --context <size> Context window: 4k, 16k, 32k (default: 8k)');
722
+ console.log(' --context <size> Context window: 4k, 8k, 16k, 32k, 64k, 128k (default: model default)');
723
723
  console.log(' --memory <percent> GPU memory: 30%, 50%, 90% (default: 90%)');
724
- console.log(' --all-gpus Use all GPUs with tensor parallelism\n');
724
+ console.log(' --all-gpus Use all GPUs with tensor parallelism');
725
+ console.log(' --vllm-args Pass remaining args directly to vLLM\n');
725
726
  console.log('Utility:');
726
727
  console.log(' pi shell SSH into active pod');
727
728
 
728
- console.log('\nQuick Example:');
729
+ console.log('\nQuick Examples:');
729
730
  console.log(' pi start Qwen/Qwen2.5-7B-Instruct --name qwen');
730
731
  console.log(' pi prompt qwen "What is 2+2?"');
732
+ console.log('\n # Qwen3-Coder on 8xH200 with custom vLLM args:');
733
+ console.log(' pi start Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 --name qwen-coder --vllm-args \\');
734
+ console.log(' --data-parallel-size 8 --enable-expert-parallel \\');
735
+ console.log(' --tool-call-parser qwen3_coder --enable-auto-tool-choice --max-model-len 200000');
731
736
 
732
737
  if (this.config.active && this.config.pods[this.config.active]) {
733
738
  console.log(`\nActive pod: ${this.config.active} (${this.config.pods[this.config.active].ssh})`);