@mariozechner/pi 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/package.json +1 -1
- package/pi.js +9 -4
package/README.md
CHANGED
|
@@ -5,12 +5,12 @@ Quickly deploy LLMs on GPU pods from [Prime Intellect](https://www.primeintellec
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
npm install -g @
|
|
8
|
+
npm install -g @mariozechner/pi
|
|
9
9
|
```
|
|
10
10
|
|
|
11
11
|
Or run directly with npx:
|
|
12
12
|
```bash
|
|
13
|
-
npx @
|
|
13
|
+
npx @mariozechner/pi
|
|
14
14
|
```
|
|
15
15
|
|
|
16
16
|
## What This Is
|
|
@@ -314,4 +314,4 @@ Remember: Tool calling is still an evolving feature in the LLM ecosystem. What w
|
|
|
314
314
|
- **Connection Refused**: Check pod is running and port is correct
|
|
315
315
|
- **HF Token Issues**: Ensure HF_TOKEN is set before running setup
|
|
316
316
|
- **Access Denied**: Some models (like Llama, Mistral) require completing an access request on HuggingFace first. Visit the model page and click "Request access"
|
|
317
|
-
- **Tool Calling Errors**: See the Tool Calling section above - consider disabling it or using a different model
|
|
317
|
+
- **Tool Calling Errors**: See the Tool Calling section above - consider disabling it or using a different model
|
package/package.json
CHANGED
package/pi.js
CHANGED
|
@@ -204,7 +204,7 @@ class PrimeIntellectCLI {
|
|
|
204
204
|
console.error('');
|
|
205
205
|
console.error('Options:');
|
|
206
206
|
console.error(' --name <name> Model alias (default: auto-generated)');
|
|
207
|
-
console.error(' --context <size> Context window: 4k, 8k, 16k, 32k or 4096, 8192, etc (default: model default)');
|
|
207
|
+
console.error(' --context <size> Context window: 4k, 8k, 16k, 32k, 64k, 128k or 4096, 8192, etc (default: model default)');
|
|
208
208
|
console.error(' --memory <percent> GPU memory: 30%, 50%, 90% or 0.3, 0.5, 0.9 (default: 90%)');
|
|
209
209
|
console.error(' --all-gpus Use all GPUs with tensor parallelism (ignores --memory)');
|
|
210
210
|
console.error(' --vllm-args Pass remaining args directly to vLLM (ignores other options)');
|
|
@@ -719,15 +719,20 @@ class PrimeIntellectCLI {
|
|
|
719
719
|
console.log(' pi prompt <name> <msg> Chat with a model\n');
|
|
720
720
|
console.log('Start Options:');
|
|
721
721
|
console.log(' --name <name> Model alias (default: auto-generated)');
|
|
722
|
-
console.log(' --context <size> Context window: 4k, 16k, 32k (default:
|
|
722
|
+
console.log(' --context <size> Context window: 4k, 8k, 16k, 32k, 64k, 128k (default: model default)');
|
|
723
723
|
console.log(' --memory <percent> GPU memory: 30%, 50%, 90% (default: 90%)');
|
|
724
|
-
console.log(' --all-gpus Use all GPUs with tensor parallelism
|
|
724
|
+
console.log(' --all-gpus Use all GPUs with tensor parallelism');
|
|
725
|
+
console.log(' --vllm-args Pass remaining args directly to vLLM\n');
|
|
725
726
|
console.log('Utility:');
|
|
726
727
|
console.log(' pi shell SSH into active pod');
|
|
727
728
|
|
|
728
|
-
console.log('\nQuick
|
|
729
|
+
console.log('\nQuick Examples:');
|
|
729
730
|
console.log(' pi start Qwen/Qwen2.5-7B-Instruct --name qwen');
|
|
730
731
|
console.log(' pi prompt qwen "What is 2+2?"');
|
|
732
|
+
console.log('\n # Qwen3-Coder on 8xH200 with custom vLLM args:');
|
|
733
|
+
console.log(' pi start Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 --name qwen-coder --vllm-args \\');
|
|
734
|
+
console.log(' --data-parallel-size 8 --enable-expert-parallel \\');
|
|
735
|
+
console.log(' --tool-call-parser qwen3_coder --enable-auto-tool-choice --max-model-len 200000');
|
|
731
736
|
|
|
732
737
|
if (this.config.active && this.config.pods[this.config.active]) {
|
|
733
738
|
console.log(`\nActive pod: ${this.config.active} (${this.config.pods[this.config.active].ssh})`);
|