@mariozechner/pi 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/pi.js +9 -4
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mariozechner/pi",
3
- "version": "0.1.4",
3
+ "version": "0.1.5",
4
4
  "description": "CLI tool for managing vLLM deployments on GPU pods from Prime Intellect, Vast.ai, etc.",
5
5
  "main": "pi.js",
6
6
  "bin": {
package/pi.js CHANGED
@@ -204,7 +204,7 @@ class PrimeIntellectCLI {
204
204
  console.error('');
205
205
  console.error('Options:');
206
206
  console.error(' --name <name> Model alias (default: auto-generated)');
207
- console.error(' --context <size> Context window: 4k, 8k, 16k, 32k or 4096, 8192, etc (default: model default)');
207
+ console.error(' --context <size> Context window: 4k, 8k, 16k, 32k, 64k, 128k or 4096, 8192, etc (default: model default)');
208
208
  console.error(' --memory <percent> GPU memory: 30%, 50%, 90% or 0.3, 0.5, 0.9 (default: 90%)');
209
209
  console.error(' --all-gpus Use all GPUs with tensor parallelism (ignores --memory)');
210
210
  console.error(' --vllm-args Pass remaining args directly to vLLM (ignores other options)');
@@ -719,15 +719,20 @@ class PrimeIntellectCLI {
719
719
  console.log(' pi prompt <name> <msg> Chat with a model\n');
720
720
  console.log('Start Options:');
721
721
  console.log(' --name <name> Model alias (default: auto-generated)');
722
- console.log(' --context <size> Context window: 4k, 16k, 32k (default: 8k)');
722
+ console.log(' --context <size> Context window: 4k, 8k, 16k, 32k, 64k, 128k (default: model default)');
723
723
  console.log(' --memory <percent> GPU memory: 30%, 50%, 90% (default: 90%)');
724
- console.log(' --all-gpus Use all GPUs with tensor parallelism\n');
724
+ console.log(' --all-gpus Use all GPUs with tensor parallelism');
725
+ console.log(' --vllm-args Pass remaining args directly to vLLM\n');
725
726
  console.log('Utility:');
726
727
  console.log(' pi shell SSH into active pod');
727
728
 
728
- console.log('\nQuick Example:');
729
+ console.log('\nQuick Examples:');
729
730
  console.log(' pi start Qwen/Qwen2.5-7B-Instruct --name qwen');
730
731
  console.log(' pi prompt qwen "What is 2+2?"');
732
+ console.log('\n # Qwen3-Coder on 8xH200 with custom vLLM args:');
733
+ console.log(' pi start Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 --name qwen-coder --vllm-args \\');
734
+ console.log(' --data-parallel-size 8 --enable-expert-parallel \\');
735
+ console.log(' --tool-call-parser qwen3_coder --enable-auto-tool-choice --max-model-len 200000');
731
736
 
732
737
  if (this.config.active && this.config.pods[this.config.active]) {
733
738
  console.log(`\nActive pod: ${this.config.active} (${this.config.pods[this.config.active].ssh})`);