@mariozechner/pi 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pi.js +9 -4
package/package.json
CHANGED
package/pi.js
CHANGED
|
@@ -204,7 +204,7 @@ class PrimeIntellectCLI {
|
|
|
204
204
|
console.error('');
|
|
205
205
|
console.error('Options:');
|
|
206
206
|
console.error(' --name <name> Model alias (default: auto-generated)');
|
|
207
|
-
console.error(' --context <size> Context window: 4k, 8k, 16k, 32k or 4096, 8192, etc (default: model default)');
|
|
207
|
+
console.error(' --context <size> Context window: 4k, 8k, 16k, 32k, 64k, 128k or 4096, 8192, etc (default: model default)');
|
|
208
208
|
console.error(' --memory <percent> GPU memory: 30%, 50%, 90% or 0.3, 0.5, 0.9 (default: 90%)');
|
|
209
209
|
console.error(' --all-gpus Use all GPUs with tensor parallelism (ignores --memory)');
|
|
210
210
|
console.error(' --vllm-args Pass remaining args directly to vLLM (ignores other options)');
|
|
@@ -719,15 +719,20 @@ class PrimeIntellectCLI {
|
|
|
719
719
|
console.log(' pi prompt <name> <msg> Chat with a model\n');
|
|
720
720
|
console.log('Start Options:');
|
|
721
721
|
console.log(' --name <name> Model alias (default: auto-generated)');
|
|
722
|
-
console.log(' --context <size> Context window: 4k, 16k, 32k (default:
|
|
722
|
+
console.log(' --context <size> Context window: 4k, 8k, 16k, 32k, 64k, 128k (default: model default)');
|
|
723
723
|
console.log(' --memory <percent> GPU memory: 30%, 50%, 90% (default: 90%)');
|
|
724
|
-
console.log(' --all-gpus Use all GPUs with tensor parallelism
|
|
724
|
+
console.log(' --all-gpus Use all GPUs with tensor parallelism');
|
|
725
|
+
console.log(' --vllm-args Pass remaining args directly to vLLM\n');
|
|
725
726
|
console.log('Utility:');
|
|
726
727
|
console.log(' pi shell SSH into active pod');
|
|
727
728
|
|
|
728
|
-
console.log('\nQuick
|
|
729
|
+
console.log('\nQuick Examples:');
|
|
729
730
|
console.log(' pi start Qwen/Qwen2.5-7B-Instruct --name qwen');
|
|
730
731
|
console.log(' pi prompt qwen "What is 2+2?"');
|
|
732
|
+
console.log('\n # Qwen3-Coder on 8xH200 with custom vLLM args:');
|
|
733
|
+
console.log(' pi start Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 --name qwen-coder --vllm-args \\');
|
|
734
|
+
console.log(' --data-parallel-size 8 --enable-expert-parallel \\');
|
|
735
|
+
console.log(' --tool-call-parser qwen3_coder --enable-auto-tool-choice --max-model-len 200000');
|
|
731
736
|
|
|
732
737
|
if (this.config.active && this.config.pods[this.config.active]) {
|
|
733
738
|
console.log(`\nActive pod: ${this.config.active} (${this.config.pods[this.config.active].ssh})`);
|