@mariozechner/pi 0.1.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +410 -216
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +348 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/models.d.ts +39 -0
- package/dist/commands/models.d.ts.map +1 -0
- package/dist/commands/models.js +612 -0
- package/dist/commands/models.js.map +1 -0
- package/dist/commands/pods.d.ts +21 -0
- package/dist/commands/pods.d.ts.map +1 -0
- package/dist/commands/pods.js +175 -0
- package/dist/commands/pods.js.map +1 -0
- package/dist/commands/prompt.d.ts +7 -0
- package/dist/commands/prompt.d.ts.map +1 -0
- package/dist/commands/prompt.js +55 -0
- package/dist/commands/prompt.js.map +1 -0
- package/dist/config.d.ts +11 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +74 -0
- package/dist/config.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -0
- package/dist/model-configs.d.ts +22 -0
- package/dist/model-configs.d.ts.map +1 -0
- package/dist/model-configs.js +75 -0
- package/dist/model-configs.js.map +1 -0
- package/dist/models.json +305 -0
- package/dist/ssh.d.ts +24 -0
- package/dist/ssh.d.ts.map +1 -0
- package/dist/ssh.js +115 -0
- package/dist/ssh.js.map +1 -0
- package/dist/types.d.ts +23 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +38 -40
- package/LICENSE +0 -21
- package/pi.js +0 -878
- package/pod_setup.sh +0 -133
- package/vllm_manager.py +0 -499
package/dist/models.json
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
{
|
|
2
|
+
"models": {
|
|
3
|
+
"Qwen/Qwen2.5-Coder-32B-Instruct": {
|
|
4
|
+
"name": "Qwen2.5-Coder-32B",
|
|
5
|
+
"configs": [
|
|
6
|
+
{
|
|
7
|
+
"gpuCount": 1,
|
|
8
|
+
"gpuTypes": ["H100", "H200"],
|
|
9
|
+
"args": ["--tool-call-parser", "hermes", "--enable-auto-tool-choice"]
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"gpuCount": 2,
|
|
13
|
+
"gpuTypes": ["H100", "H200"],
|
|
14
|
+
"args": ["--tensor-parallel-size", "2", "--tool-call-parser", "hermes", "--enable-auto-tool-choice"]
|
|
15
|
+
}
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
"Qwen/Qwen3-Coder-30B-A3B-Instruct": {
|
|
19
|
+
"name": "Qwen3-Coder-30B",
|
|
20
|
+
"configs": [
|
|
21
|
+
{
|
|
22
|
+
"gpuCount": 1,
|
|
23
|
+
"gpuTypes": ["H100", "H200"],
|
|
24
|
+
"args": ["--enable-auto-tool-choice", "--tool-call-parser", "qwen3_coder"],
|
|
25
|
+
"notes": "Fits comfortably on single GPU. ~60GB model weight."
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"gpuCount": 2,
|
|
29
|
+
"gpuTypes": ["H100", "H200"],
|
|
30
|
+
"args": [
|
|
31
|
+
"--tensor-parallel-size",
|
|
32
|
+
"2",
|
|
33
|
+
"--enable-auto-tool-choice",
|
|
34
|
+
"--tool-call-parser",
|
|
35
|
+
"qwen3_coder"
|
|
36
|
+
],
|
|
37
|
+
"notes": "For higher throughput/longer context."
|
|
38
|
+
}
|
|
39
|
+
]
|
|
40
|
+
},
|
|
41
|
+
"Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8": {
|
|
42
|
+
"name": "Qwen3-Coder-30B-FP8",
|
|
43
|
+
"configs": [
|
|
44
|
+
{
|
|
45
|
+
"gpuCount": 1,
|
|
46
|
+
"gpuTypes": ["H100", "H200"],
|
|
47
|
+
"args": ["--enable-auto-tool-choice", "--tool-call-parser", "qwen3_coder"],
|
|
48
|
+
"env": {
|
|
49
|
+
"VLLM_USE_DEEP_GEMM": "1"
|
|
50
|
+
},
|
|
51
|
+
"notes": "FP8 quantized, ~30GB model weight. Excellent for single GPU deployment."
|
|
52
|
+
}
|
|
53
|
+
]
|
|
54
|
+
},
|
|
55
|
+
"Qwen/Qwen3-Coder-480B-A35B-Instruct": {
|
|
56
|
+
"name": "Qwen3-Coder-480B",
|
|
57
|
+
"configs": [
|
|
58
|
+
{
|
|
59
|
+
"gpuCount": 8,
|
|
60
|
+
"gpuTypes": ["H200", "H20"],
|
|
61
|
+
"args": [
|
|
62
|
+
"--tensor-parallel-size",
|
|
63
|
+
"8",
|
|
64
|
+
"--max-model-len",
|
|
65
|
+
"32000",
|
|
66
|
+
"--enable-auto-tool-choice",
|
|
67
|
+
"--tool-call-parser",
|
|
68
|
+
"qwen3_coder"
|
|
69
|
+
],
|
|
70
|
+
"notes": "Cannot serve full 262K context on single node. Reduce max-model-len or increase gpu-memory-utilization."
|
|
71
|
+
}
|
|
72
|
+
]
|
|
73
|
+
},
|
|
74
|
+
"Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
|
|
75
|
+
"name": "Qwen3-Coder-480B-FP8",
|
|
76
|
+
"configs": [
|
|
77
|
+
{
|
|
78
|
+
"gpuCount": 8,
|
|
79
|
+
"gpuTypes": ["H200", "H20"],
|
|
80
|
+
"args": [
|
|
81
|
+
"--max-model-len",
|
|
82
|
+
"131072",
|
|
83
|
+
"--enable-expert-parallel",
|
|
84
|
+
"--data-parallel-size",
|
|
85
|
+
"8",
|
|
86
|
+
"--enable-auto-tool-choice",
|
|
87
|
+
"--tool-call-parser",
|
|
88
|
+
"qwen3_coder"
|
|
89
|
+
],
|
|
90
|
+
"env": {
|
|
91
|
+
"VLLM_USE_DEEP_GEMM": "1"
|
|
92
|
+
},
|
|
93
|
+
"notes": "Use data-parallel mode (not tensor-parallel) to avoid weight quantization errors."
|
|
94
|
+
}
|
|
95
|
+
]
|
|
96
|
+
},
|
|
97
|
+
"openai/gpt-oss-20b": {
|
|
98
|
+
"name": "GPT-OSS-20B",
|
|
99
|
+
"configs": [
|
|
100
|
+
{
|
|
101
|
+
"gpuCount": 1,
|
|
102
|
+
"gpuTypes": ["H100", "H200"],
|
|
103
|
+
"args": ["--async-scheduling"]
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"gpuCount": 1,
|
|
107
|
+
"gpuTypes": ["B200"],
|
|
108
|
+
"args": ["--async-scheduling"],
|
|
109
|
+
"env": {
|
|
110
|
+
"VLLM_USE_TRTLLM_ATTENTION": "1",
|
|
111
|
+
"VLLM_USE_TRTLLM_DECODE_ATTENTION": "1",
|
|
112
|
+
"VLLM_USE_TRTLLM_CONTEXT_ATTENTION": "1",
|
|
113
|
+
"VLLM_USE_FLASHINFER_MXFP4_MOE": "1"
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
],
|
|
117
|
+
"notes": "Requires vLLM 0.10.1+gptoss. Tools/functoin calls only via /v1/responses endpoint."
|
|
118
|
+
},
|
|
119
|
+
"openai/gpt-oss-120b": {
|
|
120
|
+
"name": "GPT-OSS-120B",
|
|
121
|
+
"configs": [
|
|
122
|
+
{
|
|
123
|
+
"gpuCount": 1,
|
|
124
|
+
"gpuTypes": ["H100", "H200"],
|
|
125
|
+
"args": ["--async-scheduling", "--gpu-memory-utilization", "0.95", "--max-num-batched-tokens", "1024"],
|
|
126
|
+
"notes": "Single GPU deployment. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint."
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
"gpuCount": 2,
|
|
130
|
+
"gpuTypes": ["H100", "H200"],
|
|
131
|
+
"args": ["--tensor-parallel-size", "2", "--async-scheduling", "--gpu-memory-utilization", "0.94"],
|
|
132
|
+
"notes": "Recommended for H100/H200. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint."
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
"gpuCount": 4,
|
|
136
|
+
"gpuTypes": ["H100", "H200"],
|
|
137
|
+
"args": ["--tensor-parallel-size", "4", "--async-scheduling"],
|
|
138
|
+
"notes": "Higher throughput. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint."
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"gpuCount": 8,
|
|
142
|
+
"gpuTypes": ["H100", "H200"],
|
|
143
|
+
"args": ["--tensor-parallel-size", "8", "--async-scheduling"],
|
|
144
|
+
"notes": "Maximum throughput for evaluation workloads. Requires vLLM 0.10.1+gptoss. Tools/function calls only via /v1/responses endpoint."
|
|
145
|
+
}
|
|
146
|
+
]
|
|
147
|
+
},
|
|
148
|
+
"zai-org/GLM-4.5": {
|
|
149
|
+
"name": "GLM-4.5",
|
|
150
|
+
"configs": [
|
|
151
|
+
{
|
|
152
|
+
"gpuCount": 16,
|
|
153
|
+
"gpuTypes": ["H100"],
|
|
154
|
+
"args": [
|
|
155
|
+
"--tensor-parallel-size",
|
|
156
|
+
"16",
|
|
157
|
+
"--tool-call-parser",
|
|
158
|
+
"glm4_moe",
|
|
159
|
+
"--reasoning-parser",
|
|
160
|
+
"glm4_moe",
|
|
161
|
+
"--enable-auto-tool-choice"
|
|
162
|
+
]
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
"gpuCount": 8,
|
|
166
|
+
"gpuTypes": ["H200"],
|
|
167
|
+
"args": [
|
|
168
|
+
"--tensor-parallel-size",
|
|
169
|
+
"8",
|
|
170
|
+
"--tool-call-parser",
|
|
171
|
+
"glm4_moe",
|
|
172
|
+
"--reasoning-parser",
|
|
173
|
+
"glm4_moe",
|
|
174
|
+
"--enable-auto-tool-choice"
|
|
175
|
+
]
|
|
176
|
+
}
|
|
177
|
+
],
|
|
178
|
+
"notes": "Models default to thinking mode. For full 128K context, double the GPU count."
|
|
179
|
+
},
|
|
180
|
+
"zai-org/GLM-4.5-FP8": {
|
|
181
|
+
"name": "GLM-4.5-FP8",
|
|
182
|
+
"configs": [
|
|
183
|
+
{
|
|
184
|
+
"gpuCount": 8,
|
|
185
|
+
"gpuTypes": ["H100"],
|
|
186
|
+
"args": [
|
|
187
|
+
"--tensor-parallel-size",
|
|
188
|
+
"8",
|
|
189
|
+
"--tool-call-parser",
|
|
190
|
+
"glm4_moe",
|
|
191
|
+
"--reasoning-parser",
|
|
192
|
+
"glm4_moe",
|
|
193
|
+
"--enable-auto-tool-choice"
|
|
194
|
+
]
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"gpuCount": 4,
|
|
198
|
+
"gpuTypes": ["H200"],
|
|
199
|
+
"args": [
|
|
200
|
+
"--tensor-parallel-size",
|
|
201
|
+
"4",
|
|
202
|
+
"--tool-call-parser",
|
|
203
|
+
"glm4_moe",
|
|
204
|
+
"--reasoning-parser",
|
|
205
|
+
"glm4_moe",
|
|
206
|
+
"--enable-auto-tool-choice"
|
|
207
|
+
]
|
|
208
|
+
}
|
|
209
|
+
]
|
|
210
|
+
},
|
|
211
|
+
"zai-org/GLM-4.5-Air-FP8": {
|
|
212
|
+
"name": "GLM-4.5-Air-FP8",
|
|
213
|
+
"configs": [
|
|
214
|
+
{
|
|
215
|
+
"gpuCount": 2,
|
|
216
|
+
"gpuTypes": ["H100"],
|
|
217
|
+
"args": [
|
|
218
|
+
"--tensor-parallel-size",
|
|
219
|
+
"2",
|
|
220
|
+
"--tool-call-parser",
|
|
221
|
+
"glm4_moe",
|
|
222
|
+
"--reasoning-parser",
|
|
223
|
+
"glm4_moe",
|
|
224
|
+
"--enable-auto-tool-choice",
|
|
225
|
+
"--quantization",
|
|
226
|
+
"fp8"
|
|
227
|
+
],
|
|
228
|
+
"env": {
|
|
229
|
+
"VLLM_ATTENTION_BACKEND": "XFORMERS"
|
|
230
|
+
},
|
|
231
|
+
"notes": "FP8 model requires vLLM with proper FP8 support or MTP module"
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
"gpuCount": 1,
|
|
235
|
+
"gpuTypes": ["H200"],
|
|
236
|
+
"args": [
|
|
237
|
+
"--tool-call-parser",
|
|
238
|
+
"glm4_moe",
|
|
239
|
+
"--reasoning-parser",
|
|
240
|
+
"glm4_moe",
|
|
241
|
+
"--enable-auto-tool-choice",
|
|
242
|
+
"--quantization",
|
|
243
|
+
"fp8"
|
|
244
|
+
],
|
|
245
|
+
"env": {
|
|
246
|
+
"VLLM_ATTENTION_BACKEND": "XFORMERS"
|
|
247
|
+
},
|
|
248
|
+
"notes": "FP8 model requires vLLM with proper FP8 support or MTP module"
|
|
249
|
+
}
|
|
250
|
+
]
|
|
251
|
+
},
|
|
252
|
+
"zai-org/GLM-4.5-Air": {
|
|
253
|
+
"name": "GLM-4.5-Air",
|
|
254
|
+
"configs": [
|
|
255
|
+
{
|
|
256
|
+
"gpuCount": 2,
|
|
257
|
+
"gpuTypes": ["H100", "H200"],
|
|
258
|
+
"args": [
|
|
259
|
+
"--tensor-parallel-size",
|
|
260
|
+
"2",
|
|
261
|
+
"--tool-call-parser",
|
|
262
|
+
"glm4_moe",
|
|
263
|
+
"--reasoning-parser",
|
|
264
|
+
"glm4_moe",
|
|
265
|
+
"--enable-auto-tool-choice"
|
|
266
|
+
],
|
|
267
|
+
"notes": "Non-quantized BF16 version, more compatible"
|
|
268
|
+
},
|
|
269
|
+
{
|
|
270
|
+
"gpuCount": 1,
|
|
271
|
+
"gpuTypes": ["H200"],
|
|
272
|
+
"args": [
|
|
273
|
+
"--tool-call-parser",
|
|
274
|
+
"glm4_moe",
|
|
275
|
+
"--reasoning-parser",
|
|
276
|
+
"glm4_moe",
|
|
277
|
+
"--enable-auto-tool-choice",
|
|
278
|
+
"--gpu-memory-utilization",
|
|
279
|
+
"0.95"
|
|
280
|
+
],
|
|
281
|
+
"notes": "Single H200 can fit the BF16 model with high memory utilization"
|
|
282
|
+
}
|
|
283
|
+
]
|
|
284
|
+
},
|
|
285
|
+
"moonshotai/Kimi-K2-Instruct": {
|
|
286
|
+
"name": "Kimi-K2",
|
|
287
|
+
"configs": [
|
|
288
|
+
{
|
|
289
|
+
"gpuCount": 16,
|
|
290
|
+
"gpuTypes": ["H200", "H20"],
|
|
291
|
+
"args": [
|
|
292
|
+
"--tensor-parallel-size",
|
|
293
|
+
"16",
|
|
294
|
+
"--trust-remote-code",
|
|
295
|
+
"--enable-auto-tool-choice",
|
|
296
|
+
"--tool-call-parser",
|
|
297
|
+
"kimi_k2"
|
|
298
|
+
],
|
|
299
|
+
"notes": "Pure TP mode. For >16 GPUs, combine with pipeline-parallelism."
|
|
300
|
+
}
|
|
301
|
+
],
|
|
302
|
+
"notes": "Requires vLLM v0.10.0rc1+. Minimum 16 GPUs for FP8 with 128k context."
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
package/dist/ssh.d.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export interface SSHResult {
|
|
2
|
+
stdout: string;
|
|
3
|
+
stderr: string;
|
|
4
|
+
exitCode: number;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Execute an SSH command and return the result
|
|
8
|
+
*/
|
|
9
|
+
export declare const sshExec: (sshCmd: string, command: string, options?: {
|
|
10
|
+
keepAlive?: boolean;
|
|
11
|
+
}) => Promise<SSHResult>;
|
|
12
|
+
/**
|
|
13
|
+
* Execute an SSH command with streaming output to console
|
|
14
|
+
*/
|
|
15
|
+
export declare const sshExecStream: (sshCmd: string, command: string, options?: {
|
|
16
|
+
silent?: boolean;
|
|
17
|
+
forceTTY?: boolean;
|
|
18
|
+
keepAlive?: boolean;
|
|
19
|
+
}) => Promise<number>;
|
|
20
|
+
/**
|
|
21
|
+
* Copy a file to remote via SCP
|
|
22
|
+
*/
|
|
23
|
+
export declare const scpFile: (sshCmd: string, localPath: string, remotePath: string) => Promise<boolean>;
|
|
24
|
+
//# sourceMappingURL=ssh.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ssh.d.ts","sourceRoot":"","sources":["../src/ssh.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,SAAS;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED;;GAEG;AACH,eAAO,MAAM,OAAO,GACnB,QAAQ,MAAM,EACd,SAAS,MAAM,EACf,UAAU;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,KAC/B,OAAO,CAAC,SAAS,CA+CnB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,aAAa,GACzB,QAAQ,MAAM,EACd,SAAS,MAAM,EACf,UAAU;IAAE,MAAM,CAAC,EAAE,OAAO,CAAC;IAAC,QAAQ,CAAC,EAAE,OAAO,CAAC;IAAC,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,KACrE,OAAO,CAAC,MAAM,CAoChB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,OAAO,GAAU,QAAQ,MAAM,EAAE,WAAW,MAAM,EAAE,YAAY,MAAM,KAAG,OAAO,CAAC,OAAO,CAsCpG,CAAC"}
|
package/dist/ssh.js
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { spawn } from "child_process";
|
|
2
|
+
/**
|
|
3
|
+
* Execute an SSH command and return the result
|
|
4
|
+
*/
|
|
5
|
+
export const sshExec = async (sshCmd, command, options) => {
|
|
6
|
+
return new Promise((resolve) => {
|
|
7
|
+
// Parse SSH command (e.g., "ssh root@1.2.3.4" or "ssh -p 22 root@1.2.3.4")
|
|
8
|
+
const sshParts = sshCmd.split(" ").filter((p) => p);
|
|
9
|
+
const sshBinary = sshParts[0];
|
|
10
|
+
let sshArgs = [...sshParts.slice(1)];
|
|
11
|
+
// Add SSH keepalive options for long-running commands
|
|
12
|
+
if (options?.keepAlive) {
|
|
13
|
+
// ServerAliveInterval=30 sends keepalive every 30 seconds
|
|
14
|
+
// ServerAliveCountMax=120 allows up to 120 failures (60 minutes total)
|
|
15
|
+
sshArgs = ["-o", "ServerAliveInterval=30", "-o", "ServerAliveCountMax=120", ...sshArgs];
|
|
16
|
+
}
|
|
17
|
+
sshArgs.push(command);
|
|
18
|
+
const proc = spawn(sshBinary, sshArgs, {
|
|
19
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
20
|
+
});
|
|
21
|
+
let stdout = "";
|
|
22
|
+
let stderr = "";
|
|
23
|
+
proc.stdout.on("data", (data) => {
|
|
24
|
+
stdout += data.toString();
|
|
25
|
+
});
|
|
26
|
+
proc.stderr.on("data", (data) => {
|
|
27
|
+
stderr += data.toString();
|
|
28
|
+
});
|
|
29
|
+
proc.on("close", (code) => {
|
|
30
|
+
resolve({
|
|
31
|
+
stdout,
|
|
32
|
+
stderr,
|
|
33
|
+
exitCode: code || 0,
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
proc.on("error", (err) => {
|
|
37
|
+
resolve({
|
|
38
|
+
stdout,
|
|
39
|
+
stderr: err.message,
|
|
40
|
+
exitCode: 1,
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
};
|
|
45
|
+
/**
|
|
46
|
+
* Execute an SSH command with streaming output to console
|
|
47
|
+
*/
|
|
48
|
+
export const sshExecStream = async (sshCmd, command, options) => {
|
|
49
|
+
return new Promise((resolve) => {
|
|
50
|
+
const sshParts = sshCmd.split(" ").filter((p) => p);
|
|
51
|
+
const sshBinary = sshParts[0];
|
|
52
|
+
// Build SSH args
|
|
53
|
+
let sshArgs = [...sshParts.slice(1)];
|
|
54
|
+
// Add -t flag if requested and not already present
|
|
55
|
+
if (options?.forceTTY && !sshParts.includes("-t")) {
|
|
56
|
+
sshArgs = ["-t", ...sshArgs];
|
|
57
|
+
}
|
|
58
|
+
// Add SSH keepalive options for long-running commands
|
|
59
|
+
if (options?.keepAlive) {
|
|
60
|
+
// ServerAliveInterval=30 sends keepalive every 30 seconds
|
|
61
|
+
// ServerAliveCountMax=120 allows up to 120 failures (60 minutes total)
|
|
62
|
+
sshArgs = ["-o", "ServerAliveInterval=30", "-o", "ServerAliveCountMax=120", ...sshArgs];
|
|
63
|
+
}
|
|
64
|
+
sshArgs.push(command);
|
|
65
|
+
const spawnOptions = options?.silent
|
|
66
|
+
? { stdio: ["ignore", "ignore", "ignore"] }
|
|
67
|
+
: { stdio: "inherit" };
|
|
68
|
+
const proc = spawn(sshBinary, sshArgs, spawnOptions);
|
|
69
|
+
proc.on("close", (code) => {
|
|
70
|
+
resolve(code || 0);
|
|
71
|
+
});
|
|
72
|
+
proc.on("error", () => {
|
|
73
|
+
resolve(1);
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
};
|
|
77
|
+
/**
|
|
78
|
+
* Copy a file to remote via SCP
|
|
79
|
+
*/
|
|
80
|
+
export const scpFile = async (sshCmd, localPath, remotePath) => {
|
|
81
|
+
// Extract host from SSH command
|
|
82
|
+
const sshParts = sshCmd.split(" ").filter((p) => p);
|
|
83
|
+
let host = "";
|
|
84
|
+
let port = "22";
|
|
85
|
+
let i = 1; // Skip 'ssh'
|
|
86
|
+
while (i < sshParts.length) {
|
|
87
|
+
if (sshParts[i] === "-p" && i + 1 < sshParts.length) {
|
|
88
|
+
port = sshParts[i + 1];
|
|
89
|
+
i += 2;
|
|
90
|
+
}
|
|
91
|
+
else if (!sshParts[i].startsWith("-")) {
|
|
92
|
+
host = sshParts[i];
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
i++;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (!host) {
|
|
100
|
+
console.error("Could not parse host from SSH command");
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
// Build SCP command
|
|
104
|
+
const scpArgs = ["-P", port, localPath, `${host}:${remotePath}`];
|
|
105
|
+
return new Promise((resolve) => {
|
|
106
|
+
const proc = spawn("scp", scpArgs, { stdio: "inherit" });
|
|
107
|
+
proc.on("close", (code) => {
|
|
108
|
+
resolve(code === 0);
|
|
109
|
+
});
|
|
110
|
+
proc.on("error", () => {
|
|
111
|
+
resolve(false);
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
};
|
|
115
|
+
//# sourceMappingURL=ssh.js.map
|
package/dist/ssh.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ssh.js","sourceRoot":"","sources":["../src/ssh.ts"],"names":[],"mappings":"AAAA,OAAO,EAAqB,KAAK,EAAE,MAAM,eAAe,CAAC;AAQzD;;GAEG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,KAAK,EAC3B,MAAc,EACd,OAAe,EACf,OAAiC,EACZ,EAAE;IACvB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC9B,2EAA2E;QAC3E,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC9B,IAAI,OAAO,GAAG,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAErC,sDAAsD;QACtD,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;YACxB,0DAA0D;YAC1D,uEAAuE;YACvE,OAAO,GAAG,CAAC,IAAI,EAAE,wBAAwB,EAAE,IAAI,EAAE,yBAAyB,EAAE,GAAG,OAAO,CAAC,CAAC;QACzF,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEtB,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,EAAE,OAAO,EAAE;YACtC,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC;SACjC,CAAC,CAAC;QAEH,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC3B,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE;YAC/B,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC3B,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACzB,OAAO,CAAC;gBACP,MAAM;gBACN,MAAM;gBACN,QAAQ,EAAE,IAAI,IAAI,CAAC;aACnB,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACxB,OAAO,CAAC;gBACP,MAAM;gBACN,MAAM,EAAE,GAAG,CAAC,OAAO;gBACnB,QAAQ,EAAE,CAAC;aACX,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,KAAK,EACjC,MAAc,EACd,OAAe,EACf,OAAuE,EACrD,EAAE;IACpB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC9B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAE9B,iBAAiB;QACjB,IAAI,OAAO,GAAG,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAErC,mDAAmD;QACnD,IAAI,OAAO,EAAE,QAAQ,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACnD,OAAO,GAAG,CAAC,IAAI,EAAE,GAAG,OAAO,CAAC,CAAC;QAC9B,CAAC;QAED,sDAAsD;QACtD,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;YACxB,0DAA0D;YAC1D,uEAAuE;YACvE,OAAO,GAAG,CAAC,IAAI,EAAE,wBAAwB,EAAE,IAAI,EAAE,yBAAyB,EAAE,GAAG,OAAO,CAAC,CAAC;QACzF,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEtB,MAAM,YAAY,GAAiB,OAAO,EAAE,MAAM;YACjD,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE;YAC3C,CAAC,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QAExB,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;QAErD,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACzB,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;QACpB,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACrB,OAAO,CAAC,CAAC,CAAC,CAAC;QACZ,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,KAAK,EAAE,MAAc,EAAE,SAAiB,EAAE,UAAkB,EAAoB,EAAE;IACxG,gCAAgC;IAChC,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IACpD,IAAI,IAAI,GAAG,EAAE,CAAC;IACd,IAAI,IAAI,GAAG,IAAI,CAAC;IAChB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa;IAExB,OAAO,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QAC5B,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;YACrD,IAAI,GAAG,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACvB,CAAC,IAAI,CAAC,CAAC;QACR,CAAC;aAAM,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACzC,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YACnB,MAAM;QACP,CAAC;aAAM,CAAC;YACP,CAAC,EAAE,CAAC;QACL,CAAC;IACF,CAAC;IAED,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;QACvD,OAAO,KAAK,CAAC;IACd,CAAC;IAED,oBAAoB;IACpB,MAAM,OAAO,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,IAAI,IAAI,UAAU,EAAE,CAAC,CAAC;IAEjE,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;QAEzD,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;YACzB,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC;QACrB,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACrB,OAAO,CAAC,KAAK,CAAC,CAAC;QAChB,CAAC,CAAC,CAAC;IACJ,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export interface GPU {
|
|
2
|
+
id: number;
|
|
3
|
+
name: string;
|
|
4
|
+
memory: string;
|
|
5
|
+
}
|
|
6
|
+
export interface Model {
|
|
7
|
+
model: string;
|
|
8
|
+
port: number;
|
|
9
|
+
gpu: number[];
|
|
10
|
+
pid: number;
|
|
11
|
+
}
|
|
12
|
+
export interface Pod {
|
|
13
|
+
ssh: string;
|
|
14
|
+
gpus: GPU[];
|
|
15
|
+
models: Record<string, Model>;
|
|
16
|
+
modelsPath?: string;
|
|
17
|
+
vllmVersion?: "release" | "nightly" | "gpt-oss";
|
|
18
|
+
}
|
|
19
|
+
export interface Config {
|
|
20
|
+
pods: Record<string, Pod>;
|
|
21
|
+
active?: string;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,GAAG;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,KAAK;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,EAAE,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,GAAG;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,GAAG,EAAE,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,SAAS,GAAG,SAAS,GAAG,SAAS,CAAC;CAChD;AAED,MAAM,WAAW,MAAM;IACtB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,+BAA+B"}
|
package/package.json
CHANGED
|
@@ -1,42 +1,40 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
"LICENSE"
|
|
41
|
-
]
|
|
2
|
+
"name": "@mariozechner/pi",
|
|
3
|
+
"version": "0.5.0",
|
|
4
|
+
"description": "CLI tool for managing vLLM deployments on GPU pods",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"pi": "dist/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"clean": "rm -rf dist",
|
|
11
|
+
"build": "tsc -p tsconfig.build.json && chmod +x dist/cli.js && cp src/models.json dist/",
|
|
12
|
+
"check": "biome check --write .",
|
|
13
|
+
"prepublishOnly": "npm run clean && npm run build"
|
|
14
|
+
},
|
|
15
|
+
"files": [
|
|
16
|
+
"dist"
|
|
17
|
+
],
|
|
18
|
+
"keywords": [
|
|
19
|
+
"llm",
|
|
20
|
+
"vllm",
|
|
21
|
+
"gpu",
|
|
22
|
+
"ai",
|
|
23
|
+
"cli"
|
|
24
|
+
],
|
|
25
|
+
"author": "Mario Zechner",
|
|
26
|
+
"license": "MIT",
|
|
27
|
+
"repository": {
|
|
28
|
+
"type": "git",
|
|
29
|
+
"url": "git+https://github.com/badlogic/pi-mono.git",
|
|
30
|
+
"directory": "packages/pods"
|
|
31
|
+
},
|
|
32
|
+
"engines": {
|
|
33
|
+
"node": ">=20.0.0"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@mariozechner/pi-agent": "^0.5.0",
|
|
37
|
+
"chalk": "^5.5.0"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {}
|
|
42
40
|
}
|
package/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 Mario Zechner
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|