@mariozechner/pi 0.5.15 → 0.5.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/models.json +5 -5
- package/package.json +2 -2
package/dist/models.json
CHANGED
|
@@ -114,7 +114,7 @@
|
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
116
|
],
|
|
117
|
-
"notes": "
|
|
117
|
+
"notes": "Tools/function calls only via /v1/responses endpoint."
|
|
118
118
|
},
|
|
119
119
|
"openai/gpt-oss-120b": {
|
|
120
120
|
"name": "GPT-OSS-120B",
|
|
@@ -123,25 +123,25 @@
|
|
|
123
123
|
"gpuCount": 1,
|
|
124
124
|
"gpuTypes": ["H100", "H200"],
|
|
125
125
|
"args": ["--async-scheduling", "--gpu-memory-utilization", "0.95", "--max-num-batched-tokens", "1024"],
|
|
126
|
-
"notes": "Single GPU deployment.
|
|
126
|
+
"notes": "Single GPU deployment. Tools/function calls only via /v1/responses endpoint."
|
|
127
127
|
},
|
|
128
128
|
{
|
|
129
129
|
"gpuCount": 2,
|
|
130
130
|
"gpuTypes": ["H100", "H200"],
|
|
131
131
|
"args": ["--tensor-parallel-size", "2", "--async-scheduling", "--gpu-memory-utilization", "0.94"],
|
|
132
|
-
"notes": "Recommended for H100/H200.
|
|
132
|
+
"notes": "Recommended for H100/H200. Tools/function calls only via /v1/responses endpoint."
|
|
133
133
|
},
|
|
134
134
|
{
|
|
135
135
|
"gpuCount": 4,
|
|
136
136
|
"gpuTypes": ["H100", "H200"],
|
|
137
137
|
"args": ["--tensor-parallel-size", "4", "--async-scheduling"],
|
|
138
|
-
"notes": "Higher throughput.
|
|
138
|
+
"notes": "Higher throughput. Tools/function calls only via /v1/responses endpoint."
|
|
139
139
|
},
|
|
140
140
|
{
|
|
141
141
|
"gpuCount": 8,
|
|
142
142
|
"gpuTypes": ["H100", "H200"],
|
|
143
143
|
"args": ["--tensor-parallel-size", "8", "--async-scheduling"],
|
|
144
|
-
"notes": "Maximum throughput for evaluation workloads.
|
|
144
|
+
"notes": "Maximum throughput for evaluation workloads. Tools/function calls only via /v1/responses endpoint."
|
|
145
145
|
}
|
|
146
146
|
]
|
|
147
147
|
},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mariozechner/pi",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.17",
|
|
4
4
|
"description": "CLI tool for managing vLLM deployments on GPU pods",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"node": ">=20.0.0"
|
|
35
35
|
},
|
|
36
36
|
"dependencies": {
|
|
37
|
-
"@mariozechner/pi-agent": "^0.5.
|
|
37
|
+
"@mariozechner/pi-agent": "^0.5.17",
|
|
38
38
|
"chalk": "^5.5.0"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {}
|