@vtstech/pi-model-test 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -0
- package/model-test.js +1 -1
- package/package.json +3 -3
package/README.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# @vtstech/pi-model-test
|
|
2
|
+
|
|
3
|
+
Model benchmark extension for the [Pi Coding Agent](https://github.com/badlogic/pi-mono).
|
|
4
|
+
|
|
5
|
+
Test any model for reasoning, tool usage, and instruction following — works with Ollama and cloud providers.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pi install "npm:@vtstech/pi-model-test"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Commands
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
/model-test Test current Pi model (auto-detects provider)
|
|
17
|
+
/model-test qwen3:0.6b Test a specific Ollama model
|
|
18
|
+
/model-test --all Test every Ollama model
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Test Suites
|
|
22
|
+
|
|
23
|
+
### Ollama (6 tests)
|
|
24
|
+
|
|
25
|
+
| Test | Scoring |
|
|
26
|
+
|------|---------|
|
|
27
|
+
| Reasoning (snail puzzle) | STRONG / MODERATE / WEAK / FAIL |
|
|
28
|
+
| Thinking token support | SUPPORTED / NOT SUPPORTED |
|
|
29
|
+
| Tool usage (native + text) | STRONG / MODERATE / WEAK / FAIL |
|
|
30
|
+
| ReAct parsing | STRONG / MODERATE / WEAK / FAIL |
|
|
31
|
+
| Instruction following (JSON) | STRONG / MODERATE / WEAK / FAIL |
|
|
32
|
+
| Tool support detection | NATIVE / REACT / NONE |
|
|
33
|
+
|
|
34
|
+
### Cloud Providers (4 tests)
|
|
35
|
+
|
|
36
|
+
| Test | Scoring |
|
|
37
|
+
|------|---------|
|
|
38
|
+
| Connectivity | OK / FAIL |
|
|
39
|
+
| Reasoning | STRONG / MODERATE / WEAK / FAIL |
|
|
40
|
+
| Instruction following | STRONG / MODERATE / WEAK / FAIL |
|
|
41
|
+
| Tool usage (function calling) | STRONG / MODERATE / WEAK / FAIL |
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
- Auto-detects Ollama vs cloud provider (OpenRouter, Anthropic, Google, OpenAI, Groq, DeepSeek, Mistral, xAI, Together, Fireworks, Cohere)
|
|
46
|
+
- Automatic remote Ollama URL resolution
|
|
47
|
+
- Timeout resilience with auto-retry on empty responses
|
|
48
|
+
- Rate limit delay between tests (configurable)
|
|
49
|
+
- Thinking model fallback (retries with `think: true`)
|
|
50
|
+
- Tool support cache (`~/.pi/agent/cache/tool_support.json`)
|
|
51
|
+
- JSON repair for truncated output
|
|
52
|
+
- Tab-completion for model names
|
|
53
|
+
|
|
54
|
+
## Links
|
|
55
|
+
|
|
56
|
+
- [Full Documentation](https://github.com/VTSTech/pi-coding-agent#model-benchmark-model-testts)
|
|
57
|
+
- [Changelog](https://github.com/VTSTech/pi-coding-agent/blob/main/CHANGELOG.md)
|
|
58
|
+
|
|
59
|
+
## License
|
|
60
|
+
|
|
61
|
+
MIT — [VTSTech](https://www.vts-tech.org)
|
package/model-test.js
CHANGED
|
@@ -1167,7 +1167,7 @@ The JSON object must have exactly these 4 keys:
|
|
|
1167
1167
|
}
|
|
1168
1168
|
}
|
|
1169
1169
|
const branding = [
|
|
1170
|
-
` \u26A1 Pi Model Benchmark v1.0.
|
|
1170
|
+
` \u26A1 Pi Model Benchmark v1.0.5`,
|
|
1171
1171
|
` Written by VTSTech`,
|
|
1172
1172
|
` GitHub: https://github.com/VTSTech`,
|
|
1173
1173
|
` Website: www.vts-tech.org`
|
package/package.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vtstech/pi-model-test",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5",
|
|
4
4
|
"description": "Model benchmark/testing extension for Pi Coding Agent",
|
|
5
5
|
"main": "model-test.js",
|
|
6
|
-
"keywords": ["pi-
|
|
6
|
+
"keywords": ["pi-extensions"],
|
|
7
7
|
"license": "MIT",
|
|
8
8
|
"access": "public",
|
|
9
9
|
"type": "module",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"url": "https://github.com/VTSTech/pi-coding-agent"
|
|
15
15
|
},
|
|
16
16
|
"dependencies": {
|
|
17
|
-
"@vtstech/pi-shared": "1.0.
|
|
17
|
+
"@vtstech/pi-shared": "1.0.5"
|
|
18
18
|
},
|
|
19
19
|
"peerDependencies": {
|
|
20
20
|
"@mariozechner/pi-coding-agent": ">=0.66"
|