@cliwatch/cli-bench 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/package.json +11 -2
package/CHANGELOG.md ADDED
@@ -0,0 +1,33 @@
1
+ # Changelog
2
+
3
+ ## 0.5.0 (2026-02-11)
4
+
5
+ ### Features
6
+
7
+ - **`system_prompt` config field** — append custom instructions to the default agent system prompt via cli-bench.yaml
8
+
9
+ ## 0.4.0 (2026-02-08)
10
+
11
+ ### Features
12
+
13
+ - **Repeat support** — run tasks N times with `repeat` (per-task or global) for statistical confidence
14
+ - **Threshold checks** — fail CI when pass rates drop below configured thresholds
15
+ - **Conversation traces** — full tool call/result traces uploaded for debugging
16
+ - **Task suite hashing** — detect when task definitions change between runs
17
+
18
+ ## 0.3.0 (2026-02-05)
19
+
20
+ - Config file mode (`cli-bench.yaml`)
21
+ - File references (`file://`) with glob support
22
+ - CI metadata collection (git sha, branch, PR number)
23
+ - Upload to CLIWatch dashboard
24
+
25
+ ## 0.2.0 (2026-02-02)
26
+
27
+ - Help modes: injected, discoverable, none
28
+ - Assertion-based evaluation (10 assertion types)
29
+ - Concurrent task execution
30
+
31
+ ## 0.1.0 (2026-01-28)
32
+
33
+ Initial release.
package/package.json CHANGED
@@ -1,13 +1,22 @@
1
1
  {
2
2
  "name": "@cliwatch/cli-bench",
3
- "version": "0.5.1",
3
+ "version": "0.5.2",
4
+ "description": "LLM CLI agent testing framework — benchmark how well AI models use your CLI tool",
5
+ "keywords": ["cli", "benchmark", "llm", "testing", "ai-agent", "cliwatch", "evaluation"],
6
+ "license": "MIT",
7
+ "homepage": "https://docs.cliwatch.com",
4
8
  "type": "module",
9
+ "engines": {
10
+ "node": ">=18"
11
+ },
5
12
  "bin": {
6
13
  "cli-bench": "./dist/index.js"
7
14
  },
8
15
  "files": [
9
16
  "dist",
10
- "task_suites"
17
+ "task_suites",
18
+ "LICENSE",
19
+ "CHANGELOG.md"
11
20
  ],
12
21
  "exports": {
13
22
  ".": {