bat-cli 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {bat_cli-0.1.1 → bat_cli-0.1.2}/PKG-INFO +100 -43
  2. {bat_cli-0.1.1 → bat_cli-0.1.2}/README.md +94 -42
  3. bat_cli-0.1.2/pyproject.toml +95 -0
  4. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/add/client.py +2 -0
  5. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/bat_cli.egg-info/PKG-INFO +100 -43
  6. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/cli.py +39 -17
  7. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/agent.py +123 -15
  8. bat_cli-0.1.2/src/create/templates/agent/__main__.py +12 -0
  9. bat_cli-0.1.2/src/create/templates/agent/llm_client.py.template +30 -0
  10. bat_cli-0.1.2/src/create/templates/agent/src/__init__.py +4 -0
  11. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/src/graph.py +5 -5
  12. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/commands.py +54 -40
  13. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/adapter.py +51 -22
  14. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/bench_runner.py +28 -10
  15. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/eval_config.py +42 -18
  16. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/evaluator.py +13 -4
  17. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/metrics/llm_evaluators.py +60 -17
  18. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/metrics/metrics.py +19 -5
  19. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/metrics/qualitative_helpers.py +6 -2
  20. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/orchestrator.py +25 -9
  21. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/plotter.py +263 -57
  22. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/image_defaults.py +4 -2
  23. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/push/push.py +13 -3
  24. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/set/env.py +7 -6
  25. {bat_cli-0.1.1 → bat_cli-0.1.2}/tests/test_create_new_agent.py +71 -34
  26. {bat_cli-0.1.1 → bat_cli-0.1.2}/tests/test_eval_commands.py +52 -21
  27. bat_cli-0.1.1/pyproject.toml +0 -49
  28. bat_cli-0.1.1/src/create/templates/agent/__main__.py +0 -2
  29. bat_cli-0.1.1/src/create/templates/agent/llm_client.py.template +0 -36
  30. bat_cli-0.1.1/src/create/templates/agent/tests/__init__.py +0 -0
  31. {bat_cli-0.1.1 → bat_cli-0.1.2}/setup.cfg +0 -0
  32. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/add/__init__.py +0 -0
  33. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/bat_cli.egg-info/SOURCES.txt +0 -0
  34. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/bat_cli.egg-info/dependency_links.txt +0 -0
  35. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/bat_cli.egg-info/entry_points.txt +0 -0
  36. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/bat_cli.egg-info/requires.txt +0 -0
  37. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/bat_cli.egg-info/top_level.txt +0 -0
  38. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/build/__init__.py +0 -0
  39. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/build/build.py +0 -0
  40. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/__init__.py +0 -0
  41. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/.dockerignore +0 -0
  42. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/.env.template +0 -0
  43. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/.python-version +0 -0
  44. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/Dockerfile +0 -0
  45. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/Makefile +0 -0
  46. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/README.md +0 -0
  47. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/agent.json.template +0 -0
  48. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/agent.spec +0 -0
  49. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/config.yaml +0 -0
  50. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/create/templates/agent/pyproject.toml.template +0 -0
  51. {bat_cli-0.1.1/src/create/templates/agent/src → bat_cli-0.1.2/src/create/templates/agent/src/llm_clients}/__init__.py +0 -0
  52. {bat_cli-0.1.1/src/create/templates/agent/src/llm_clients → bat_cli-0.1.2/src/create/templates/agent/tests}/__init__.py +0 -0
  53. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/__init__.py +0 -0
  54. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/__init__.py +0 -0
  55. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/contracts.py +1 -1
  56. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/eval/engine/metrics/__init__.py +0 -0
  57. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/push/__init__.py +0 -0
  58. {bat_cli-0.1.1 → bat_cli-0.1.2}/src/set/__init__.py +0 -0
@@ -1,7 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bat-cli
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: CLI tool to interact with BAT agents
5
+ Author-email: Enrico SBUTTONI <enrico.sbuttoni@bubbleran.com>
6
+ Project-URL: Homepage, https://bubbleran.com/
7
+ Project-URL: Repository, https://github.com/bubbleran/bat
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
5
10
  Requires-Python: >=3.12
6
11
  Description-Content-Type: text/markdown
7
12
  Requires-Dist: typer>=0.12.3
@@ -18,34 +23,30 @@ A CLI tool for creating, building, and evaluating BAT agent projects.
18
23
 
19
24
  ## Prerequisites
20
25
 
21
- - Python and [uv](https://docs.astral.sh/uv/) installed
26
+ - Python 3.12+ and [uv](https://docs.astral.sh/uv/) installed
22
27
  - Docker installed (required for `bat build` and `bat push`)
23
- - For evaluation commands: an existing BAT agent root containing `agent.json`, `config.yaml`, and `src/graph.py`
28
+ - For evaluation commands: an existing BAT agent root containing `agent.json`, `config.yaml`, and `pyproject.toml`
24
29
 
25
30
  ---
26
31
 
27
32
  ## Installation
28
33
 
29
- ### Option A — build and install a standalone binary (Linux/macOS)
34
+ ### Option A — install system-wide with `uv tool` (recommended)
30
35
 
31
- Run the helper script from the repo root:
36
+ Installs `bat` into an isolated environment and puts the executable on your `PATH`,
37
+ so it is available from any directory.
32
38
 
33
- ```bash
34
- bash cli/build_and_install.sh
35
- ```
36
-
37
- This will:
39
+ ````bash
40
+ # from PyPI
41
+ uv tool install bat-cli
38
42
 
39
- 1. Sync `dev` and `packaging` dependency groups via `uv`.
40
- 2. Build a one-file executable with PyInstaller.
41
- 3. Move it to `~/.local/bin/bat` (uses `sudo` only when necessary).
42
43
 
43
- Make sure `~/.local/bin` is in your `PATH`:
44
+ Make sure the uv tools bin directory is on your `PATH` (uv prints the path on first
45
+ install; this is usually `~/.local/bin`):
44
46
 
45
47
  ```bash
46
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc # or ~/.zshrc
47
- source ~/.bashrc
48
- ```
48
+ uv tool update-shell # adds the uv tools dir to your shell profile
49
+ ````
49
50
 
50
51
  Then verify:
51
52
 
@@ -53,20 +54,39 @@ Then verify:
53
54
  bat --help
54
55
  ```
55
56
 
56
- ### Option B build manually
57
+ To upgrade or remove later:
57
58
 
58
59
  ```bash
59
- uv sync --group dev --group packaging
60
- uv run pyinstaller --clean --noconfirm bat_cli.spec
61
- # binary is at dist/bat (Linux/macOS) or dist/bat.exe (Windows)
60
+ uv tool upgrade bat-cli
61
+ uv tool uninstall bat-cli
62
62
  ```
63
63
 
64
- On **Windows**, copy `dist/bat.exe` to a folder on your `PATH` (e.g. `C:\tools\bat`) and open a new terminal.
64
+ ### Option B install into a virtual environment with `uv pip`
65
+
66
+ Use this when you want `bat` scoped to a specific project/venv rather than installed
67
+ globally.
65
68
 
66
- > PyInstaller builds are OS-specific — build on each target OS.
69
+ ```bash
70
+ uv venv # create .venv (skip if you already have one)
71
+ source .venv/bin/activate # .venv\Scripts\activate on Windows
72
+
73
+ # from PyPI
74
+ uv pip install bat-cli
75
+
76
+ # or from a local checkout (run from the cli/ directory)
77
+ uv pip install . # add -e for an editable/development install
78
+ ```
79
+
80
+ `bat` is available whenever that virtual environment is active:
81
+
82
+ ```bash
83
+ bat --help
84
+ ```
67
85
 
68
86
  ### Option C — run without installing (development)
69
87
 
88
+ From the `cli/` directory:
89
+
70
90
  ```bash
71
91
  uv sync --group dev
72
92
  uv run bat --help
@@ -82,7 +102,7 @@ All examples below show `bat ...`; replace with `uv run bat ...` when using this
82
102
  bat
83
103
  ├── init
84
104
  │ └── agent
85
- │ ├── [name=default]
105
+ │ ├── <name>
86
106
  │ ├── --clients, -c
87
107
  │ ├── --output-dir, -o
88
108
  │ ├── --force, -f
@@ -103,7 +123,11 @@ bat
103
123
  ├── eval
104
124
  │ ├── init
105
125
  │ │ └── --force, -f
106
- └── run
126
+ ├── run
127
+ │ ├── show
128
+ │ └── plot
129
+ │ ├── --folder, -f
130
+ │ └── --filter, -F
107
131
  ├── build
108
132
  │ ├── --context, -C
109
133
  │ ├── --docker-registry
@@ -123,6 +147,7 @@ Built-in help is available at every level:
123
147
  ```bash
124
148
  bat --help
125
149
  bat init agent --help
150
+ bat eval --help
126
151
  bat build --help
127
152
  ```
128
153
 
@@ -133,10 +158,6 @@ bat build --help
133
158
  ### 1. Create a new agent
134
159
 
135
160
  ```bash
136
- # default name
137
- bat init agent
138
-
139
- # custom name
140
161
  bat init agent my_agent
141
162
 
142
163
  # specific output directory
@@ -144,6 +165,9 @@ bat init agent my_agent --output-dir .
144
165
 
145
166
  # pre-generate LLM clients
146
167
  bat init agent my_agent --clients reformulator,planner,executor
168
+
169
+ # set the port/model/provider written to .env
170
+ bat init agent my_agent --port 9900 --model gpt-4o-mini --model-provider openai
147
171
  ```
148
172
 
149
173
  ### 2. Add clients to an existing agent
@@ -159,10 +183,10 @@ bat add client planner,executor --force
159
183
 
160
184
  ### 3. Update agent environment variables
161
185
 
162
- Run from the agent root (updates or creates `.env`):
186
+ Run from the agent root (updates an existing `.env`):
163
187
 
164
188
  ```bash
165
- bat set env --port 8080 --model gpt-4.1-mini --model-provider openai
189
+ bat set env --port 8080 --model gpt-4o-mini --model-provider openai
166
190
 
167
191
  # also set Docker defaults for build/push
168
192
  bat set env --docker-registry hub.bubbleran.com --repo orama/labs/my-agent
@@ -190,12 +214,16 @@ If `BAT_DOCKER_REGISTRY` and `BAT_DOCKER_REPO` are already set in `.env` or the
190
214
 
191
215
  ### 5. Run evaluation
192
216
 
193
- From an existing agent root:
217
+ Run all `eval` commands from an existing agent root (must contain `agent.json`,
218
+ `config.yaml`, and `pyproject.toml`):
194
219
 
195
220
  ```bash
196
221
  # scaffold evaluation files
197
222
  bat eval init
198
223
 
224
+ # inspect the resolved configuration
225
+ bat eval show
226
+
199
227
  # run evaluation
200
228
  bat eval run
201
229
  ```
@@ -210,22 +238,51 @@ Minimal `eval/eval.yaml`:
210
238
 
211
239
  ```yaml
212
240
  evaluation:
213
- dataset: eval/input/tasks.json
214
- output_dir: eval/output
241
+ dataset: eval/input/tasks.json # default path if omitted
242
+ output_dir: eval/output # default path if omitted
243
+ agent_url: http://127.0.0.1:9900 # must include the scheme; this is the default
244
+ agent_startup_timeout_s: 45
245
+ agent_shutdown_timeout_s: 10
215
246
  k: 1
216
- qualitative: true
217
- save_attempts: false
218
-
219
- judge:
220
- provider: ollama
221
- model: your-judge-model
222
- base_url: http://localhost:11434
247
+ qualitative: false # set true to enable LLM judge scoring
223
248
 
224
249
  models:
225
250
  - provider: openai
226
251
  model: your-model-name
252
+ - provider: ollama
253
+ model: your-local-model
254
+ base_url: http://localhost:11434
255
+
256
+ # required only when qualitative: true
257
+ judge:
258
+ provider: ollama
259
+ model: local-judge-model
260
+ base_url: http://localhost:11434
261
+ # api_key_env: BAT_JUDGE_API_KEY # env var name holding the judge's API key
227
262
  ```
228
263
 
229
- `eval run` requires the agent virtual environment at `.venv/bin/python` (`.venv/Scripts/python.exe` on Windows).
264
+ Notes:
265
+
266
+ - `bat eval run` starts the agent via `uv run .` from the agent root and waits until
267
+ `agent_url` accepts a TCP connection, so the agent project must have its
268
+ dependencies installed (its own `.venv`).
269
+ - `models` entries may also be written as `"<provider>:<model>"` strings.
270
+ - For models that require an API key, set it in the agent's `.env` under
271
+ `<PROVIDER>_API_KEY` (e.g. `OPENAI_API_KEY`).
272
+
273
+ ### 6. Plot evaluation metrics
274
+
275
+ `bat eval plot` reads the `metrics.json` files produced by `eval run` and renders
276
+ charts. Point `--folder` at an evaluation output directory; each sub-folder
277
+ containing a `metrics.json` is treated as one run.
278
+
279
+ ```bash
280
+ # plot every run found under the output folder
281
+ bat eval plot --folder eval/output
282
+
283
+ # restrict the per-task charts to task ids containing a substring
284
+ bat eval plot --folder eval/output --filter smoke
285
+ ```
230
286
 
231
- For model that requires API_KEYS set it into the agent `.env` under <PROVIDER>\_API_KEY.
287
+ Charts are saved back into the given folder. `--filter` only narrows the per-task
288
+ charts; summary charts always cover all runs.
@@ -4,34 +4,30 @@ A CLI tool for creating, building, and evaluating BAT agent projects.
4
4
 
5
5
  ## Prerequisites
6
6
 
7
- - Python and [uv](https://docs.astral.sh/uv/) installed
7
+ - Python 3.12+ and [uv](https://docs.astral.sh/uv/) installed
8
8
  - Docker installed (required for `bat build` and `bat push`)
9
- - For evaluation commands: an existing BAT agent root containing `agent.json`, `config.yaml`, and `src/graph.py`
9
+ - For evaluation commands: an existing BAT agent root containing `agent.json`, `config.yaml`, and `pyproject.toml`
10
10
 
11
11
  ---
12
12
 
13
13
  ## Installation
14
14
 
15
- ### Option A — build and install a standalone binary (Linux/macOS)
15
+ ### Option A — install system-wide with `uv tool` (recommended)
16
16
 
17
- Run the helper script from the repo root:
17
+ Installs `bat` into an isolated environment and puts the executable on your `PATH`,
18
+ so it is available from any directory.
18
19
 
19
- ```bash
20
- bash cli/build_and_install.sh
21
- ```
22
-
23
- This will:
20
+ ````bash
21
+ # from PyPI
22
+ uv tool install bat-cli
24
23
 
25
- 1. Sync `dev` and `packaging` dependency groups via `uv`.
26
- 2. Build a one-file executable with PyInstaller.
27
- 3. Move it to `~/.local/bin/bat` (uses `sudo` only when necessary).
28
24
 
29
- Make sure `~/.local/bin` is in your `PATH`:
25
+ Make sure the uv tools bin directory is on your `PATH` (uv prints the path on first
26
+ install; this is usually `~/.local/bin`):
30
27
 
31
28
  ```bash
32
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc # or ~/.zshrc
33
- source ~/.bashrc
34
- ```
29
+ uv tool update-shell # adds the uv tools dir to your shell profile
30
+ ````
35
31
 
36
32
  Then verify:
37
33
 
@@ -39,20 +35,39 @@ Then verify:
39
35
  bat --help
40
36
  ```
41
37
 
42
- ### Option B build manually
38
+ To upgrade or remove later:
43
39
 
44
40
  ```bash
45
- uv sync --group dev --group packaging
46
- uv run pyinstaller --clean --noconfirm bat_cli.spec
47
- # binary is at dist/bat (Linux/macOS) or dist/bat.exe (Windows)
41
+ uv tool upgrade bat-cli
42
+ uv tool uninstall bat-cli
48
43
  ```
49
44
 
50
- On **Windows**, copy `dist/bat.exe` to a folder on your `PATH` (e.g. `C:\tools\bat`) and open a new terminal.
45
+ ### Option B install into a virtual environment with `uv pip`
46
+
47
+ Use this when you want `bat` scoped to a specific project/venv rather than installed
48
+ globally.
51
49
 
52
- > PyInstaller builds are OS-specific — build on each target OS.
50
+ ```bash
51
+ uv venv # create .venv (skip if you already have one)
52
+ source .venv/bin/activate # .venv\Scripts\activate on Windows
53
+
54
+ # from PyPI
55
+ uv pip install bat-cli
56
+
57
+ # or from a local checkout (run from the cli/ directory)
58
+ uv pip install . # add -e for an editable/development install
59
+ ```
60
+
61
+ `bat` is available whenever that virtual environment is active:
62
+
63
+ ```bash
64
+ bat --help
65
+ ```
53
66
 
54
67
  ### Option C — run without installing (development)
55
68
 
69
+ From the `cli/` directory:
70
+
56
71
  ```bash
57
72
  uv sync --group dev
58
73
  uv run bat --help
@@ -68,7 +83,7 @@ All examples below show `bat ...`; replace with `uv run bat ...` when using this
68
83
  bat
69
84
  ├── init
70
85
  │ └── agent
71
- │ ├── [name=default]
86
+ │ ├── <name>
72
87
  │ ├── --clients, -c
73
88
  │ ├── --output-dir, -o
74
89
  │ ├── --force, -f
@@ -89,7 +104,11 @@ bat
89
104
  ├── eval
90
105
  │ ├── init
91
106
  │ │ └── --force, -f
92
- └── run
107
+ ├── run
108
+ │ ├── show
109
+ │ └── plot
110
+ │ ├── --folder, -f
111
+ │ └── --filter, -F
93
112
  ├── build
94
113
  │ ├── --context, -C
95
114
  │ ├── --docker-registry
@@ -109,6 +128,7 @@ Built-in help is available at every level:
109
128
  ```bash
110
129
  bat --help
111
130
  bat init agent --help
131
+ bat eval --help
112
132
  bat build --help
113
133
  ```
114
134
 
@@ -119,10 +139,6 @@ bat build --help
119
139
  ### 1. Create a new agent
120
140
 
121
141
  ```bash
122
- # default name
123
- bat init agent
124
-
125
- # custom name
126
142
  bat init agent my_agent
127
143
 
128
144
  # specific output directory
@@ -130,6 +146,9 @@ bat init agent my_agent --output-dir .
130
146
 
131
147
  # pre-generate LLM clients
132
148
  bat init agent my_agent --clients reformulator,planner,executor
149
+
150
+ # set the port/model/provider written to .env
151
+ bat init agent my_agent --port 9900 --model gpt-4o-mini --model-provider openai
133
152
  ```
134
153
 
135
154
  ### 2. Add clients to an existing agent
@@ -145,10 +164,10 @@ bat add client planner,executor --force
145
164
 
146
165
  ### 3. Update agent environment variables
147
166
 
148
- Run from the agent root (updates or creates `.env`):
167
+ Run from the agent root (updates an existing `.env`):
149
168
 
150
169
  ```bash
151
- bat set env --port 8080 --model gpt-4.1-mini --model-provider openai
170
+ bat set env --port 8080 --model gpt-4o-mini --model-provider openai
152
171
 
153
172
  # also set Docker defaults for build/push
154
173
  bat set env --docker-registry hub.bubbleran.com --repo orama/labs/my-agent
@@ -176,12 +195,16 @@ If `BAT_DOCKER_REGISTRY` and `BAT_DOCKER_REPO` are already set in `.env` or the
176
195
 
177
196
  ### 5. Run evaluation
178
197
 
179
- From an existing agent root:
198
+ Run all `eval` commands from an existing agent root (must contain `agent.json`,
199
+ `config.yaml`, and `pyproject.toml`):
180
200
 
181
201
  ```bash
182
202
  # scaffold evaluation files
183
203
  bat eval init
184
204
 
205
+ # inspect the resolved configuration
206
+ bat eval show
207
+
185
208
  # run evaluation
186
209
  bat eval run
187
210
  ```
@@ -196,22 +219,51 @@ Minimal `eval/eval.yaml`:
196
219
 
197
220
  ```yaml
198
221
  evaluation:
199
- dataset: eval/input/tasks.json
200
- output_dir: eval/output
222
+ dataset: eval/input/tasks.json # default path if omitted
223
+ output_dir: eval/output # default path if omitted
224
+ agent_url: http://127.0.0.1:9900 # must include the scheme; this is the default
225
+ agent_startup_timeout_s: 45
226
+ agent_shutdown_timeout_s: 10
201
227
  k: 1
202
- qualitative: true
203
- save_attempts: false
204
-
205
- judge:
206
- provider: ollama
207
- model: your-judge-model
208
- base_url: http://localhost:11434
228
+ qualitative: false # set true to enable LLM judge scoring
209
229
 
210
230
  models:
211
231
  - provider: openai
212
232
  model: your-model-name
233
+ - provider: ollama
234
+ model: your-local-model
235
+ base_url: http://localhost:11434
236
+
237
+ # required only when qualitative: true
238
+ judge:
239
+ provider: ollama
240
+ model: local-judge-model
241
+ base_url: http://localhost:11434
242
+ # api_key_env: BAT_JUDGE_API_KEY # env var name holding the judge's API key
213
243
  ```
214
244
 
215
- `eval run` requires the agent virtual environment at `.venv/bin/python` (`.venv/Scripts/python.exe` on Windows).
245
+ Notes:
246
+
247
+ - `bat eval run` starts the agent via `uv run .` from the agent root and waits until
248
+ `agent_url` accepts a TCP connection, so the agent project must have its
249
+ dependencies installed (its own `.venv`).
250
+ - `models` entries may also be written as `"<provider>:<model>"` strings.
251
+ - For models that require an API key, set it in the agent's `.env` under
252
+ `<PROVIDER>_API_KEY` (e.g. `OPENAI_API_KEY`).
253
+
254
+ ### 6. Plot evaluation metrics
255
+
256
+ `bat eval plot` reads the `metrics.json` files produced by `eval run` and renders
257
+ charts. Point `--folder` at an evaluation output directory; each sub-folder
258
+ containing a `metrics.json` is treated as one run.
259
+
260
+ ```bash
261
+ # plot every run found under the output folder
262
+ bat eval plot --folder eval/output
263
+
264
+ # restrict the per-task charts to task ids containing a substring
265
+ bat eval plot --folder eval/output --filter smoke
266
+ ```
216
267
 
217
- For model that requires API_KEYS set it into the agent `.env` under <PROVIDER>\_API_KEY.
268
+ Charts are saved back into the given folder. `--filter` only narrows the per-task
269
+ charts; summary charts always cover all runs.
@@ -0,0 +1,95 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "bat-cli"
7
+ version = "0.1.2"
8
+ authors = [
9
+ { name="Enrico SBUTTONI", email="enrico.sbuttoni@bubbleran.com" },
10
+ ]
11
+ description = "CLI tool to interact with BAT agents"
12
+ readme = "README.md"
13
+ requires-python = ">=3.12"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "Operating System :: OS Independent",
17
+ ]
18
+
19
+ dependencies = [
20
+ "typer>=0.12.3",
21
+ "bat-adk>=2026.06rc1",
22
+ "bat-adk[openai]",
23
+ "a2a-sdk>=1.0.0",
24
+ "python-dotenv>=1.0.1",
25
+ "typing-extensions>=4.12.0",
26
+ "matplotlib>=3.8",
27
+ ]
28
+
29
+ [project.scripts]
30
+ bat = "cli:main"
31
+
32
+ [project.urls]
33
+ "Homepage" = "https://bubbleran.com/"
34
+ "Repository" = "https://github.com/bubbleran/bat"
35
+
36
+ [dependency-groups]
37
+ dev = [
38
+ "pytest>=8.0",
39
+ ]
40
+ packaging = [
41
+ "pyinstaller>=6.0",
42
+ ]
43
+
44
+ [tool.setuptools]
45
+ package-dir = {"" = "src"}
46
+ py-modules = ["cli", "image_defaults"]
47
+
48
+ [tool.setuptools.packages.find]
49
+ where = ["src"]
50
+
51
+ [tool.setuptools.package-data]
52
+ create = [
53
+ "templates/agent/*",
54
+ "templates/agent/.*",
55
+ "templates/agent/src/*",
56
+ "templates/agent/src/llm_clients/*",
57
+ "templates/agent/tests/*",
58
+ ]
59
+
60
+ [tool.pytest.ini_options]
61
+ pythonpath = ["src"]
62
+
63
+ [tool.ruff]
64
+ target-version = "py312"
65
+ line-length = 80
66
+
67
+ # Files or directories to ignore entirely
68
+ exclude = [
69
+ ".git",
70
+ ".venv",
71
+ "__pycache__",
72
+ "build",
73
+ "dist",
74
+ "src/create/templates", # agent scaffold templates, not valid Python
75
+ ]
76
+
77
+ [tool.ruff.lint]
78
+ select = [
79
+ "E", # pycodestyle errors (standard spacing, formatting)
80
+ "W", # pycodestyle warnings
81
+ "F", # Pyflakes (catches unused imports, undefined variables)
82
+ "I", # isort (automatically sorts your imports alphabetically)
83
+ "B", # flake8-bugbear (catches tricky hidden Python bugs)
84
+ "SIM", # flake8-simplify (helps you write simpler, shorter code)
85
+ ]
86
+ ignore = [
87
+ "B008", # function call in default argument (this is the Typer/FastAPI pattern)
88
+ ]
89
+
90
+ # Custom settings for specific files
91
+ [tool.ruff.lint.per-file-ignores]
92
+ "tests/*" = ["S101"] # Allow 'assert' statements in tests without warning
93
+
94
+ [tool.ruff.format]
95
+ indent-style = "space"
@@ -1,6 +1,8 @@
1
1
  from pathlib import Path
2
+
2
3
  from create.agent import _write_llm_clients
3
4
 
5
+
4
6
  def add_clients_to_existing_agent(
5
7
  agent_dir: Path,
6
8
  *,