bat-cli 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bat_cli-0.1.0 → bat_cli-0.1.2}/PKG-INFO +100 -43
- {bat_cli-0.1.0 → bat_cli-0.1.2}/README.md +94 -42
- bat_cli-0.1.2/pyproject.toml +95 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/add/client.py +2 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/bat_cli.egg-info/PKG-INFO +100 -43
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/cli.py +40 -18
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/agent.py +123 -15
- bat_cli-0.1.2/src/create/templates/agent/__main__.py +12 -0
- bat_cli-0.1.2/src/create/templates/agent/llm_client.py.template +30 -0
- bat_cli-0.1.2/src/create/templates/agent/src/__init__.py +4 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/src/graph.py +5 -5
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/commands.py +54 -40
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/adapter.py +51 -22
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/bench_runner.py +28 -10
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/eval_config.py +42 -18
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/evaluator.py +13 -4
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/metrics/llm_evaluators.py +60 -17
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/metrics/metrics.py +19 -5
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/metrics/qualitative_helpers.py +6 -2
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/orchestrator.py +25 -9
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/plotter.py +263 -57
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/image_defaults.py +4 -2
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/push/push.py +13 -3
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/set/env.py +7 -6
- {bat_cli-0.1.0 → bat_cli-0.1.2}/tests/test_create_new_agent.py +71 -34
- {bat_cli-0.1.0 → bat_cli-0.1.2}/tests/test_eval_commands.py +52 -21
- bat_cli-0.1.0/pyproject.toml +0 -49
- bat_cli-0.1.0/src/create/templates/agent/__main__.py +0 -2
- bat_cli-0.1.0/src/create/templates/agent/llm_client.py.template +0 -36
- bat_cli-0.1.0/src/create/templates/agent/tests/__init__.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/setup.cfg +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/add/__init__.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/bat_cli.egg-info/SOURCES.txt +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/bat_cli.egg-info/dependency_links.txt +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/bat_cli.egg-info/entry_points.txt +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/bat_cli.egg-info/requires.txt +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/bat_cli.egg-info/top_level.txt +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/build/__init__.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/build/build.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/__init__.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/.dockerignore +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/.env.template +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/.python-version +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/Dockerfile +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/Makefile +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/README.md +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/agent.json.template +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/agent.spec +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/config.yaml +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/create/templates/agent/pyproject.toml.template +0 -0
- {bat_cli-0.1.0/src/create/templates/agent/src → bat_cli-0.1.2/src/create/templates/agent/src/llm_clients}/__init__.py +0 -0
- {bat_cli-0.1.0/src/create/templates/agent/src/llm_clients → bat_cli-0.1.2/src/create/templates/agent/tests}/__init__.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/__init__.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/__init__.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/contracts.py +1 -1
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/eval/engine/metrics/__init__.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/push/__init__.py +0 -0
- {bat_cli-0.1.0 → bat_cli-0.1.2}/src/set/__init__.py +0 -0
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bat-cli
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: CLI tool to interact with BAT agents
|
|
5
|
+
Author-email: Enrico SBUTTONI <enrico.sbuttoni@bubbleran.com>
|
|
6
|
+
Project-URL: Homepage, https://bubbleran.com/
|
|
7
|
+
Project-URL: Repository, https://github.com/bubbleran/bat
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
5
10
|
Requires-Python: >=3.12
|
|
6
11
|
Description-Content-Type: text/markdown
|
|
7
12
|
Requires-Dist: typer>=0.12.3
|
|
@@ -18,34 +23,30 @@ A CLI tool for creating, building, and evaluating BAT agent projects.
|
|
|
18
23
|
|
|
19
24
|
## Prerequisites
|
|
20
25
|
|
|
21
|
-
- Python and [uv](https://docs.astral.sh/uv/) installed
|
|
26
|
+
- Python 3.12+ and [uv](https://docs.astral.sh/uv/) installed
|
|
22
27
|
- Docker installed (required for `bat build` and `bat push`)
|
|
23
|
-
- For evaluation commands: an existing BAT agent root containing `agent.json`, `config.yaml`, and `
|
|
28
|
+
- For evaluation commands: an existing BAT agent root containing `agent.json`, `config.yaml`, and `pyproject.toml`
|
|
24
29
|
|
|
25
30
|
---
|
|
26
31
|
|
|
27
32
|
## Installation
|
|
28
33
|
|
|
29
|
-
### Option A —
|
|
34
|
+
### Option A — install system-wide with `uv tool` (recommended)
|
|
30
35
|
|
|
31
|
-
|
|
36
|
+
Installs `bat` into an isolated environment and puts the executable on your `PATH`,
|
|
37
|
+
so it is available from any directory.
|
|
32
38
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
This will:
|
|
39
|
+
````bash
|
|
40
|
+
# from PyPI
|
|
41
|
+
uv tool install bat-cli
|
|
38
42
|
|
|
39
|
-
1. Sync `dev` and `packaging` dependency groups via `uv`.
|
|
40
|
-
2. Build a one-file executable with PyInstaller.
|
|
41
|
-
3. Move it to `~/.local/bin/bat` (uses `sudo` only when necessary).
|
|
42
43
|
|
|
43
|
-
Make sure
|
|
44
|
+
Make sure the uv tools bin directory is on your `PATH` (uv prints the path on first
|
|
45
|
+
install; this is usually `~/.local/bin`):
|
|
44
46
|
|
|
45
47
|
```bash
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
```
|
|
48
|
+
uv tool update-shell # adds the uv tools dir to your shell profile
|
|
49
|
+
````
|
|
49
50
|
|
|
50
51
|
Then verify:
|
|
51
52
|
|
|
@@ -53,20 +54,39 @@ Then verify:
|
|
|
53
54
|
bat --help
|
|
54
55
|
```
|
|
55
56
|
|
|
56
|
-
|
|
57
|
+
To upgrade or remove later:
|
|
57
58
|
|
|
58
59
|
```bash
|
|
59
|
-
uv
|
|
60
|
-
uv
|
|
61
|
-
# binary is at dist/bat (Linux/macOS) or dist/bat.exe (Windows)
|
|
60
|
+
uv tool upgrade bat-cli
|
|
61
|
+
uv tool uninstall bat-cli
|
|
62
62
|
```
|
|
63
63
|
|
|
64
|
-
|
|
64
|
+
### Option B — install into a virtual environment with `uv pip`
|
|
65
|
+
|
|
66
|
+
Use this when you want `bat` scoped to a specific project/venv rather than installed
|
|
67
|
+
globally.
|
|
65
68
|
|
|
66
|
-
|
|
69
|
+
```bash
|
|
70
|
+
uv venv # create .venv (skip if you already have one)
|
|
71
|
+
source .venv/bin/activate # .venv\Scripts\activate on Windows
|
|
72
|
+
|
|
73
|
+
# from PyPI
|
|
74
|
+
uv pip install bat-cli
|
|
75
|
+
|
|
76
|
+
# or from a local checkout (run from the cli/ directory)
|
|
77
|
+
uv pip install . # add -e for an editable/development install
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
`bat` is available whenever that virtual environment is active:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
bat --help
|
|
84
|
+
```
|
|
67
85
|
|
|
68
86
|
### Option C — run without installing (development)
|
|
69
87
|
|
|
88
|
+
From the `cli/` directory:
|
|
89
|
+
|
|
70
90
|
```bash
|
|
71
91
|
uv sync --group dev
|
|
72
92
|
uv run bat --help
|
|
@@ -82,7 +102,7 @@ All examples below show `bat ...`; replace with `uv run bat ...` when using this
|
|
|
82
102
|
bat
|
|
83
103
|
├── init
|
|
84
104
|
│ └── agent
|
|
85
|
-
│ ├──
|
|
105
|
+
│ ├── <name>
|
|
86
106
|
│ ├── --clients, -c
|
|
87
107
|
│ ├── --output-dir, -o
|
|
88
108
|
│ ├── --force, -f
|
|
@@ -103,7 +123,11 @@ bat
|
|
|
103
123
|
├── eval
|
|
104
124
|
│ ├── init
|
|
105
125
|
│ │ └── --force, -f
|
|
106
|
-
│
|
|
126
|
+
│ ├── run
|
|
127
|
+
│ ├── show
|
|
128
|
+
│ └── plot
|
|
129
|
+
│ ├── --folder, -f
|
|
130
|
+
│ └── --filter, -F
|
|
107
131
|
├── build
|
|
108
132
|
│ ├── --context, -C
|
|
109
133
|
│ ├── --docker-registry
|
|
@@ -123,6 +147,7 @@ Built-in help is available at every level:
|
|
|
123
147
|
```bash
|
|
124
148
|
bat --help
|
|
125
149
|
bat init agent --help
|
|
150
|
+
bat eval --help
|
|
126
151
|
bat build --help
|
|
127
152
|
```
|
|
128
153
|
|
|
@@ -133,10 +158,6 @@ bat build --help
|
|
|
133
158
|
### 1. Create a new agent
|
|
134
159
|
|
|
135
160
|
```bash
|
|
136
|
-
# default name
|
|
137
|
-
bat init agent
|
|
138
|
-
|
|
139
|
-
# custom name
|
|
140
161
|
bat init agent my_agent
|
|
141
162
|
|
|
142
163
|
# specific output directory
|
|
@@ -144,6 +165,9 @@ bat init agent my_agent --output-dir .
|
|
|
144
165
|
|
|
145
166
|
# pre-generate LLM clients
|
|
146
167
|
bat init agent my_agent --clients reformulator,planner,executor
|
|
168
|
+
|
|
169
|
+
# set the port/model/provider written to .env
|
|
170
|
+
bat init agent my_agent --port 9900 --model gpt-4o-mini --model-provider openai
|
|
147
171
|
```
|
|
148
172
|
|
|
149
173
|
### 2. Add clients to an existing agent
|
|
@@ -159,10 +183,10 @@ bat add client planner,executor --force
|
|
|
159
183
|
|
|
160
184
|
### 3. Update agent environment variables
|
|
161
185
|
|
|
162
|
-
Run from the agent root (updates
|
|
186
|
+
Run from the agent root (updates an existing `.env`):
|
|
163
187
|
|
|
164
188
|
```bash
|
|
165
|
-
bat set env --port 8080 --model gpt-
|
|
189
|
+
bat set env --port 8080 --model gpt-4o-mini --model-provider openai
|
|
166
190
|
|
|
167
191
|
# also set Docker defaults for build/push
|
|
168
192
|
bat set env --docker-registry hub.bubbleran.com --repo orama/labs/my-agent
|
|
@@ -190,12 +214,16 @@ If `BAT_DOCKER_REGISTRY` and `BAT_DOCKER_REPO` are already set in `.env` or the
|
|
|
190
214
|
|
|
191
215
|
### 5. Run evaluation
|
|
192
216
|
|
|
193
|
-
|
|
217
|
+
Run all `eval` commands from an existing agent root (must contain `agent.json`,
|
|
218
|
+
`config.yaml`, and `pyproject.toml`):
|
|
194
219
|
|
|
195
220
|
```bash
|
|
196
221
|
# scaffold evaluation files
|
|
197
222
|
bat eval init
|
|
198
223
|
|
|
224
|
+
# inspect the resolved configuration
|
|
225
|
+
bat eval show
|
|
226
|
+
|
|
199
227
|
# run evaluation
|
|
200
228
|
bat eval run
|
|
201
229
|
```
|
|
@@ -210,22 +238,51 @@ Minimal `eval/eval.yaml`:
|
|
|
210
238
|
|
|
211
239
|
```yaml
|
|
212
240
|
evaluation:
|
|
213
|
-
dataset: eval/input/tasks.json
|
|
214
|
-
output_dir: eval/output
|
|
241
|
+
dataset: eval/input/tasks.json # default path if omitted
|
|
242
|
+
output_dir: eval/output # default path if omitted
|
|
243
|
+
agent_url: http://127.0.0.1:9900 # must include the scheme; this is the default
|
|
244
|
+
agent_startup_timeout_s: 45
|
|
245
|
+
agent_shutdown_timeout_s: 10
|
|
215
246
|
k: 1
|
|
216
|
-
qualitative: true
|
|
217
|
-
save_attempts: false
|
|
218
|
-
|
|
219
|
-
judge:
|
|
220
|
-
provider: ollama
|
|
221
|
-
model: your-judge-model
|
|
222
|
-
base_url: http://localhost:11434
|
|
247
|
+
qualitative: false # set true to enable LLM judge scoring
|
|
223
248
|
|
|
224
249
|
models:
|
|
225
250
|
- provider: openai
|
|
226
251
|
model: your-model-name
|
|
252
|
+
- provider: ollama
|
|
253
|
+
model: your-local-model
|
|
254
|
+
base_url: http://localhost:11434
|
|
255
|
+
|
|
256
|
+
# required only when qualitative: true
|
|
257
|
+
judge:
|
|
258
|
+
provider: ollama
|
|
259
|
+
model: local-judge-model
|
|
260
|
+
base_url: http://localhost:11434
|
|
261
|
+
# api_key_env: BAT_JUDGE_API_KEY # env var name holding the judge's API key
|
|
227
262
|
```
|
|
228
263
|
|
|
229
|
-
|
|
264
|
+
Notes:
|
|
265
|
+
|
|
266
|
+
- `bat eval run` starts the agent via `uv run .` from the agent root and waits until
|
|
267
|
+
`agent_url` accepts a TCP connection, so the agent project must have its
|
|
268
|
+
dependencies installed (its own `.venv`).
|
|
269
|
+
- `models` entries may also be written as `"<provider>:<model>"` strings.
|
|
270
|
+
- For models that require an API key, set it in the agent's `.env` under
|
|
271
|
+
`<PROVIDER>_API_KEY` (e.g. `OPENAI_API_KEY`).
|
|
272
|
+
|
|
273
|
+
### 6. Plot evaluation metrics
|
|
274
|
+
|
|
275
|
+
`bat eval plot` reads the `metrics.json` files produced by `eval run` and renders
|
|
276
|
+
charts. Point `--folder` at an evaluation output directory; each sub-folder
|
|
277
|
+
containing a `metrics.json` is treated as one run.
|
|
278
|
+
|
|
279
|
+
```bash
|
|
280
|
+
# plot every run found under the output folder
|
|
281
|
+
bat eval plot --folder eval/output
|
|
282
|
+
|
|
283
|
+
# restrict the per-task charts to task ids containing a substring
|
|
284
|
+
bat eval plot --folder eval/output --filter smoke
|
|
285
|
+
```
|
|
230
286
|
|
|
231
|
-
|
|
287
|
+
Charts are saved back into the given folder. `--filter` only narrows the per-task
|
|
288
|
+
charts; summary charts always cover all runs.
|
|
@@ -4,34 +4,30 @@ A CLI tool for creating, building, and evaluating BAT agent projects.
|
|
|
4
4
|
|
|
5
5
|
## Prerequisites
|
|
6
6
|
|
|
7
|
-
- Python and [uv](https://docs.astral.sh/uv/) installed
|
|
7
|
+
- Python 3.12+ and [uv](https://docs.astral.sh/uv/) installed
|
|
8
8
|
- Docker installed (required for `bat build` and `bat push`)
|
|
9
|
-
- For evaluation commands: an existing BAT agent root containing `agent.json`, `config.yaml`, and `
|
|
9
|
+
- For evaluation commands: an existing BAT agent root containing `agent.json`, `config.yaml`, and `pyproject.toml`
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
13
|
## Installation
|
|
14
14
|
|
|
15
|
-
### Option A —
|
|
15
|
+
### Option A — install system-wide with `uv tool` (recommended)
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
Installs `bat` into an isolated environment and puts the executable on your `PATH`,
|
|
18
|
+
so it is available from any directory.
|
|
18
19
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
This will:
|
|
20
|
+
````bash
|
|
21
|
+
# from PyPI
|
|
22
|
+
uv tool install bat-cli
|
|
24
23
|
|
|
25
|
-
1. Sync `dev` and `packaging` dependency groups via `uv`.
|
|
26
|
-
2. Build a one-file executable with PyInstaller.
|
|
27
|
-
3. Move it to `~/.local/bin/bat` (uses `sudo` only when necessary).
|
|
28
24
|
|
|
29
|
-
Make sure
|
|
25
|
+
Make sure the uv tools bin directory is on your `PATH` (uv prints the path on first
|
|
26
|
+
install; this is usually `~/.local/bin`):
|
|
30
27
|
|
|
31
28
|
```bash
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
```
|
|
29
|
+
uv tool update-shell # adds the uv tools dir to your shell profile
|
|
30
|
+
````
|
|
35
31
|
|
|
36
32
|
Then verify:
|
|
37
33
|
|
|
@@ -39,20 +35,39 @@ Then verify:
|
|
|
39
35
|
bat --help
|
|
40
36
|
```
|
|
41
37
|
|
|
42
|
-
|
|
38
|
+
To upgrade or remove later:
|
|
43
39
|
|
|
44
40
|
```bash
|
|
45
|
-
uv
|
|
46
|
-
uv
|
|
47
|
-
# binary is at dist/bat (Linux/macOS) or dist/bat.exe (Windows)
|
|
41
|
+
uv tool upgrade bat-cli
|
|
42
|
+
uv tool uninstall bat-cli
|
|
48
43
|
```
|
|
49
44
|
|
|
50
|
-
|
|
45
|
+
### Option B — install into a virtual environment with `uv pip`
|
|
46
|
+
|
|
47
|
+
Use this when you want `bat` scoped to a specific project/venv rather than installed
|
|
48
|
+
globally.
|
|
51
49
|
|
|
52
|
-
|
|
50
|
+
```bash
|
|
51
|
+
uv venv # create .venv (skip if you already have one)
|
|
52
|
+
source .venv/bin/activate # .venv\Scripts\activate on Windows
|
|
53
|
+
|
|
54
|
+
# from PyPI
|
|
55
|
+
uv pip install bat-cli
|
|
56
|
+
|
|
57
|
+
# or from a local checkout (run from the cli/ directory)
|
|
58
|
+
uv pip install . # add -e for an editable/development install
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
`bat` is available whenever that virtual environment is active:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
bat --help
|
|
65
|
+
```
|
|
53
66
|
|
|
54
67
|
### Option C — run without installing (development)
|
|
55
68
|
|
|
69
|
+
From the `cli/` directory:
|
|
70
|
+
|
|
56
71
|
```bash
|
|
57
72
|
uv sync --group dev
|
|
58
73
|
uv run bat --help
|
|
@@ -68,7 +83,7 @@ All examples below show `bat ...`; replace with `uv run bat ...` when using this
|
|
|
68
83
|
bat
|
|
69
84
|
├── init
|
|
70
85
|
│ └── agent
|
|
71
|
-
│ ├──
|
|
86
|
+
│ ├── <name>
|
|
72
87
|
│ ├── --clients, -c
|
|
73
88
|
│ ├── --output-dir, -o
|
|
74
89
|
│ ├── --force, -f
|
|
@@ -89,7 +104,11 @@ bat
|
|
|
89
104
|
├── eval
|
|
90
105
|
│ ├── init
|
|
91
106
|
│ │ └── --force, -f
|
|
92
|
-
│
|
|
107
|
+
│ ├── run
|
|
108
|
+
│ ├── show
|
|
109
|
+
│ └── plot
|
|
110
|
+
│ ├── --folder, -f
|
|
111
|
+
│ └── --filter, -F
|
|
93
112
|
├── build
|
|
94
113
|
│ ├── --context, -C
|
|
95
114
|
│ ├── --docker-registry
|
|
@@ -109,6 +128,7 @@ Built-in help is available at every level:
|
|
|
109
128
|
```bash
|
|
110
129
|
bat --help
|
|
111
130
|
bat init agent --help
|
|
131
|
+
bat eval --help
|
|
112
132
|
bat build --help
|
|
113
133
|
```
|
|
114
134
|
|
|
@@ -119,10 +139,6 @@ bat build --help
|
|
|
119
139
|
### 1. Create a new agent
|
|
120
140
|
|
|
121
141
|
```bash
|
|
122
|
-
# default name
|
|
123
|
-
bat init agent
|
|
124
|
-
|
|
125
|
-
# custom name
|
|
126
142
|
bat init agent my_agent
|
|
127
143
|
|
|
128
144
|
# specific output directory
|
|
@@ -130,6 +146,9 @@ bat init agent my_agent --output-dir .
|
|
|
130
146
|
|
|
131
147
|
# pre-generate LLM clients
|
|
132
148
|
bat init agent my_agent --clients reformulator,planner,executor
|
|
149
|
+
|
|
150
|
+
# set the port/model/provider written to .env
|
|
151
|
+
bat init agent my_agent --port 9900 --model gpt-4o-mini --model-provider openai
|
|
133
152
|
```
|
|
134
153
|
|
|
135
154
|
### 2. Add clients to an existing agent
|
|
@@ -145,10 +164,10 @@ bat add client planner,executor --force
|
|
|
145
164
|
|
|
146
165
|
### 3. Update agent environment variables
|
|
147
166
|
|
|
148
|
-
Run from the agent root (updates
|
|
167
|
+
Run from the agent root (updates an existing `.env`):
|
|
149
168
|
|
|
150
169
|
```bash
|
|
151
|
-
bat set env --port 8080 --model gpt-
|
|
170
|
+
bat set env --port 8080 --model gpt-4o-mini --model-provider openai
|
|
152
171
|
|
|
153
172
|
# also set Docker defaults for build/push
|
|
154
173
|
bat set env --docker-registry hub.bubbleran.com --repo orama/labs/my-agent
|
|
@@ -176,12 +195,16 @@ If `BAT_DOCKER_REGISTRY` and `BAT_DOCKER_REPO` are already set in `.env` or the
|
|
|
176
195
|
|
|
177
196
|
### 5. Run evaluation
|
|
178
197
|
|
|
179
|
-
|
|
198
|
+
Run all `eval` commands from an existing agent root (must contain `agent.json`,
|
|
199
|
+
`config.yaml`, and `pyproject.toml`):
|
|
180
200
|
|
|
181
201
|
```bash
|
|
182
202
|
# scaffold evaluation files
|
|
183
203
|
bat eval init
|
|
184
204
|
|
|
205
|
+
# inspect the resolved configuration
|
|
206
|
+
bat eval show
|
|
207
|
+
|
|
185
208
|
# run evaluation
|
|
186
209
|
bat eval run
|
|
187
210
|
```
|
|
@@ -196,22 +219,51 @@ Minimal `eval/eval.yaml`:
|
|
|
196
219
|
|
|
197
220
|
```yaml
|
|
198
221
|
evaluation:
|
|
199
|
-
dataset: eval/input/tasks.json
|
|
200
|
-
output_dir: eval/output
|
|
222
|
+
dataset: eval/input/tasks.json # default path if omitted
|
|
223
|
+
output_dir: eval/output # default path if omitted
|
|
224
|
+
agent_url: http://127.0.0.1:9900 # must include the scheme; this is the default
|
|
225
|
+
agent_startup_timeout_s: 45
|
|
226
|
+
agent_shutdown_timeout_s: 10
|
|
201
227
|
k: 1
|
|
202
|
-
qualitative: true
|
|
203
|
-
save_attempts: false
|
|
204
|
-
|
|
205
|
-
judge:
|
|
206
|
-
provider: ollama
|
|
207
|
-
model: your-judge-model
|
|
208
|
-
base_url: http://localhost:11434
|
|
228
|
+
qualitative: false # set true to enable LLM judge scoring
|
|
209
229
|
|
|
210
230
|
models:
|
|
211
231
|
- provider: openai
|
|
212
232
|
model: your-model-name
|
|
233
|
+
- provider: ollama
|
|
234
|
+
model: your-local-model
|
|
235
|
+
base_url: http://localhost:11434
|
|
236
|
+
|
|
237
|
+
# required only when qualitative: true
|
|
238
|
+
judge:
|
|
239
|
+
provider: ollama
|
|
240
|
+
model: local-judge-model
|
|
241
|
+
base_url: http://localhost:11434
|
|
242
|
+
# api_key_env: BAT_JUDGE_API_KEY # env var name holding the judge's API key
|
|
213
243
|
```
|
|
214
244
|
|
|
215
|
-
|
|
245
|
+
Notes:
|
|
246
|
+
|
|
247
|
+
- `bat eval run` starts the agent via `uv run .` from the agent root and waits until
|
|
248
|
+
`agent_url` accepts a TCP connection, so the agent project must have its
|
|
249
|
+
dependencies installed (its own `.venv`).
|
|
250
|
+
- `models` entries may also be written as `"<provider>:<model>"` strings.
|
|
251
|
+
- For models that require an API key, set it in the agent's `.env` under
|
|
252
|
+
`<PROVIDER>_API_KEY` (e.g. `OPENAI_API_KEY`).
|
|
253
|
+
|
|
254
|
+
### 6. Plot evaluation metrics
|
|
255
|
+
|
|
256
|
+
`bat eval plot` reads the `metrics.json` files produced by `eval run` and renders
|
|
257
|
+
charts. Point `--folder` at an evaluation output directory; each sub-folder
|
|
258
|
+
containing a `metrics.json` is treated as one run.
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
# plot every run found under the output folder
|
|
262
|
+
bat eval plot --folder eval/output
|
|
263
|
+
|
|
264
|
+
# restrict the per-task charts to task ids containing a substring
|
|
265
|
+
bat eval plot --folder eval/output --filter smoke
|
|
266
|
+
```
|
|
216
267
|
|
|
217
|
-
|
|
268
|
+
Charts are saved back into the given folder. `--filter` only narrows the per-task
|
|
269
|
+
charts; summary charts always cover all runs.
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "bat-cli"
|
|
7
|
+
version = "0.1.2"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Enrico SBUTTONI", email="enrico.sbuttoni@bubbleran.com" },
|
|
10
|
+
]
|
|
11
|
+
description = "CLI tool to interact with BAT agents"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.12"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
dependencies = [
|
|
20
|
+
"typer>=0.12.3",
|
|
21
|
+
"bat-adk>=2026.06rc1",
|
|
22
|
+
"bat-adk[openai]",
|
|
23
|
+
"a2a-sdk>=1.0.0",
|
|
24
|
+
"python-dotenv>=1.0.1",
|
|
25
|
+
"typing-extensions>=4.12.0",
|
|
26
|
+
"matplotlib>=3.8",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
bat = "cli:main"
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
"Homepage" = "https://bubbleran.com/"
|
|
34
|
+
"Repository" = "https://github.com/bubbleran/bat"
|
|
35
|
+
|
|
36
|
+
[dependency-groups]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=8.0",
|
|
39
|
+
]
|
|
40
|
+
packaging = [
|
|
41
|
+
"pyinstaller>=6.0",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[tool.setuptools]
|
|
45
|
+
package-dir = {"" = "src"}
|
|
46
|
+
py-modules = ["cli", "image_defaults"]
|
|
47
|
+
|
|
48
|
+
[tool.setuptools.packages.find]
|
|
49
|
+
where = ["src"]
|
|
50
|
+
|
|
51
|
+
[tool.setuptools.package-data]
|
|
52
|
+
create = [
|
|
53
|
+
"templates/agent/*",
|
|
54
|
+
"templates/agent/.*",
|
|
55
|
+
"templates/agent/src/*",
|
|
56
|
+
"templates/agent/src/llm_clients/*",
|
|
57
|
+
"templates/agent/tests/*",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
[tool.pytest.ini_options]
|
|
61
|
+
pythonpath = ["src"]
|
|
62
|
+
|
|
63
|
+
[tool.ruff]
|
|
64
|
+
target-version = "py312"
|
|
65
|
+
line-length = 80
|
|
66
|
+
|
|
67
|
+
# Files or directories to ignore entirely
|
|
68
|
+
exclude = [
|
|
69
|
+
".git",
|
|
70
|
+
".venv",
|
|
71
|
+
"__pycache__",
|
|
72
|
+
"build",
|
|
73
|
+
"dist",
|
|
74
|
+
"src/create/templates", # agent scaffold templates, not valid Python
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
[tool.ruff.lint]
|
|
78
|
+
select = [
|
|
79
|
+
"E", # pycodestyle errors (standard spacing, formatting)
|
|
80
|
+
"W", # pycodestyle warnings
|
|
81
|
+
"F", # Pyflakes (catches unused imports, undefined variables)
|
|
82
|
+
"I", # isort (automatically sorts your imports alphabetically)
|
|
83
|
+
"B", # flake8-bugbear (catches tricky hidden Python bugs)
|
|
84
|
+
"SIM", # flake8-simplify (helps you write simpler, shorter code)
|
|
85
|
+
]
|
|
86
|
+
ignore = [
|
|
87
|
+
"B008", # function call in default argument (this is the Typer/FastAPI pattern)
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
# Custom settings for specific files
|
|
91
|
+
[tool.ruff.lint.per-file-ignores]
|
|
92
|
+
"tests/*" = ["S101"] # Allow 'assert' statements in tests without warning
|
|
93
|
+
|
|
94
|
+
[tool.ruff.format]
|
|
95
|
+
indent-style = "space"
|