agent-regression-lab 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docs/tools.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Custom Tools
2
2
 
3
- Custom tools are registered in `agentlab.config.yaml` and loaded from repo-local JS or TS modules.
3
+ Custom tools are registered in `agentlab.config.yaml` and can be loaded from repo-local JS/TS modules or installed npm packages.
4
4
 
5
5
  This is the main extension point when built-in tools are not enough.
6
6
 
@@ -9,12 +9,14 @@ This is the main extension point when built-in tools are not enough.
9
9
  Each tool entry must define:
10
10
 
11
11
  - `name`
12
- - `modulePath`
12
+ - exactly one source:
13
+ - `modulePath`, or
14
+ - `package`
13
15
  - `exportName`
14
16
  - `description`
15
17
  - `inputSchema`
16
18
 
17
- Example:
19
+ Repo-local example:
18
20
 
19
21
  ```yaml
20
22
  tools:
@@ -33,6 +35,25 @@ tools:
33
35
  - customer_id
34
36
  ```
35
37
 
38
+ Installed package example:
39
+
40
+ ```yaml
41
+ tools:
42
+ - name: support.find_duplicate_charge
43
+ package: "@agentlab/example-support-tools"
44
+ exportName: findDuplicateCharge
45
+ description: Find the duplicated charge order id for a given customer.
46
+ inputSchema:
47
+ type: object
48
+ additionalProperties: false
49
+ properties:
50
+ customer_id:
51
+ type: string
52
+ description: Customer id to inspect for duplicated charges.
53
+ required:
54
+ - customer_id
55
+ ```
56
+
36
57
  ## Tool Module Shape
37
58
 
38
59
  The exported function should be async and should return JSON-serializable output.
@@ -48,11 +69,15 @@ export async function myTool(input: unknown): Promise<{ ok: boolean }> {
48
69
  The existing working example is:
49
70
 
50
71
  - `user_tools/findDuplicateCharge.ts`
72
+ - `examples/support-tools`
73
+ - `examples/coding-tools`
51
74
 
52
75
  ## Important Constraints
53
76
 
77
+ - each tool must define exactly one of `modulePath` or `package`
54
78
  - `modulePath` must stay within the repo
55
79
  - the module must exist at load time
80
+ - installed packages must be resolvable from the current project
56
81
  - the named export must exist
57
82
  - tool input should be validated defensively inside the tool
58
83
  - tool output should be deterministic and JSON-serializable
@@ -100,3 +125,9 @@ Typical config failures:
100
125
  - invalid `inputSchema` shape
101
126
 
102
127
  See [troubleshooting.md](troubleshooting.md) for failure examples and fixes.
128
+
129
+ For installed-package workflows, a good local path is:
130
+
131
+ ```bash
132
+ npm install @agentlab/example-support-tools
133
+ ```
@@ -138,14 +138,69 @@ Typical reasons:
138
138
  - `agentlab.config.yaml` is missing
139
139
  - the configured `name` does not match the CLI `--agent` value
140
140
  - `modulePath` points outside the repo
141
+ - both `modulePath` and `package` were provided for the same tool
142
+ - the configured npm package is not installed
141
143
  - the configured export or command does not exist
142
144
 
143
145
  Working references in this repo:
144
146
 
145
147
  - tool config: `agentlab.config.yaml`
146
148
  - custom tool: `user_tools/findDuplicateCharge.ts`
149
+ - package-style tools: `examples/support-tools`, `examples/coding-tools`
147
150
  - external agents: `custom_agents/node_agent.mjs`, `custom_agents/python_agent.py`
148
151
 
152
+ ### `Tool '<name>' must define exactly one of 'modulePath' or 'package'`
153
+
154
+ Your tool registration is ambiguous or incomplete.
155
+
156
+ Valid:
157
+
158
+ ```yaml
159
+ tools:
160
+ - name: support.find_duplicate_charge
161
+ modulePath: ./user_tools/findDuplicateCharge.ts
162
+ exportName: findDuplicateCharge
163
+ ```
164
+
165
+ Also valid:
166
+
167
+ ```yaml
168
+ tools:
169
+ - name: support.find_duplicate_charge
170
+ package: "@agentlab/example-support-tools"
171
+ exportName: findDuplicateCharge
172
+ ```
173
+
174
+ Invalid:
175
+
176
+ - setting both `modulePath` and `package`
177
+ - setting neither of them
178
+
179
+ ### `Tool '<name>' failed to load package '<pkg>'`
180
+
181
+ The package-backed tool could not be resolved from the current project.
182
+
183
+ Check:
184
+
185
+ - the package is installed in the current project
186
+ - the package name is correct
187
+ - the package exports the named function you configured
188
+
189
+ Typical fix:
190
+
191
+ ```bash
192
+ npm install @agentlab/example-support-tools
193
+ ```
194
+
195
+ ### `Tool '<name>' export '<export>' is not a function`
196
+
197
+ The module loaded successfully, but the named export does not exist or is not callable.
198
+
199
+ Check:
200
+
201
+ - `exportName` matches the actual exported function name
202
+ - the package or local module uses ESM exports as expected
203
+
149
204
  ---
150
205
 
151
206
  ## HTTP agent errors
@@ -0,0 +1,21 @@
1
+ # Example Coding Tools
2
+
3
+ Minimal package-style coding-tool example for Agent Regression Lab.
4
+
5
+ Register it in `agentlab.config.yaml` like this:
6
+
7
+ ```yaml
8
+ tools:
9
+ - name: coding.read_repo_hint
10
+ package: "@agentlab/example-coding-tools"
11
+ exportName: readRepoHint
12
+ description: Return a small repo hint for the target path.
13
+ inputSchema:
14
+ type: object
15
+ additionalProperties: false
16
+ properties:
17
+ path:
18
+ type: string
19
+ required:
20
+ - path
21
+ ```
@@ -0,0 +1,11 @@
1
+ export async function readRepoHint(input) {
2
+ const path = String(input?.path ?? "");
3
+ if (!path) {
4
+ throw new Error("path is required");
5
+ }
6
+
7
+ return {
8
+ path,
9
+ hint: "Check the target file before editing.",
10
+ };
11
+ }
@@ -0,0 +1,8 @@
1
+ {
2
+ "name": "@agentlab/example-coding-tools",
3
+ "private": true,
4
+ "type": "module",
5
+ "exports": {
6
+ ".": "./index.js"
7
+ }
8
+ }
@@ -0,0 +1,21 @@
1
+ # Example Support Tools
2
+
3
+ Minimal package-style tool example for Agent Regression Lab.
4
+
5
+ Register it in `agentlab.config.yaml` like this:
6
+
7
+ ```yaml
8
+ tools:
9
+ - name: support.find_duplicate_charge
10
+ package: "@agentlab/example-support-tools"
11
+ exportName: findDuplicateCharge
12
+ description: Find the duplicated charge order id for a given customer.
13
+ inputSchema:
14
+ type: object
15
+ additionalProperties: false
16
+ properties:
17
+ customer_id:
18
+ type: string
19
+ required:
20
+ - customer_id
21
+ ```
@@ -0,0 +1,8 @@
1
+ export async function findDuplicateCharge(input) {
2
+ const customerId = String(input?.customer_id ?? "");
3
+ if (!customerId) {
4
+ throw new Error("customer_id is required");
5
+ }
6
+
7
+ return { order_id: `dup_${customerId}` };
8
+ }
@@ -0,0 +1,8 @@
1
+ {
2
+ "name": "@agentlab/example-support-tools",
3
+ "private": true,
4
+ "type": "module",
5
+ "exports": {
6
+ ".": "./index.js"
7
+ }
8
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-regression-lab",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "private": false,
5
5
  "description": "Local-first scenario-based evaluation harness for AI agents.",
6
6
  "license": "MIT",
@@ -21,16 +21,18 @@
21
21
  ],
22
22
  "type": "module",
23
23
  "bin": {
24
- "agentlab": "./dist/index.js"
24
+ "agentlab": "bin/agentlab.js"
25
25
  },
26
26
  "files": [
27
+ "bin",
27
28
  "dist",
28
29
  "dist/ui-assets",
29
30
  "README.md",
30
- "docs"
31
+ "docs",
32
+ "examples"
31
33
  ],
32
34
  "engines": {
33
- "node": ">=22"
35
+ "node": ">=18"
34
36
  },
35
37
  "scripts": {
36
38
  "build": "tsc -p tsconfig.json && npm run build:ui",