agent-regression-lab 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +186 -123
- package/dist/agent/factory.js +20 -6
- package/dist/agent/httpAdapter.js +79 -0
- package/dist/agent/mockAdapter.js +210 -13
- package/dist/config.js +223 -4
- package/dist/conversationEvaluators.js +167 -0
- package/dist/conversationRunner.js +199 -0
- package/dist/evaluators.js +56 -1
- package/dist/index.js +428 -111
- package/dist/lib/id.js +6 -0
- package/dist/runOutput.js +46 -0
- package/dist/runner.js +31 -9
- package/dist/scenarios.js +211 -11
- package/dist/scoring.js +2 -2
- package/dist/storage.js +305 -31
- package/dist/tools.js +284 -0
- package/dist/trace.js +4 -2
- package/dist/ui/App.js +67 -5
- package/dist/ui/server.js +18 -0
- package/dist/ui-assets/client.js +165 -3
- package/docs/agents.md +287 -0
- package/docs/golden-suites.md +74 -0
- package/docs/integrations-and-live-services.md +58 -0
- package/docs/memory-and-stateful-agents.md +51 -0
- package/docs/release-checklist.md +94 -0
- package/docs/runtime-profiles.md +67 -0
- package/docs/scenarios.md +419 -0
- package/docs/tools.md +102 -0
- package/docs/troubleshooting.md +296 -0
- package/docs/variant-sets.md +63 -0
- package/package.json +4 -3
package/README.md
CHANGED
|
@@ -1,62 +1,139 @@
|
|
|
1
1
|
# Agent Regression Lab
|
|
2
2
|
|
|
3
|
-
Agent Regression Lab is
|
|
3
|
+
Agent Regression Lab is the local-first regression spine for agent engineering teams.
|
|
4
4
|
|
|
5
|
-
It
|
|
5
|
+
It gives teams a repeatable way to define expected agent behavior in YAML, replay it against deterministic tool surfaces or live HTTP agents, store traces and scores locally, and compare candidate behavior against known baselines over time.
|
|
6
6
|
|
|
7
|
-
This is
|
|
7
|
+
This is a local-first alpha for early technical teams. It is strongest when used across one workflow spine:
|
|
8
|
+
|
|
9
|
+
- debug a single scenario while building
|
|
10
|
+
- validate a branch with a suite before merge
|
|
11
|
+
- run curated golden suites before release
|
|
12
|
+
- keep incident-derived scenarios as engineering memory
|
|
13
|
+
|
|
14
|
+
## Who It Is For
|
|
15
|
+
|
|
16
|
+
- teams shipping prompt, model, tool, workflow, and memory changes
|
|
17
|
+
- engineers who need repeatable before/after evidence instead of vibes
|
|
18
|
+
- teams validating live HTTP agents as well as deterministic local scenarios
|
|
19
|
+
- researchers and technical operators who want local control before adopting heavier hosted infrastructure
|
|
20
|
+
|
|
21
|
+
## Why Teams Use It
|
|
22
|
+
|
|
23
|
+
- catch regressions before merge or release
|
|
24
|
+
- debug subtle behavioral changes with full traces
|
|
25
|
+
- compare model, prompt, tool, and workflow changes against a known baseline
|
|
26
|
+
- build a portfolio of golden workflows, historical regressions, and ugly edge cases
|
|
27
|
+
- preserve engineering memory so old failures do not quietly return
|
|
8
28
|
|
|
9
29
|
## What It Supports Today
|
|
10
30
|
|
|
11
31
|
- YAML scenarios under `scenarios/`
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
32
|
+
- deterministic built-in tools plus repo-local custom tools from `agentlab.config.yaml`
|
|
33
|
+
- named agents from `agentlab.config.yaml`
|
|
34
|
+
- built-in `mock`, `openai`, `external_process`, and `http` agent modes
|
|
35
|
+
- `type: conversation` multi-turn dialog scenarios for HTTP agents
|
|
15
36
|
- SQLite-backed local run history under `artifacts/agentlab.db`
|
|
16
37
|
- CLI commands to list, run, show, compare, and launch the UI
|
|
17
|
-
-
|
|
38
|
+
- local web UI for run inspection, run comparison, and suite batch comparison
|
|
18
39
|
|
|
19
|
-
##
|
|
40
|
+
## Workflow Spine
|
|
20
41
|
|
|
21
|
-
|
|
42
|
+
Use this as the default product story:
|
|
22
43
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
44
|
+
1. debug locally with one scenario
|
|
45
|
+
2. validate a branch with a suite
|
|
46
|
+
3. run curated golden suites before release
|
|
47
|
+
4. keep incident-derived scenarios as permanent regression assets
|
|
48
|
+
|
|
49
|
+
## First 10 Minutes
|
|
50
|
+
|
|
51
|
+
The fastest path is to run the CLI from a local checkout.
|
|
26
52
|
|
|
27
|
-
|
|
53
|
+
1. Install dependencies and build:
|
|
28
54
|
|
|
29
55
|
```bash
|
|
56
|
+
npm install
|
|
30
57
|
npm run check
|
|
31
58
|
npm test
|
|
32
59
|
npm run build
|
|
33
60
|
```
|
|
34
61
|
|
|
35
|
-
|
|
62
|
+
2. Verify the CLI:
|
|
36
63
|
|
|
37
64
|
```bash
|
|
38
|
-
|
|
65
|
+
agentlab --help
|
|
39
66
|
```
|
|
40
67
|
|
|
41
|
-
|
|
68
|
+
If you have not linked the package locally yet, use:
|
|
42
69
|
|
|
43
70
|
```bash
|
|
44
|
-
npm
|
|
71
|
+
npm link
|
|
72
|
+
agentlab --help
|
|
45
73
|
```
|
|
46
74
|
|
|
47
|
-
|
|
75
|
+
3. List scenarios:
|
|
48
76
|
|
|
49
77
|
```bash
|
|
50
|
-
|
|
78
|
+
agentlab list scenarios
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
4. Run a deterministic sample scenario:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
agentlab run support.refund-correct-order --agent mock-default
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
5. Inspect the run:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
agentlab show <run-id>
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
6. Run the same scenario again, then compare the two runs:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
agentlab compare <baseline-run-id> <candidate-run-id>
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
7. Launch the local UI:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
agentlab ui
|
|
51
103
|
```
|
|
52
104
|
|
|
53
105
|
The UI starts on `http://127.0.0.1:4173`.
|
|
54
106
|
|
|
55
|
-
|
|
107
|
+
8. Run a suite and compare two suite batches:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
agentlab run --suite support --agent mock-default
|
|
111
|
+
agentlab run --suite support --agent mock-default
|
|
112
|
+
agentlab compare --suite <baseline-batch-id> <candidate-batch-id>
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
`run --suite` prints a `Suite batch:` id at the end. That is the id used by `compare --suite`.
|
|
116
|
+
|
|
117
|
+
## Install
|
|
118
|
+
|
|
119
|
+
### Installed CLI
|
|
120
|
+
|
|
121
|
+
After the package is published:
|
|
56
122
|
|
|
57
|
-
|
|
123
|
+
```bash
|
|
124
|
+
npm install -g agent-regression-lab
|
|
125
|
+
agentlab --help
|
|
126
|
+
```
|
|
58
127
|
|
|
59
|
-
|
|
128
|
+
You can also use:
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
npx agent-regression-lab --help
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Local Development Install
|
|
135
|
+
|
|
136
|
+
From this repo:
|
|
60
137
|
|
|
61
138
|
```bash
|
|
62
139
|
npm install
|
|
@@ -65,141 +142,127 @@ npm link
|
|
|
65
142
|
agentlab --help
|
|
66
143
|
```
|
|
67
144
|
|
|
68
|
-
|
|
145
|
+
### Repo-Local Dev Mode
|
|
146
|
+
|
|
147
|
+
If you do not want to link the package yet:
|
|
69
148
|
|
|
70
149
|
```bash
|
|
71
|
-
npm
|
|
72
|
-
|
|
150
|
+
npm run start -- --help
|
|
151
|
+
npm run start -- run support.refund-correct-order --agent mock-default
|
|
73
152
|
```
|
|
74
153
|
|
|
75
|
-
The CLI operates on the current working directory. Run it from the root of a project that contains `scenarios/`, `fixtures/`, and optional `agentlab.config.yaml`.
|
|
76
|
-
|
|
77
154
|
## CLI
|
|
78
155
|
|
|
156
|
+
Supported command surface:
|
|
157
|
+
|
|
79
158
|
```text
|
|
80
159
|
agentlab list scenarios
|
|
81
160
|
agentlab run <scenario-id> [--agent <name>]
|
|
82
161
|
agentlab run --suite <suite-id> [--agent <name>]
|
|
162
|
+
agentlab run --suite-def <name> [--agent <name>]
|
|
163
|
+
agentlab run <scenario-id> [--variant-set <name>]
|
|
83
164
|
agentlab show <run-id>
|
|
84
165
|
agentlab compare <baseline-run-id> <candidate-run-id>
|
|
166
|
+
agentlab compare --suite <baseline-batch-id> <candidate-batch-id>
|
|
85
167
|
agentlab ui
|
|
168
|
+
agentlab version
|
|
169
|
+
agentlab help
|
|
86
170
|
```
|
|
87
171
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
##
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
Find the duplicated charge and refund only that order.
|
|
113
|
-
tools:
|
|
114
|
-
allowed:
|
|
115
|
-
- crm.search_customer
|
|
116
|
-
- orders.list
|
|
117
|
-
- orders.refund
|
|
118
|
-
runtime:
|
|
119
|
-
max_steps: 8
|
|
120
|
-
timeout_seconds: 60
|
|
121
|
-
evaluators:
|
|
122
|
-
- id: refund-created
|
|
123
|
-
type: tool_call_assertion
|
|
124
|
-
mode: hard_gate
|
|
125
|
-
config:
|
|
126
|
-
tool: orders.refund
|
|
127
|
-
match:
|
|
128
|
-
order_id: ord_1024
|
|
172
|
+
The CLI operates on the current working directory. Run it from the root of a project that contains `scenarios/`, `fixtures/`, and optional `agentlab.config.yaml`.
|
|
173
|
+
|
|
174
|
+
## Canonical Workflow
|
|
175
|
+
|
|
176
|
+
Use this as the default mental model:
|
|
177
|
+
|
|
178
|
+
1. list scenarios
|
|
179
|
+
2. run one scenario or one suite
|
|
180
|
+
3. note the run id or suite batch id
|
|
181
|
+
4. inspect the run in CLI or UI
|
|
182
|
+
5. compare two runs or two suite batches
|
|
183
|
+
6. extend the setup with a named agent or repo-local tool when needed
|
|
184
|
+
|
|
185
|
+
## Canonical Live HTTP Fixture
|
|
186
|
+
|
|
187
|
+
`arl-test/` is the canonical live HTTP regression fixture in this repo.
|
|
188
|
+
|
|
189
|
+
Use it to verify the production-like HTTP path end to end:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
cd arl-test
|
|
193
|
+
npm start
|
|
194
|
+
node ../dist/index.js list scenarios
|
|
195
|
+
node ../dist/index.js run order-tracking-in-transit --agent support-agent
|
|
129
196
|
```
|
|
130
197
|
|
|
131
|
-
|
|
198
|
+
The `arl-test` scenarios are intended to behave like a real internal-team regression fixture, not just a toy demo.
|
|
199
|
+
|
|
200
|
+
## Config And Extension Points
|
|
132
201
|
|
|
133
|
-
`agentlab.config.yaml` is the extension point for
|
|
202
|
+
`agentlab.config.yaml` is the public extension point for:
|
|
203
|
+
|
|
204
|
+
- named agents
|
|
205
|
+
- repo-local custom tools
|
|
134
206
|
|
|
135
207
|
Supported agent providers:
|
|
136
208
|
|
|
137
209
|
- `mock`
|
|
138
210
|
- `openai`
|
|
139
211
|
- `external_process`
|
|
212
|
+
- `http` — point at a running HTTP service for multi-turn conversation testing
|
|
140
213
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
-
|
|
144
|
-
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
tools:
|
|
158
|
-
- name: support.find_duplicate_charge
|
|
159
|
-
modulePath: user_tools/findDuplicateCharge.ts
|
|
160
|
-
exportName: findDuplicateCharge
|
|
161
|
-
description: Find the duplicated charge order id for a given customer.
|
|
162
|
-
inputSchema:
|
|
163
|
-
type: object
|
|
164
|
-
additionalProperties: false
|
|
165
|
-
properties:
|
|
166
|
-
customer_id:
|
|
167
|
-
type: string
|
|
168
|
-
required:
|
|
169
|
-
- customer_id
|
|
170
|
-
```
|
|
214
|
+
Working sample assets already live in this repo:
|
|
215
|
+
|
|
216
|
+
- external agents: `custom_agents/node_agent.mjs`, `custom_agents/python_agent.py`
|
|
217
|
+
- custom tool: `user_tools/findDuplicateCharge.ts`
|
|
218
|
+
- sample config: `agentlab.config.yaml`
|
|
219
|
+
|
|
220
|
+
See:
|
|
221
|
+
|
|
222
|
+
- [docs/scenarios.md](docs/scenarios.md)
|
|
223
|
+
- [docs/tools.md](docs/tools.md)
|
|
224
|
+
- [docs/agents.md](docs/agents.md)
|
|
225
|
+
- [docs/troubleshooting.md](docs/troubleshooting.md)
|
|
226
|
+
- [docs/release-checklist.md](docs/release-checklist.md)
|
|
227
|
+
|
|
228
|
+
## Local Data And Artifacts
|
|
171
229
|
|
|
172
|
-
|
|
230
|
+
By default the product writes local state under `artifacts/`.
|
|
173
231
|
|
|
174
|
-
|
|
232
|
+
Important paths:
|
|
175
233
|
|
|
176
|
-
|
|
234
|
+
- SQLite DB: `artifacts/agentlab.db`
|
|
235
|
+
- per-run trace output: `artifacts/<run-id>/trace.json`
|
|
236
|
+
- local UI assets at runtime: served from packaged `dist/ui-assets` or built into `artifacts/ui/` in repo mode
|
|
177
237
|
|
|
178
|
-
|
|
179
|
-
- `tool_result`
|
|
180
|
-
- `runner_error`
|
|
238
|
+
If you delete `artifacts/`, you remove stored run history and generated local outputs.
|
|
181
239
|
|
|
182
|
-
|
|
240
|
+
## Determinism
|
|
183
241
|
|
|
184
|
-
|
|
185
|
-
- `final`
|
|
186
|
-
- `error`
|
|
242
|
+
The benchmark is designed to be deterministic enough for repeated local evaluation:
|
|
187
243
|
|
|
188
|
-
|
|
244
|
+
- built-in tools read from local fixtures
|
|
245
|
+
- scenarios declare fixed tool allowlists and evaluator rules
|
|
246
|
+
- scoring is rule-based
|
|
247
|
+
- suite comparison is based on stored local runs and suite batch ids
|
|
189
248
|
|
|
190
|
-
|
|
249
|
+
Agent behavior can still vary depending on the provider path. The built-in `mock` path is the most deterministic path for smoke tests and baseline examples.
|
|
191
250
|
|
|
192
|
-
|
|
193
|
-
2. agent sends back a `tool_call` or `final`
|
|
194
|
-
3. runner executes the tool and sends `tool_result`
|
|
195
|
-
4. agent sends the next `tool_call` or `final`
|
|
251
|
+
## Limitations
|
|
196
252
|
|
|
197
|
-
|
|
253
|
+
- this is a local-first alpha, not a hosted platform
|
|
254
|
+
- custom tool loading is limited to repo-local module paths
|
|
255
|
+
- external agents integrate through the local stdin/stdout protocol only
|
|
256
|
+
- the UI is intentionally minimal and optimized for debugging
|
|
257
|
+
- SQLite-backed local storage still makes sequential live verification the safest path when reusing the same local artifacts DB
|
|
258
|
+
- the benchmark is broader than before, but still small compared to a mature benchmark product
|
|
198
259
|
|
|
199
|
-
##
|
|
260
|
+
## Next Docs
|
|
200
261
|
|
|
201
|
-
-
|
|
202
|
-
-
|
|
203
|
-
-
|
|
204
|
-
-
|
|
205
|
-
-
|
|
262
|
+
- scenario authoring: [docs/scenarios.md](docs/scenarios.md)
|
|
263
|
+
- golden suites: [docs/golden-suites.md](docs/golden-suites.md)
|
|
264
|
+
- integrations and live services: [docs/integrations-and-live-services.md](docs/integrations-and-live-services.md)
|
|
265
|
+
- memory and stateful agents: [docs/memory-and-stateful-agents.md](docs/memory-and-stateful-agents.md)
|
|
266
|
+
- custom tools: [docs/tools.md](docs/tools.md)
|
|
267
|
+
- named agents and external-process protocol: [docs/agents.md](docs/agents.md)
|
|
268
|
+
- common failure modes: [docs/troubleshooting.md](docs/troubleshooting.md)
|
package/dist/agent/factory.js
CHANGED
|
@@ -2,6 +2,20 @@ import { ExternalProcessAgentAdapter } from "./externalProcessAdapter.js";
|
|
|
2
2
|
import { MockAgentAdapter } from "./mockAdapter.js";
|
|
3
3
|
import { OpenAIResponsesAgentAdapter } from "./openaiResponsesAdapter.js";
|
|
4
4
|
import { createAgentVersionId } from "../lib/id.js";
|
|
5
|
+
function attachIdentityMetadata(version, config) {
|
|
6
|
+
return {
|
|
7
|
+
...version,
|
|
8
|
+
variantSetName: config.variantSetName,
|
|
9
|
+
variantLabel: config.variantLabel,
|
|
10
|
+
promptVersion: config.promptVersion,
|
|
11
|
+
modelVersion: config.modelVersion,
|
|
12
|
+
toolSchemaVersion: config.toolSchemaVersion,
|
|
13
|
+
configLabel: config.configLabel,
|
|
14
|
+
configHash: config.configHash,
|
|
15
|
+
runtimeProfileName: config.runtimeProfileName,
|
|
16
|
+
suiteDefinitionName: config.suiteDefinitionName,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
5
19
|
class MockAgentAdapterFactory {
|
|
6
20
|
createAdapter() {
|
|
7
21
|
return new MockAgentAdapter();
|
|
@@ -9,13 +23,13 @@ class MockAgentAdapterFactory {
|
|
|
9
23
|
createVersion(config) {
|
|
10
24
|
const label = config.label ?? config.agentName ?? "mock-support-agent-v1";
|
|
11
25
|
const payload = { adapter: "mock", domain: "support", agentName: config.agentName };
|
|
12
|
-
return {
|
|
26
|
+
return attachIdentityMetadata({
|
|
13
27
|
id: createAgentVersionId(label, payload),
|
|
14
28
|
label,
|
|
15
29
|
modelId: "mock-model",
|
|
16
30
|
provider: "mock",
|
|
17
31
|
config: payload,
|
|
18
|
-
};
|
|
32
|
+
}, config);
|
|
19
33
|
}
|
|
20
34
|
}
|
|
21
35
|
class OpenAIAdapterFactory {
|
|
@@ -28,13 +42,13 @@ class OpenAIAdapterFactory {
|
|
|
28
42
|
const model = config.model ?? "gpt-4o-mini";
|
|
29
43
|
const label = config.label ?? config.agentName ?? `openai-${model}`;
|
|
30
44
|
const payload = { provider: "openai", model, agentName: config.agentName };
|
|
31
|
-
return {
|
|
45
|
+
return attachIdentityMetadata({
|
|
32
46
|
id: createAgentVersionId(label, payload),
|
|
33
47
|
label,
|
|
34
48
|
modelId: model,
|
|
35
49
|
provider: "openai",
|
|
36
50
|
config: payload,
|
|
37
|
-
};
|
|
51
|
+
}, config);
|
|
38
52
|
}
|
|
39
53
|
}
|
|
40
54
|
class ExternalProcessAdapterFactory {
|
|
@@ -53,14 +67,14 @@ class ExternalProcessAdapterFactory {
|
|
|
53
67
|
args: config.args ?? [],
|
|
54
68
|
agentName: config.agentName,
|
|
55
69
|
};
|
|
56
|
-
return {
|
|
70
|
+
return attachIdentityMetadata({
|
|
57
71
|
id: createAgentVersionId(label, payload),
|
|
58
72
|
label,
|
|
59
73
|
provider: "external_process",
|
|
60
74
|
command: config.command,
|
|
61
75
|
args: config.args ?? [],
|
|
62
76
|
config: payload,
|
|
63
|
-
};
|
|
77
|
+
}, config);
|
|
64
78
|
}
|
|
65
79
|
}
|
|
66
80
|
export function createAgentFactory(config) {
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { performance } from "node:perf_hooks";
|
|
2
|
+
export function interpolateTemplate(template, message, conversationId) {
|
|
3
|
+
return template.replace(/\{\{([^}]+)\}\}/g, (_, key) => {
|
|
4
|
+
const k = key.trim();
|
|
5
|
+
if (k === "message")
|
|
6
|
+
return message;
|
|
7
|
+
if (k === "conversation_id")
|
|
8
|
+
return conversationId;
|
|
9
|
+
if (k.startsWith("env."))
|
|
10
|
+
return process.env[k.slice(4)] ?? "";
|
|
11
|
+
return "";
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
export function buildRequestBody(template, message, conversationId) {
|
|
15
|
+
if (!template) {
|
|
16
|
+
return { message, conversation_id: conversationId };
|
|
17
|
+
}
|
|
18
|
+
const result = {};
|
|
19
|
+
for (const [field, valueTemplate] of Object.entries(template)) {
|
|
20
|
+
result[field] = interpolateTemplate(valueTemplate, message, conversationId);
|
|
21
|
+
}
|
|
22
|
+
return result;
|
|
23
|
+
}
|
|
24
|
+
export function extractReply(body, responseField) {
|
|
25
|
+
const field = responseField ?? "message";
|
|
26
|
+
if (typeof body === "object" && body !== null && field in body) {
|
|
27
|
+
const value = body[field];
|
|
28
|
+
return typeof value === "string" ? value : null;
|
|
29
|
+
}
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
export async function callHttpAgent(input) {
|
|
33
|
+
const { url, message, conversationId, request_template, response_field, headers = {}, timeout_ms = 30000 } = input;
|
|
34
|
+
const body = buildRequestBody(request_template, message, conversationId);
|
|
35
|
+
const interpolatedHeaders = {};
|
|
36
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
37
|
+
interpolatedHeaders[key] = interpolateTemplate(value, message, conversationId);
|
|
38
|
+
}
|
|
39
|
+
const controller = new AbortController();
|
|
40
|
+
const timeoutHandle = setTimeout(() => controller.abort(), timeout_ms);
|
|
41
|
+
const start = performance.now();
|
|
42
|
+
let response;
|
|
43
|
+
try {
|
|
44
|
+
response = await fetch(url, {
|
|
45
|
+
method: "POST",
|
|
46
|
+
headers: { "Content-Type": "application/json", ...interpolatedHeaders },
|
|
47
|
+
body: JSON.stringify(body),
|
|
48
|
+
signal: controller.signal,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
catch (error) {
|
|
52
|
+
clearTimeout(timeoutHandle);
|
|
53
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
54
|
+
throw Object.assign(new Error(`Request to ${url} timed out after ${timeout_ms}ms`), { code: "timeout_exceeded" });
|
|
55
|
+
}
|
|
56
|
+
throw Object.assign(new Error(`Connection to ${url} failed: ${error instanceof Error ? error.message : String(error)}`), { code: "http_connection_failed" });
|
|
57
|
+
}
|
|
58
|
+
clearTimeout(timeoutHandle);
|
|
59
|
+
const latencyMs = Math.round(performance.now() - start);
|
|
60
|
+
if (!response.ok) {
|
|
61
|
+
throw Object.assign(new Error(`HTTP ${response.status} from ${url}`), {
|
|
62
|
+
code: "http_error",
|
|
63
|
+
httpStatus: response.status,
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
let parsed;
|
|
67
|
+
try {
|
|
68
|
+
parsed = await response.json();
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
throw Object.assign(new Error(`Response from ${url} is not valid JSON`), { code: "invalid_response_format" });
|
|
72
|
+
}
|
|
73
|
+
const reply = extractReply(parsed, response_field);
|
|
74
|
+
if (reply === null) {
|
|
75
|
+
const field = response_field ?? "message";
|
|
76
|
+
throw Object.assign(new Error(`Response from ${url} missing expected field '${field}'`), { code: "invalid_response_format" });
|
|
77
|
+
}
|
|
78
|
+
return { reply, latencyMs };
|
|
79
|
+
}
|