@agentxjs/devtools 1.9.6-dev → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,284 @@
1
+ # @agentxjs/devtools
2
+
3
+ Development and testing tools for AgentX. Provides BDD utilities for Cucumber-based integration tests, plus VCR-style fixture recording/replay for deterministic LLM testing.
4
+
5
+ ## Overview
6
+
7
+ `@agentxjs/devtools` has two parts:
8
+
9
+ 1. **BDD Utilities** (`@agentxjs/devtools/bdd`) -- Cucumber config, AI-powered UI testing, documentation testing, dev server management.
10
+ 2. **VCR Infrastructure** -- `MockDriver`, `RecordingDriver`, `createVcrCreateDriver` for recording and replaying LLM interactions in unit tests.
11
+
12
+ ## Quick Start: BDD Testing
13
+
14
+ ### 1. Set up Cucumber config
15
+
16
+ ```typescript
17
+ // bdd/cucumber.js
18
+ import { createCucumberConfig } from "@agentxjs/devtools/bdd";
19
+
20
+ export default createCucumberConfig({
21
+ paths: ["bdd/journeys/**/*.feature"],
22
+ import: ["bdd/steps/**/*.ts"],
23
+ });
24
+ ```
25
+
26
+ ### 2. Use agentUiTester for UI tests
27
+
28
+ ```typescript
29
+ import { agentUiTester } from "@agentxjs/devtools/bdd";
30
+
31
+ const result = agentUiTester(`
32
+ Navigate to http://localhost:3000
33
+ Verify redirect to /setup
34
+ Fill email "admin@example.com", password "admin123"
35
+ Click Setup
36
+ Verify logged in as admin
37
+ `);
38
+
39
+ assert.ok(result.passed, result.output);
40
+ ```
41
+
42
+ ### 3. Start a dev server in tests
43
+
44
+ ```typescript
45
+ import { startDevServer, stopDevServer } from "@agentxjs/devtools/bdd";
46
+
47
+ // In BeforeAll hook
48
+ await startDevServer({ cwd: "/path/to/app", port: 3000 });
49
+
50
+ // In AfterAll hook
51
+ stopDevServer();
52
+ ```
53
+
54
+ ### 4. Use MockDriver for unit tests
55
+
56
+ MockDriver replays recorded fixtures — no network calls, fully deterministic. Use it when you need to test code that interacts with a Driver without hitting a real LLM API.
57
+
58
+ ```typescript
59
+ import { MockDriver } from "@agentxjs/devtools";
60
+ import { SIMPLE_REPLY } from "@agentxjs/devtools/fixtures";
61
+
62
+ // Create a mock driver from a built-in fixture
63
+ const driver = new MockDriver({ fixture: SIMPLE_REPLY });
64
+ await driver.initialize();
65
+
66
+ for await (const event of driver.receive({ content: "Hello" })) {
67
+ if (event.type === "text_delta") {
68
+ process.stdout.write(event.data.text);
69
+ }
70
+ }
71
+
72
+ await driver.dispose();
73
+ ```
74
+
75
+ Built-in fixtures: `SIMPLE_REPLY`, `LONG_REPLY`, `TOOL_CALL`, `ERROR`, `EMPTY`.
76
+
77
+ To use your own recorded fixture:
78
+
79
+ ```typescript
80
+ import { MockDriver, getFixture } from "@agentxjs/devtools";
81
+
82
+ const fixture = getFixture("my-recorded-scenario"); // from fixtures directory
83
+ const driver = new MockDriver({ fixture });
84
+ ```
85
+
86
+ ## Quick Start: VCR Recording
87
+
88
+ Record real LLM interactions once, replay them in subsequent test runs:
89
+
90
+ ```typescript
91
+ import { createDevtools } from "@agentxjs/devtools";
92
+
93
+ const devtools = createDevtools({
94
+ fixturesDir: "./fixtures",
95
+ apiKey: process.env.ANTHROPIC_API_KEY, // only needed for recording
96
+ });
97
+
98
+ // Fixture exists --> playback (MockDriver)
99
+ // Fixture missing --> call real API, record, save, return MockDriver
100
+ const driver = await devtools.driver("greeting-test", {
101
+ message: "Hello!",
102
+ });
103
+
104
+ await driver.initialize();
105
+ for await (const event of driver.receive(userMessage)) {
106
+ if (event.type === "text_delta") {
107
+ process.stdout.write(event.data.text);
108
+ }
109
+ }
110
+ await driver.dispose();
111
+ ```
112
+
113
+ ### VCR with Server (Integration Tests)
114
+
115
+ Use `createVcrCreateDriver` to wrap a real driver with VCR logic — ideal for BDD tests that run through the full server stack:
116
+
117
+ ```typescript
118
+ import { createVcrCreateDriver } from "@agentxjs/devtools";
119
+
120
+ const vcrCreateDriver = createVcrCreateDriver({
121
+ fixturesDir: "./fixtures",
122
+ getFixtureName: () => currentFixture,
123
+ apiKey: process.env.ANTHROPIC_API_KEY,
124
+ createRealDriver: createMonoDriver,
125
+ onPlayback: (name) => console.log(`Playback: ${name}`),
126
+ onRecording: (name) => console.log(`Recording: ${name}`),
127
+ });
128
+
129
+ const server = await createServer({ platform, createDriver: vcrCreateDriver });
130
+ ```
131
+
132
+ ## API Reference
133
+
134
+ ### BDD API (`@agentxjs/devtools/bdd`)
135
+
136
+ #### `createCucumberConfig(options: CucumberConfigOptions)`
137
+
138
+ ```typescript
139
+ interface CucumberConfigOptions {
140
+ paths: string[]; // feature file paths
141
+ import: string[]; // step definition paths
142
+ tags?: string; // default: "not @pending and not @skip"
143
+ timeout?: number; // default: 30000 ms
144
+ format?: string[]; // default: ["progress"]
145
+ }
146
+ ```
147
+
148
+ #### `agentUiTester(prompt, options?): UiTestResult`
149
+
150
+ Runs a UI test scenario using Claude CLI + agent-browser.
151
+
152
+ ```typescript
153
+ interface UiTesterOptions {
154
+ model?: string; // default: "haiku"
155
+ baseUrl?: string;
156
+ timeout?: number; // default: 300000 (5 min)
157
+ headed?: boolean; // default: false
158
+ }
159
+
160
+ interface UiTestResult {
161
+ passed: boolean;
162
+ output: string;
163
+ }
164
+ ```
165
+
166
+ #### `agentDocTester(options, testerOptions?): DocTestResult`
167
+
168
+ Evaluates documents against requirements using AgentX. Assesses completeness, logic, and readability.
169
+
170
+ ```typescript
171
+ import { agentDocTester } from "@agentxjs/devtools/bdd";
172
+
173
+ const result = await agentDocTester({
174
+ files: ["packages/core/README.md"],
175
+ requirements: `
176
+ The README should explain Container, Image, Session, Driver, Platform.
177
+ There should be a Quick Start example.
178
+ `,
179
+ });
180
+
181
+ assert.ok(result.passed, result.output);
182
+ ```
183
+
184
+ ```typescript
185
+ interface DocTesterOptions {
186
+ provider?: string; // default: "anthropic"
187
+ model?: string; // default: "claude-haiku-4-5-20251001"
188
+ apiKey?: string; // reads from DEEPRACTICE_API_KEY or ANTHROPIC_API_KEY
189
+ baseUrl?: string; // reads from DEEPRACTICE_BASE_URL
190
+ timeout?: number; // default: 120000 (2 min)
191
+ }
192
+ ```
193
+
194
+ #### `startDevServer(options): Promise<void>`
195
+
196
+ ```typescript
197
+ interface DevServerOptions {
198
+ cwd: string;
199
+ port: number;
200
+ command?: string; // default: "bun"
201
+ args?: string[]; // default: ["run", "dev"]
202
+ timeout?: number; // default: 30000
203
+ debug?: boolean; // default: !!process.env.DEBUG
204
+ }
205
+ ```
206
+
207
+ #### Path Utilities
208
+
209
+ ```typescript
210
+ import { getFixturesPath, getTempPath, ensureDir, getMonorepoPath } from "@agentxjs/devtools/bdd";
211
+ ```
212
+
213
+ ### VCR API (main entry)
214
+
215
+ #### `createDevtools(config: DevtoolsConfig): Devtools`
216
+
217
+ ```typescript
218
+ interface DevtoolsConfig {
219
+ fixturesDir: string;
220
+ apiKey?: string;
221
+ baseUrl?: string;
222
+ model?: string;
223
+ systemPrompt?: string;
224
+ cwd?: string;
225
+ createDriver?: CreateDriver;
226
+ }
227
+ ```
228
+
229
+ | Method | Description |
230
+ | ---------------------------------------- | --------------------------------------------------------- |
231
+ | `driver(name, options): Promise<Driver>` | Get driver — playback if fixture exists, record otherwise |
232
+ | `load(name): Promise<Fixture>` | Load a fixture by name |
233
+ | `exists(name): boolean` | Check if fixture exists |
234
+ | `delete(name): Promise<void>` | Delete a fixture |
235
+
236
+ #### `MockDriver`
237
+
238
+ Replays events from a fixture. No network calls.
239
+
240
+ ```typescript
241
+ import { MockDriver, createMockDriver } from "@agentxjs/devtools";
242
+
243
+ const driver = new MockDriver({ fixture: myFixture });
244
+ ```
245
+
246
+ #### `RecordingDriver`
247
+
248
+ Wraps a real driver and records all events.
249
+
250
+ ```typescript
251
+ import { createRecordingDriver } from "@agentxjs/devtools";
252
+
253
+ const recorder = createRecordingDriver({ driver: realDriver, name: "my-scenario" });
254
+ const fixture = recorder.getFixture(); // after recording
255
+ ```
256
+
257
+ #### Built-in Fixtures
258
+
259
+ ```typescript
260
+ import { SIMPLE_REPLY, TOOL_CALL, getFixture, listFixtures } from "@agentxjs/devtools/fixtures";
261
+
262
+ listFixtures(); // ["simple-reply", "long-reply", "tool-call", "error", "empty"]
263
+ ```
264
+
265
+ ## Configuration
266
+
267
+ ### Package Exports
268
+
269
+ | Import path | Contents |
270
+ | ----------------------------- | --------------------------------------------------------------------------------------- |
271
+ | `@agentxjs/devtools` | VCR: `Devtools`, `MockDriver`, `RecordingDriver`, fixtures |
272
+ | `@agentxjs/devtools/mock` | `MockDriver`, `createMockDriver` |
273
+ | `@agentxjs/devtools/recorder` | `RecordingDriver`, `createRecordingDriver` |
274
+ | `@agentxjs/devtools/fixtures` | Built-in fixtures, `getFixture`, `listFixtures` |
275
+ | `@agentxjs/devtools/bdd` | BDD: `createCucumberConfig`, `agentUiTester`, `agentDocTester`, `startDevServer`, paths |
276
+
277
+ ### Peer Dependencies (optional)
278
+
279
+ | Package | When needed |
280
+ | ------------------------- | ---------------------------------------------------- |
281
+ | `agentxjs` | `agentDocTester` (uses AgentX SDK for AI evaluation) |
282
+ | `@agentxjs/claude-driver` | Recording with claude-driver |
283
+ | `@playwright/test` | Browser-based BDD tests |
284
+ | `@cucumber/cucumber` | BDD test runner |
@@ -0,0 +1 @@
1
+ #!/usr/bin/env node
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/bdd/cli.ts
4
+ import { spawn } from "child_process";
5
+ import { resolve, dirname, relative } from "path";
6
+ import { existsSync, readFileSync, writeFileSync, unlinkSync } from "fs";
7
+ import { fileURLToPath } from "url";
8
+ var __dirname = dirname(fileURLToPath(import.meta.url));
9
+ function loadEnvFile(filePath) {
10
+ if (!existsSync(filePath)) return;
11
+ const content = readFileSync(filePath, "utf-8");
12
+ for (const line of content.split("\n")) {
13
+ const trimmed = line.trim();
14
+ if (!trimmed || trimmed.startsWith("#")) continue;
15
+ const eqIndex = trimmed.indexOf("=");
16
+ if (eqIndex === -1) continue;
17
+ const key = trimmed.slice(0, eqIndex).trim();
18
+ let value = trimmed.slice(eqIndex + 1).trim();
19
+ if (value.startsWith('"') && value.endsWith('"') || value.startsWith("'") && value.endsWith("'")) {
20
+ value = value.slice(1, -1);
21
+ }
22
+ if (process.env[key] === void 0) {
23
+ process.env[key] = value;
24
+ }
25
+ }
26
+ }
27
+ function findMonorepoRoot(startDir) {
28
+ let dir = startDir;
29
+ while (true) {
30
+ const pkgPath = resolve(dir, "package.json");
31
+ if (existsSync(pkgPath)) {
32
+ try {
33
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
34
+ if (pkg.workspaces) return dir;
35
+ } catch {
36
+ }
37
+ }
38
+ const parent = dirname(dir);
39
+ if (parent === dir) return null;
40
+ dir = parent;
41
+ }
42
+ }
43
+ var cwd = process.cwd();
44
+ loadEnvFile(resolve(cwd, ".env"));
45
+ loadEnvFile(resolve(cwd, ".env.local"));
46
+ var monorepoRoot = findMonorepoRoot(cwd);
47
+ if (monorepoRoot && monorepoRoot !== cwd) {
48
+ loadEnvFile(resolve(monorepoRoot, ".env"));
49
+ loadEnvFile(resolve(monorepoRoot, ".env.local"));
50
+ }
51
+ var args = process.argv.slice(2);
52
+ var configPath = "bdd/cucumber.js";
53
+ var configIndex = args.indexOf("--config");
54
+ if (configIndex !== -1 && args[configIndex + 1]) {
55
+ configPath = args[configIndex + 1];
56
+ args.splice(configIndex, 2);
57
+ }
58
+ var fullConfigPath = resolve(cwd, configPath);
59
+ if (!existsSync(fullConfigPath)) {
60
+ console.error(`Config not found: ${fullConfigPath}`);
61
+ console.error("Create bdd/cucumber.js or specify --config path");
62
+ process.exit(1);
63
+ }
64
+ var featurePaths = [];
65
+ var flags = [];
66
+ for (const arg of args) {
67
+ if (arg.startsWith("-")) {
68
+ flags.push(arg);
69
+ } else if (arg.endsWith(".feature") || arg.includes(".feature:")) {
70
+ featurePaths.push(arg);
71
+ } else {
72
+ flags.push(arg);
73
+ }
74
+ }
75
+ var cucumberPaths = [
76
+ resolve(cwd, "node_modules/.bin/cucumber-js"),
77
+ resolve(__dirname, "../../../.bin/cucumber-js"),
78
+ "cucumber-js"
79
+ ];
80
+ var cucumberBin = cucumberPaths.find((p) => p === "cucumber-js" || existsSync(p)) || "cucumber-js";
81
+ var rootNodeModules = resolve(cwd, "node_modules");
82
+ var effectiveConfig = configPath;
83
+ var tempConfigPath = null;
84
+ if (featurePaths.length > 0) {
85
+ const configRelPath = relative(
86
+ dirname(resolve(cwd, "bdd/.tmp-cucumber.js")),
87
+ fullConfigPath
88
+ ).replace(/\\/g, "/");
89
+ const pathsJson = JSON.stringify(featurePaths);
90
+ const tempContent = [
91
+ `import config from "./${configRelPath}";`,
92
+ `export default { ...config.default ?? config, paths: ${pathsJson} };`,
93
+ ""
94
+ ].join("\n");
95
+ tempConfigPath = resolve(cwd, "bdd/.tmp-cucumber.js");
96
+ writeFileSync(tempConfigPath, tempContent);
97
+ effectiveConfig = "bdd/.tmp-cucumber.js";
98
+ }
99
+ var cucumberArgs = ["--config", effectiveConfig, ...flags];
100
+ var child = spawn(cucumberBin, cucumberArgs, {
101
+ stdio: "inherit",
102
+ env: {
103
+ ...process.env,
104
+ NODE_OPTIONS: "--import tsx",
105
+ NODE_PATH: rootNodeModules
106
+ }
107
+ });
108
+ child.on("close", (code) => {
109
+ if (tempConfigPath && existsSync(tempConfigPath)) {
110
+ try {
111
+ unlinkSync(tempConfigPath);
112
+ } catch {
113
+ }
114
+ }
115
+ process.exit(code ?? 0);
116
+ });
117
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/bdd/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n/**\n * BDD CLI wrapper for cucumber-js\n *\n * Usage:\n * bdd # Run all tests\n * bdd path/to/file.feature # Run specific feature file\n * bdd path/to/file.feature:10 # Run specific scenario by line\n * bdd --tags @contributor # Run specific tags\n * bdd --tags \"@dev and not @slow\" # Tag expression\n * bdd --name \"token usage\" # Filter by scenario name (regex)\n * bdd --dry-run # Validate without executing\n * bdd --config path # Custom config (default: bdd/cucumber.js)\n */\n\nimport { spawn } from \"node:child_process\";\nimport { resolve, dirname, relative } from \"node:path\";\nimport { existsSync, readFileSync, writeFileSync, unlinkSync } from \"node:fs\";\nimport { fileURLToPath } from \"node:url\";\n\nconst __dirname = dirname(fileURLToPath(import.meta.url));\n\n// Load .env files (like dotenv but zero dependencies)\nfunction loadEnvFile(filePath: string) {\n if (!existsSync(filePath)) return;\n const content = readFileSync(filePath, \"utf-8\");\n for (const line of content.split(\"\\n\")) {\n const trimmed = line.trim();\n if (!trimmed || trimmed.startsWith(\"#\")) continue;\n const eqIndex = trimmed.indexOf(\"=\");\n if (eqIndex === -1) continue;\n const key = trimmed.slice(0, eqIndex).trim();\n let value = trimmed.slice(eqIndex + 1).trim();\n if (\n (value.startsWith('\"') && value.endsWith('\"')) ||\n (value.startsWith(\"'\") && value.endsWith(\"'\"))\n ) {\n value = value.slice(1, -1);\n }\n if (process.env[key] === undefined) {\n process.env[key] = value;\n }\n }\n}\n\n// Find monorepo root by walking up to find the root package.json with workspaces\nfunction findMonorepoRoot(startDir: string): string | null {\n let dir = startDir;\n while (true) {\n const pkgPath = resolve(dir, \"package.json\");\n if (existsSync(pkgPath)) {\n try {\n const pkg = JSON.parse(readFileSync(pkgPath, \"utf-8\"));\n if (pkg.workspaces) return dir;\n } catch {\n // ignore parse errors\n }\n }\n const parent = dirname(dir);\n if (parent === dir) return null;\n dir = parent;\n }\n}\n\nconst cwd = process.cwd();\n\n// Load .env files from cwd first, then monorepo root\nloadEnvFile(resolve(cwd, \".env\"));\nloadEnvFile(resolve(cwd, \".env.local\"));\n\nconst monorepoRoot = findMonorepoRoot(cwd);\nif (monorepoRoot && monorepoRoot !== cwd) {\n loadEnvFile(resolve(monorepoRoot, \".env\"));\n loadEnvFile(resolve(monorepoRoot, \".env.local\"));\n}\n\nconst args = process.argv.slice(2);\n\n// Extract --config\nlet configPath = \"bdd/cucumber.js\";\nconst configIndex = args.indexOf(\"--config\");\nif (configIndex !== -1 && args[configIndex + 1]) {\n configPath = args[configIndex + 1];\n args.splice(configIndex, 2);\n}\n\n// Check if config exists\nconst fullConfigPath = resolve(cwd, configPath);\nif (!existsSync(fullConfigPath)) {\n console.error(`Config not found: ${fullConfigPath}`);\n console.error(\"Create bdd/cucumber.js or specify --config path\");\n process.exit(1);\n}\n\n// Separate positional args (feature files/lines) from flags\nconst featurePaths: string[] = [];\nconst flags: string[] = [];\n\nfor (const arg of args) {\n if (arg.startsWith(\"-\")) {\n flags.push(arg);\n } else if (arg.endsWith(\".feature\") || arg.includes(\".feature:\")) {\n featurePaths.push(arg);\n } else {\n // Could be a flag value (e.g. after --tags), keep as-is\n flags.push(arg);\n }\n}\n\n// Find cucumber-js binary\nconst cucumberPaths = [\n resolve(cwd, \"node_modules/.bin/cucumber-js\"),\n resolve(__dirname, \"../../../.bin/cucumber-js\"),\n \"cucumber-js\",\n];\nconst cucumberBin =\n cucumberPaths.find((p) => p === \"cucumber-js\" || existsSync(p)) || \"cucumber-js\";\n\nconst rootNodeModules = resolve(cwd, \"node_modules\");\n\n// When feature paths are specified, generate a temp config that overrides\n// the original config's `paths` — cucumber-js config.paths takes precedence\n// over positional args, so we must override it in the config itself.\nlet effectiveConfig = configPath;\nlet tempConfigPath: string | null = null;\n\nif (featurePaths.length > 0) {\n const configRelPath = relative(\n dirname(resolve(cwd, \"bdd/.tmp-cucumber.js\")),\n fullConfigPath\n ).replace(/\\\\/g, \"/\");\n const pathsJson = JSON.stringify(featurePaths);\n const tempContent = [\n `import config from \"./${configRelPath}\";`,\n `export default { ...config.default ?? config, paths: ${pathsJson} };`,\n \"\",\n ].join(\"\\n\");\n\n tempConfigPath = resolve(cwd, \"bdd/.tmp-cucumber.js\");\n writeFileSync(tempConfigPath, tempContent);\n effectiveConfig = \"bdd/.tmp-cucumber.js\";\n}\n\n// Build cucumber args\nconst cucumberArgs = [\"--config\", effectiveConfig, ...flags];\n\nconst child = spawn(cucumberBin, cucumberArgs, {\n stdio: \"inherit\",\n env: {\n ...process.env,\n NODE_OPTIONS: \"--import tsx\",\n NODE_PATH: rootNodeModules,\n },\n});\n\nchild.on(\"close\", (code) => {\n // Clean up temp config\n if (tempConfigPath && existsSync(tempConfigPath)) {\n try {\n unlinkSync(tempConfigPath);\n } catch {\n // ignore cleanup errors\n }\n }\n process.exit(code ?? 0);\n});\n"],"mappings":";;;AAeA,SAAS,aAAa;AACtB,SAAS,SAAS,SAAS,gBAAgB;AAC3C,SAAS,YAAY,cAAc,eAAe,kBAAkB;AACpE,SAAS,qBAAqB;AAE9B,IAAM,YAAY,QAAQ,cAAc,YAAY,GAAG,CAAC;AAGxD,SAAS,YAAY,UAAkB;AACrC,MAAI,CAAC,WAAW,QAAQ,EAAG;AAC3B,QAAM,UAAU,aAAa,UAAU,OAAO;AAC9C,aAAW,QAAQ,QAAQ,MAAM,IAAI,GAAG;AACtC,UAAM,UAAU,KAAK,KAAK;AAC1B,QAAI,CAAC,WAAW,QAAQ,WAAW,GAAG,EAAG;AACzC,UAAM,UAAU,QAAQ,QAAQ,GAAG;AACnC,QAAI,YAAY,GAAI;AACpB,UAAM,MAAM,QAAQ,MAAM,GAAG,OAAO,EAAE,KAAK;AAC3C,QAAI,QAAQ,QAAQ,MAAM,UAAU,CAAC,EAAE,KAAK;AAC5C,QACG,MAAM,WAAW,GAAG,KAAK,MAAM,SAAS,GAAG,KAC3C,MAAM,WAAW,GAAG,KAAK,MAAM,SAAS,GAAG,GAC5C;AACA,cAAQ,MAAM,MAAM,GAAG,EAAE;AAAA,IAC3B;AACA,QAAI,QAAQ,IAAI,GAAG,MAAM,QAAW;AAClC,cAAQ,IAAI,GAAG,IAAI;AAAA,IACrB;AAAA,EACF;AACF;AAGA,SAAS,iBAAiB,UAAiC;AACzD,MAAI,MAAM;AACV,SAAO,MAAM;AACX,UAAM,UAAU,QAAQ,KAAK,cAAc;AAC3C,QAAI,WAAW,OAAO,GAAG;AACvB,UAAI;AACF,cAAM,MAAM,KAAK,MAAM,aAAa,SAAS,OAAO,CAAC;AACrD,YAAI,IAAI,WAAY,QAAO;AAAA,MAC7B,QAAQ;AAAA,MAER;AAAA,IACF;AACA,UAAM,SAAS,QAAQ,GAAG;AAC1B,QAAI,WAAW,IAAK,QAAO;AAC3B,UAAM;AAAA,EACR;AACF;AAEA,IAAM,MAAM,QAAQ,IAAI;AAGxB,YAAY,QAAQ,KAAK,MAAM,CAAC;AAChC,YAAY,QAAQ,KAAK,YAAY,CAAC;AAEtC,IAAM,eAAe,iBAAiB,GAAG;AACzC,IAAI,gBAAgB,iBAAiB,KAAK;AACxC,cAAY,QAAQ,cAAc,MAAM,CAAC;AACzC,cAAY,QAAQ,cAAc,YAAY,CAAC;AACjD;AAEA,IAAM,OAAO,QAAQ,KAAK,MAAM,CAAC;AAGjC,IAAI,aAAa;AACjB,IAAM,cAAc,KAAK,QAAQ,UAAU;AAC3C,IAAI,gBAAgB,MAAM,KAAK,cAAc,CAAC,GAAG;AAC/C,eAAa,KAAK,cAAc,CAAC;AACjC,OAAK,OAAO,aAAa,CAAC;AAC5B;AAGA,IAAM,iBAAiB,QAAQ,KAAK,UAAU;AAC9C,IAAI,CAAC,WAAW,cAAc,GAAG;AAC/B,UAAQ,MAAM,qBAAqB,cAAc,EAAE;AACnD,UAAQ,MAAM,iDAAiD;AAC/D,UAAQ,KAAK,CAAC;AAChB;AAGA,IAAM,eAAyB,CAAC;AAChC,IAAM,QAAkB,CAAC;AAEzB,WAAW,OAAO,MAAM;AACtB,MAAI,IAAI,WAAW,GAAG,GAAG;AACvB,UAAM,KAAK,GAAG;AAAA,EAChB,WAAW,IAAI,SAAS,UAAU,KAAK,IAAI,SAAS,WAAW,GAAG;AAChE,iBAAa,KAAK,GAAG;AAAA,EACvB,OAAO;AAEL,UAAM,KAAK,GAAG;AAAA,EAChB;AACF;AAGA,IAAM,gBAAgB;AAAA,EACpB,QAAQ,KAAK,+BAA+B;AAAA,EAC5C,QAAQ,WAAW,2BAA2B;AAAA,EAC9C;AACF;AACA,IAAM,cACJ,cAAc,KAAK,CAAC,MAAM,MAAM,iBAAiB,WAAW,CAAC,CAAC,KAAK;AAErE,IAAM,kBAAkB,QAAQ,KAAK,cAAc;AAKnD,IAAI,kBAAkB;AACtB,IAAI,iBAAgC;AAEpC,IAAI,aAAa,SAAS,GAAG;AAC3B,QAAM,gBAAgB;AAAA,IACpB,QAAQ,QAAQ,KAAK,sBAAsB,CAAC;AAAA,IAC5C;AAAA,EACF,EAAE,QAAQ,OAAO,GAAG;AACpB,QAAM,YAAY,KAAK,UAAU,YAAY;AAC7C,QAAM,cAAc;AAAA,IAClB,yBAAyB,aAAa;AAAA,IACtC,wDAAwD,SAAS;AAAA,IACjE;AAAA,EACF,EAAE,KAAK,IAAI;AAEX,mBAAiB,QAAQ,KAAK,sBAAsB;AACpD,gBAAc,gBAAgB,WAAW;AACzC,oBAAkB;AACpB;AAGA,IAAM,eAAe,CAAC,YAAY,iBAAiB,GAAG,KAAK;AAE3D,IAAM,QAAQ,MAAM,aAAa,cAAc;AAAA,EAC7C,OAAO;AAAA,EACP,KAAK;AAAA,IACH,GAAG,QAAQ;AAAA,IACX,cAAc;AAAA,IACd,WAAW;AAAA,EACb;AACF,CAAC;AAED,MAAM,GAAG,SAAS,CAAC,SAAS;AAE1B,MAAI,kBAAkB,WAAW,cAAc,GAAG;AAChD,QAAI;AACF,iBAAW,cAAc;AAAA,IAC3B,QAAQ;AAAA,IAER;AAAA,EACF;AACA,UAAQ,KAAK,QAAQ,CAAC;AACxB,CAAC;","names":[]}
@@ -0,0 +1,202 @@
1
+ import { Browser, Page } from '@playwright/test';
2
+ import { ChildProcess } from 'node:child_process';
3
+
4
+ /**
5
+ * Shared Cucumber configuration for BDD tests
6
+ *
7
+ * Usage in project's cucumber.js:
8
+ *
9
+ * ```js
10
+ * import { createCucumberConfig } from "@agentxjs/devtools/bdd";
11
+ *
12
+ * export default createCucumberConfig({
13
+ * paths: ["bdd/journeys/** /*.feature"],
14
+ * import: ["bdd/steps/** /*.ts"],
15
+ * });
16
+ * ```
17
+ */
18
+ interface CucumberConfigOptions {
19
+ /** Feature file paths */
20
+ paths: string[];
21
+ /** Step definition paths */
22
+ import: string[];
23
+ /** Tags to filter (default: exclude @pending and @skip) */
24
+ tags?: string;
25
+ /** Default timeout in ms (default: 30000) */
26
+ timeout?: number;
27
+ /** Format output (default: progress) */
28
+ format?: string[];
29
+ }
30
+ declare function createCucumberConfig(options: CucumberConfigOptions): {
31
+ format: string[];
32
+ formatOptions: {
33
+ snippetInterface: string;
34
+ };
35
+ import: string[];
36
+ paths: string[];
37
+ tags: string;
38
+ worldParameters: {
39
+ defaultTimeout: number;
40
+ };
41
+ };
42
+
43
+ /**
44
+ * Playwright utilities for BDD testing
45
+ *
46
+ * Uses system Chrome to avoid downloading Chromium.
47
+ * Install: PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 bun add -d @playwright/test
48
+ *
49
+ * Browser lifecycle:
50
+ * - Single browser instance for all tests
51
+ * - Single page (tab) reused across scenarios
52
+ * - resetPage() clears state between scenarios
53
+ */
54
+
55
+ interface BrowserOptions {
56
+ headless?: boolean;
57
+ slowMo?: number;
58
+ }
59
+ /**
60
+ * Launch browser using system Chrome (singleton)
61
+ */
62
+ declare function launchBrowser(options?: BrowserOptions): Promise<Browser>;
63
+ /**
64
+ * Get or create a page (singleton, reused across scenarios)
65
+ */
66
+ declare function getPage(): Promise<Page>;
67
+ /**
68
+ * Reset page state between scenarios (without closing)
69
+ * Use this instead of closePage() for faster tests
70
+ */
71
+ declare function resetPage(): Promise<void>;
72
+ /**
73
+ * Close current page
74
+ * @deprecated Use resetPage() for faster tests. Only use closePage() if you need full isolation.
75
+ */
76
+ declare function closePage(): Promise<void>;
77
+ /**
78
+ * Close browser and cleanup
79
+ */
80
+ declare function closeBrowser(): Promise<void>;
81
+ /**
82
+ * Wait for a URL to be accessible
83
+ */
84
+ declare function waitForUrl(url: string, timeout?: number): Promise<boolean>;
85
+
86
+ /**
87
+ * Dev server utilities for BDD testing
88
+ *
89
+ * Start and stop dev servers during test runs.
90
+ */
91
+
92
+ interface DevServerOptions {
93
+ /** Working directory */
94
+ cwd: string;
95
+ /** Command to run (default: "bun") */
96
+ command?: string;
97
+ /** Command arguments (default: ["run", "dev"]) */
98
+ args?: string[];
99
+ /** Port to wait for */
100
+ port: number;
101
+ /** Startup timeout in ms (default: 30000) */
102
+ timeout?: number;
103
+ /** Show server output (default: false, or true if DEBUG env is set) */
104
+ debug?: boolean;
105
+ }
106
+ /**
107
+ * Start a dev server and wait for it to be ready
108
+ */
109
+ declare function startDevServer(options: DevServerOptions): Promise<void>;
110
+ /**
111
+ * Stop the dev server
112
+ */
113
+ declare function stopDevServer(): void;
114
+ /**
115
+ * Get the dev server process (for advanced use)
116
+ */
117
+ declare function getDevServer(): ChildProcess | null;
118
+
119
+ /**
120
+ * Monorepo root directory
121
+ */
122
+ declare function getMonorepoPath(): string;
123
+ /**
124
+ * Current package root directory
125
+ */
126
+ declare function getPackagePath(): string;
127
+ /**
128
+ * BDD directory for current package
129
+ */
130
+ declare function getBddPath(): string;
131
+ /**
132
+ * Fixtures directory for current package's BDD tests
133
+ */
134
+ declare function getFixturesPath(subdir?: string): string;
135
+ /**
136
+ * Get or create a temporary directory for tests
137
+ */
138
+ declare function getTempPath(prefix?: string): string;
139
+ /**
140
+ * Ensure a directory exists, creating it if necessary
141
+ */
142
+ declare function ensureDir(path: string): string;
143
+ /**
144
+ * Reset cached paths (useful for testing)
145
+ */
146
+ declare function resetPaths(): void;
147
+ declare const paths: {
148
+ monorepo: typeof getMonorepoPath;
149
+ package: typeof getPackagePath;
150
+ bdd: typeof getBddPath;
151
+ fixtures: typeof getFixturesPath;
152
+ temp: typeof getTempPath;
153
+ ensure: typeof ensureDir;
154
+ reset: typeof resetPaths;
155
+ };
156
+
157
+ interface UiTestResult {
158
+ passed: boolean;
159
+ output: string;
160
+ }
161
+ interface UiTesterOptions {
162
+ model?: string;
163
+ baseUrl?: string;
164
+ timeout?: number;
165
+ /** Show browser window (default: false) */
166
+ headed?: boolean;
167
+ }
168
+ /**
169
+ * Run a UI test scenario using Claude Code CLI + agent-browser.
170
+ *
171
+ * BDD scripts must run under Node.js (not Bun) to avoid claude CLI auth bug.
172
+ */
173
+ declare function agentUiTester(prompt: string, options?: UiTesterOptions): UiTestResult;
174
+
175
+ interface DocTestResult {
176
+ passed: boolean;
177
+ output: string;
178
+ }
179
+ interface DocTesterOptions {
180
+ /** LLM provider (default: "anthropic") */
181
+ provider?: string;
182
+ /** Model name */
183
+ model?: string;
184
+ /** API key (reads from env if not provided) */
185
+ apiKey?: string;
186
+ /** Base URL (reads from env if not provided) */
187
+ baseUrl?: string;
188
+ /** Timeout in ms */
189
+ timeout?: number;
190
+ }
191
+ /**
192
+ * Evaluate a document against requirements using AgentX.
193
+ *
194
+ * Uses agentxjs local mode — no subprocess, no CLI, no auth issues.
195
+ * Requires `agentxjs` as a peer dependency.
196
+ */
197
+ declare function agentDocTester(options: {
198
+ files: string[];
199
+ requirements: string;
200
+ }, testerOptions?: DocTesterOptions): Promise<DocTestResult>;
201
+
202
+ export { type BrowserOptions, type CucumberConfigOptions, type DevServerOptions, type DocTestResult, type DocTesterOptions, type UiTestResult, type UiTesterOptions, agentDocTester, agentUiTester, closeBrowser, closePage, createCucumberConfig, ensureDir, getBddPath, getDevServer, getFixturesPath, getMonorepoPath, getPackagePath, getPage, getTempPath, launchBrowser, paths, resetPage, resetPaths, startDevServer, stopDevServer, waitForUrl };