@agentxjs/devtools 1.9.6-dev → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +284 -0
  2. package/dist/bdd/cli.d.ts +1 -0
  3. package/dist/bdd/cli.js +117 -0
  4. package/dist/bdd/cli.js.map +1 -0
  5. package/dist/bdd/index.d.ts +202 -0
  6. package/dist/bdd/index.js +383 -0
  7. package/dist/bdd/index.js.map +1 -0
  8. package/dist/chunk-DGUM43GV.js +11 -0
  9. package/dist/chunk-DGUM43GV.js.map +1 -0
  10. package/dist/{chunk-SQDCFUA3.js → chunk-SVPPRUN5.js} +9 -1
  11. package/dist/chunk-SVPPRUN5.js.map +1 -0
  12. package/dist/chunk-Y6RXZINS.js +64 -0
  13. package/dist/chunk-Y6RXZINS.js.map +1 -0
  14. package/dist/{chunk-YRTTCKHM.js → chunk-YFONF7SD.js} +1 -1
  15. package/dist/chunk-YFONF7SD.js.map +1 -0
  16. package/dist/fixtures/index.js +1 -0
  17. package/dist/index.d.ts +38 -5
  18. package/dist/index.js +11 -15
  19. package/dist/index.js.map +1 -1
  20. package/dist/mock/index.d.ts +1 -1
  21. package/dist/mock/index.js +2 -1
  22. package/dist/recorder/index.d.ts +1 -1
  23. package/dist/recorder/index.js +2 -1
  24. package/package.json +27 -5
  25. package/src/Devtools.ts +17 -20
  26. package/src/bdd/agent-doc-tester.ts +130 -0
  27. package/src/bdd/agent-ui-tester.ts +88 -0
  28. package/src/bdd/cli.ts +166 -0
  29. package/src/bdd/cucumber.config.ts +40 -0
  30. package/src/bdd/dev-server.ts +82 -0
  31. package/src/bdd/index.ts +36 -0
  32. package/src/bdd/paths.ts +140 -0
  33. package/src/bdd/playwright.ts +110 -0
  34. package/src/env.ts +97 -0
  35. package/src/index.ts +19 -19
  36. package/src/mock/MockDriver.ts +23 -14
  37. package/src/mock/index.ts +1 -1
  38. package/src/recorder/RecordingDriver.ts +2 -6
  39. package/src/recorder/index.ts +1 -1
  40. package/dist/chunk-SQDCFUA3.js.map +0 -1
  41. package/dist/chunk-YRTTCKHM.js.map +0 -1
package/README.md ADDED
@@ -0,0 +1,284 @@
1
+ # @agentxjs/devtools
2
+
3
+ Development and testing tools for AgentX. Provides BDD utilities for Cucumber-based integration tests, plus VCR-style fixture recording/replay for deterministic LLM testing.
4
+
5
+ ## Overview
6
+
7
+ `@agentxjs/devtools` has two parts:
8
+
9
+ 1. **BDD Utilities** (`@agentxjs/devtools/bdd`) -- Cucumber config, AI-powered UI testing, documentation testing, dev server management.
10
+ 2. **VCR Infrastructure** -- `MockDriver`, `RecordingDriver`, `createVcrCreateDriver` for recording and replaying LLM interactions in unit tests.
11
+
12
+ ## Quick Start: BDD Testing
13
+
14
+ ### 1. Set up Cucumber config
15
+
16
+ ```typescript
17
+ // bdd/cucumber.js
18
+ import { createCucumberConfig } from "@agentxjs/devtools/bdd";
19
+
20
+ export default createCucumberConfig({
21
+ paths: ["bdd/journeys/**/*.feature"],
22
+ import: ["bdd/steps/**/*.ts"],
23
+ });
24
+ ```
25
+
26
+ ### 2. Use agentUiTester for UI tests
27
+
28
+ ```typescript
29
+ import { agentUiTester } from "@agentxjs/devtools/bdd";
30
+
31
+ const result = agentUiTester(`
32
+ Navigate to http://localhost:3000
33
+ Verify redirect to /setup
34
+ Fill email "admin@example.com", password "admin123"
35
+ Click Setup
36
+ Verify logged in as admin
37
+ `);
38
+
39
+ assert.ok(result.passed, result.output);
40
+ ```
41
+
42
+ ### 3. Start a dev server in tests
43
+
44
+ ```typescript
45
+ import { startDevServer, stopDevServer } from "@agentxjs/devtools/bdd";
46
+
47
+ // In BeforeAll hook
48
+ await startDevServer({ cwd: "/path/to/app", port: 3000 });
49
+
50
+ // In AfterAll hook
51
+ stopDevServer();
52
+ ```
53
+
54
+ ### 4. Use MockDriver for unit tests
55
+
56
+ MockDriver replays recorded fixtures — no network calls, fully deterministic. Use it when you need to test code that interacts with a Driver without hitting a real LLM API.
57
+
58
+ ```typescript
59
+ import { MockDriver } from "@agentxjs/devtools";
60
+ import { SIMPLE_REPLY } from "@agentxjs/devtools/fixtures";
61
+
62
+ // Create a mock driver from a built-in fixture
63
+ const driver = new MockDriver({ fixture: SIMPLE_REPLY });
64
+ await driver.initialize();
65
+
66
+ for await (const event of driver.receive({ content: "Hello" })) {
67
+ if (event.type === "text_delta") {
68
+ process.stdout.write(event.data.text);
69
+ }
70
+ }
71
+
72
+ await driver.dispose();
73
+ ```
74
+
75
+ Built-in fixtures: `SIMPLE_REPLY`, `LONG_REPLY`, `TOOL_CALL`, `ERROR`, `EMPTY`.
76
+
77
+ To use your own recorded fixture:
78
+
79
+ ```typescript
80
+ import { MockDriver, getFixture } from "@agentxjs/devtools";
81
+
82
+ const fixture = getFixture("my-recorded-scenario"); // from fixtures directory
83
+ const driver = new MockDriver({ fixture });
84
+ ```
85
+
86
+ ## Quick Start: VCR Recording
87
+
88
+ Record real LLM interactions once, replay them in subsequent test runs:
89
+
90
+ ```typescript
91
+ import { createDevtools } from "@agentxjs/devtools";
92
+
93
+ const devtools = createDevtools({
94
+ fixturesDir: "./fixtures",
95
+ apiKey: process.env.ANTHROPIC_API_KEY, // only needed for recording
96
+ });
97
+
98
+ // Fixture exists --> playback (MockDriver)
99
+ // Fixture missing --> call real API, record, save, return MockDriver
100
+ const driver = await devtools.driver("greeting-test", {
101
+ message: "Hello!",
102
+ });
103
+
104
+ await driver.initialize();
105
+ for await (const event of driver.receive(userMessage)) {
106
+ if (event.type === "text_delta") {
107
+ process.stdout.write(event.data.text);
108
+ }
109
+ }
110
+ await driver.dispose();
111
+ ```
112
+
113
+ ### VCR with Server (Integration Tests)
114
+
115
+ Use `createVcrCreateDriver` to wrap a real driver with VCR logic — ideal for BDD tests that run through the full server stack:
116
+
117
+ ```typescript
118
+ import { createVcrCreateDriver } from "@agentxjs/devtools";
119
+
120
+ const vcrCreateDriver = createVcrCreateDriver({
121
+ fixturesDir: "./fixtures",
122
+ getFixtureName: () => currentFixture,
123
+ apiKey: process.env.ANTHROPIC_API_KEY,
124
+ createRealDriver: createMonoDriver,
125
+ onPlayback: (name) => console.log(`Playback: ${name}`),
126
+ onRecording: (name) => console.log(`Recording: ${name}`),
127
+ });
128
+
129
+ const server = await createServer({ platform, createDriver: vcrCreateDriver });
130
+ ```
131
+
132
+ ## API Reference
133
+
134
+ ### BDD API (`@agentxjs/devtools/bdd`)
135
+
136
+ #### `createCucumberConfig(options: CucumberConfigOptions)`
137
+
138
+ ```typescript
139
+ interface CucumberConfigOptions {
140
+ paths: string[]; // feature file paths
141
+ import: string[]; // step definition paths
142
+ tags?: string; // default: "not @pending and not @skip"
143
+ timeout?: number; // default: 30000 ms
144
+ format?: string[]; // default: ["progress"]
145
+ }
146
+ ```
147
+
148
+ #### `agentUiTester(prompt, options?): UiTestResult`
149
+
150
+ Runs a UI test scenario using Claude CLI + agent-browser.
151
+
152
+ ```typescript
153
+ interface UiTesterOptions {
154
+ model?: string; // default: "haiku"
155
+ baseUrl?: string;
156
+ timeout?: number; // default: 300000 (5 min)
157
+ headed?: boolean; // default: false
158
+ }
159
+
160
+ interface UiTestResult {
161
+ passed: boolean;
162
+ output: string;
163
+ }
164
+ ```
165
+
166
+ #### `agentDocTester(options, testerOptions?): DocTestResult`
167
+
168
+ Evaluates documents against requirements using AgentX. Assesses completeness, logic, and readability.
169
+
170
+ ```typescript
171
+ import { agentDocTester } from "@agentxjs/devtools/bdd";
172
+
173
+ const result = await agentDocTester({
174
+ files: ["packages/core/README.md"],
175
+ requirements: `
176
+ The README should explain Container, Image, Session, Driver, Platform.
177
+ There should be a Quick Start example.
178
+ `,
179
+ });
180
+
181
+ assert.ok(result.passed, result.output);
182
+ ```
183
+
184
+ ```typescript
185
+ interface DocTesterOptions {
186
+ provider?: string; // default: "anthropic"
187
+ model?: string; // default: "claude-haiku-4-5-20251001"
188
+ apiKey?: string; // reads from DEEPRACTICE_API_KEY or ANTHROPIC_API_KEY
189
+ baseUrl?: string; // reads from DEEPRACTICE_BASE_URL
190
+ timeout?: number; // default: 120000 (2 min)
191
+ }
192
+ ```
193
+
194
+ #### `startDevServer(options): Promise<void>`
195
+
196
+ ```typescript
197
+ interface DevServerOptions {
198
+ cwd: string;
199
+ port: number;
200
+ command?: string; // default: "bun"
201
+ args?: string[]; // default: ["run", "dev"]
202
+ timeout?: number; // default: 30000
203
+ debug?: boolean; // default: !!process.env.DEBUG
204
+ }
205
+ ```
206
+
207
+ #### Path Utilities
208
+
209
+ ```typescript
210
+ import { getFixturesPath, getTempPath, ensureDir, getMonorepoPath } from "@agentxjs/devtools/bdd";
211
+ ```
212
+
213
+ ### VCR API (main entry)
214
+
215
+ #### `createDevtools(config: DevtoolsConfig): Devtools`
216
+
217
+ ```typescript
218
+ interface DevtoolsConfig {
219
+ fixturesDir: string;
220
+ apiKey?: string;
221
+ baseUrl?: string;
222
+ model?: string;
223
+ systemPrompt?: string;
224
+ cwd?: string;
225
+ createDriver?: CreateDriver;
226
+ }
227
+ ```
228
+
229
+ | Method | Description |
230
+ | ---------------------------------------- | --------------------------------------------------------- |
231
+ | `driver(name, options): Promise<Driver>` | Get driver — playback if fixture exists, record otherwise |
232
+ | `load(name): Promise<Fixture>` | Load a fixture by name |
233
+ | `exists(name): boolean` | Check if fixture exists |
234
+ | `delete(name): Promise<void>` | Delete a fixture |
235
+
236
+ #### `MockDriver`
237
+
238
+ Replays events from a fixture. No network calls.
239
+
240
+ ```typescript
241
+ import { MockDriver, createMockDriver } from "@agentxjs/devtools";
242
+
243
+ const driver = new MockDriver({ fixture: myFixture });
244
+ ```
245
+
246
+ #### `RecordingDriver`
247
+
248
+ Wraps a real driver and records all events.
249
+
250
+ ```typescript
251
+ import { createRecordingDriver } from "@agentxjs/devtools";
252
+
253
+ const recorder = createRecordingDriver({ driver: realDriver, name: "my-scenario" });
254
+ const fixture = recorder.getFixture(); // after recording
255
+ ```
256
+
257
+ #### Built-in Fixtures
258
+
259
+ ```typescript
260
+ import { SIMPLE_REPLY, TOOL_CALL, getFixture, listFixtures } from "@agentxjs/devtools/fixtures";
261
+
262
+ listFixtures(); // ["simple-reply", "long-reply", "tool-call", "error", "empty"]
263
+ ```
264
+
265
+ ## Configuration
266
+
267
+ ### Package Exports
268
+
269
+ | Import path | Contents |
270
+ | ----------------------------- | --------------------------------------------------------------------------------------- |
271
+ | `@agentxjs/devtools` | VCR: `Devtools`, `MockDriver`, `RecordingDriver`, fixtures |
272
+ | `@agentxjs/devtools/mock` | `MockDriver`, `createMockDriver` |
273
+ | `@agentxjs/devtools/recorder` | `RecordingDriver`, `createRecordingDriver` |
274
+ | `@agentxjs/devtools/fixtures` | Built-in fixtures, `getFixture`, `listFixtures` |
275
+ | `@agentxjs/devtools/bdd` | BDD: `createCucumberConfig`, `agentUiTester`, `agentDocTester`, `startDevServer`, paths |
276
+
277
+ ### Peer Dependencies (optional)
278
+
279
+ | Package | When needed |
280
+ | ------------------------- | ---------------------------------------------------- |
281
+ | `agentxjs` | `agentDocTester` (uses AgentX SDK for AI evaluation) |
282
+ | `@agentxjs/claude-driver` | Recording with claude-driver |
283
+ | `@playwright/test` | Browser-based BDD tests |
284
+ | `@cucumber/cucumber` | BDD test runner |
@@ -0,0 +1 @@
1
+ #!/usr/bin/env node
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/bdd/cli.ts
4
+ import { spawn } from "child_process";
5
+ import { existsSync, readFileSync, unlinkSync, writeFileSync } from "fs";
6
+ import { dirname, relative, resolve } from "path";
7
+ import { fileURLToPath } from "url";
8
+ var __dirname = dirname(fileURLToPath(import.meta.url));
9
+ function loadEnvFile(filePath) {
10
+ if (!existsSync(filePath)) return;
11
+ const content = readFileSync(filePath, "utf-8");
12
+ for (const line of content.split("\n")) {
13
+ const trimmed = line.trim();
14
+ if (!trimmed || trimmed.startsWith("#")) continue;
15
+ const eqIndex = trimmed.indexOf("=");
16
+ if (eqIndex === -1) continue;
17
+ const key = trimmed.slice(0, eqIndex).trim();
18
+ let value = trimmed.slice(eqIndex + 1).trim();
19
+ if (value.startsWith('"') && value.endsWith('"') || value.startsWith("'") && value.endsWith("'")) {
20
+ value = value.slice(1, -1);
21
+ }
22
+ if (process.env[key] === void 0) {
23
+ process.env[key] = value;
24
+ }
25
+ }
26
+ }
27
+ function findMonorepoRoot(startDir) {
28
+ let dir = startDir;
29
+ while (true) {
30
+ const pkgPath = resolve(dir, "package.json");
31
+ if (existsSync(pkgPath)) {
32
+ try {
33
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
34
+ if (pkg.workspaces) return dir;
35
+ } catch {
36
+ }
37
+ }
38
+ const parent = dirname(dir);
39
+ if (parent === dir) return null;
40
+ dir = parent;
41
+ }
42
+ }
43
+ var cwd = process.cwd();
44
+ loadEnvFile(resolve(cwd, ".env"));
45
+ loadEnvFile(resolve(cwd, ".env.local"));
46
+ var monorepoRoot = findMonorepoRoot(cwd);
47
+ if (monorepoRoot && monorepoRoot !== cwd) {
48
+ loadEnvFile(resolve(monorepoRoot, ".env"));
49
+ loadEnvFile(resolve(monorepoRoot, ".env.local"));
50
+ }
51
+ var args = process.argv.slice(2);
52
+ var configPath = "bdd/cucumber.js";
53
+ var configIndex = args.indexOf("--config");
54
+ if (configIndex !== -1 && args[configIndex + 1]) {
55
+ configPath = args[configIndex + 1];
56
+ args.splice(configIndex, 2);
57
+ }
58
+ var fullConfigPath = resolve(cwd, configPath);
59
+ if (!existsSync(fullConfigPath)) {
60
+ console.error(`Config not found: ${fullConfigPath}`);
61
+ console.error("Create bdd/cucumber.js or specify --config path");
62
+ process.exit(1);
63
+ }
64
+ var featurePaths = [];
65
+ var flags = [];
66
+ for (const arg of args) {
67
+ if (arg.startsWith("-")) {
68
+ flags.push(arg);
69
+ } else if (arg.endsWith(".feature") || arg.includes(".feature:")) {
70
+ featurePaths.push(arg);
71
+ } else {
72
+ flags.push(arg);
73
+ }
74
+ }
75
+ var cucumberPaths = [
76
+ resolve(cwd, "node_modules/.bin/cucumber-js"),
77
+ resolve(__dirname, "../../../.bin/cucumber-js"),
78
+ "cucumber-js"
79
+ ];
80
+ var cucumberBin = cucumberPaths.find((p) => p === "cucumber-js" || existsSync(p)) || "cucumber-js";
81
+ var rootNodeModules = resolve(cwd, "node_modules");
82
+ var effectiveConfig = configPath;
83
+ var tempConfigPath = null;
84
+ if (featurePaths.length > 0) {
85
+ const configRelPath = relative(
86
+ dirname(resolve(cwd, "bdd/.tmp-cucumber.js")),
87
+ fullConfigPath
88
+ ).replace(/\\/g, "/");
89
+ const pathsJson = JSON.stringify(featurePaths);
90
+ const tempContent = [
91
+ `import config from "./${configRelPath}";`,
92
+ `export default { ...config.default ?? config, paths: ${pathsJson} };`,
93
+ ""
94
+ ].join("\n");
95
+ tempConfigPath = resolve(cwd, "bdd/.tmp-cucumber.js");
96
+ writeFileSync(tempConfigPath, tempContent);
97
+ effectiveConfig = "bdd/.tmp-cucumber.js";
98
+ }
99
+ var cucumberArgs = ["--config", effectiveConfig, ...flags];
100
+ var child = spawn(cucumberBin, cucumberArgs, {
101
+ stdio: "inherit",
102
+ env: {
103
+ ...process.env,
104
+ NODE_OPTIONS: "--import tsx",
105
+ NODE_PATH: rootNodeModules
106
+ }
107
+ });
108
+ child.on("close", (code) => {
109
+ if (tempConfigPath && existsSync(tempConfigPath)) {
110
+ try {
111
+ unlinkSync(tempConfigPath);
112
+ } catch {
113
+ }
114
+ }
115
+ process.exit(code ?? 0);
116
+ });
117
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/bdd/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n/**\n * BDD CLI wrapper for cucumber-js\n *\n * Usage:\n * bdd # Run all tests\n * bdd path/to/file.feature # Run specific feature file\n * bdd path/to/file.feature:10 # Run specific scenario by line\n * bdd --tags @contributor # Run specific tags\n * bdd --tags \"@dev and not @slow\" # Tag expression\n * bdd --name \"token usage\" # Filter by scenario name (regex)\n * bdd --dry-run # Validate without executing\n * bdd --config path # Custom config (default: bdd/cucumber.js)\n */\n\nimport { spawn } from \"node:child_process\";\nimport { existsSync, readFileSync, unlinkSync, writeFileSync } from \"node:fs\";\nimport { dirname, relative, resolve } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\n\nconst __dirname = dirname(fileURLToPath(import.meta.url));\n\n// Load .env files (like dotenv but zero dependencies)\nfunction loadEnvFile(filePath: string) {\n if (!existsSync(filePath)) return;\n const content = readFileSync(filePath, \"utf-8\");\n for (const line of content.split(\"\\n\")) {\n const trimmed = line.trim();\n if (!trimmed || trimmed.startsWith(\"#\")) continue;\n const eqIndex = trimmed.indexOf(\"=\");\n if (eqIndex === -1) continue;\n const key = trimmed.slice(0, eqIndex).trim();\n let value = trimmed.slice(eqIndex + 1).trim();\n if (\n (value.startsWith('\"') && value.endsWith('\"')) ||\n (value.startsWith(\"'\") && value.endsWith(\"'\"))\n ) {\n value = value.slice(1, -1);\n }\n if (process.env[key] === undefined) {\n process.env[key] = value;\n }\n }\n}\n\n// Find monorepo root by walking up to find the root package.json with workspaces\nfunction findMonorepoRoot(startDir: string): string | null {\n let dir = startDir;\n while (true) {\n const pkgPath = resolve(dir, \"package.json\");\n if (existsSync(pkgPath)) {\n try {\n const pkg = JSON.parse(readFileSync(pkgPath, \"utf-8\"));\n if (pkg.workspaces) return dir;\n } catch {\n // ignore parse errors\n }\n }\n const parent = dirname(dir);\n if (parent === dir) return null;\n dir = parent;\n }\n}\n\nconst cwd = process.cwd();\n\n// Load .env files from cwd first, then monorepo root\nloadEnvFile(resolve(cwd, \".env\"));\nloadEnvFile(resolve(cwd, \".env.local\"));\n\nconst monorepoRoot = findMonorepoRoot(cwd);\nif (monorepoRoot && monorepoRoot !== cwd) {\n loadEnvFile(resolve(monorepoRoot, \".env\"));\n loadEnvFile(resolve(monorepoRoot, \".env.local\"));\n}\n\nconst args = process.argv.slice(2);\n\n// Extract --config\nlet configPath = \"bdd/cucumber.js\";\nconst configIndex = args.indexOf(\"--config\");\nif (configIndex !== -1 && args[configIndex + 1]) {\n configPath = args[configIndex + 1];\n args.splice(configIndex, 2);\n}\n\n// Check if config exists\nconst fullConfigPath = resolve(cwd, configPath);\nif (!existsSync(fullConfigPath)) {\n console.error(`Config not found: ${fullConfigPath}`);\n console.error(\"Create bdd/cucumber.js or specify --config path\");\n process.exit(1);\n}\n\n// Separate positional args (feature files/lines) from flags\nconst featurePaths: string[] = [];\nconst flags: string[] = [];\n\nfor (const arg of args) {\n if (arg.startsWith(\"-\")) {\n flags.push(arg);\n } else if (arg.endsWith(\".feature\") || arg.includes(\".feature:\")) {\n featurePaths.push(arg);\n } else {\n // Could be a flag value (e.g. after --tags), keep as-is\n flags.push(arg);\n }\n}\n\n// Find cucumber-js binary\nconst cucumberPaths = [\n resolve(cwd, \"node_modules/.bin/cucumber-js\"),\n resolve(__dirname, \"../../../.bin/cucumber-js\"),\n \"cucumber-js\",\n];\nconst cucumberBin =\n cucumberPaths.find((p) => p === \"cucumber-js\" || existsSync(p)) || \"cucumber-js\";\n\nconst rootNodeModules = resolve(cwd, \"node_modules\");\n\n// When feature paths are specified, generate a temp config that overrides\n// the original config's `paths` — cucumber-js config.paths takes precedence\n// over positional args, so we must override it in the config itself.\nlet effectiveConfig = configPath;\nlet tempConfigPath: string | null = null;\n\nif (featurePaths.length > 0) {\n const configRelPath = relative(\n dirname(resolve(cwd, \"bdd/.tmp-cucumber.js\")),\n fullConfigPath\n ).replace(/\\\\/g, \"/\");\n const pathsJson = JSON.stringify(featurePaths);\n const tempContent = [\n `import config from \"./${configRelPath}\";`,\n `export default { ...config.default ?? config, paths: ${pathsJson} };`,\n \"\",\n ].join(\"\\n\");\n\n tempConfigPath = resolve(cwd, \"bdd/.tmp-cucumber.js\");\n writeFileSync(tempConfigPath, tempContent);\n effectiveConfig = \"bdd/.tmp-cucumber.js\";\n}\n\n// Build cucumber args\nconst cucumberArgs = [\"--config\", effectiveConfig, ...flags];\n\nconst child = spawn(cucumberBin, cucumberArgs, {\n stdio: \"inherit\",\n env: {\n ...process.env,\n NODE_OPTIONS: \"--import tsx\",\n NODE_PATH: rootNodeModules,\n },\n});\n\nchild.on(\"close\", (code) => {\n // Clean up temp config\n if (tempConfigPath && existsSync(tempConfigPath)) {\n try {\n unlinkSync(tempConfigPath);\n } catch {\n // ignore cleanup errors\n }\n }\n process.exit(code ?? 0);\n});\n"],"mappings":";;;AAeA,SAAS,aAAa;AACtB,SAAS,YAAY,cAAc,YAAY,qBAAqB;AACpE,SAAS,SAAS,UAAU,eAAe;AAC3C,SAAS,qBAAqB;AAE9B,IAAM,YAAY,QAAQ,cAAc,YAAY,GAAG,CAAC;AAGxD,SAAS,YAAY,UAAkB;AACrC,MAAI,CAAC,WAAW,QAAQ,EAAG;AAC3B,QAAM,UAAU,aAAa,UAAU,OAAO;AAC9C,aAAW,QAAQ,QAAQ,MAAM,IAAI,GAAG;AACtC,UAAM,UAAU,KAAK,KAAK;AAC1B,QAAI,CAAC,WAAW,QAAQ,WAAW,GAAG,EAAG;AACzC,UAAM,UAAU,QAAQ,QAAQ,GAAG;AACnC,QAAI,YAAY,GAAI;AACpB,UAAM,MAAM,QAAQ,MAAM,GAAG,OAAO,EAAE,KAAK;AAC3C,QAAI,QAAQ,QAAQ,MAAM,UAAU,CAAC,EAAE,KAAK;AAC5C,QACG,MAAM,WAAW,GAAG,KAAK,MAAM,SAAS,GAAG,KAC3C,MAAM,WAAW,GAAG,KAAK,MAAM,SAAS,GAAG,GAC5C;AACA,cAAQ,MAAM,MAAM,GAAG,EAAE;AAAA,IAC3B;AACA,QAAI,QAAQ,IAAI,GAAG,MAAM,QAAW;AAClC,cAAQ,IAAI,GAAG,IAAI;AAAA,IACrB;AAAA,EACF;AACF;AAGA,SAAS,iBAAiB,UAAiC;AACzD,MAAI,MAAM;AACV,SAAO,MAAM;AACX,UAAM,UAAU,QAAQ,KAAK,cAAc;AAC3C,QAAI,WAAW,OAAO,GAAG;AACvB,UAAI;AACF,cAAM,MAAM,KAAK,MAAM,aAAa,SAAS,OAAO,CAAC;AACrD,YAAI,IAAI,WAAY,QAAO;AAAA,MAC7B,QAAQ;AAAA,MAER;AAAA,IACF;AACA,UAAM,SAAS,QAAQ,GAAG;AAC1B,QAAI,WAAW,IAAK,QAAO;AAC3B,UAAM;AAAA,EACR;AACF;AAEA,IAAM,MAAM,QAAQ,IAAI;AAGxB,YAAY,QAAQ,KAAK,MAAM,CAAC;AAChC,YAAY,QAAQ,KAAK,YAAY,CAAC;AAEtC,IAAM,eAAe,iBAAiB,GAAG;AACzC,IAAI,gBAAgB,iBAAiB,KAAK;AACxC,cAAY,QAAQ,cAAc,MAAM,CAAC;AACzC,cAAY,QAAQ,cAAc,YAAY,CAAC;AACjD;AAEA,IAAM,OAAO,QAAQ,KAAK,MAAM,CAAC;AAGjC,IAAI,aAAa;AACjB,IAAM,cAAc,KAAK,QAAQ,UAAU;AAC3C,IAAI,gBAAgB,MAAM,KAAK,cAAc,CAAC,GAAG;AAC/C,eAAa,KAAK,cAAc,CAAC;AACjC,OAAK,OAAO,aAAa,CAAC;AAC5B;AAGA,IAAM,iBAAiB,QAAQ,KAAK,UAAU;AAC9C,IAAI,CAAC,WAAW,cAAc,GAAG;AAC/B,UAAQ,MAAM,qBAAqB,cAAc,EAAE;AACnD,UAAQ,MAAM,iDAAiD;AAC/D,UAAQ,KAAK,CAAC;AAChB;AAGA,IAAM,eAAyB,CAAC;AAChC,IAAM,QAAkB,CAAC;AAEzB,WAAW,OAAO,MAAM;AACtB,MAAI,IAAI,WAAW,GAAG,GAAG;AACvB,UAAM,KAAK,GAAG;AAAA,EAChB,WAAW,IAAI,SAAS,UAAU,KAAK,IAAI,SAAS,WAAW,GAAG;AAChE,iBAAa,KAAK,GAAG;AAAA,EACvB,OAAO;AAEL,UAAM,KAAK,GAAG;AAAA,EAChB;AACF;AAGA,IAAM,gBAAgB;AAAA,EACpB,QAAQ,KAAK,+BAA+B;AAAA,EAC5C,QAAQ,WAAW,2BAA2B;AAAA,EAC9C;AACF;AACA,IAAM,cACJ,cAAc,KAAK,CAAC,MAAM,MAAM,iBAAiB,WAAW,CAAC,CAAC,KAAK;AAErE,IAAM,kBAAkB,QAAQ,KAAK,cAAc;AAKnD,IAAI,kBAAkB;AACtB,IAAI,iBAAgC;AAEpC,IAAI,aAAa,SAAS,GAAG;AAC3B,QAAM,gBAAgB;AAAA,IACpB,QAAQ,QAAQ,KAAK,sBAAsB,CAAC;AAAA,IAC5C;AAAA,EACF,EAAE,QAAQ,OAAO,GAAG;AACpB,QAAM,YAAY,KAAK,UAAU,YAAY;AAC7C,QAAM,cAAc;AAAA,IAClB,yBAAyB,aAAa;AAAA,IACtC,wDAAwD,SAAS;AAAA,IACjE;AAAA,EACF,EAAE,KAAK,IAAI;AAEX,mBAAiB,QAAQ,KAAK,sBAAsB;AACpD,gBAAc,gBAAgB,WAAW;AACzC,oBAAkB;AACpB;AAGA,IAAM,eAAe,CAAC,YAAY,iBAAiB,GAAG,KAAK;AAE3D,IAAM,QAAQ,MAAM,aAAa,cAAc;AAAA,EAC7C,OAAO;AAAA,EACP,KAAK;AAAA,IACH,GAAG,QAAQ;AAAA,IACX,cAAc;AAAA,IACd,WAAW;AAAA,EACb;AACF,CAAC;AAED,MAAM,GAAG,SAAS,CAAC,SAAS;AAE1B,MAAI,kBAAkB,WAAW,cAAc,GAAG;AAChD,QAAI;AACF,iBAAW,cAAc;AAAA,IAC3B,QAAQ;AAAA,IAER;AAAA,EACF;AACA,UAAQ,KAAK,QAAQ,CAAC;AACxB,CAAC;","names":[]}
@@ -0,0 +1,202 @@
1
+ import { ChildProcess } from 'node:child_process';
2
+ import { Page, Browser } from '@playwright/test';
3
+
4
+ interface DocTestResult {
5
+ passed: boolean;
6
+ output: string;
7
+ }
8
+ interface DocTesterOptions {
9
+ /** LLM provider (default: "anthropic") */
10
+ provider?: string;
11
+ /** Model name */
12
+ model?: string;
13
+ /** API key (reads from env if not provided) */
14
+ apiKey?: string;
15
+ /** Base URL (reads from env if not provided) */
16
+ baseUrl?: string;
17
+ /** Timeout in ms */
18
+ timeout?: number;
19
+ }
20
+ /**
21
+ * Evaluate a document against requirements using AgentX.
22
+ *
23
+ * Uses agentxjs local mode — no subprocess, no CLI, no auth issues.
24
+ * Requires `agentxjs` as a peer dependency.
25
+ */
26
+ declare function agentDocTester(options: {
27
+ files: string[];
28
+ requirements: string;
29
+ }, testerOptions?: DocTesterOptions): Promise<DocTestResult>;
30
+
31
+ interface UiTestResult {
32
+ passed: boolean;
33
+ output: string;
34
+ }
35
+ interface UiTesterOptions {
36
+ model?: string;
37
+ baseUrl?: string;
38
+ timeout?: number;
39
+ /** Show browser window (default: false) */
40
+ headed?: boolean;
41
+ }
42
+ /**
43
+ * Run a UI test scenario using Claude Code CLI + agent-browser.
44
+ *
45
+ * BDD scripts must run under Node.js (not Bun) to avoid claude CLI auth bug.
46
+ */
47
+ declare function agentUiTester(prompt: string, options?: UiTesterOptions): UiTestResult;
48
+
49
+ /**
50
+ * Shared Cucumber configuration for BDD tests
51
+ *
52
+ * Usage in project's cucumber.js:
53
+ *
54
+ * ```js
55
+ * import { createCucumberConfig } from "@agentxjs/devtools/bdd";
56
+ *
57
+ * export default createCucumberConfig({
58
+ * paths: ["bdd/journeys/** /*.feature"],
59
+ * import: ["bdd/steps/** /*.ts"],
60
+ * });
61
+ * ```
62
+ */
63
+ interface CucumberConfigOptions {
64
+ /** Feature file paths */
65
+ paths: string[];
66
+ /** Step definition paths */
67
+ import: string[];
68
+ /** Tags to filter (default: exclude @pending and @skip) */
69
+ tags?: string;
70
+ /** Default timeout in ms (default: 30000) */
71
+ timeout?: number;
72
+ /** Format output (default: progress) */
73
+ format?: string[];
74
+ }
75
+ declare function createCucumberConfig(options: CucumberConfigOptions): {
76
+ format: string[];
77
+ formatOptions: {
78
+ snippetInterface: string;
79
+ };
80
+ import: string[];
81
+ paths: string[];
82
+ tags: string;
83
+ worldParameters: {
84
+ defaultTimeout: number;
85
+ };
86
+ };
87
+
88
+ /**
89
+ * Dev server utilities for BDD testing
90
+ *
91
+ * Start and stop dev servers during test runs.
92
+ */
93
+
94
+ interface DevServerOptions {
95
+ /** Working directory */
96
+ cwd: string;
97
+ /** Command to run (default: "bun") */
98
+ command?: string;
99
+ /** Command arguments (default: ["run", "dev"]) */
100
+ args?: string[];
101
+ /** Port to wait for */
102
+ port: number;
103
+ /** Startup timeout in ms (default: 30000) */
104
+ timeout?: number;
105
+ /** Show server output (default: false, or true if DEBUG env is set) */
106
+ debug?: boolean;
107
+ }
108
+ /**
109
+ * Start a dev server and wait for it to be ready
110
+ */
111
+ declare function startDevServer(options: DevServerOptions): Promise<void>;
112
+ /**
113
+ * Stop the dev server
114
+ */
115
+ declare function stopDevServer(): void;
116
+ /**
117
+ * Get the dev server process (for advanced use)
118
+ */
119
+ declare function getDevServer(): ChildProcess | null;
120
+
121
+ /**
122
+ * Monorepo root directory
123
+ */
124
+ declare function getMonorepoPath(): string;
125
+ /**
126
+ * Current package root directory
127
+ */
128
+ declare function getPackagePath(): string;
129
+ /**
130
+ * BDD directory for current package
131
+ */
132
+ declare function getBddPath(): string;
133
+ /**
134
+ * Fixtures directory for current package's BDD tests
135
+ */
136
+ declare function getFixturesPath(subdir?: string): string;
137
+ /**
138
+ * Get or create a temporary directory for tests
139
+ */
140
+ declare function getTempPath(prefix?: string): string;
141
+ /**
142
+ * Ensure a directory exists, creating it if necessary
143
+ */
144
+ declare function ensureDir(path: string): string;
145
+ /**
146
+ * Reset cached paths (useful for testing)
147
+ */
148
+ declare function resetPaths(): void;
149
+ declare const paths: {
150
+ monorepo: typeof getMonorepoPath;
151
+ package: typeof getPackagePath;
152
+ bdd: typeof getBddPath;
153
+ fixtures: typeof getFixturesPath;
154
+ temp: typeof getTempPath;
155
+ ensure: typeof ensureDir;
156
+ reset: typeof resetPaths;
157
+ };
158
+
159
+ /**
160
+ * Playwright utilities for BDD testing
161
+ *
162
+ * Uses system Chrome to avoid downloading Chromium.
163
+ * Install: PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 bun add -d @playwright/test
164
+ *
165
+ * Browser lifecycle:
166
+ * - Single browser instance for all tests
167
+ * - Single page (tab) reused across scenarios
168
+ * - resetPage() clears state between scenarios
169
+ */
170
+
171
+ interface BrowserOptions {
172
+ headless?: boolean;
173
+ slowMo?: number;
174
+ }
175
+ /**
176
+ * Launch browser using system Chrome (singleton)
177
+ */
178
+ declare function launchBrowser(options?: BrowserOptions): Promise<Browser>;
179
+ /**
180
+ * Get or create a page (singleton, reused across scenarios)
181
+ */
182
+ declare function getPage(): Promise<Page>;
183
+ /**
184
+ * Reset page state between scenarios (without closing)
185
+ * Use this instead of closePage() for faster tests
186
+ */
187
+ declare function resetPage(): Promise<void>;
188
+ /**
189
+ * Close current page
190
+ * @deprecated Use resetPage() for faster tests. Only use closePage() if you need full isolation.
191
+ */
192
+ declare function closePage(): Promise<void>;
193
+ /**
194
+ * Close browser and cleanup
195
+ */
196
+ declare function closeBrowser(): Promise<void>;
197
+ /**
198
+ * Wait for a URL to be accessible
199
+ */
200
+ declare function waitForUrl(url: string, timeout?: number): Promise<boolean>;
201
+
202
+ export { type BrowserOptions, type CucumberConfigOptions, type DevServerOptions, type DocTestResult, type DocTesterOptions, type UiTestResult, type UiTesterOptions, agentDocTester, agentUiTester, closeBrowser, closePage, createCucumberConfig, ensureDir, getBddPath, getDevServer, getFixturesPath, getMonorepoPath, getPackagePath, getPage, getTempPath, launchBrowser, paths, resetPage, resetPaths, startDevServer, stopDevServer, waitForUrl };