@agentxjs/devtools 1.9.5-dev → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +284 -0
  2. package/dist/bdd/cli.d.ts +1 -0
  3. package/dist/bdd/cli.js +117 -0
  4. package/dist/bdd/cli.js.map +1 -0
  5. package/dist/bdd/index.d.ts +202 -0
  6. package/dist/bdd/index.js +381 -0
  7. package/dist/bdd/index.js.map +1 -0
  8. package/dist/chunk-6OHXS7LW.js +297 -0
  9. package/dist/chunk-6OHXS7LW.js.map +1 -0
  10. package/dist/chunk-DGUM43GV.js +11 -0
  11. package/dist/chunk-DGUM43GV.js.map +1 -0
  12. package/dist/chunk-DR45HEV4.js +152 -0
  13. package/dist/chunk-DR45HEV4.js.map +1 -0
  14. package/dist/chunk-J6L73HM5.js +301 -0
  15. package/dist/chunk-J6L73HM5.js.map +1 -0
  16. package/dist/chunk-S7J75AXG.js +64 -0
  17. package/dist/chunk-S7J75AXG.js.map +1 -0
  18. package/dist/fixtures/index.d.ts +49 -0
  19. package/dist/fixtures/index.js +22 -0
  20. package/dist/fixtures/index.js.map +1 -0
  21. package/dist/index.d.ts +240 -0
  22. package/dist/index.js +269 -0
  23. package/dist/index.js.map +1 -0
  24. package/dist/mock/index.d.ts +115 -0
  25. package/dist/mock/index.js +11 -0
  26. package/dist/mock/index.js.map +1 -0
  27. package/dist/recorder/index.d.ts +120 -0
  28. package/dist/recorder/index.js +10 -0
  29. package/dist/recorder/index.js.map +1 -0
  30. package/dist/types-C6Lf3vz2.d.ts +78 -0
  31. package/package.json +63 -8
  32. package/src/Devtools.ts +11 -14
  33. package/src/bdd/agent-doc-tester.ts +130 -0
  34. package/src/bdd/agent-ui-tester.ts +88 -0
  35. package/src/bdd/cli.ts +166 -0
  36. package/src/bdd/cucumber.config.ts +40 -0
  37. package/src/bdd/dev-server.ts +82 -0
  38. package/src/bdd/index.ts +41 -0
  39. package/src/bdd/paths.ts +140 -0
  40. package/src/bdd/playwright.ts +110 -0
  41. package/src/env.ts +97 -0
  42. package/src/index.ts +6 -1
  43. package/src/mock/MockDriver.ts +21 -12
  44. package/src/recorder/RecordingDriver.ts +1 -5
  45. package/scripts/record-fixture.ts +0 -148
  46. package/tsconfig.json +0 -10
@@ -0,0 +1,120 @@
1
+ import { Driver, DriverState, DriverStreamEvent } from '@agentxjs/core/driver';
2
+ import { UserMessage } from '@agentxjs/core/agent';
3
+ import { F as Fixture } from '../types-C6Lf3vz2.js';
4
+
5
+ /**
6
+ * RecordingDriver - Wraps a real driver to record events
7
+ *
8
+ * Used to capture real LLM API responses and save them as fixtures.
9
+ * These fixtures can then be played back by MockDriver for testing.
10
+ *
11
+ * Usage:
12
+ * ```typescript
13
+ * import { createClaudeDriver } from "@agentxjs/claude-driver";
14
+ * import { RecordingDriver } from "@agentxjs/devtools/recorder";
15
+ *
16
+ * // Create real driver
17
+ * const realDriver = createClaudeDriver(config);
18
+ *
19
+ * // Wrap with recorder
20
+ * const recorder = new RecordingDriver({
21
+ * driver: realDriver,
22
+ * name: "my-scenario",
23
+ * description: "User asks about weather",
24
+ * });
25
+ *
26
+ * await recorder.initialize();
27
+ *
28
+ * // Use like a normal driver - events are recorded
29
+ * for await (const event of recorder.receive({ content: "Hello" })) {
30
+ * console.log(event);
31
+ * }
32
+ *
33
+ * // Save the fixture
34
+ * await recorder.saveFixture("./fixtures/my-scenario.json");
35
+ * ```
36
+ */
37
+
38
+ /**
39
+ * Options for RecordingDriver
40
+ */
41
+ interface RecordingDriverOptions {
42
+ /**
43
+ * The real driver to wrap
44
+ */
45
+ driver: Driver;
46
+ /**
47
+ * Fixture name for the recording
48
+ */
49
+ name: string;
50
+ /**
51
+ * Description for the recording
52
+ */
53
+ description?: string;
54
+ }
55
+ /**
56
+ * Recorded event with timing
57
+ */
58
+ interface RecordedEvent {
59
+ event: DriverStreamEvent;
60
+ timestamp: number;
61
+ }
62
+ /**
63
+ * RecordingDriver - Records events from a real driver
64
+ *
65
+ * Implements the new Driver interface by wrapping a real driver
66
+ * and intercepting events from receive().
67
+ */
68
+ declare class RecordingDriver implements Driver {
69
+ readonly name = "RecordingDriver";
70
+ private readonly realDriver;
71
+ private readonly fixtureName;
72
+ private readonly fixtureDescription?;
73
+ private recordedEvents;
74
+ private recordingStartTime;
75
+ constructor(options: RecordingDriverOptions);
76
+ get sessionId(): string | null;
77
+ get state(): DriverState;
78
+ initialize(): Promise<void>;
79
+ dispose(): Promise<void>;
80
+ /**
81
+ * Receive a user message and return stream of events
82
+ *
83
+ * Wraps the real driver's receive() and records all events.
84
+ */
85
+ receive(message: UserMessage): AsyncIterable<DriverStreamEvent>;
86
+ /**
87
+ * Interrupt current operation (delegate to real driver)
88
+ */
89
+ interrupt(): void;
90
+ /**
91
+ * Record an event
92
+ */
93
+ private recordEvent;
94
+ /**
95
+ * Get the recorded fixture
96
+ */
97
+ getFixture(): Fixture;
98
+ /**
99
+ * Save the recorded fixture to a JSON file
100
+ */
101
+ saveFixture(filePath: string): Promise<void>;
102
+ /**
103
+ * Get the number of recorded events
104
+ */
105
+ get eventCount(): number;
106
+ /**
107
+ * Clear recorded events (start fresh recording)
108
+ */
109
+ clearRecording(): void;
110
+ /**
111
+ * Get raw recorded events (for debugging)
112
+ */
113
+ getRawEvents(): RecordedEvent[];
114
+ }
115
+ /**
116
+ * Create a RecordingDriver that wraps a real driver
117
+ */
118
+ declare function createRecordingDriver(options: RecordingDriverOptions): RecordingDriver;
119
+
120
+ export { RecordingDriver, type RecordingDriverOptions, createRecordingDriver };
@@ -0,0 +1,10 @@
1
+ import {
2
+ RecordingDriver,
3
+ createRecordingDriver
4
+ } from "../chunk-DR45HEV4.js";
5
+ import "../chunk-DGUM43GV.js";
6
+ export {
7
+ RecordingDriver,
8
+ createRecordingDriver
9
+ };
10
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * DevTools Types
3
+ *
4
+ * Defines the fixture format for recording and playback.
5
+ */
6
+ /**
7
+ * A single event in a fixture
8
+ */
9
+ interface FixtureEvent {
10
+ /**
11
+ * Event type (e.g., "message_start", "text_delta", "message_stop")
12
+ */
13
+ type: string;
14
+ /**
15
+ * Delay in milliseconds since last event (0 for first event)
16
+ */
17
+ delay: number;
18
+ /**
19
+ * Event data (type-specific)
20
+ */
21
+ data: unknown;
22
+ /**
23
+ * Optional: index for content blocks
24
+ */
25
+ index?: number;
26
+ /**
27
+ * Optional: event context (agentId, sessionId, etc.)
28
+ */
29
+ context?: unknown;
30
+ }
31
+ /**
32
+ * A complete fixture (recorded conversation scenario)
33
+ */
34
+ interface Fixture {
35
+ /**
36
+ * Fixture name (e.g., "simple-reply", "tool-call")
37
+ */
38
+ name: string;
39
+ /**
40
+ * Human-readable description
41
+ */
42
+ description?: string;
43
+ /**
44
+ * When this fixture was recorded (Unix timestamp)
45
+ */
46
+ recordedAt?: number;
47
+ /**
48
+ * The user message that triggers this fixture (optional, for documentation)
49
+ */
50
+ trigger?: string;
51
+ /**
52
+ * Sequence of events to emit
53
+ */
54
+ events: FixtureEvent[];
55
+ }
56
+ /**
57
+ * Options for MockDriver
58
+ */
59
+ interface MockDriverOptions {
60
+ /**
61
+ * Fixture to use for playback
62
+ */
63
+ fixture?: Fixture | string;
64
+ /**
65
+ * Custom fixtures map (name -> fixture)
66
+ */
67
+ fixtures?: Map<string, Fixture>;
68
+ /**
69
+ * Default delay between events if not specified (ms)
70
+ */
71
+ defaultDelay?: number;
72
+ /**
73
+ * Speed multiplier (1.0 = real time, 0 = instant, 2.0 = half speed)
74
+ */
75
+ speedMultiplier?: number;
76
+ }
77
+
78
+ export type { Fixture as F, MockDriverOptions as M, FixtureEvent as a };
package/package.json CHANGED
@@ -1,23 +1,78 @@
1
1
  {
2
2
  "name": "@agentxjs/devtools",
3
- "version": "1.9.5-dev",
3
+ "version": "2.0.0",
4
4
  "description": "Development tools for AgentX - MockDriver, RecordingDriver, Fixtures",
5
5
  "type": "module",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "bin": {
9
+ "bdd": "./dist/bdd/cli.js"
10
+ },
6
11
  "exports": {
7
- ".": "./src/index.ts",
8
- "./mock": "./src/mock/index.ts",
9
- "./recorder": "./src/recorder/index.ts",
10
- "./fixtures": "./fixtures/index.ts"
12
+ ".": {
13
+ "types": "./dist/index.d.ts",
14
+ "import": "./dist/index.js",
15
+ "default": "./dist/index.js"
16
+ },
17
+ "./mock": {
18
+ "types": "./dist/mock/index.d.ts",
19
+ "import": "./dist/mock/index.js",
20
+ "default": "./dist/mock/index.js"
21
+ },
22
+ "./recorder": {
23
+ "types": "./dist/recorder/index.d.ts",
24
+ "import": "./dist/recorder/index.js",
25
+ "default": "./dist/recorder/index.js"
26
+ },
27
+ "./fixtures": {
28
+ "types": "./dist/fixtures/index.d.ts",
29
+ "import": "./dist/fixtures/index.js",
30
+ "default": "./dist/fixtures/index.js"
31
+ },
32
+ "./bdd": {
33
+ "types": "./dist/bdd/index.d.ts",
34
+ "import": "./dist/bdd/index.js",
35
+ "default": "./dist/bdd/index.js"
36
+ }
11
37
  },
38
+ "files": [
39
+ "dist",
40
+ "src",
41
+ "fixtures"
42
+ ],
12
43
  "scripts": {
44
+ "build": "tsup",
13
45
  "typecheck": "tsc --noEmit",
14
- "test": "bun test"
46
+ "test": "echo 'No tests yet'",
47
+ "bdd": "bdd"
15
48
  },
16
49
  "dependencies": {
17
- "@agentxjs/core": "1.9.5-dev",
18
- "commonxjs": "^0.1.0"
50
+ "@agentxjs/core": "^2.0.0",
51
+ "commonxjs": "^0.1.1"
19
52
  },
20
53
  "devDependencies": {
54
+ "@agentxjs/claude-driver": "^2.0.0",
55
+ "@agentxjs/mono-driver": "^2.0.0",
21
56
  "typescript": "^5.3.3"
57
+ },
58
+ "peerDependencies": {
59
+ "agentxjs": "^2.0.0",
60
+ "@agentxjs/claude-driver": "^2.0.0",
61
+ "@playwright/test": "^1.50.0",
62
+ "@cucumber/cucumber": "^11.0.0"
63
+ },
64
+ "peerDependenciesMeta": {
65
+ "agentxjs": {
66
+ "optional": true
67
+ },
68
+ "@agentxjs/claude-driver": {
69
+ "optional": true
70
+ },
71
+ "@playwright/test": {
72
+ "optional": true
73
+ },
74
+ "@cucumber/cucumber": {
75
+ "optional": true
76
+ }
22
77
  }
23
78
  }
package/src/Devtools.ts CHANGED
@@ -7,9 +7,11 @@
7
7
  * ```typescript
8
8
  * import { createDevtools } from "@agentxjs/devtools";
9
9
  *
10
+ * import { env } from "@agentxjs/devtools";
11
+ *
10
12
  * const devtools = createDevtools({
11
13
  * fixturesDir: "./fixtures",
12
- * apiKey: process.env.DEEPRACTICE_API_KEY,
14
+ * apiKey: env.apiKey,
13
15
  * });
14
16
  *
15
17
  * // Has fixture → playback (MockDriver)
@@ -166,7 +168,8 @@ export class Devtools {
166
168
  baseUrl: this.config.baseUrl,
167
169
  agentId,
168
170
  model: this.config.model,
169
- systemPrompt: options.systemPrompt || this.config.systemPrompt || "You are a helpful assistant.",
171
+ systemPrompt:
172
+ options.systemPrompt || this.config.systemPrompt || "You are a helpful assistant.",
170
173
  cwd: options.cwd || this.config.cwd || process.cwd(),
171
174
  };
172
175
 
@@ -381,23 +384,17 @@ export interface VcrCreateDriverConfig {
381
384
  * // Before each test:
382
385
  * currentFixture = "test-scenario-name";
383
386
  *
384
- * // Use with server/provider:
385
- * const provider = await createNodeProvider({
387
+ * // Use with server:
388
+ * const platform = await createNodePlatform({...});
389
+ * const server = await createServer({
390
+ * platform,
386
391
  * createDriver: vcrCreateDriver,
387
392
  * });
388
393
  * ```
389
394
  */
390
395
  export function createVcrCreateDriver(config: VcrCreateDriverConfig): CreateDriver {
391
- const {
392
- fixturesDir,
393
- getFixtureName,
394
- apiKey,
395
- baseUrl,
396
- model,
397
- onPlayback,
398
- onRecording,
399
- onSaved,
400
- } = config;
396
+ const { fixturesDir, getFixtureName, apiKey, baseUrl, model, onPlayback, onRecording, onSaved } =
397
+ config;
401
398
 
402
399
  // Real driver factory (must be provided or pre-loaded)
403
400
  const realCreateDriver: CreateDriver | null = config.createRealDriver || null;
@@ -0,0 +1,130 @@
1
+ import { readFileSync, existsSync } from "node:fs";
2
+ import { env } from "../env";
3
+
4
+ const SYSTEM_PROMPT = `You are a documentation reviewer evaluating documents from the reader's experience.
5
+
6
+ EVALUATION DIMENSIONS:
7
+ 1. Completeness — All required information is present. Nothing critical is missing.
8
+ 2. Logic — Structure flows naturally. Concepts build on each other without jumps.
9
+ 3. Readability — A newcomer can follow without confusion. No unexplained jargon.
10
+
11
+ RULES:
12
+ - Read the provided document carefully
13
+ - Evaluate each requirement listed in the prompt against ALL three dimensions
14
+ - Be strict but fair — the document should genuinely help the reader achieve the stated goal
15
+ - Output your result as a single line: PASS or FAIL followed by a brief reason
16
+ - If FAIL, list which specific requirements are not met and which dimension they violate`;
17
+
18
+ export interface DocTestResult {
19
+ passed: boolean;
20
+ output: string;
21
+ }
22
+
23
+ export interface DocTesterOptions {
24
+ /** LLM provider (default: "anthropic") */
25
+ provider?: string;
26
+ /** Model name */
27
+ model?: string;
28
+ /** API key (reads from env if not provided) */
29
+ apiKey?: string;
30
+ /** Base URL (reads from env if not provided) */
31
+ baseUrl?: string;
32
+ /** Timeout in ms */
33
+ timeout?: number;
34
+ }
35
+
36
+ /**
37
+ * Evaluate a document against requirements using AgentX.
38
+ *
39
+ * Uses agentxjs local mode — no subprocess, no CLI, no auth issues.
40
+ * Requires `agentxjs` as a peer dependency.
41
+ */
42
+ export async function agentDocTester(
43
+ options: {
44
+ files: string[];
45
+ requirements: string;
46
+ },
47
+ testerOptions: DocTesterOptions = {}
48
+ ): Promise<DocTestResult> {
49
+ const {
50
+ provider = process.env.AGENTX_PROVIDER || "anthropic",
51
+ model = env.model,
52
+ apiKey = env.apiKey || "",
53
+ baseUrl = env.baseUrl,
54
+ timeout = 120_000,
55
+ } = testerOptions;
56
+
57
+ const docContents = options.files
58
+ .map((filePath) => {
59
+ if (!existsSync(filePath)) {
60
+ return `--- ${filePath} ---\n[FILE NOT FOUND]`;
61
+ }
62
+ return `--- ${filePath} ---\n${readFileSync(filePath, "utf-8")}`;
63
+ })
64
+ .join("\n\n");
65
+
66
+ const userPrompt = [
67
+ "Evaluate the following document(s) against the requirements below.",
68
+ "",
69
+ "DOCUMENTS:",
70
+ docContents,
71
+ "",
72
+ "REQUIREMENTS:",
73
+ options.requirements,
74
+ "",
75
+ "Evaluate each requirement. Output PASS if all are met, FAIL if any are not.",
76
+ ].join("\n");
77
+
78
+ // Dynamic import to avoid circular dependency (devtools ↔ agentxjs)
79
+ // Use variable to prevent TypeScript DTS from resolving the module
80
+ const moduleName = "agentxjs";
81
+ const agentxjs: any = await import(/* @vite-ignore */ moduleName);
82
+ const createAgentX: (...args: any[]) => Promise<any> = agentxjs.createAgentX;
83
+
84
+ let agentx: any = null;
85
+
86
+ try {
87
+ agentx = await createAgentX({
88
+ apiKey,
89
+ provider,
90
+ model,
91
+ baseUrl,
92
+ logLevel: "silent",
93
+ });
94
+
95
+ await agentx.containers.create("doc-tester");
96
+
97
+ const { record: image } = await agentx.images.create({
98
+ containerId: "doc-tester",
99
+ systemPrompt: SYSTEM_PROMPT,
100
+ });
101
+
102
+ const { agentId } = await agentx.agents.create({ imageId: image.imageId });
103
+
104
+ // Collect response text
105
+ let output = "";
106
+ agentx.on("text_delta", (e: any) => {
107
+ output += e.data.text;
108
+ });
109
+
110
+ // Send prompt and wait for completion
111
+ await Promise.race([
112
+ agentx.sessions.send(agentId, userPrompt),
113
+ new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), timeout)),
114
+ ]);
115
+
116
+ output = output.trim();
117
+ const passed = /\*{0,2}PASS\*{0,2}\b/m.test(output);
118
+ return { passed, output };
119
+ } catch (error: any) {
120
+ return { passed: false, output: error.message || "Unknown error" };
121
+ } finally {
122
+ if (agentx) {
123
+ try {
124
+ await agentx.shutdown();
125
+ } catch {
126
+ // ignore shutdown errors
127
+ }
128
+ }
129
+ }
130
+ }
@@ -0,0 +1,88 @@
1
+ import { execFileSync } from "node:child_process";
2
+ import { readFileSync } from "node:fs";
3
+ import { resolve, dirname } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+
6
+ const __dirname = dirname(fileURLToPath(import.meta.url));
7
+
8
+ const SKILL_PATH = resolve(__dirname, "../../../../.claude/skills/agent-browser/SKILL.md");
9
+
10
+ function loadSystemPrompt(headed = false): string {
11
+ let skillContent = "";
12
+ try {
13
+ skillContent = readFileSync(SKILL_PATH, "utf-8");
14
+ } catch {
15
+ // Skill file not found, continue without it
16
+ }
17
+
18
+ return `You are a UI tester. You test web application scenarios using the agent-browser CLI.
19
+
20
+ RULES:
21
+ - ONLY use agent-browser commands via Bash tool
22
+ - Use ${headed ? "--headed " : ""}--executable-path "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" for all commands
23
+ - After each navigation or click, run: agent-browser snapshot -i
24
+ - Refs (@e1, @e2) are invalidated after page changes — always re-snapshot
25
+ - At the end, close the browser with: agent-browser close
26
+ - Output your result as a single line: PASS or FAIL followed by a brief reason
27
+
28
+ ${skillContent ? `AGENT-BROWSER REFERENCE:\n${skillContent}` : ""}`;
29
+ }
30
+
31
+ export interface UiTestResult {
32
+ passed: boolean;
33
+ output: string;
34
+ }
35
+
36
+ export interface UiTesterOptions {
37
+ model?: string;
38
+ baseUrl?: string;
39
+ timeout?: number;
40
+ /** Show browser window (default: false) */
41
+ headed?: boolean;
42
+ }
43
+
44
+ /**
45
+ * Run a UI test scenario using Claude Code CLI + agent-browser.
46
+ *
47
+ * BDD scripts must run under Node.js (not Bun) to avoid claude CLI auth bug.
48
+ */
49
+ export function agentUiTester(prompt: string, options: UiTesterOptions = {}): UiTestResult {
50
+ const { model = "haiku", baseUrl, timeout = 300_000, headed = false } = options;
51
+
52
+ const fullPrompt = baseUrl ? `Base URL: ${baseUrl}\n\n${prompt}` : prompt;
53
+
54
+ const systemPrompt = loadSystemPrompt(headed);
55
+
56
+ // Filter out CLAUDE* env vars to avoid auth conflicts when spawned from Claude Code
57
+ const cleanEnv = Object.fromEntries(
58
+ Object.entries(process.env).filter(([k]) => !k.startsWith("CLAUDE"))
59
+ );
60
+
61
+ try {
62
+ const output = execFileSync(
63
+ "claude",
64
+ [
65
+ "-p",
66
+ fullPrompt,
67
+ "--model",
68
+ model,
69
+ "--append-system-prompt",
70
+ systemPrompt,
71
+ "--allowedTools",
72
+ "Bash(agent-browser:*)",
73
+ ],
74
+ {
75
+ encoding: "utf-8",
76
+ timeout,
77
+ env: cleanEnv,
78
+ maxBuffer: 10 * 1024 * 1024,
79
+ }
80
+ ).trim();
81
+
82
+ const passed = /\*{0,2}PASS\*{0,2}\b/m.test(output);
83
+ return { passed, output };
84
+ } catch (error: any) {
85
+ const output = error.stdout || error.stderr || error.message || "Unknown error";
86
+ return { passed: false, output: output.trim() };
87
+ }
88
+ }