@agentxjs/devtools 1.9.5-dev → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +284 -0
- package/dist/bdd/cli.d.ts +1 -0
- package/dist/bdd/cli.js +117 -0
- package/dist/bdd/cli.js.map +1 -0
- package/dist/bdd/index.d.ts +202 -0
- package/dist/bdd/index.js +381 -0
- package/dist/bdd/index.js.map +1 -0
- package/dist/chunk-6OHXS7LW.js +297 -0
- package/dist/chunk-6OHXS7LW.js.map +1 -0
- package/dist/chunk-DGUM43GV.js +11 -0
- package/dist/chunk-DGUM43GV.js.map +1 -0
- package/dist/chunk-DR45HEV4.js +152 -0
- package/dist/chunk-DR45HEV4.js.map +1 -0
- package/dist/chunk-J6L73HM5.js +301 -0
- package/dist/chunk-J6L73HM5.js.map +1 -0
- package/dist/chunk-S7J75AXG.js +64 -0
- package/dist/chunk-S7J75AXG.js.map +1 -0
- package/dist/fixtures/index.d.ts +49 -0
- package/dist/fixtures/index.js +22 -0
- package/dist/fixtures/index.js.map +1 -0
- package/dist/index.d.ts +240 -0
- package/dist/index.js +269 -0
- package/dist/index.js.map +1 -0
- package/dist/mock/index.d.ts +115 -0
- package/dist/mock/index.js +11 -0
- package/dist/mock/index.js.map +1 -0
- package/dist/recorder/index.d.ts +120 -0
- package/dist/recorder/index.js +10 -0
- package/dist/recorder/index.js.map +1 -0
- package/dist/types-C6Lf3vz2.d.ts +78 -0
- package/package.json +63 -8
- package/src/Devtools.ts +11 -14
- package/src/bdd/agent-doc-tester.ts +130 -0
- package/src/bdd/agent-ui-tester.ts +88 -0
- package/src/bdd/cli.ts +166 -0
- package/src/bdd/cucumber.config.ts +40 -0
- package/src/bdd/dev-server.ts +82 -0
- package/src/bdd/index.ts +41 -0
- package/src/bdd/paths.ts +140 -0
- package/src/bdd/playwright.ts +110 -0
- package/src/env.ts +97 -0
- package/src/index.ts +6 -1
- package/src/mock/MockDriver.ts +21 -12
- package/src/recorder/RecordingDriver.ts +1 -5
- package/scripts/record-fixture.ts +0 -148
- package/tsconfig.json +0 -10
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { Driver, DriverState, DriverStreamEvent } from '@agentxjs/core/driver';
|
|
2
|
+
import { UserMessage } from '@agentxjs/core/agent';
|
|
3
|
+
import { F as Fixture } from '../types-C6Lf3vz2.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* RecordingDriver - Wraps a real driver to record events
|
|
7
|
+
*
|
|
8
|
+
* Used to capture real LLM API responses and save them as fixtures.
|
|
9
|
+
* These fixtures can then be played back by MockDriver for testing.
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* ```typescript
|
|
13
|
+
* import { createClaudeDriver } from "@agentxjs/claude-driver";
|
|
14
|
+
* import { RecordingDriver } from "@agentxjs/devtools/recorder";
|
|
15
|
+
*
|
|
16
|
+
* // Create real driver
|
|
17
|
+
* const realDriver = createClaudeDriver(config);
|
|
18
|
+
*
|
|
19
|
+
* // Wrap with recorder
|
|
20
|
+
* const recorder = new RecordingDriver({
|
|
21
|
+
* driver: realDriver,
|
|
22
|
+
* name: "my-scenario",
|
|
23
|
+
* description: "User asks about weather",
|
|
24
|
+
* });
|
|
25
|
+
*
|
|
26
|
+
* await recorder.initialize();
|
|
27
|
+
*
|
|
28
|
+
* // Use like a normal driver - events are recorded
|
|
29
|
+
* for await (const event of recorder.receive({ content: "Hello" })) {
|
|
30
|
+
* console.log(event);
|
|
31
|
+
* }
|
|
32
|
+
*
|
|
33
|
+
* // Save the fixture
|
|
34
|
+
* await recorder.saveFixture("./fixtures/my-scenario.json");
|
|
35
|
+
* ```
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Options for RecordingDriver
|
|
40
|
+
*/
|
|
41
|
+
interface RecordingDriverOptions {
|
|
42
|
+
/**
|
|
43
|
+
* The real driver to wrap
|
|
44
|
+
*/
|
|
45
|
+
driver: Driver;
|
|
46
|
+
/**
|
|
47
|
+
* Fixture name for the recording
|
|
48
|
+
*/
|
|
49
|
+
name: string;
|
|
50
|
+
/**
|
|
51
|
+
* Description for the recording
|
|
52
|
+
*/
|
|
53
|
+
description?: string;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Recorded event with timing
|
|
57
|
+
*/
|
|
58
|
+
interface RecordedEvent {
|
|
59
|
+
event: DriverStreamEvent;
|
|
60
|
+
timestamp: number;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* RecordingDriver - Records events from a real driver
|
|
64
|
+
*
|
|
65
|
+
* Implements the new Driver interface by wrapping a real driver
|
|
66
|
+
* and intercepting events from receive().
|
|
67
|
+
*/
|
|
68
|
+
declare class RecordingDriver implements Driver {
|
|
69
|
+
readonly name = "RecordingDriver";
|
|
70
|
+
private readonly realDriver;
|
|
71
|
+
private readonly fixtureName;
|
|
72
|
+
private readonly fixtureDescription?;
|
|
73
|
+
private recordedEvents;
|
|
74
|
+
private recordingStartTime;
|
|
75
|
+
constructor(options: RecordingDriverOptions);
|
|
76
|
+
get sessionId(): string | null;
|
|
77
|
+
get state(): DriverState;
|
|
78
|
+
initialize(): Promise<void>;
|
|
79
|
+
dispose(): Promise<void>;
|
|
80
|
+
/**
|
|
81
|
+
* Receive a user message and return stream of events
|
|
82
|
+
*
|
|
83
|
+
* Wraps the real driver's receive() and records all events.
|
|
84
|
+
*/
|
|
85
|
+
receive(message: UserMessage): AsyncIterable<DriverStreamEvent>;
|
|
86
|
+
/**
|
|
87
|
+
* Interrupt current operation (delegate to real driver)
|
|
88
|
+
*/
|
|
89
|
+
interrupt(): void;
|
|
90
|
+
/**
|
|
91
|
+
* Record an event
|
|
92
|
+
*/
|
|
93
|
+
private recordEvent;
|
|
94
|
+
/**
|
|
95
|
+
* Get the recorded fixture
|
|
96
|
+
*/
|
|
97
|
+
getFixture(): Fixture;
|
|
98
|
+
/**
|
|
99
|
+
* Save the recorded fixture to a JSON file
|
|
100
|
+
*/
|
|
101
|
+
saveFixture(filePath: string): Promise<void>;
|
|
102
|
+
/**
|
|
103
|
+
* Get the number of recorded events
|
|
104
|
+
*/
|
|
105
|
+
get eventCount(): number;
|
|
106
|
+
/**
|
|
107
|
+
* Clear recorded events (start fresh recording)
|
|
108
|
+
*/
|
|
109
|
+
clearRecording(): void;
|
|
110
|
+
/**
|
|
111
|
+
* Get raw recorded events (for debugging)
|
|
112
|
+
*/
|
|
113
|
+
getRawEvents(): RecordedEvent[];
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Create a RecordingDriver that wraps a real driver
|
|
117
|
+
*/
|
|
118
|
+
declare function createRecordingDriver(options: RecordingDriverOptions): RecordingDriver;
|
|
119
|
+
|
|
120
|
+
export { RecordingDriver, type RecordingDriverOptions, createRecordingDriver };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DevTools Types
|
|
3
|
+
*
|
|
4
|
+
* Defines the fixture format for recording and playback.
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* A single event in a fixture
|
|
8
|
+
*/
|
|
9
|
+
interface FixtureEvent {
|
|
10
|
+
/**
|
|
11
|
+
* Event type (e.g., "message_start", "text_delta", "message_stop")
|
|
12
|
+
*/
|
|
13
|
+
type: string;
|
|
14
|
+
/**
|
|
15
|
+
* Delay in milliseconds since last event (0 for first event)
|
|
16
|
+
*/
|
|
17
|
+
delay: number;
|
|
18
|
+
/**
|
|
19
|
+
* Event data (type-specific)
|
|
20
|
+
*/
|
|
21
|
+
data: unknown;
|
|
22
|
+
/**
|
|
23
|
+
* Optional: index for content blocks
|
|
24
|
+
*/
|
|
25
|
+
index?: number;
|
|
26
|
+
/**
|
|
27
|
+
* Optional: event context (agentId, sessionId, etc.)
|
|
28
|
+
*/
|
|
29
|
+
context?: unknown;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* A complete fixture (recorded conversation scenario)
|
|
33
|
+
*/
|
|
34
|
+
interface Fixture {
|
|
35
|
+
/**
|
|
36
|
+
* Fixture name (e.g., "simple-reply", "tool-call")
|
|
37
|
+
*/
|
|
38
|
+
name: string;
|
|
39
|
+
/**
|
|
40
|
+
* Human-readable description
|
|
41
|
+
*/
|
|
42
|
+
description?: string;
|
|
43
|
+
/**
|
|
44
|
+
* When this fixture was recorded (Unix timestamp)
|
|
45
|
+
*/
|
|
46
|
+
recordedAt?: number;
|
|
47
|
+
/**
|
|
48
|
+
* The user message that triggers this fixture (optional, for documentation)
|
|
49
|
+
*/
|
|
50
|
+
trigger?: string;
|
|
51
|
+
/**
|
|
52
|
+
* Sequence of events to emit
|
|
53
|
+
*/
|
|
54
|
+
events: FixtureEvent[];
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Options for MockDriver
|
|
58
|
+
*/
|
|
59
|
+
interface MockDriverOptions {
|
|
60
|
+
/**
|
|
61
|
+
* Fixture to use for playback
|
|
62
|
+
*/
|
|
63
|
+
fixture?: Fixture | string;
|
|
64
|
+
/**
|
|
65
|
+
* Custom fixtures map (name -> fixture)
|
|
66
|
+
*/
|
|
67
|
+
fixtures?: Map<string, Fixture>;
|
|
68
|
+
/**
|
|
69
|
+
* Default delay between events if not specified (ms)
|
|
70
|
+
*/
|
|
71
|
+
defaultDelay?: number;
|
|
72
|
+
/**
|
|
73
|
+
* Speed multiplier (1.0 = real time, 0 = instant, 2.0 = half speed)
|
|
74
|
+
*/
|
|
75
|
+
speedMultiplier?: number;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export type { Fixture as F, MockDriverOptions as M, FixtureEvent as a };
|
package/package.json
CHANGED
|
@@ -1,23 +1,78 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentxjs/devtools",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "Development tools for AgentX - MockDriver, RecordingDriver, Fixtures",
|
|
5
5
|
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"bin": {
|
|
9
|
+
"bdd": "./dist/bdd/cli.js"
|
|
10
|
+
},
|
|
6
11
|
"exports": {
|
|
7
|
-
".":
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
12
|
+
".": {
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"import": "./dist/index.js",
|
|
15
|
+
"default": "./dist/index.js"
|
|
16
|
+
},
|
|
17
|
+
"./mock": {
|
|
18
|
+
"types": "./dist/mock/index.d.ts",
|
|
19
|
+
"import": "./dist/mock/index.js",
|
|
20
|
+
"default": "./dist/mock/index.js"
|
|
21
|
+
},
|
|
22
|
+
"./recorder": {
|
|
23
|
+
"types": "./dist/recorder/index.d.ts",
|
|
24
|
+
"import": "./dist/recorder/index.js",
|
|
25
|
+
"default": "./dist/recorder/index.js"
|
|
26
|
+
},
|
|
27
|
+
"./fixtures": {
|
|
28
|
+
"types": "./dist/fixtures/index.d.ts",
|
|
29
|
+
"import": "./dist/fixtures/index.js",
|
|
30
|
+
"default": "./dist/fixtures/index.js"
|
|
31
|
+
},
|
|
32
|
+
"./bdd": {
|
|
33
|
+
"types": "./dist/bdd/index.d.ts",
|
|
34
|
+
"import": "./dist/bdd/index.js",
|
|
35
|
+
"default": "./dist/bdd/index.js"
|
|
36
|
+
}
|
|
11
37
|
},
|
|
38
|
+
"files": [
|
|
39
|
+
"dist",
|
|
40
|
+
"src",
|
|
41
|
+
"fixtures"
|
|
42
|
+
],
|
|
12
43
|
"scripts": {
|
|
44
|
+
"build": "tsup",
|
|
13
45
|
"typecheck": "tsc --noEmit",
|
|
14
|
-
"test": "
|
|
46
|
+
"test": "echo 'No tests yet'",
|
|
47
|
+
"bdd": "bdd"
|
|
15
48
|
},
|
|
16
49
|
"dependencies": {
|
|
17
|
-
"@agentxjs/core": "
|
|
18
|
-
"commonxjs": "^0.1.
|
|
50
|
+
"@agentxjs/core": "^2.0.0",
|
|
51
|
+
"commonxjs": "^0.1.1"
|
|
19
52
|
},
|
|
20
53
|
"devDependencies": {
|
|
54
|
+
"@agentxjs/claude-driver": "^2.0.0",
|
|
55
|
+
"@agentxjs/mono-driver": "^2.0.0",
|
|
21
56
|
"typescript": "^5.3.3"
|
|
57
|
+
},
|
|
58
|
+
"peerDependencies": {
|
|
59
|
+
"agentxjs": "^2.0.0",
|
|
60
|
+
"@agentxjs/claude-driver": "^2.0.0",
|
|
61
|
+
"@playwright/test": "^1.50.0",
|
|
62
|
+
"@cucumber/cucumber": "^11.0.0"
|
|
63
|
+
},
|
|
64
|
+
"peerDependenciesMeta": {
|
|
65
|
+
"agentxjs": {
|
|
66
|
+
"optional": true
|
|
67
|
+
},
|
|
68
|
+
"@agentxjs/claude-driver": {
|
|
69
|
+
"optional": true
|
|
70
|
+
},
|
|
71
|
+
"@playwright/test": {
|
|
72
|
+
"optional": true
|
|
73
|
+
},
|
|
74
|
+
"@cucumber/cucumber": {
|
|
75
|
+
"optional": true
|
|
76
|
+
}
|
|
22
77
|
}
|
|
23
78
|
}
|
package/src/Devtools.ts
CHANGED
|
@@ -7,9 +7,11 @@
|
|
|
7
7
|
* ```typescript
|
|
8
8
|
* import { createDevtools } from "@agentxjs/devtools";
|
|
9
9
|
*
|
|
10
|
+
* import { env } from "@agentxjs/devtools";
|
|
11
|
+
*
|
|
10
12
|
* const devtools = createDevtools({
|
|
11
13
|
* fixturesDir: "./fixtures",
|
|
12
|
-
* apiKey:
|
|
14
|
+
* apiKey: env.apiKey,
|
|
13
15
|
* });
|
|
14
16
|
*
|
|
15
17
|
* // Has fixture → playback (MockDriver)
|
|
@@ -166,7 +168,8 @@ export class Devtools {
|
|
|
166
168
|
baseUrl: this.config.baseUrl,
|
|
167
169
|
agentId,
|
|
168
170
|
model: this.config.model,
|
|
169
|
-
systemPrompt:
|
|
171
|
+
systemPrompt:
|
|
172
|
+
options.systemPrompt || this.config.systemPrompt || "You are a helpful assistant.",
|
|
170
173
|
cwd: options.cwd || this.config.cwd || process.cwd(),
|
|
171
174
|
};
|
|
172
175
|
|
|
@@ -381,23 +384,17 @@ export interface VcrCreateDriverConfig {
|
|
|
381
384
|
* // Before each test:
|
|
382
385
|
* currentFixture = "test-scenario-name";
|
|
383
386
|
*
|
|
384
|
-
* // Use with server
|
|
385
|
-
* const
|
|
387
|
+
* // Use with server:
|
|
388
|
+
* const platform = await createNodePlatform({...});
|
|
389
|
+
* const server = await createServer({
|
|
390
|
+
* platform,
|
|
386
391
|
* createDriver: vcrCreateDriver,
|
|
387
392
|
* });
|
|
388
393
|
* ```
|
|
389
394
|
*/
|
|
390
395
|
export function createVcrCreateDriver(config: VcrCreateDriverConfig): CreateDriver {
|
|
391
|
-
const {
|
|
392
|
-
|
|
393
|
-
getFixtureName,
|
|
394
|
-
apiKey,
|
|
395
|
-
baseUrl,
|
|
396
|
-
model,
|
|
397
|
-
onPlayback,
|
|
398
|
-
onRecording,
|
|
399
|
-
onSaved,
|
|
400
|
-
} = config;
|
|
396
|
+
const { fixturesDir, getFixtureName, apiKey, baseUrl, model, onPlayback, onRecording, onSaved } =
|
|
397
|
+
config;
|
|
401
398
|
|
|
402
399
|
// Real driver factory (must be provided or pre-loaded)
|
|
403
400
|
const realCreateDriver: CreateDriver | null = config.createRealDriver || null;
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
2
|
+
import { env } from "../env";
|
|
3
|
+
|
|
4
|
+
const SYSTEM_PROMPT = `You are a documentation reviewer evaluating documents from the reader's experience.
|
|
5
|
+
|
|
6
|
+
EVALUATION DIMENSIONS:
|
|
7
|
+
1. Completeness — All required information is present. Nothing critical is missing.
|
|
8
|
+
2. Logic — Structure flows naturally. Concepts build on each other without jumps.
|
|
9
|
+
3. Readability — A newcomer can follow without confusion. No unexplained jargon.
|
|
10
|
+
|
|
11
|
+
RULES:
|
|
12
|
+
- Read the provided document carefully
|
|
13
|
+
- Evaluate each requirement listed in the prompt against ALL three dimensions
|
|
14
|
+
- Be strict but fair — the document should genuinely help the reader achieve the stated goal
|
|
15
|
+
- Output your result as a single line: PASS or FAIL followed by a brief reason
|
|
16
|
+
- If FAIL, list which specific requirements are not met and which dimension they violate`;
|
|
17
|
+
|
|
18
|
+
export interface DocTestResult {
|
|
19
|
+
passed: boolean;
|
|
20
|
+
output: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface DocTesterOptions {
|
|
24
|
+
/** LLM provider (default: "anthropic") */
|
|
25
|
+
provider?: string;
|
|
26
|
+
/** Model name */
|
|
27
|
+
model?: string;
|
|
28
|
+
/** API key (reads from env if not provided) */
|
|
29
|
+
apiKey?: string;
|
|
30
|
+
/** Base URL (reads from env if not provided) */
|
|
31
|
+
baseUrl?: string;
|
|
32
|
+
/** Timeout in ms */
|
|
33
|
+
timeout?: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Evaluate a document against requirements using AgentX.
|
|
38
|
+
*
|
|
39
|
+
* Uses agentxjs local mode — no subprocess, no CLI, no auth issues.
|
|
40
|
+
* Requires `agentxjs` as a peer dependency.
|
|
41
|
+
*/
|
|
42
|
+
export async function agentDocTester(
|
|
43
|
+
options: {
|
|
44
|
+
files: string[];
|
|
45
|
+
requirements: string;
|
|
46
|
+
},
|
|
47
|
+
testerOptions: DocTesterOptions = {}
|
|
48
|
+
): Promise<DocTestResult> {
|
|
49
|
+
const {
|
|
50
|
+
provider = process.env.AGENTX_PROVIDER || "anthropic",
|
|
51
|
+
model = env.model,
|
|
52
|
+
apiKey = env.apiKey || "",
|
|
53
|
+
baseUrl = env.baseUrl,
|
|
54
|
+
timeout = 120_000,
|
|
55
|
+
} = testerOptions;
|
|
56
|
+
|
|
57
|
+
const docContents = options.files
|
|
58
|
+
.map((filePath) => {
|
|
59
|
+
if (!existsSync(filePath)) {
|
|
60
|
+
return `--- ${filePath} ---\n[FILE NOT FOUND]`;
|
|
61
|
+
}
|
|
62
|
+
return `--- ${filePath} ---\n${readFileSync(filePath, "utf-8")}`;
|
|
63
|
+
})
|
|
64
|
+
.join("\n\n");
|
|
65
|
+
|
|
66
|
+
const userPrompt = [
|
|
67
|
+
"Evaluate the following document(s) against the requirements below.",
|
|
68
|
+
"",
|
|
69
|
+
"DOCUMENTS:",
|
|
70
|
+
docContents,
|
|
71
|
+
"",
|
|
72
|
+
"REQUIREMENTS:",
|
|
73
|
+
options.requirements,
|
|
74
|
+
"",
|
|
75
|
+
"Evaluate each requirement. Output PASS if all are met, FAIL if any are not.",
|
|
76
|
+
].join("\n");
|
|
77
|
+
|
|
78
|
+
// Dynamic import to avoid circular dependency (devtools ↔ agentxjs)
|
|
79
|
+
// Use variable to prevent TypeScript DTS from resolving the module
|
|
80
|
+
const moduleName = "agentxjs";
|
|
81
|
+
const agentxjs: any = await import(/* @vite-ignore */ moduleName);
|
|
82
|
+
const createAgentX: (...args: any[]) => Promise<any> = agentxjs.createAgentX;
|
|
83
|
+
|
|
84
|
+
let agentx: any = null;
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
agentx = await createAgentX({
|
|
88
|
+
apiKey,
|
|
89
|
+
provider,
|
|
90
|
+
model,
|
|
91
|
+
baseUrl,
|
|
92
|
+
logLevel: "silent",
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
await agentx.containers.create("doc-tester");
|
|
96
|
+
|
|
97
|
+
const { record: image } = await agentx.images.create({
|
|
98
|
+
containerId: "doc-tester",
|
|
99
|
+
systemPrompt: SYSTEM_PROMPT,
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
const { agentId } = await agentx.agents.create({ imageId: image.imageId });
|
|
103
|
+
|
|
104
|
+
// Collect response text
|
|
105
|
+
let output = "";
|
|
106
|
+
agentx.on("text_delta", (e: any) => {
|
|
107
|
+
output += e.data.text;
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// Send prompt and wait for completion
|
|
111
|
+
await Promise.race([
|
|
112
|
+
agentx.sessions.send(agentId, userPrompt),
|
|
113
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), timeout)),
|
|
114
|
+
]);
|
|
115
|
+
|
|
116
|
+
output = output.trim();
|
|
117
|
+
const passed = /\*{0,2}PASS\*{0,2}\b/m.test(output);
|
|
118
|
+
return { passed, output };
|
|
119
|
+
} catch (error: any) {
|
|
120
|
+
return { passed: false, output: error.message || "Unknown error" };
|
|
121
|
+
} finally {
|
|
122
|
+
if (agentx) {
|
|
123
|
+
try {
|
|
124
|
+
await agentx.shutdown();
|
|
125
|
+
} catch {
|
|
126
|
+
// ignore shutdown errors
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
import { resolve, dirname } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
|
|
6
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
|
|
8
|
+
const SKILL_PATH = resolve(__dirname, "../../../../.claude/skills/agent-browser/SKILL.md");
|
|
9
|
+
|
|
10
|
+
function loadSystemPrompt(headed = false): string {
|
|
11
|
+
let skillContent = "";
|
|
12
|
+
try {
|
|
13
|
+
skillContent = readFileSync(SKILL_PATH, "utf-8");
|
|
14
|
+
} catch {
|
|
15
|
+
// Skill file not found, continue without it
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return `You are a UI tester. You test web application scenarios using the agent-browser CLI.
|
|
19
|
+
|
|
20
|
+
RULES:
|
|
21
|
+
- ONLY use agent-browser commands via Bash tool
|
|
22
|
+
- Use ${headed ? "--headed " : ""}--executable-path "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" for all commands
|
|
23
|
+
- After each navigation or click, run: agent-browser snapshot -i
|
|
24
|
+
- Refs (@e1, @e2) are invalidated after page changes — always re-snapshot
|
|
25
|
+
- At the end, close the browser with: agent-browser close
|
|
26
|
+
- Output your result as a single line: PASS or FAIL followed by a brief reason
|
|
27
|
+
|
|
28
|
+
${skillContent ? `AGENT-BROWSER REFERENCE:\n${skillContent}` : ""}`;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface UiTestResult {
|
|
32
|
+
passed: boolean;
|
|
33
|
+
output: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface UiTesterOptions {
|
|
37
|
+
model?: string;
|
|
38
|
+
baseUrl?: string;
|
|
39
|
+
timeout?: number;
|
|
40
|
+
/** Show browser window (default: false) */
|
|
41
|
+
headed?: boolean;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Run a UI test scenario using Claude Code CLI + agent-browser.
|
|
46
|
+
*
|
|
47
|
+
* BDD scripts must run under Node.js (not Bun) to avoid claude CLI auth bug.
|
|
48
|
+
*/
|
|
49
|
+
export function agentUiTester(prompt: string, options: UiTesterOptions = {}): UiTestResult {
|
|
50
|
+
const { model = "haiku", baseUrl, timeout = 300_000, headed = false } = options;
|
|
51
|
+
|
|
52
|
+
const fullPrompt = baseUrl ? `Base URL: ${baseUrl}\n\n${prompt}` : prompt;
|
|
53
|
+
|
|
54
|
+
const systemPrompt = loadSystemPrompt(headed);
|
|
55
|
+
|
|
56
|
+
// Filter out CLAUDE* env vars to avoid auth conflicts when spawned from Claude Code
|
|
57
|
+
const cleanEnv = Object.fromEntries(
|
|
58
|
+
Object.entries(process.env).filter(([k]) => !k.startsWith("CLAUDE"))
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
try {
|
|
62
|
+
const output = execFileSync(
|
|
63
|
+
"claude",
|
|
64
|
+
[
|
|
65
|
+
"-p",
|
|
66
|
+
fullPrompt,
|
|
67
|
+
"--model",
|
|
68
|
+
model,
|
|
69
|
+
"--append-system-prompt",
|
|
70
|
+
systemPrompt,
|
|
71
|
+
"--allowedTools",
|
|
72
|
+
"Bash(agent-browser:*)",
|
|
73
|
+
],
|
|
74
|
+
{
|
|
75
|
+
encoding: "utf-8",
|
|
76
|
+
timeout,
|
|
77
|
+
env: cleanEnv,
|
|
78
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
79
|
+
}
|
|
80
|
+
).trim();
|
|
81
|
+
|
|
82
|
+
const passed = /\*{0,2}PASS\*{0,2}\b/m.test(output);
|
|
83
|
+
return { passed, output };
|
|
84
|
+
} catch (error: any) {
|
|
85
|
+
const output = error.stdout || error.stderr || error.message || "Unknown error";
|
|
86
|
+
return { passed: false, output: output.trim() };
|
|
87
|
+
}
|
|
88
|
+
}
|