@mnapoli/exspec 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -42
- package/dist/cli.js +1 -1
- package/dist/reporter.d.ts +1 -1
- package/dist/reporter.js +30 -3
- package/dist/reporter.test.js +2 -2
- package/dist/runner.js +6 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,8 +1,53 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Executable specs
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
AI writes code. AI writes tests. But confidence comes from tests you actually read and write.
|
|
4
4
|
|
|
5
|
-
exspec
|
|
5
|
+
**exspec runs plain-text specs in a real browser using AI.**
|
|
6
|
+
No test code, no step definitions. Write specs as acceptance criteria, then let agents build and run exspec to check they pass.
|
|
7
|
+
|
|
8
|
+
## Example
|
|
9
|
+
|
|
10
|
+
```gherkin
|
|
11
|
+
Feature: Order management
|
|
12
|
+
|
|
13
|
+
Scenario: Place an order and check it appears in the dashboard
|
|
14
|
+
Given I am logged in as a store manager
|
|
15
|
+
When I create a new order for customer "Alice Martin" with 2 items
|
|
16
|
+
Then the order should appear in the orders list with status "Pending"
|
|
17
|
+
|
|
18
|
+
Scenario: Cancel an order
|
|
19
|
+
Given I am logged in as a store manager
|
|
20
|
+
And there is at least one pending order
|
|
21
|
+
When I open the most recent order and cancel it
|
|
22
|
+
Then the order status should change to "Cancelled"
|
|
23
|
+
And the customer should see a cancellation notice
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
$ npx exspec
|
|
28
|
+
|
|
29
|
+
Suite: 2 scenario(s) in 1 domain(s)
|
|
30
|
+
|
|
31
|
+
orders (2 scenarios)
|
|
32
|
+
· Place an order and check it appears in the dashboard
|
|
33
|
+
· Cancel an order
|
|
34
|
+
|
|
35
|
+
▶ orders...
|
|
36
|
+
2 passed, 0 failed
|
|
37
|
+
Cost: $0.0523
|
|
38
|
+
|
|
39
|
+
────────────────────────────────────────
|
|
40
|
+
Total: 2 passed, 0 failed, 0 skipped, 0 errors
|
|
41
|
+
Total cost: $0.0523
|
|
42
|
+
|
|
43
|
+
Results written to features/exspec/2026-03-20-1430.md
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Unlike [Cucumber](https://github.com/cucumber/cucumber-js) or [Behat](https://github.com/Behat/Behat), there's **no glue code** — no step definitions, no page objects, no regex matchers to wire up. The AI agent reads your specs and navigates the app like a real user would. It figures out where to click, what to fill in, and what to check on screen.
|
|
47
|
+
|
|
48
|
+
This also means specs aren't brittle. Traditional browser tests break when a CSS class changes or a button moves. The AI agent adapts to the actual UI — and if the UX is so broken that a human couldn't complete the task, the spec fails too. That's a feature, not a bug.
|
|
49
|
+
|
|
50
|
+
Specs are written in [Gherkin](https://cucumber.io/docs/gherkin/reference/), a simple Given/When/Then format. You can write them in [70+ languages](https://cucumber.io/docs/gherkin/languages/) (English, French, German, Spanish, etc.).
|
|
6
51
|
|
|
7
52
|
## Install
|
|
8
53
|
|
|
@@ -10,24 +55,52 @@ exspec parses `.feature` files, launches a Claude agent restricted to browser-on
|
|
|
10
55
|
npm install -D @mnapoli/exspec
|
|
11
56
|
```
|
|
12
57
|
|
|
13
|
-
|
|
58
|
+
### Prerequisites
|
|
14
59
|
|
|
15
60
|
- [Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code) installed and authenticated
|
|
16
61
|
|
|
17
|
-
##
|
|
62
|
+
## Quick start
|
|
63
|
+
|
|
64
|
+
1. Create a `features/exspec.md` configuration file:
|
|
65
|
+
|
|
66
|
+
```markdown
|
|
67
|
+
URL: http://localhost:3000
|
|
68
|
+
|
|
69
|
+
Use the `test@example.com` / `password` credentials for authentication.
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
2. Write a feature file in `features/`:
|
|
73
|
+
|
|
74
|
+
```gherkin
|
|
75
|
+
Feature: Shopping cart
|
|
76
|
+
|
|
77
|
+
Scenario: Add a product to the cart
|
|
78
|
+
Given I am logged in
|
|
79
|
+
When I navigate to the product catalog
|
|
80
|
+
And I add the first product to my cart
|
|
81
|
+
Then the cart should show 1 item
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
3. Run:
|
|
18
85
|
|
|
19
86
|
```bash
|
|
20
|
-
# Run all feature files in features/
|
|
21
87
|
npx exspec
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
That's it. No step definitions to implement, no test code to write.
|
|
22
91
|
|
|
23
|
-
|
|
24
|
-
|
|
92
|
+
## Usage
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# Run all feature files
|
|
96
|
+
npx exspec
|
|
25
97
|
|
|
26
|
-
# Run
|
|
27
|
-
npx exspec features/
|
|
98
|
+
# Run a specific file or directory
|
|
99
|
+
npx exspec features/auth/login.feature
|
|
100
|
+
npx exspec features/auth/
|
|
28
101
|
|
|
29
102
|
# Filter by scenario name
|
|
30
|
-
npx exspec
|
|
103
|
+
npx exspec --filter "invalid password"
|
|
31
104
|
|
|
32
105
|
# Stop at first failure
|
|
33
106
|
npx exspec --fail-fast
|
|
@@ -38,18 +111,17 @@ npx exspec --headed
|
|
|
38
111
|
|
|
39
112
|
## Configuration
|
|
40
113
|
|
|
41
|
-
### `exspec.md`
|
|
114
|
+
### `features/exspec.md`
|
|
42
115
|
|
|
43
|
-
|
|
116
|
+
This file is passed to the AI agent as context. Describe your app, provide credentials, set the URL — anything the agent needs to know to test your application.
|
|
44
117
|
|
|
45
118
|
```markdown
|
|
46
|
-
|
|
119
|
+
URL: http://localhost:3000
|
|
47
120
|
|
|
48
121
|
## Application
|
|
49
122
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
This is an e-commerce app. The user is a store manager. For detailed feature documentation, see the `docs/` directory.
|
|
123
|
+
This is an e-commerce app. The user is a store manager.
|
|
124
|
+
For detailed feature documentation, see the `docs/` directory.
|
|
53
125
|
|
|
54
126
|
## Authentication
|
|
55
127
|
|
|
@@ -60,41 +132,25 @@ Use the `test@example.com` / `password` credentials for authentication.
|
|
|
60
132
|
Resolution: 1920x1080
|
|
61
133
|
```
|
|
62
134
|
|
|
63
|
-
The agent reads this file as context, so you can reference any project documentation here, or give it extra instructions.
|
|
64
|
-
|
|
65
135
|
### Environment variables
|
|
66
136
|
|
|
67
|
-
If your project has a `.env` file, exspec loads it automatically. You can
|
|
137
|
+
If your project has a `.env` file, exspec loads it automatically. You can reference variables in `exspec.md` with `$VAR` or `${VAR}` syntax:
|
|
68
138
|
|
|
69
139
|
```markdown
|
|
70
140
|
URL: $APP_URL
|
|
71
141
|
```
|
|
72
142
|
|
|
73
|
-
This is useful for dynamic URLs across environments (e.g. with git worktrees). If a variable is not defined, the reference is left as-is.
|
|
74
|
-
|
|
75
143
|
## How it works
|
|
76
144
|
|
|
77
|
-
1.
|
|
78
|
-
2.
|
|
79
|
-
3.
|
|
80
|
-
4.
|
|
81
|
-
- Only Playwright tools available (browser-only, no database or code access)
|
|
82
|
-
- Playwright in headless mode (or headed with `--headed`)
|
|
83
|
-
- Feature content + context docs + config as prompt
|
|
84
|
-
5. Parses results (PASS/FAIL/SKIP) and writes them to `features/exspec/`
|
|
145
|
+
1. Discovers `.feature` files in `features/` and groups them by subdirectory
|
|
146
|
+
2. For each group, launches a Claude agent with only Playwright browser tools (no database, no code, no shell access)
|
|
147
|
+
3. The agent reads your specs and interacts with the browser autonomously
|
|
148
|
+
4. Results (PASS/FAIL/SKIP) are written to `features/exspec/`
|
|
85
149
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
Results are written to `features/exspec/{YYYY-MM-DD-HHmm}.md` with failure screenshots in the corresponding directory.
|
|
89
|
-
|
|
90
|
-
The CLI exits with code `1` if any tests fail (CI-friendly).
|
|
150
|
+
The agent is sandboxed to browser-only interaction. If a scenario can't be verified through the browser, it's marked as FAIL.
|
|
91
151
|
|
|
92
|
-
##
|
|
93
|
-
|
|
94
|
-
The AI agent can ONLY use Playwright browser tools. It cannot:
|
|
152
|
+
## Results
|
|
95
153
|
|
|
96
|
-
-
|
|
97
|
-
- Read or modify source code
|
|
98
|
-
- Execute shell commands
|
|
154
|
+
Results are written to `features/exspec/{YYYY-MM-DD-HHmm}.md` with failure screenshots.
|
|
99
155
|
|
|
100
|
-
|
|
156
|
+
The CLI exits with code `1` on failures (CI-friendly).
|
package/dist/cli.js
CHANGED
|
@@ -119,7 +119,7 @@ for (const [domain, domainFeatures] of domains) {
|
|
|
119
119
|
}
|
|
120
120
|
}
|
|
121
121
|
// Summary
|
|
122
|
-
appendSummary(resultsPath, totals);
|
|
122
|
+
appendSummary(resultsPath, totals, screenshotsDir);
|
|
123
123
|
console.log("─".repeat(40));
|
|
124
124
|
console.log(`Total: ${totals.passed} passed, ${totals.failed} failed, ${totals.skipped} skipped, ${totals.errors} errors`);
|
|
125
125
|
if (totals.cost) {
|
package/dist/reporter.d.ts
CHANGED
|
@@ -6,4 +6,4 @@ export declare function initResultsFile(projectRoot: string, runId: string): {
|
|
|
6
6
|
screenshotsDir: string;
|
|
7
7
|
};
|
|
8
8
|
export declare function appendDomainResults(resultsPath: string, result: DomainResult): void;
|
|
9
|
-
export declare function appendSummary(resultsPath: string, totals: RunTotals): void;
|
|
9
|
+
export declare function appendSummary(resultsPath: string, totals: RunTotals, screenshotsDir: string): void;
|
package/dist/reporter.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { appendFileSync, existsSync, mkdirSync, writeFileSync } from "fs";
|
|
1
|
+
import { appendFileSync, existsSync, mkdirSync, readdirSync, rmSync, writeFileSync, } from "fs";
|
|
2
2
|
import { resolve, join } from "path";
|
|
3
|
+
const MAX_RUNS = 5;
|
|
3
4
|
const pad = (n) => String(n).padStart(2, "0");
|
|
4
5
|
export function generateRunId() {
|
|
5
6
|
const now = new Date();
|
|
@@ -13,12 +14,13 @@ export function initResultsFile(projectRoot, runId) {
|
|
|
13
14
|
const resultsDir = resolve(projectRoot, "features/exspec");
|
|
14
15
|
const screenshotsDir = resolve(resultsDir, runId);
|
|
15
16
|
const resultsPath = resolve(resultsDir, `${runId}.md`);
|
|
16
|
-
mkdirSync(
|
|
17
|
+
mkdirSync(resultsDir, { recursive: true });
|
|
17
18
|
// Create .gitignore on first run
|
|
18
19
|
const gitignorePath = join(resultsDir, ".gitignore");
|
|
19
20
|
if (!existsSync(gitignorePath)) {
|
|
20
21
|
writeFileSync(gitignorePath, "*\n!.gitignore\n");
|
|
21
22
|
}
|
|
23
|
+
pruneOldRuns(resultsDir);
|
|
22
24
|
writeFileSync(resultsPath, `# Test results — ${runId}\n\nStarted at ${formatTime()}\n`);
|
|
23
25
|
return { resultsPath, screenshotsDir };
|
|
24
26
|
}
|
|
@@ -60,7 +62,7 @@ export function appendDomainResults(resultsPath, result) {
|
|
|
60
62
|
}
|
|
61
63
|
appendFileSync(resultsPath, lines.join("\n"));
|
|
62
64
|
}
|
|
63
|
-
export function appendSummary(resultsPath, totals) {
|
|
65
|
+
export function appendSummary(resultsPath, totals, screenshotsDir) {
|
|
64
66
|
const content = [
|
|
65
67
|
"---\n",
|
|
66
68
|
"## Summary\n",
|
|
@@ -68,4 +70,29 @@ export function appendSummary(resultsPath, totals) {
|
|
|
68
70
|
`Finished at ${formatTime()}\n`,
|
|
69
71
|
].join("\n");
|
|
70
72
|
appendFileSync(resultsPath, content);
|
|
73
|
+
cleanupEmptyDir(screenshotsDir);
|
|
74
|
+
}
|
|
75
|
+
function cleanupEmptyDir(dir) {
|
|
76
|
+
if (!existsSync(dir))
|
|
77
|
+
return;
|
|
78
|
+
const entries = readdirSync(dir);
|
|
79
|
+
if (entries.length === 0) {
|
|
80
|
+
rmSync(dir);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
function pruneOldRuns(resultsDir) {
|
|
84
|
+
const entries = readdirSync(resultsDir);
|
|
85
|
+
const runIds = entries
|
|
86
|
+
.filter((e) => e.match(/^\d{4}-\d{2}-\d{2}-\d{4}\.md$/))
|
|
87
|
+
.map((e) => e.replace(/\.md$/, ""))
|
|
88
|
+
.sort();
|
|
89
|
+
while (runIds.length >= MAX_RUNS) {
|
|
90
|
+
const oldest = runIds.shift();
|
|
91
|
+
const mdPath = join(resultsDir, `${oldest}.md`);
|
|
92
|
+
const dirPath = join(resultsDir, oldest);
|
|
93
|
+
if (existsSync(mdPath))
|
|
94
|
+
rmSync(mdPath);
|
|
95
|
+
if (existsSync(dirPath))
|
|
96
|
+
rmSync(dirPath, { recursive: true });
|
|
97
|
+
}
|
|
71
98
|
}
|
package/dist/reporter.test.js
CHANGED
|
@@ -70,14 +70,14 @@ describe("appendSummary", () => {
|
|
|
70
70
|
});
|
|
71
71
|
test("writes totals", () => {
|
|
72
72
|
mkdirSync(tmpRoot, { recursive: true });
|
|
73
|
-
const { resultsPath } = initResultsFile(tmpRoot, "test-run");
|
|
73
|
+
const { resultsPath, screenshotsDir } = initResultsFile(tmpRoot, "test-run");
|
|
74
74
|
const totals = {
|
|
75
75
|
passed: 5,
|
|
76
76
|
failed: 2,
|
|
77
77
|
skipped: 1,
|
|
78
78
|
errors: 0,
|
|
79
79
|
};
|
|
80
|
-
appendSummary(resultsPath, totals);
|
|
80
|
+
appendSummary(resultsPath, totals, screenshotsDir);
|
|
81
81
|
const content = readFileSync(resultsPath, "utf-8");
|
|
82
82
|
expect(content).toContain("5 passed, 2 failed, 1 skipped, 0 errors");
|
|
83
83
|
});
|
package/dist/runner.js
CHANGED
|
@@ -56,6 +56,7 @@ function invokeClaude(prompt, cwd, mcpConfigPath) {
|
|
|
56
56
|
"mcp__playwright__*",
|
|
57
57
|
"--output-format",
|
|
58
58
|
"stream-json",
|
|
59
|
+
"--verbose",
|
|
59
60
|
"--model",
|
|
60
61
|
"sonnet",
|
|
61
62
|
"--mcp-config",
|
|
@@ -112,6 +113,9 @@ function invokeClaude(prompt, cwd, mcpConfigPath) {
|
|
|
112
113
|
resultText = event.result ?? "";
|
|
113
114
|
cost = event.cost_usd;
|
|
114
115
|
duration = event.duration_ms;
|
|
116
|
+
if (event.is_error) {
|
|
117
|
+
resultText = `Error: ${resultText}`;
|
|
118
|
+
}
|
|
115
119
|
break;
|
|
116
120
|
}
|
|
117
121
|
}
|
|
@@ -133,7 +137,8 @@ function invokeClaude(prompt, cwd, mcpConfigPath) {
|
|
|
133
137
|
}
|
|
134
138
|
process.stderr.write("\n");
|
|
135
139
|
if (code !== 0) {
|
|
136
|
-
|
|
140
|
+
const detail = resultText || stderr.slice(0, 500) || `exit code ${code}`;
|
|
141
|
+
reject(new Error(detail));
|
|
137
142
|
}
|
|
138
143
|
else {
|
|
139
144
|
resolve({ result: resultText, cost, duration });
|