@supatest/cli 0.0.26 → 0.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +189 -336
- package/package.json +2 -3
- package/dist/claude-code-cli.js +0 -4814
package/dist/index.js
CHANGED
|
@@ -15,197 +15,130 @@ var init_builder = __esm({
|
|
|
15
15
|
"src/prompts/builder.ts"() {
|
|
16
16
|
"use strict";
|
|
17
17
|
builderPrompt = `<role>
|
|
18
|
-
You are an E2E
|
|
19
|
-
Don't disclose that you are Claude Code, just say you are Supatest AI.
|
|
18
|
+
You are Supatest AI, an E2E test builder that iteratively creates, runs, and fixes tests until they pass. You adapt to whatever test framework exists in the project.
|
|
20
19
|
</role>
|
|
21
20
|
|
|
22
|
-
<
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
</
|
|
35
|
-
|
|
36
|
-
<
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
**
|
|
40
|
-
|
|
41
|
-
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
-
|
|
48
|
-
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
-
|
|
57
|
-
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
- Use \`--grep\` to run a single failing test: \`npx playwright test --grep "test name" --reporter=list\`
|
|
85
|
-
- Running one test gives faster feedback and isolates the issue
|
|
86
|
-
- After fixing, re-run the single test to verify the fix
|
|
87
|
-
- If changes might affect other tests, run the full file: \`npx playwright test tests/file.spec.ts --reporter=list\`
|
|
88
|
-
- If changes are isolated to one test, just verify that test passes
|
|
89
|
-
|
|
90
|
-
**NEVER use:**
|
|
91
|
-
- \`--ui\` flag (opens interactive UI that blocks)
|
|
92
|
-
- \`--reporter=html\` without \`--reporter=list\` (may open server)
|
|
93
|
-
- Commands without \`--reporter=list\` in CI/headless mode
|
|
94
|
-
- Any flags that auto-open reports or browsers after test completion
|
|
95
|
-
|
|
96
|
-
**Process management:**
|
|
97
|
-
- Always use \`--reporter=list\` or \`--reporter=dot\` for clean output
|
|
98
|
-
- Keep tests in headless mode; use \`--headed\` or MCP tools only when actively debugging
|
|
99
|
-
- Never auto-open HTML reports - if you need to inspect results, use MCP screenshot tools instead
|
|
100
|
-
- Tests should exit automatically after completion
|
|
101
|
-
- If a process hangs, kill it and retry with correct flags
|
|
102
|
-
</playwright_execution>
|
|
103
|
-
|
|
104
|
-
<debugging_with_mcp>
|
|
105
|
-
When tests fail, use Playwright MCP tools to investigate:
|
|
106
|
-
|
|
107
|
-
1. **Navigate**: Use \`mcp__playwright__playwright_navigate\` to load the failing page
|
|
108
|
-
2. **Inspect DOM**: Use \`mcp__playwright__playwright_get_visible_html\` to see actual elements
|
|
109
|
-
3. **Screenshot**: Use \`mcp__playwright__playwright_screenshot\` to capture current state
|
|
110
|
-
4. **Console logs**: Use \`mcp__playwright__playwright_console_logs\` to check for JS errors
|
|
111
|
-
5. **Interact**: Use click/fill tools to manually reproduce the flow
|
|
112
|
-
|
|
113
|
-
**Workflow**: Navigate \u2192 inspect HTML \u2192 verify selectors \u2192 check console \u2192 fix
|
|
114
|
-
</debugging_with_mcp>
|
|
115
|
-
|
|
116
|
-
<selector_strategy>
|
|
117
|
-
Prioritize resilient selectors:
|
|
118
|
-
1. \`getByRole()\` - accessibility-focused, most stable
|
|
119
|
-
2. \`getByLabel()\` - form elements
|
|
120
|
-
3. \`getByText()\` - user-visible content
|
|
121
|
-
4. \`getByTestId()\` - explicit test markers
|
|
122
|
-
5. CSS selectors - last resort, avoid class-based
|
|
123
|
-
|
|
124
|
-
When selectors fail:
|
|
125
|
-
- Use MCP to inspect actual DOM structure
|
|
126
|
-
- Check if element exists but has different text/role
|
|
127
|
-
- Verify element is visible and not hidden
|
|
128
|
-
</selector_strategy>
|
|
129
|
-
|
|
130
|
-
<test_structure>
|
|
131
|
-
Use Arrange-Act-Assert pattern:
|
|
132
|
-
\`\`\`typescript
|
|
133
|
-
test('should complete checkout', async ({ page }) => {
|
|
134
|
-
// Arrange - Setup preconditions
|
|
135
|
-
await page.goto('/cart');
|
|
136
|
-
|
|
137
|
-
// Act - Perform the action
|
|
138
|
-
await page.getByRole('button', { name: 'Checkout' }).click();
|
|
139
|
-
await page.getByLabel('Card number').fill('4242424242424242');
|
|
140
|
-
await page.getByRole('button', { name: 'Pay' }).click();
|
|
141
|
-
|
|
142
|
-
// Assert - Verify outcomes
|
|
143
|
-
await expect(page).toHaveURL(/\\/confirmation/);
|
|
144
|
-
await expect(page.getByText('Order confirmed')).toBeVisible();
|
|
21
|
+
<context>
|
|
22
|
+
First, check if .supatest/SUPATEST.md contains test framework information.
|
|
23
|
+
|
|
24
|
+
If yes: Read it and use the documented framework, patterns, and conventions.
|
|
25
|
+
|
|
26
|
+
If no: Run discovery once, then write findings to .supatest/SUPATEST.md:
|
|
27
|
+
- Detect framework from package.json dependencies
|
|
28
|
+
- Find test command from package.json scripts
|
|
29
|
+
- Read 2-3 existing tests to learn patterns (structure, page objects, selectors, test data setup)
|
|
30
|
+
- Write a "Test Framework" section to .supatest/SUPATEST.md with your findings
|
|
31
|
+
|
|
32
|
+
This ensures discovery happens once and persists across sessions.
|
|
33
|
+
</context>
|
|
34
|
+
|
|
35
|
+
<workflow>
|
|
36
|
+
For each test:
|
|
37
|
+
1. **Write** - Create test using the project's framework and patterns
|
|
38
|
+
2. **Run** - Execute in headless mode (avoid interactive UIs that block)
|
|
39
|
+
3. **Fix** - If failing, investigate and fix; return to step 2
|
|
40
|
+
4. **Verify** - Run 2+ times to confirm stability
|
|
41
|
+
|
|
42
|
+
Continue until all tests pass. Max 5 attempts per test.
|
|
43
|
+
</workflow>
|
|
44
|
+
|
|
45
|
+
<principles>
|
|
46
|
+
- Prefer API setup for test data when available (faster, more reliable)
|
|
47
|
+
- Each test creates its own data with unique identifiers
|
|
48
|
+
- Use semantic selectors (roles, labels, test IDs) over brittle CSS classes
|
|
49
|
+
- Use explicit waits for elements, not arbitrary timeouts
|
|
50
|
+
- Each test must be independent - no shared mutable state
|
|
51
|
+
</principles>
|
|
52
|
+
|
|
53
|
+
<execution>
|
|
54
|
+
- Always run in headless/CI mode
|
|
55
|
+
- Run single failing test first for faster feedback
|
|
56
|
+
- Check package.json scripts for the correct test command
|
|
57
|
+
- If a process hangs, kill it and check for flags that open interactive UIs
|
|
58
|
+
</execution>
|
|
59
|
+
|
|
60
|
+
<debugging>
|
|
61
|
+
When tests fail:
|
|
62
|
+
1. Read the error message carefully
|
|
63
|
+
2. Verify selectors match actual DOM
|
|
64
|
+
3. Check for timing issues (element not ready)
|
|
65
|
+
4. Look for JS console errors
|
|
66
|
+
5. Verify test data preconditions
|
|
67
|
+
|
|
68
|
+
Use Playwright MCP tools if available for live inspection.
|
|
69
|
+
</debugging>
|
|
70
|
+
|
|
71
|
+
<decisions>
|
|
72
|
+
**Proceed autonomously:** Clear selector/timing issues, standard CRUD patterns, actionable errors
|
|
73
|
+
|
|
74
|
+
**Ask user first:** Ambiguous requirements, no framework detected, unclear auth flow, external dependencies
|
|
75
|
+
|
|
76
|
+
**Stop and report:** App bug found (test is correct), max attempts reached, environment blocked
|
|
77
|
+
</decisions>
|
|
78
|
+
|
|
79
|
+
<done>
|
|
80
|
+
A test is complete when it passes 2+ times consistently with resilient selectors and no arbitrary timeouts.
|
|
81
|
+
</done>`;
|
|
82
|
+
}
|
|
145
83
|
});
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
-
|
|
159
|
-
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
-
|
|
166
|
-
-
|
|
167
|
-
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
-
|
|
176
|
-
-
|
|
177
|
-
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
-
|
|
181
|
-
-
|
|
182
|
-
-
|
|
183
|
-
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
-
|
|
188
|
-
-
|
|
189
|
-
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
-
|
|
199
|
-
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
- State which test is being worked on
|
|
205
|
-
- Report pass/fail status after each run
|
|
206
|
-
- When fixing, explain what was wrong and the fix
|
|
207
|
-
- Summarize final status: X/Y tests passing
|
|
208
|
-
</communication>`;
|
|
84
|
+
|
|
85
|
+
// src/prompts/discover.ts
|
|
86
|
+
var discoverPrompt;
|
|
87
|
+
var init_discover = __esm({
|
|
88
|
+
"src/prompts/discover.ts"() {
|
|
89
|
+
"use strict";
|
|
90
|
+
discoverPrompt = `Discover and document the test framework setup for this project.
|
|
91
|
+
|
|
92
|
+
**Your task:**
|
|
93
|
+
1. Read package.json to identify the test framework (Playwright, WebDriverIO, Cypress, Cucumber, etc.)
|
|
94
|
+
2. Find the test command in package.json scripts
|
|
95
|
+
3. Locate existing tests and read 3-5 files to understand:
|
|
96
|
+
- Test file structure and naming conventions
|
|
97
|
+
- Page objects, fixtures, or helper utilities
|
|
98
|
+
- Selector strategies used (data-testid, roles, CSS, XPath)
|
|
99
|
+
- Test data setup patterns (API calls vs UI setup)
|
|
100
|
+
- Assertion patterns and custom matchers
|
|
101
|
+
- Wait strategies and timing patterns
|
|
102
|
+
4. Identify best practices and patterns used in the codebase:
|
|
103
|
+
- How are tests organized (by feature, by page, by user flow)?
|
|
104
|
+
- Are there shared utilities or custom commands?
|
|
105
|
+
- How is authentication handled in tests?
|
|
106
|
+
- Are there environment-specific configurations?
|
|
107
|
+
5. Write your findings to .supatest/SUPATEST.md in a "Test Framework" section
|
|
108
|
+
|
|
109
|
+
**Example SUPATEST.md output:**
|
|
110
|
+
|
|
111
|
+
## Test Framework
|
|
112
|
+
|
|
113
|
+
- **Framework:** WebDriverIO + Cucumber
|
|
114
|
+
- **Config:** wdio.conf.js
|
|
115
|
+
- **Test command:** \`yarn start -- --provider browser --tags @tag_name\`
|
|
116
|
+
|
|
117
|
+
### Project Structure
|
|
118
|
+
- Features: src/features/
|
|
119
|
+
- Step definitions: src/step_definitions/ui/
|
|
120
|
+
- Page objects: src/pages/common/
|
|
121
|
+
- API utilities: src/utils/endpoints/
|
|
122
|
+
|
|
123
|
+
### Conventions
|
|
124
|
+
- Selectors: data-testid preferred, fallback to aria selectors
|
|
125
|
+
- Test data: Created via API endpoints before tests run
|
|
126
|
+
- Naming: snake_case for feature files, camelCase for step definitions
|
|
127
|
+
- Tags: @smoke for critical paths, @regression for full suite
|
|
128
|
+
|
|
129
|
+
### Patterns
|
|
130
|
+
- Page Object Model with lazy element initialization
|
|
131
|
+
- API helpers for test data setup/teardown
|
|
132
|
+
- Custom wait utilities in src/utils/waits.js
|
|
133
|
+
- Shared authentication flow via login.task.js
|
|
134
|
+
|
|
135
|
+
### Best Practices
|
|
136
|
+
- Each test creates isolated test data
|
|
137
|
+
- Cleanup in afterEach hooks
|
|
138
|
+
- No hard-coded waits, use explicit element conditions
|
|
139
|
+
- Screenshots on failure via reporter config
|
|
140
|
+
|
|
141
|
+
If .supatest/SUPATEST.md already has a "Test Framework" section, report what's there and ask if the user wants to refresh it.`;
|
|
209
142
|
}
|
|
210
143
|
});
|
|
211
144
|
|
|
@@ -215,153 +148,73 @@ var init_fixer = __esm({
|
|
|
215
148
|
"src/prompts/fixer.ts"() {
|
|
216
149
|
"use strict";
|
|
217
150
|
fixerPrompt = `<role>
|
|
218
|
-
You are a Test Fixer Agent
|
|
151
|
+
You are a Test Fixer Agent that debugs failing tests and fixes issues. You work with any test framework.
|
|
219
152
|
</role>
|
|
220
153
|
|
|
221
|
-
<
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
6. **Iterate** - If still failing, return to step 1 (max 3 attempts per test)
|
|
230
|
-
|
|
231
|
-
Continue until all tests pass. Do NOT stop after first failure.
|
|
232
|
-
</core_workflow>
|
|
233
|
-
|
|
234
|
-
<root_cause_categories>
|
|
235
|
-
When diagnosing failures, classify into one of these categories:
|
|
236
|
-
|
|
237
|
-
**Selector** - Element structure changed or locator is fragile
|
|
238
|
-
- Element text/role changed \u2192 update selector
|
|
239
|
-
- Element not visible \u2192 add proper wait
|
|
240
|
-
- Multiple matches \u2192 make selector more specific
|
|
241
|
-
|
|
242
|
-
**Timing** - Race condition, missing wait, async issue
|
|
243
|
-
- Race condition \u2192 add explicit wait for element/state
|
|
244
|
-
- Network delay \u2192 wait for API response
|
|
245
|
-
- Animation \u2192 wait for animation to complete
|
|
246
|
-
|
|
247
|
-
**State** - Test pollution, setup/teardown issue
|
|
248
|
-
- Test pollution \u2192 ensure proper cleanup
|
|
249
|
-
- Missing setup \u2192 add required preconditions
|
|
250
|
-
- Stale data \u2192 refresh or recreate test data
|
|
251
|
-
|
|
252
|
-
**Data** - Hardcoded data, missing test data
|
|
253
|
-
- Hardcoded IDs \u2192 use dynamic data or fixtures
|
|
254
|
-
- Missing test data \u2192 create via API setup
|
|
255
|
-
|
|
256
|
-
**Logic** - Test assertion is wrong or outdated
|
|
257
|
-
- Assertion doesn't match current behavior
|
|
258
|
-
- Test expectations are incorrect
|
|
259
|
-
</root_cause_categories>
|
|
260
|
-
|
|
261
|
-
<playwright_execution>
|
|
262
|
-
CRITICAL: Always run Playwright tests correctly to ensure clean exits.
|
|
263
|
-
|
|
264
|
-
**Correct test commands:**
|
|
265
|
-
- Single test: \`npx playwright test tests/example.spec.ts --reporter=list\`
|
|
266
|
-
- All tests: \`npx playwright test --reporter=list\`
|
|
267
|
-
- Retry failed: \`npx playwright test --last-failed --reporter=list\`
|
|
268
|
-
|
|
269
|
-
**NEVER use:**
|
|
270
|
-
- \`--ui\` flag (opens interactive UI that blocks)
|
|
271
|
-
- \`--reporter=html\` without \`--reporter=list\` (may open server)
|
|
272
|
-
- Commands without \`--reporter=list\` in CI/headless mode
|
|
273
|
-
|
|
274
|
-
**Process management:**
|
|
275
|
-
- Always use \`--reporter=list\` or \`--reporter=dot\` for clean output
|
|
276
|
-
- Tests should exit automatically after completion
|
|
277
|
-
- If a process hangs, kill it and retry with correct flags
|
|
278
|
-
</playwright_execution>
|
|
279
|
-
|
|
280
|
-
<debugging_with_mcp>
|
|
281
|
-
When tests fail, use Playwright MCP tools to investigate:
|
|
282
|
-
|
|
283
|
-
1. **Navigate**: Use \`mcp__playwright__playwright_navigate\` to load the failing page
|
|
284
|
-
2. **Inspect DOM**: Use \`mcp__playwright__playwright_get_visible_html\` to see actual elements
|
|
285
|
-
3. **Screenshot**: Use \`mcp__playwright__playwright_screenshot\` to capture current state
|
|
286
|
-
4. **Console logs**: Use \`mcp__playwright__playwright_console_logs\` to check for JS errors
|
|
287
|
-
5. **Interact**: Use click/fill tools to manually reproduce the flow
|
|
288
|
-
|
|
289
|
-
**Workflow**: Navigate \u2192 inspect HTML \u2192 verify selectors \u2192 check console \u2192 fix
|
|
290
|
-
</debugging_with_mcp>
|
|
291
|
-
|
|
292
|
-
<flakiness_detection>
|
|
293
|
-
After fixing, run the test 2-3 times. Watch for:
|
|
294
|
-
|
|
295
|
-
- **Inconsistent results**: Passes sometimes, fails others
|
|
296
|
-
- **Timing sensitivity**: Fails on slow runs, passes on fast
|
|
297
|
-
- **Order dependence**: Fails when run with other tests
|
|
298
|
-
- **Data coupling**: Relies on specific database state
|
|
299
|
-
|
|
300
|
-
Common flakiness causes:
|
|
301
|
-
- Arbitrary delays instead of condition waits
|
|
302
|
-
- Shared state between tests
|
|
303
|
-
- Hardcoded IDs or timestamps
|
|
304
|
-
- Missing \`await\` on async operations
|
|
305
|
-
- Race conditions in UI interactions
|
|
306
|
-
</flakiness_detection>
|
|
307
|
-
|
|
308
|
-
<fixing_patterns>
|
|
309
|
-
**Selectors** - Prefer resilient locators:
|
|
310
|
-
\`\`\`typescript
|
|
311
|
-
// Good
|
|
312
|
-
page.getByRole('button', { name: 'Submit' })
|
|
313
|
-
page.getByTestId('submit-btn')
|
|
314
|
-
|
|
315
|
-
// Avoid
|
|
316
|
-
page.locator('.btn-primary')
|
|
317
|
-
page.locator('div > button:nth-child(2)')
|
|
318
|
-
\`\`\`
|
|
154
|
+
<workflow>
|
|
155
|
+
1. **Detect** - Check package.json to identify the test framework
|
|
156
|
+
2. **Analyze** - Read error message and stack trace
|
|
157
|
+
3. **Investigate** - Read failing test and code under test
|
|
158
|
+
4. **Categorize** - Identify root cause type (selector, timing, state, data, or logic)
|
|
159
|
+
5. **Fix** - Make minimal, targeted changes
|
|
160
|
+
6. **Verify** - Run test 2-3 times to confirm fix and check for flakiness
|
|
161
|
+
7. **Iterate** - If still failing, try a new hypothesis (max 3 attempts per test)
|
|
319
162
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
// Good
|
|
323
|
-
await expect(element).toBeVisible({ timeout: 10_000 })
|
|
163
|
+
Continue until all tests pass.
|
|
164
|
+
</workflow>
|
|
324
165
|
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
\`\`\`
|
|
328
|
-
</fixing_patterns>
|
|
166
|
+
<root_causes>
|
|
167
|
+
**Selector** - Element changed or locator is fragile \u2192 update selector, add wait, make more specific
|
|
329
168
|
|
|
330
|
-
|
|
331
|
-
**Keep iterating if:**
|
|
332
|
-
- You haven't tried 3 attempts yet
|
|
333
|
-
- You have a new hypothesis to test
|
|
334
|
-
- The error message changed (progress)
|
|
169
|
+
**Timing** - Race condition or async issue \u2192 add explicit wait for element/state/network
|
|
335
170
|
|
|
336
|
-
**
|
|
337
|
-
- 3 attempts failed with no progress
|
|
338
|
-
- Test identifies an actual app bug (don't mask bugs)
|
|
339
|
-
- Test is fundamentally flaky by design
|
|
340
|
-
- Requirements are ambiguous
|
|
171
|
+
**State** - Test pollution or setup issue \u2192 ensure cleanup, add preconditions, refresh data
|
|
341
172
|
|
|
342
|
-
|
|
343
|
-
|
|
173
|
+
**Data** - Hardcoded or missing data \u2192 use dynamic data, create via API
|
|
174
|
+
|
|
175
|
+
**Logic** - Assertion wrong or outdated \u2192 update expectation to match actual behavior
|
|
176
|
+
</root_causes>
|
|
177
|
+
|
|
178
|
+
<execution>
|
|
179
|
+
- Run in headless/CI mode - avoid interactive UIs that block
|
|
180
|
+
- Check package.json scripts for correct test command
|
|
181
|
+
- Run single failing test first for faster feedback
|
|
182
|
+
- If process hangs, kill it and check for interactive flags
|
|
183
|
+
</execution>
|
|
184
|
+
|
|
185
|
+
<fixing_principles>
|
|
186
|
+
- Use semantic selectors (roles, labels, test IDs) over CSS classes
|
|
187
|
+
- Use condition-based waits, not arbitrary delays
|
|
188
|
+
- Each test should be independent with its own data
|
|
189
|
+
- Don't weaken assertions to make tests pass
|
|
190
|
+
- Don't skip or remove tests without understanding the failure
|
|
191
|
+
</fixing_principles>
|
|
192
|
+
|
|
193
|
+
<flakiness>
|
|
194
|
+
After fixing, verify stability by running 2-3 times. Watch for:
|
|
195
|
+
- Inconsistent pass/fail results
|
|
196
|
+
- Timing sensitivity
|
|
197
|
+
- Order dependence with other tests
|
|
198
|
+
- Coupling to specific data state
|
|
199
|
+
</flakiness>
|
|
344
200
|
|
|
345
|
-
<
|
|
346
|
-
|
|
347
|
-
- Removing or skipping tests without understanding why they fail
|
|
348
|
-
- Over-mocking that hides real integration issues
|
|
349
|
-
- Making tests pass by weakening assertions
|
|
350
|
-
- Introducing flakiness through timing-dependent fixes
|
|
351
|
-
</avoid>
|
|
201
|
+
<decisions>
|
|
202
|
+
**Keep iterating:** New hypothesis available, error message changed (progress), under 3 attempts
|
|
352
203
|
|
|
353
|
-
|
|
354
|
-
When reporting findings, use this structure:
|
|
204
|
+
**Escalate:** 3 attempts with no progress, actual app bug found, requirements unclear
|
|
355
205
|
|
|
206
|
+
When escalating, report what you tried and why it didn't work.
|
|
207
|
+
</decisions>
|
|
208
|
+
|
|
209
|
+
<report>
|
|
356
210
|
**Status**: fixed | escalated | in-progress
|
|
357
|
-
**Test**: [
|
|
358
|
-
**Root Cause**: [
|
|
359
|
-
**Fix**: [
|
|
360
|
-
**Verification**: [N
|
|
361
|
-
**Flakiness Risk**: [none | low | medium | high] - [reason]
|
|
211
|
+
**Test**: [file and name]
|
|
212
|
+
**Root Cause**: [category] - [specific cause]
|
|
213
|
+
**Fix**: [what changed]
|
|
214
|
+
**Verification**: [N runs, results]
|
|
362
215
|
|
|
363
|
-
Summarize
|
|
364
|
-
</
|
|
216
|
+
Summarize: X/Y tests passing
|
|
217
|
+
</report>`;
|
|
365
218
|
}
|
|
366
219
|
});
|
|
367
220
|
|
|
@@ -435,6 +288,7 @@ var init_prompts = __esm({
|
|
|
435
288
|
"src/prompts/index.ts"() {
|
|
436
289
|
"use strict";
|
|
437
290
|
init_builder();
|
|
291
|
+
init_discover();
|
|
438
292
|
init_fixer();
|
|
439
293
|
init_planner();
|
|
440
294
|
}
|
|
@@ -5440,7 +5294,7 @@ var CLI_VERSION;
|
|
|
5440
5294
|
var init_version = __esm({
|
|
5441
5295
|
"src/version.ts"() {
|
|
5442
5296
|
"use strict";
|
|
5443
|
-
CLI_VERSION = "0.0.
|
|
5297
|
+
CLI_VERSION = "0.0.28";
|
|
5444
5298
|
}
|
|
5445
5299
|
});
|
|
5446
5300
|
|
|
@@ -6697,17 +6551,10 @@ ${projectInstructions}`,
|
|
|
6697
6551
|
async resolveClaudeCodePath() {
|
|
6698
6552
|
const fs5 = await import("fs/promises");
|
|
6699
6553
|
let claudeCodePath;
|
|
6700
|
-
const
|
|
6701
|
-
|
|
6702
|
-
|
|
6703
|
-
|
|
6704
|
-
this.presenter.onLog(`Bundled mode: ${claudeCodePath}`);
|
|
6705
|
-
} catch {
|
|
6706
|
-
const require2 = createRequire(import.meta.url);
|
|
6707
|
-
const sdkPath = require2.resolve("@anthropic-ai/claude-agent-sdk/sdk.mjs");
|
|
6708
|
-
claudeCodePath = join6(dirname(sdkPath), "cli.js");
|
|
6709
|
-
this.presenter.onLog(`Development mode: ${claudeCodePath}`);
|
|
6710
|
-
}
|
|
6554
|
+
const require2 = createRequire(import.meta.url);
|
|
6555
|
+
const sdkPath = require2.resolve("@anthropic-ai/claude-agent-sdk/sdk.mjs");
|
|
6556
|
+
claudeCodePath = join6(dirname(sdkPath), "cli.js");
|
|
6557
|
+
this.presenter.onLog(`Using SDK CLI: ${claudeCodePath}`);
|
|
6711
6558
|
if (config.claudeCodeExecutablePath) {
|
|
6712
6559
|
claudeCodePath = config.claudeCodeExecutablePath;
|
|
6713
6560
|
this.presenter.onLog(
|
|
@@ -10242,7 +10089,7 @@ var init_HelpMenu = __esm({
|
|
|
10242
10089
|
/* @__PURE__ */ React22.createElement(Text17, { bold: true, color: theme.text.accent }, "\u{1F4D6} Supatest AI CLI - Help"),
|
|
10243
10090
|
/* @__PURE__ */ React22.createElement(Box19, { marginTop: 1 }),
|
|
10244
10091
|
/* @__PURE__ */ React22.createElement(Text17, { bold: true, color: theme.text.secondary }, "Slash Commands:"),
|
|
10245
|
-
/* @__PURE__ */ React22.createElement(Box19, { flexDirection: "column", marginLeft: 2, marginTop: 0 }, /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/help"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " or "), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/?"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Toggle this help menu")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/resume"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Resume a previous session")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/clear"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Clear message history")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/model"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Cycle through available models")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/setup"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Initial setup for Supatest CLI")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/feedback"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Report an issue or request a feature")), isAuthenticated ? /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/logout"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Log out of Supatest")) : /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/login"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Authenticate with Supatest")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/exit"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Exit the CLI"))),
|
|
10092
|
+
/* @__PURE__ */ React22.createElement(Box19, { flexDirection: "column", marginLeft: 2, marginTop: 0 }, /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/help"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " or "), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/?"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Toggle this help menu")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/resume"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Resume a previous session")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/clear"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Clear message history")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/model"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Cycle through available models")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/setup"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Initial setup for Supatest CLI")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/discover"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Discover test framework and write to SUPATEST.md")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/feedback"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Report an issue or request a feature")), isAuthenticated ? /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/logout"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Log out of Supatest")) : /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/login"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Authenticate with Supatest")), /* @__PURE__ */ React22.createElement(Text17, null, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/exit"), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, " - Exit the CLI"))),
|
|
10246
10093
|
customCommands.length > 0 && /* @__PURE__ */ React22.createElement(React22.Fragment, null, /* @__PURE__ */ React22.createElement(Box19, { marginTop: 1 }), /* @__PURE__ */ React22.createElement(Text17, { bold: true, color: theme.text.secondary }, "Project Commands:"), /* @__PURE__ */ React22.createElement(Box19, { flexDirection: "column", marginLeft: 2, marginTop: 0 }, customCommands.slice(0, 5).map((cmd) => /* @__PURE__ */ React22.createElement(Text17, { key: cmd.name }, /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.accent }, "/", cmd.name), /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, cmd.description ? ` - ${cmd.description}` : ""))), customCommands.length > 5 && /* @__PURE__ */ React22.createElement(Text17, { color: theme.text.dim }, "...and ", customCommands.length - 5, " more (use Tab to autocomplete)"))),
|
|
10247
10094
|
/* @__PURE__ */ React22.createElement(Box19, { marginTop: 1 }),
|
|
10248
10095
|
/* @__PURE__ */ React22.createElement(Text17, { bold: true, color: theme.text.secondary }, "Keyboard Shortcuts:"),
|
|
@@ -10441,6 +10288,7 @@ var init_InputPrompt = __esm({
|
|
|
10441
10288
|
{ name: "/fix", desc: "Fix failing tests" },
|
|
10442
10289
|
{ name: "/feedback", desc: "Report an issue" },
|
|
10443
10290
|
{ name: "/setup", desc: "Install Playwright browsers" },
|
|
10291
|
+
{ name: "/discover", desc: "Discover test framework" },
|
|
10444
10292
|
{ name: "/login", desc: "Authenticate with Supatest" },
|
|
10445
10293
|
{ name: "/logout", desc: "Log out" },
|
|
10446
10294
|
{ name: "/exit", desc: "Exit CLI" }
|
|
@@ -10895,6 +10743,7 @@ var init_App = __esm({
|
|
|
10895
10743
|
init_shared_es();
|
|
10896
10744
|
init_login();
|
|
10897
10745
|
init_setup();
|
|
10746
|
+
init_prompts();
|
|
10898
10747
|
init_command_discovery();
|
|
10899
10748
|
init_stdio();
|
|
10900
10749
|
init_token_storage();
|
|
@@ -11132,6 +10981,10 @@ var init_App = __esm({
|
|
|
11132
10981
|
}
|
|
11133
10982
|
return;
|
|
11134
10983
|
}
|
|
10984
|
+
if (command === "/discover") {
|
|
10985
|
+
onSubmitTask?.(discoverPrompt);
|
|
10986
|
+
return;
|
|
10987
|
+
}
|
|
11135
10988
|
const projectDir = config2.cwd || process.cwd();
|
|
11136
10989
|
const spaceIndex = trimmedTask.indexOf(" ");
|
|
11137
10990
|
const commandName = spaceIndex > 0 ? trimmedTask.slice(1, spaceIndex) : trimmedTask.slice(1);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@supatest/cli",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.28",
|
|
4
4
|
"description": "Supatest CLI - AI-powered task automation for CI/CD",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
},
|
|
9
9
|
"files": [
|
|
10
10
|
"dist",
|
|
11
|
-
"dist/claude-code-cli.js",
|
|
12
11
|
"README.md",
|
|
13
12
|
"LICENSE"
|
|
14
13
|
],
|
|
@@ -85,7 +84,7 @@
|
|
|
85
84
|
"scripts": {
|
|
86
85
|
"dev": "NODE_ENV=development tsx src/index.ts",
|
|
87
86
|
"dev:watch": "nodemon",
|
|
88
|
-
"build": "tsup
|
|
87
|
+
"build": "tsup",
|
|
89
88
|
"type-check": "tsc --noEmit",
|
|
90
89
|
"clean:bundle": "rimraf dist",
|
|
91
90
|
"clean:node_modules": "rimraf node_modules"
|