@muggleai/works 4.5.0 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2FVSZ5LQ.js +2176 -0
- package/dist/{chunk-MNCBJEPQ.js → chunk-HDEZDEM6.js} +756 -2815
- package/dist/cli.js +2 -1
- package/dist/index.js +2 -1
- package/dist/plugin/.claude-plugin/plugin.json +1 -1
- package/dist/plugin/.cursor-plugin/plugin.json +1 -1
- package/dist/plugin/skills/do/e2e-acceptance.md +6 -3
- package/dist/plugin/skills/do/open-prs.md +2 -1
- package/dist/plugin/skills/muggle-pr-visual-walkthrough/SKILL.md +12 -2
- package/dist/plugin/skills/muggle-test/SKILL.md +111 -79
- package/dist/plugin/skills/muggle-test-feature-local/SKILL.md +18 -15
- package/dist/plugin/skills/muggle-test-import/SKILL.md +5 -2
- package/dist/plugin/skills/muggle-test-regenerate-missing/SKILL.md +7 -2
- package/dist/release-manifest.json +4 -4
- package/dist/src-TX2KXI26.js +1 -0
- package/package.json +6 -6
- package/plugin/.claude-plugin/plugin.json +1 -1
- package/plugin/.cursor-plugin/plugin.json +1 -1
- package/plugin/skills/do/e2e-acceptance.md +6 -3
- package/plugin/skills/do/open-prs.md +2 -1
- package/plugin/skills/muggle-pr-visual-walkthrough/SKILL.md +12 -2
- package/plugin/skills/muggle-test/SKILL.md +111 -79
- package/plugin/skills/muggle-test-feature-local/SKILL.md +18 -15
- package/plugin/skills/muggle-test-import/SKILL.md +5 -2
- package/plugin/skills/muggle-test-regenerate-missing/SKILL.md +7 -2
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@muggleai/works",
|
|
3
3
|
"mcpName": "io.github.multiplex-ai/muggle",
|
|
4
|
-
"version": "4.
|
|
4
|
+
"version": "4.6.1",
|
|
5
5
|
"description": "Ship quality products with AI-powered E2E acceptance testing that validates your web app like a real user — from Claude Code and Cursor to PR.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"main": "dist/index.js",
|
|
@@ -41,14 +41,14 @@
|
|
|
41
41
|
"test:watch": "vitest"
|
|
42
42
|
},
|
|
43
43
|
"muggleConfig": {
|
|
44
|
-
"electronAppVersion": "1.0.
|
|
44
|
+
"electronAppVersion": "1.0.51",
|
|
45
45
|
"downloadBaseUrl": "https://github.com/multiplex-ai/muggle-ai-works/releases/download",
|
|
46
46
|
"runtimeTargetDefault": "production",
|
|
47
47
|
"checksums": {
|
|
48
|
-
"darwin-arm64": "
|
|
49
|
-
"darwin-x64": "
|
|
50
|
-
"win32-x64": "
|
|
51
|
-
"linux-x64": "
|
|
48
|
+
"darwin-arm64": "6be5d2ff37541d9933e065f94f04348d7e4be63f01896b334a108a755a79f770",
|
|
49
|
+
"darwin-x64": "5c381e68829a330eecb8bd6edb9e5fba820e995acafe7fe78474fd7c43174f40",
|
|
50
|
+
"win32-x64": "2c101c467f75e8d60482aad16ad3c1a1e8edecac9ae58cdf7f1ad74cdf1141f7",
|
|
51
|
+
"linux-x64": "efeed3f2caf1cd301e8cc503a8ebae1f604ce73f7325c43202dee1c8a858a8a8"
|
|
52
52
|
}
|
|
53
53
|
},
|
|
54
54
|
"dependencies": {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "muggle",
|
|
3
3
|
"description": "Run real-browser end-to-end (E2E) acceptance tests on your web app from any AI coding agent. Generate test scripts from plain English, replay them on localhost, capture screenshots, and validate user flows like signup, checkout, and dashboards. Works across Claude Code, Cursor, Codex, and Windsurf.",
|
|
4
|
-
"version": "4.
|
|
4
|
+
"version": "4.6.1",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Muggle AI",
|
|
7
7
|
"email": "support@muggle-ai.com"
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "muggle",
|
|
3
3
|
"displayName": "Muggle AI",
|
|
4
4
|
"description": "Ship quality products with AI-powered end-to-end (E2E) acceptance testing that validates your web app like a real user — from Claude Code and Cursor to PR.",
|
|
5
|
-
"version": "4.
|
|
5
|
+
"version": "4.6.1",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Muggle AI",
|
|
8
8
|
"email": "support@muggle-ai.com"
|
|
@@ -33,7 +33,12 @@ Read `localUrl` for each repo from the context. If it is not provided, ask the u
|
|
|
33
33
|
### Step 1: Check Authentication
|
|
34
34
|
|
|
35
35
|
- `muggle-remote-auth-status`
|
|
36
|
-
- If
|
|
36
|
+
- If **authenticated**: print the logged-in email and ask via `AskQuestion`:
|
|
37
|
+
> "You're logged in as **{email}**. Continue with this account?"
|
|
38
|
+
- Option 1: "Yes, continue"
|
|
39
|
+
- Option 2: "No, switch account"
|
|
40
|
+
If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true` then `muggle-remote-auth-poll`.
|
|
41
|
+
- If **not signed in or expired**: `muggle-remote-auth-login` then `muggle-remote-auth-poll`
|
|
37
42
|
|
|
38
43
|
Do not skip or assume auth.
|
|
39
44
|
|
|
@@ -62,7 +67,6 @@ For each relevant test case:
|
|
|
62
67
|
- `testScript`: the full script object
|
|
63
68
|
- `actionScript`: the full action script object (from `muggle-remote-action-script-get`)
|
|
64
69
|
- `localUrl`: the resolved local URL
|
|
65
|
-
- `approveElectronAppLaunch`: `true` *(pipeline context — user starting `muggle-do` is implicit approval)*
|
|
66
70
|
- `timeoutMs`: `600000` (10 min) or `900000` (15 min) for complex flows
|
|
67
71
|
|
|
68
72
|
3. **If no script exists** (generation path):
|
|
@@ -70,7 +74,6 @@ For each relevant test case:
|
|
|
70
74
|
- `muggle-local-execute-test-generation` with:
|
|
71
75
|
- `testCase`: the full test case object
|
|
72
76
|
- `localUrl`: the resolved local URL
|
|
73
|
-
- `approveElectronAppLaunch`: `true`
|
|
74
77
|
- `timeoutMs`: `600000` (10 min) or `900000` (15 min) for complex flows
|
|
75
78
|
|
|
76
79
|
4. When execution completes, call `muggle-local-run-result-get` with the `runId` returned by the execute call.
|
|
@@ -13,6 +13,7 @@ You receive:
|
|
|
13
13
|
- `steps`: array of `{ stepIndex, action, screenshotUrl }`
|
|
14
14
|
- `failureStepIndex` and `error` (if failed)
|
|
15
15
|
- `artifactsDir` (for local debugging)
|
|
16
|
+
- `description` and `useCaseName` (optional but recommended) — test case one-liner and parent use case title; drive the grouped overview and the per-test collapsible headers in the rendered walkthrough. Prefer values already in the `e2e-acceptance.md` stage's conversation context; only call `muggle-remote-test-case-get` / `muggle-remote-use-case-get` for anything you don't already have.
|
|
16
17
|
|
|
17
18
|
## Your Job
|
|
18
19
|
|
|
@@ -46,7 +47,7 @@ For each repo with changes:
|
|
|
46
47
|
|
|
47
48
|
### Input — the `E2eReport` JSON
|
|
48
49
|
|
|
49
|
-
The `e2e-acceptance.md` stage already produces an `E2eReport` with the exact shape the skill expects (`projectId` + `tests[]` with per-test `name`, `testCaseId`, `testScriptId`, `runId`, `viewUrl`, `status`, and `steps[]` of `{stepIndex, action, screenshotUrl}`; failed tests additionally have `failureStepIndex`, `error`, and optionally `artifactsDir`). Pass it through unchanged — do not reshape it. The full schema is documented in the shared skill.
|
|
50
|
+
The `e2e-acceptance.md` stage already produces an `E2eReport` with the exact shape the skill expects (`projectId` + `tests[]` with per-test `name`, `testCaseId`, `testScriptId`, `runId`, `viewUrl`, `status`, and `steps[]` of `{stepIndex, action, screenshotUrl}`; failed tests additionally have `failureStepIndex`, `error`, and optionally `artifactsDir`; every test may additionally carry `description` and `useCaseName` — optional but recommended — which drive the grouped overview and per-test collapsible headers in the rendered walkthrough). Pass it through unchanged — do not reshape it. The full schema is documented in the shared skill.
|
|
50
51
|
|
|
51
52
|
### Invocation — Mode B (render-only)
|
|
52
53
|
|
|
@@ -26,7 +26,9 @@ Every caller must build an `E2eReport` JSON object and have it in conversation c
|
|
|
26
26
|
"projectId": "<UUID>",
|
|
27
27
|
"tests": [
|
|
28
28
|
{
|
|
29
|
-
"name": "<test case
|
|
29
|
+
"name": "<test case title>",
|
|
30
|
+
"description": "<one-line description of what this test verifies>",
|
|
31
|
+
"useCaseName": "<parent use case title>",
|
|
30
32
|
"testCaseId": "<UUID>",
|
|
31
33
|
"testScriptId": "<UUID>",
|
|
32
34
|
"runId": "<UUID>",
|
|
@@ -39,6 +41,8 @@ Every caller must build an `E2eReport` JSON object and have it in conversation c
|
|
|
39
41
|
},
|
|
40
42
|
{
|
|
41
43
|
"name": "Checkout flow",
|
|
44
|
+
"description": "Verify a shopper can complete checkout with a saved card.",
|
|
45
|
+
"useCaseName": "Purchase",
|
|
42
46
|
"testCaseId": "<UUID>",
|
|
43
47
|
"testScriptId": "<UUID>",
|
|
44
48
|
"runId": "<UUID>",
|
|
@@ -55,7 +59,12 @@ Every caller must build an `E2eReport` JSON object and have it in conversation c
|
|
|
55
59
|
}
|
|
56
60
|
```
|
|
57
61
|
|
|
58
|
-
Required fields per test: `name`, `testCaseId`, `runId`, `viewUrl`, `status`, `steps[]` with `{stepIndex, action, screenshotUrl}`. Failed tests additionally require `failureStepIndex` and `error`.
|
|
62
|
+
Required fields per test: `name`, `testCaseId`, `runId`, `viewUrl`, `status`, `steps[]` with `{stepIndex, action, screenshotUrl}`. Failed tests additionally require `failureStepIndex` and `error`.
|
|
63
|
+
|
|
64
|
+
**Optional but recommended** per test:
|
|
65
|
+
- `description` — a one-line summary of what the test case verifies. Shown in the collapsible header for each test and helps reviewers understand the test without expanding it. Pull from the test case's `title`/`description` via `muggle-remote-test-case-get`.
|
|
66
|
+
- `useCaseName` — the parent use case title. When present on any test, the overview list is grouped by use case; otherwise it is rendered as a flat list. Pull from `muggle-remote-use-case-get` using the test case's parent use-case id.
|
|
67
|
+
- `testScriptId` and `artifactsDir` are also optional.
|
|
59
68
|
|
|
60
69
|
If any required field is missing, stop and tell the caller exactly what's missing. Never fabricate data.
|
|
61
70
|
|
|
@@ -72,6 +81,7 @@ After `muggle-local-publish-test-script` returns `{testScriptId, viewUrl, ...}`
|
|
|
72
81
|
3. Determine `status` from the local run result (`muggle-local-run-result-get`).
|
|
73
82
|
4. For failures, read `failureStepIndex`, `error`, and `artifactsDir` from the run result.
|
|
74
83
|
5. Assemble the `E2eReport` with `projectId` from the test run.
|
|
84
|
+
6. Populate `description` (test case title/description) and `useCaseName` (parent use case title) on each report entry — optional but strongly recommended; they drive the grouped overview and the per-test collapsible headers. Prefer values already in your conversation context from earlier steps (e.g. a test case you just created or selected, or a use case you confirmed); only call `muggle-remote-test-case-get` / `muggle-remote-use-case-get` for anything you don't already have.
|
|
75
85
|
|
|
76
86
|
### From `muggle-do` (`open-prs.md`)
|
|
77
87
|
|
|
@@ -15,6 +15,22 @@ A router skill that detects code changes, resolves impacted test cases, executes
|
|
|
15
15
|
- **Multi-select** (use cases, test cases): Use `AskQuestion` with `allow_multiple: true`.
|
|
16
16
|
- **Free-text inputs** (URLs, descriptions): Only use plain text prompts when there is no finite set of options. Even then, offer a detected/default value when possible.
|
|
17
17
|
- **Batch related questions**: If two questions are independent, present them together in a single `AskQuestion` call rather than asking sequentially.
|
|
18
|
+
- **Parallelize job-creation calls**: Whenever you're kicking off N independent cloud jobs — creating multiple use cases, generating/creating multiple test cases, fetching details for multiple test cases, starting multiple remote workflows, publishing multiple local runs, or fetching per-step screenshots for multiple runs — issue all N tool calls in a single message so they run in parallel. Never loop them sequentially unless there is a real ordering constraint (e.g. a single local Electron browser that can only run one test at a time).
|
|
19
|
+
|
|
20
|
+
## Test Case Design: One Atomic Behavior Per Test Case
|
|
21
|
+
|
|
22
|
+
Every test case verifies exactly **one** user-observable behavior. Never bundle multiple concerns, sequential flows, or bootstrap/setup into a single test case — even if you think it would be "cleaner" or "more efficient."
|
|
23
|
+
|
|
24
|
+
**Ordering, dependencies, and bootstrap are Muggle's service responsibility, not yours.** Muggle's cloud handles test case dependencies, prerequisite state, and execution ordering. Your job is to describe the *atomic behavior to verify* — never the flow that gets there.
|
|
25
|
+
|
|
26
|
+
- ❌ Wrong: one test case that "signs up, logs in, navigates to the detail modal, verifies icon stacking, verifies tab order, verifies history format, and verifies reference layout."
|
|
27
|
+
- ✅ Right: four separate test cases — one per verifiable behavior — each with instruction text like "Verify the detail modal shows stacked pair of icons per card" with **no** signup / login / navigation / setup language.
|
|
28
|
+
|
|
29
|
+
**Never bake bootstrap into a test case description.** Signup, login, seed data, prerequisite navigation, tear-down — none of these belong inside the test case body. Write only the verification itself. The service will prepend whatever setup is needed based on its own dependency graph.
|
|
30
|
+
|
|
31
|
+
**Never consolidate the generator's output.** When `muggle-remote-test-case-generate-from-prompt` returns N micro-tests from a single prompt, that decomposition is the authoritative one. Do not "merge them into 1 for simplicity," do not "rewrite them to share bootstrap," do not "collapse them to match a 4 UC / 4 TC plan." Accept what the generator gave you.
|
|
32
|
+
|
|
33
|
+
**Never skip the generate→review cycle.** Even when you are 100% confident about the right shape, always present the generated test cases to the user before calling `muggle-remote-test-case-create`. "I'll skip the generate→review cycle and create directly" is a sign you're about to get it wrong.
|
|
18
34
|
|
|
19
35
|
## Step 1: Confirm Scope of Work (Always First)
|
|
20
36
|
|
|
@@ -41,8 +57,8 @@ If the user's intent is clear, state back what you understood and use `AskQuesti
|
|
|
41
57
|
- Option 2: "Switch to [the other mode]"
|
|
42
58
|
|
|
43
59
|
If ambiguous, use `AskQuestion` to let the user choose:
|
|
44
|
-
- Option 1: "
|
|
45
|
-
- Option 2: "
|
|
60
|
+
- Option 1: "On my computer — test your localhost dev server in a browser on your machine"
|
|
61
|
+
- Option 2: "In the cloud — test remotely targeting your deployed preview/staging URL"
|
|
46
62
|
|
|
47
63
|
Only proceed after the user selects an option.
|
|
48
64
|
|
|
@@ -66,8 +82,12 @@ If no changes detected (clean tree), tell the user and ask what they want to tes
|
|
|
66
82
|
## Step 3: Authenticate
|
|
67
83
|
|
|
68
84
|
1. Call `muggle-remote-auth-status`
|
|
69
|
-
2. If authenticated and not expired →
|
|
70
|
-
|
|
85
|
+
2. If **authenticated and not expired** → print the logged-in email and ask via `AskQuestion`:
|
|
86
|
+
> "You're logged in as **{email}**. Continue with this account?"
|
|
87
|
+
- Option 1: "Yes, continue"
|
|
88
|
+
- Option 2: "No, switch account"
|
|
89
|
+
If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true`, then `muggle-remote-auth-poll`.
|
|
90
|
+
3. If **not authenticated or expired** → call `muggle-remote-auth-login`
|
|
71
91
|
4. If login pending → call `muggle-remote-auth-poll`
|
|
72
92
|
|
|
73
93
|
If auth fails repeatedly, suggest: `muggle logout && muggle login` from terminal.
|
|
@@ -93,56 +113,68 @@ A **project** is where all your test results, use cases, and test scripts are gr
|
|
|
93
113
|
|
|
94
114
|
Store the `projectId` only after user confirms.
|
|
95
115
|
|
|
96
|
-
## Step 5: Select Use Case (
|
|
116
|
+
## Step 5: Select Use Case (Best-Effort Shortlist)
|
|
97
117
|
|
|
98
118
|
### 5a: List existing use cases
|
|
99
119
|
Call `muggle-remote-use-case-list` with the project ID.
|
|
100
120
|
|
|
101
|
-
### 5b:
|
|
121
|
+
### 5b: Best-effort match against the change summary
|
|
122
|
+
|
|
123
|
+
Using the change summary from Step 2, pick the use cases whose title/description most plausibly relate to the impacted areas. Produce a **short shortlist** (typically 1–5) — don't try to be exhaustive, and don't dump the full project list on the user. A confident best-effort match is the goal.
|
|
124
|
+
|
|
125
|
+
If nothing looks like a confident match, fall back to asking the user which use case(s) they have in mind.
|
|
102
126
|
|
|
103
|
-
|
|
127
|
+
### 5c: Present the shortlist for confirmation
|
|
104
128
|
|
|
105
|
-
|
|
129
|
+
Use `AskQuestion` with `allow_multiple: true`:
|
|
106
130
|
|
|
107
|
-
|
|
131
|
+
Prompt: "These use cases look most relevant to your changes — confirm which to test:"
|
|
108
132
|
|
|
109
|
-
|
|
110
|
-
-
|
|
111
|
-
-
|
|
112
|
-
- Any heuristic or inference
|
|
133
|
+
- Pre-check the shortlisted items so the user can accept with one click
|
|
134
|
+
- Include "Pick a different use case" to reveal the full project list
|
|
135
|
+
- Include "Create new use case" at the end
|
|
113
136
|
|
|
114
|
-
|
|
137
|
+
### 5d: If user picks "Pick a different use case"
|
|
138
|
+
Re-present the full list from 5a via `AskQuestion` with `allow_multiple: true`, then continue.
|
|
115
139
|
|
|
116
|
-
###
|
|
117
|
-
1. Ask the user to describe the use case in plain English
|
|
118
|
-
2. Call `muggle-remote-use-case-create-from-prompts
|
|
140
|
+
### 5e: If user chooses "Create new use case"
|
|
141
|
+
1. Ask the user to describe the use case(s) in plain English — they may want more than one
|
|
142
|
+
2. Call `muggle-remote-use-case-create-from-prompts` **once** with **all** descriptions batched into the `instructions` array (this endpoint natively fans out the jobs server-side — do NOT make one call per use case):
|
|
119
143
|
- `projectId`: The project ID
|
|
120
|
-
- `instructions`: A plain array of strings, one per use case — e.g. `["<
|
|
121
|
-
3. Present the created use
|
|
144
|
+
- `instructions`: A plain array of strings, one per use case — e.g. `["<description 1>", "<description 2>", ...]`
|
|
145
|
+
3. Present the created use cases and confirm they're correct
|
|
122
146
|
|
|
123
|
-
## Step 6: Select Test Case (
|
|
147
|
+
## Step 6: Select Test Case (Best-Effort Shortlist)
|
|
124
148
|
|
|
125
149
|
For the selected use case(s):
|
|
126
150
|
|
|
127
151
|
### 6a: List existing test cases
|
|
128
152
|
Call `muggle-remote-test-case-list-by-use-case` with each use case ID.
|
|
129
153
|
|
|
130
|
-
### 6b:
|
|
154
|
+
### 6b: Best-effort match against the change summary
|
|
131
155
|
|
|
132
|
-
|
|
156
|
+
Using the change summary from Step 2, pick the test cases that look most relevant to the impacted areas. Keep the shortlist small and confident — don't enumerate every test case attached to the use case(s).
|
|
133
157
|
|
|
134
|
-
|
|
158
|
+
If nothing looks like a confident match, fall back to offering to run all test cases for the selected use case(s), or ask the user what they had in mind.
|
|
135
159
|
|
|
136
|
-
### 6c:
|
|
160
|
+
### 6c: Present the shortlist for confirmation
|
|
137
161
|
|
|
138
|
-
|
|
162
|
+
Use `AskQuestion` with `allow_multiple: true`:
|
|
163
|
+
|
|
164
|
+
Prompt: "These test cases look most relevant — confirm which to run:"
|
|
165
|
+
|
|
166
|
+
- Pre-check the shortlisted items so the user can accept with one click
|
|
167
|
+
- Include "Show all test cases" to reveal the full list
|
|
168
|
+
- Include "Generate new test case" at the end
|
|
139
169
|
|
|
140
170
|
### 6d: If user chooses "Generate new test case"
|
|
141
|
-
1. Ask the user to describe what they want to test in plain English
|
|
142
|
-
2.
|
|
143
|
-
- `projectId`, `useCaseId`, `instruction` (
|
|
144
|
-
|
|
145
|
-
|
|
171
|
+
1. Ask the user to describe what they want to test in plain English — they may want more than one test case
|
|
172
|
+
2. For N descriptions, issue N `muggle-remote-test-case-generate-from-prompt` calls **in parallel** (single message, multiple tool calls — never loop sequentially):
|
|
173
|
+
- `projectId`, `useCaseId`, `instruction` (one description per call)
|
|
174
|
+
- Each `instruction` must describe **exactly one atomic behavior to verify**. No signup, no login, no "first navigate to X, then click Y, then verify Z" chains, no seed data, no cleanup. Just the verification. See **Test Case Design** above.
|
|
175
|
+
3. **Accept the generator's decomposition as-is.** If the generator returns 4 micro-tests from a single prompt, that's 4 correct test cases — never merge, consolidate, or rewrite them to bundle bootstrap.
|
|
176
|
+
4. Present the generated test case(s) for user review — **always do this review cycle**, even when you think you already know the right shape. Skipping straight to creation is the anti-pattern this skill most frequently gets wrong.
|
|
177
|
+
5. For the ones the user approves, issue `muggle-remote-test-case-create` calls **in parallel**
|
|
146
178
|
|
|
147
179
|
### 6e: Confirm final selection
|
|
148
180
|
|
|
@@ -154,9 +186,7 @@ Wait for user confirmation before moving to execution.
|
|
|
154
186
|
|
|
155
187
|
## Step 7A: Execute — Local Mode
|
|
156
188
|
|
|
157
|
-
### Pre-flight
|
|
158
|
-
|
|
159
|
-
**Question 1 — Local URL:**
|
|
189
|
+
### Pre-flight question — Local URL
|
|
160
190
|
|
|
161
191
|
Try to auto-detect the dev server URL by checking running terminals or common ports (e.g., `lsof -iTCP -sTCP:LISTEN -nP | grep -E ':(3000|3001|4200|5173|8080)'`). If a likely URL is found, present it as a clickable default via `AskQuestion`:
|
|
162
192
|
- Option 1: "http://localhost:3000" (or whatever was detected)
|
|
@@ -164,38 +194,31 @@ Try to auto-detect the dev server URL by checking running terminals or common po
|
|
|
164
194
|
|
|
165
195
|
If nothing detected, ask as free text: "Your local app should be running. What's the URL? (e.g., http://localhost:3000)"
|
|
166
196
|
|
|
167
|
-
**
|
|
168
|
-
|
|
169
|
-
After getting the URL, use a single `AskQuestion` call with two questions:
|
|
197
|
+
**No separate approval or visibility question.** The user picking Local mode in Step 1 *is* the approval — do not ask "ready to launch Electron?" before every run. The Electron browser defaults to visible; if the user wants headless, they will say so, otherwise let it run visible.
|
|
170
198
|
|
|
171
|
-
|
|
172
|
-
- "Yes, launch it (visible — I want to watch)"
|
|
173
|
-
- "Yes, launch it (headless — run in background)"
|
|
174
|
-
- "No, cancel"
|
|
199
|
+
### Fetch test case details (in parallel)
|
|
175
200
|
|
|
176
|
-
|
|
201
|
+
Before execution, fetch full test case details for all selected test cases by issuing **all** `muggle-remote-test-case-get` calls in parallel (single message, multiple tool calls).
|
|
177
202
|
|
|
178
|
-
### Run sequentially
|
|
203
|
+
### Run sequentially (Electron constraint)
|
|
179
204
|
|
|
180
|
-
For each test case:
|
|
205
|
+
Execution itself **must** be sequential because there is only one local Electron browser. For each test case, in order:
|
|
181
206
|
|
|
182
|
-
1. Call `muggle-
|
|
183
|
-
|
|
184
|
-
- `
|
|
185
|
-
- `
|
|
186
|
-
|
|
187
|
-
- `showUi`: `true` if user chose "visible", `false` if "headless" (from Question 2)
|
|
188
|
-
3. Store the returned `runId`
|
|
207
|
+
1. Call `muggle-local-execute-test-generation`:
|
|
208
|
+
- `testCase`: Full test case object from the parallel fetch above
|
|
209
|
+
- `localUrl`: User's local URL from the pre-flight question
|
|
210
|
+
- `showUi`: omit (default visible) unless the user explicitly asked for headless, then pass `false`
|
|
211
|
+
2. Store the returned `runId`
|
|
189
212
|
|
|
190
213
|
If a generation fails, log it and continue to the next. Do not abort the batch.
|
|
191
214
|
|
|
192
|
-
### Collect results
|
|
215
|
+
### Collect results (in parallel)
|
|
193
216
|
|
|
194
|
-
For
|
|
217
|
+
For every `runId`, issue all `muggle-local-run-result-get` calls in parallel. Extract: status, duration, step count, `artifactsDir`.
|
|
195
218
|
|
|
196
|
-
### Publish each run to cloud
|
|
219
|
+
### Publish each run to cloud (in parallel)
|
|
197
220
|
|
|
198
|
-
For
|
|
221
|
+
For every completed run, issue all `muggle-local-publish-test-script` calls in parallel (single message, multiple tool calls):
|
|
199
222
|
- `runId`: The local run ID
|
|
200
223
|
- `cloudTestCaseId`: The cloud test case ID
|
|
201
224
|
|
|
@@ -225,26 +248,29 @@ For failures: show which step failed, the local screenshot path, and a suggestio
|
|
|
225
248
|
|
|
226
249
|
> "What's the preview/staging URL to test against?"
|
|
227
250
|
|
|
228
|
-
###
|
|
251
|
+
### Fetch test case details (in parallel)
|
|
229
252
|
|
|
230
|
-
|
|
253
|
+
Issue all `muggle-remote-test-case-get` calls in parallel (single message, multiple tool calls) to hydrate the test case bodies.
|
|
231
254
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
255
|
+
### Trigger remote workflows (in parallel)
|
|
256
|
+
|
|
257
|
+
Once details are in hand, issue all `muggle-remote-workflow-start-test-script-generation` calls in parallel — never loop them sequentially. For each test case:
|
|
258
|
+
|
|
259
|
+
- `projectId`: The project ID
|
|
260
|
+
- `useCaseId`: The use case ID
|
|
261
|
+
- `testCaseId`: The test case ID
|
|
262
|
+
- `name`: `"muggle-test: {test case title}"`
|
|
263
|
+
- `url`: The preview/staging URL
|
|
264
|
+
- `goal`: From the test case
|
|
265
|
+
- `precondition`: From the test case (use `"None"` if empty)
|
|
266
|
+
- `instructions`: From the test case
|
|
267
|
+
- `expectedResult`: From the test case
|
|
268
|
+
|
|
269
|
+
Store each returned workflow runtime ID.
|
|
244
270
|
|
|
245
|
-
### Monitor and report
|
|
271
|
+
### Monitor and report (in parallel)
|
|
246
272
|
|
|
247
|
-
|
|
273
|
+
Issue all `muggle-remote-wf-get-ts-gen-latest-run` calls in parallel, one per runtime ID.
|
|
248
274
|
|
|
249
275
|
```
|
|
250
276
|
Test Case Workflow Status Runtime ID
|
|
@@ -287,12 +313,12 @@ After reporting results, ask the user if they want to attach a **visual walkthro
|
|
|
287
313
|
|
|
288
314
|
The shared skill takes an **`E2eReport` JSON** that includes per-step screenshot URLs. You already have `projectId`, `testCaseId`, `runId`, `viewUrl`, and `status` from earlier steps — you still need the step-level data.
|
|
289
315
|
|
|
290
|
-
For
|
|
316
|
+
For the published runs from Step 7A, issue **all** `muggle-remote-test-script-get` calls in parallel (single message, multiple tool calls) — one per `testScriptId` returned by `muggle-local-publish-test-script`. Then, for each response:
|
|
291
317
|
|
|
292
|
-
1.
|
|
293
|
-
2.
|
|
294
|
-
3.
|
|
295
|
-
4.
|
|
318
|
+
1. Extract `steps[].operation.action` (description) and `steps[].operation.screenshotUrl` (cloud URL).
|
|
319
|
+
2. Build a `steps` array: `[{ stepIndex: 0, action: "...", screenshotUrl: "..." }, ...]`.
|
|
320
|
+
3. If the run failed, also capture `failureStepIndex`, `error`, and the local `artifactsDir` from `muggle-local-run-result-get`.
|
|
321
|
+
4. Populate `description` (test case title/description) and `useCaseName` (parent use case title) on each report entry — optional but strongly recommended; they drive the grouped overview and the per-test collapsible headers. Prefer values already in your conversation context from earlier steps (e.g. the test case you just created or selected, or the use case you confirmed); only call `muggle-remote-test-case-get` / `muggle-remote-use-case-get` for anything you don't already have.
|
|
296
322
|
|
|
297
323
|
Assemble the report:
|
|
298
324
|
|
|
@@ -302,6 +328,8 @@ Assemble the report:
|
|
|
302
328
|
"tests": [
|
|
303
329
|
{
|
|
304
330
|
"name": "<test case title>",
|
|
331
|
+
"description": "<one-line description of what this test verifies (optional but recommended)>",
|
|
332
|
+
"useCaseName": "<parent use case title (optional but recommended)>",
|
|
305
333
|
"testCaseId": "<id>",
|
|
306
334
|
"testScriptId": "<id>",
|
|
307
335
|
"runId": "<id>",
|
|
@@ -364,11 +392,15 @@ This skill always uses **Mode A** (post to an existing PR); `muggle-do` is the o
|
|
|
364
392
|
|
|
365
393
|
- **Always confirm intent first** — never assume local vs remote without asking
|
|
366
394
|
- **User MUST select project** — present clickable options via `AskQuestion`, wait for explicit choice, never auto-select
|
|
367
|
-
- **
|
|
368
|
-
- **
|
|
395
|
+
- **Best-effort shortlist use cases** — use the change summary to narrow the list to the most relevant 1–5 use cases and pre-check them; never dump every use case in the project on the user. Always leave an escape hatch to reveal the full list.
|
|
396
|
+
- **Best-effort shortlist test cases** — same idea: pre-check the test cases most relevant to the change summary; never enumerate every test case attached to a use case. Always leave an escape hatch to reveal the full list.
|
|
369
397
|
- **Use `AskQuestion` for every selection** — never ask the user to type a number; always present clickable options
|
|
370
|
-
- **
|
|
371
|
-
- **
|
|
398
|
+
- **Auto-detect localhost URL when possible**; only fall back to free-text when nothing is listening on a common port
|
|
399
|
+
- **Parallelize independent cloud jobs** — when creating N use cases, generating/creating N test cases, fetching N test case details, starting N remote workflows, polling N workflow runtimes, publishing N local runs, or fetching N per-step test scripts, issue all N calls in a single message so they fan out in parallel. The only tolerated sequential loop is local Electron execution (one browser, one test at a time). For use case creation specifically, use the native batch form of `muggle-remote-use-case-create-from-prompts` (all descriptions in one `instructions` array) instead of parallel calls.
|
|
400
|
+
- **One atomic behavior per test case** — every test case verifies exactly one user-observable behavior. Never bundle signup/login/navigation/bootstrap/teardown into a test case body. Ordering and dependencies are Muggle's service responsibility, not the skill's.
|
|
401
|
+
- **Never consolidate the generator's output** — if `muggle-remote-test-case-generate-from-prompt` returns N micro-tests, accept all N; never merge them into fewer test cases, even if "the plan" says 4 UC / 4 TC.
|
|
402
|
+
- **Never skip the generate→review cycle** — always present generated test cases to the user before calling `muggle-remote-test-case-create`, even when you're confident. "I'll skip the review and create directly" is always wrong.
|
|
403
|
+
- **Never ask for Electron launch approval before each run** — the user picking Local mode is the approval. Don't prompt "Ready to launch Electron?" before execution; just run.
|
|
372
404
|
- **Never silently drop test cases** — log failures and continue, then report them
|
|
373
405
|
- **Never guess the URL** — always ask the user for localhost or preview URL
|
|
374
406
|
- **Always publish before opening browser** — the dashboard needs the published data to show results
|
|
@@ -19,7 +19,7 @@ The local URL only changes where the browser opens; it does not change the remot
|
|
|
19
19
|
|
|
20
20
|
**Every selection-based question MUST use the `AskQuestion` tool** (or the platform's equivalent structured selection tool). Never ask the user to "reply with a number" in a plain text message — always present clickable options.
|
|
21
21
|
|
|
22
|
-
- **Selections** (project, use case, test case, script
|
|
22
|
+
- **Selections** (project, use case, test case, script): Use `AskQuestion` with labeled options the user can click.
|
|
23
23
|
- **Free-text inputs** (URLs, descriptions): Only use plain text prompts when there is no finite set of options. Even then, offer a detected/default value when possible.
|
|
24
24
|
|
|
25
25
|
## Workflow
|
|
@@ -27,7 +27,12 @@ The local URL only changes where the browser opens; it does not change the remot
|
|
|
27
27
|
### 1. Auth
|
|
28
28
|
|
|
29
29
|
- `muggle-remote-auth-status`
|
|
30
|
-
- If
|
|
30
|
+
- If **authenticated**: print the logged-in email and ask via `AskQuestion`:
|
|
31
|
+
> "You're logged in as **{email}**. Continue with this account?"
|
|
32
|
+
- Option 1: "Yes, continue"
|
|
33
|
+
- Option 2: "No, switch account"
|
|
34
|
+
If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true` then `muggle-remote-auth-poll`.
|
|
35
|
+
- If **not signed in or expired**: call `muggle-remote-auth-login` then `muggle-remote-auth-poll`.
|
|
31
36
|
Do not skip or assume auth.
|
|
32
37
|
|
|
33
38
|
### 2. Targets (user must confirm)
|
|
@@ -84,21 +89,21 @@ Remind them: local URL is only the execution target, not tied to cloud project c
|
|
|
84
89
|
**Generate**
|
|
85
90
|
|
|
86
91
|
1. `muggle-remote-test-case-get`
|
|
87
|
-
2. `muggle-local-execute-test-generation`
|
|
92
|
+
2. `muggle-local-execute-test-generation` with that test case + `localUrl` (optional: `showUi: false` for headless — defaults to visible; **`timeoutMs`** — see below)
|
|
88
93
|
|
|
89
94
|
**Replay**
|
|
90
95
|
|
|
91
96
|
1. `muggle-remote-test-script-get` — note `actionScriptId`
|
|
92
97
|
2. `muggle-remote-action-script-get` with that id — full `actionScript`
|
|
93
98
|
**Use the API response as-is.** Do not edit, shorten, or rebuild `actionScript`; replay needs full `label` paths for element lookup.
|
|
94
|
-
3. `muggle-local-execute-replay`
|
|
99
|
+
3. `muggle-local-execute-replay` with `testScript`, `actionScript`, `localUrl` (optional: `showUi: false` for headless — defaults to visible; **`timeoutMs`** — see below)
|
|
95
100
|
|
|
96
101
|
### Local execution timeout (`timeoutMs`)
|
|
97
102
|
|
|
98
103
|
The MCP client often uses a **default wait of 300000 ms (5 minutes)** for `muggle-local-execute-test-generation` and `muggle-local-execute-replay`. **Exploratory script generation** (Auth0 login, dashboards, multi-step wizards, many LLM iterations) routinely **runs longer than 5 minutes** while Electron is still healthy.
|
|
99
104
|
|
|
100
105
|
- **Always pass `timeoutMs`** for flows that may be long — for example **`600000` (10 min)** or **`900000` (15 min)** — unless the user explicitly wants a short cap.
|
|
101
|
-
- If the tool reports **`Electron execution timed out after 300000ms`** (or similar) **but** Electron logs show the run still progressing (steps, screenshots, LLM calls), treat it as **orchestration timeout**, not an Electron app defect: **increase `timeoutMs` and retry
|
|
106
|
+
- If the tool reports **`Electron execution timed out after 300000ms`** (or similar) **but** Electron logs show the run still progressing (steps, screenshots, LLM calls), treat it as **orchestration timeout**, not an Electron app defect: **increase `timeoutMs` and retry**.
|
|
102
107
|
- **Test case design:** Preconditions like "a test run has already completed" on an **empty account** can force many steps (sign-up, new project, crawl). Prefer an account/project that **already has** the needed state, or narrow the test goal so generation does not try to create a full project from scratch unless that is intentional.
|
|
103
108
|
|
|
104
109
|
### Interpreting `failed` / non-zero Electron exit
|
|
@@ -106,15 +111,9 @@ The MCP client often uses a **default wait of 300000 ms (5 minutes)** for `muggl
|
|
|
106
111
|
- **`Electron execution timed out after 300000ms`:** Orchestration wait too short — see **`timeoutMs`** above.
|
|
107
112
|
- **Exit code 26** (and messages like **LLM failed to generate / replay action script**): Often corresponds to a completed exploration whose **outcome was goal not achievable** (`goal_not_achievable`, summary with `halt`) — e.g. verifying "view script after a successful run" when **no run or script exists yet** in the UI. Use `muggle-local-run-result-get` and read the **summary / structured summary**; do not assume an Electron crash. **Fix:** choose a **project that already has** completed runs and scripts, or **change the test case** so preconditions match what localhost can satisfy (e.g. include steps to create and run a test first, or assert only empty-state UI when no runs exist).
|
|
108
113
|
|
|
109
|
-
### 6.
|
|
114
|
+
### 6. Execute (no approval prompt)
|
|
110
115
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
- "Yes, launch Electron (visible — I want to watch)"
|
|
114
|
-
- "Yes, launch Electron (headless — run in background)"
|
|
115
|
-
- "No, cancel"
|
|
116
|
-
|
|
117
|
-
Only call local execute tools with `approveElectronAppLaunch: true` after the user selects a "Yes" option. Map visible to `showUi: true`, headless to `showUi: false`.
|
|
116
|
+
Call `muggle-local-execute-test-generation` or `muggle-local-execute-replay` directly. **Do not** ask the user to re-approve the Electron launch — the user choosing this skill in the first place is the approval. The browser defaults to visible; only pass `showUi: false` if the user explicitly asked for headless.
|
|
118
117
|
|
|
119
118
|
### 7. After successful generation only
|
|
120
119
|
|
|
@@ -138,6 +137,7 @@ The shared skill takes an **`E2eReport` JSON** that includes per-step screenshot
|
|
|
138
137
|
2. Extract per step: `steps[].operation.action` and `steps[].operation.screenshotUrl`.
|
|
139
138
|
3. Build the `steps` array: `[{ stepIndex: 0, action: "...", screenshotUrl: "..." }, ...]`.
|
|
140
139
|
4. If the run failed, capture `failureStepIndex`, `error`, and the local `artifactsDir` from the run result in step 8.
|
|
140
|
+
5. Populate `description` (test case title/description) and `useCaseName` (parent use case title) on the report entry — optional but strongly recommended; they drive the grouped overview and the per-test collapsible headers. Prefer values already in your conversation context from earlier steps (e.g. the test case you just created or selected, or the use case you confirmed); only call `muggle-remote-test-case-get` / `muggle-remote-use-case-get` for anything you don't already have.
|
|
141
141
|
|
|
142
142
|
Assemble the `E2eReport`:
|
|
143
143
|
|
|
@@ -147,6 +147,8 @@ Assemble the `E2eReport`:
|
|
|
147
147
|
"tests": [
|
|
148
148
|
{
|
|
149
149
|
"name": "<test case title>",
|
|
150
|
+
"description": "<one-line description of what this test verifies (optional but recommended)>",
|
|
151
|
+
"useCaseName": "<parent use case title (optional but recommended)>",
|
|
150
152
|
"testCaseId": "<id>",
|
|
151
153
|
"testScriptId": "<id from publish>",
|
|
152
154
|
"runId": "<runId from execute>",
|
|
@@ -177,10 +179,11 @@ Always use **Mode A** (post to existing PR) from this skill. Never hand-write th
|
|
|
177
179
|
|
|
178
180
|
## Non-negotiables
|
|
179
181
|
|
|
180
|
-
- No silent auth skip
|
|
182
|
+
- No silent auth skip.
|
|
183
|
+
- **Never prompt for Electron launch approval** before execution — invoking this skill is the approval. Just run.
|
|
181
184
|
- If replayable scripts exist, do not default to generation without user choice.
|
|
182
185
|
- No hiding failures: surface errors and artifact paths.
|
|
183
186
|
- Replay: never hand-built or simplified `actionScript` — only from `muggle-remote-action-script-get`.
|
|
184
|
-
- Use `AskQuestion` for every selection — project, use case, test case, script
|
|
187
|
+
- Use `AskQuestion` for every selection — project, use case, test case, script. Never ask the user to type a number.
|
|
185
188
|
- Project, use case, and test case selection lists must always include "Create new ...". Include "Show full list" whenever the API returned at least one row for that step; omit "Show full list" when the list is empty (offer "Create new ..." only). For creates, use preview tools (`muggle-remote-use-case-prompt-preview`, `muggle-remote-test-case-generate-from-prompt`) before persisting.
|
|
186
189
|
- PR posting is always optional and always delegated to the `muggle-pr-visual-walkthrough` skill — never inline the walkthrough markdown or call `gh pr comment` directly from this skill.
|
|
@@ -130,9 +130,12 @@ If the user wants changes, incorporate feedback, then ask again. Only proceed af
|
|
|
130
130
|
|
|
131
131
|
Call `muggle-remote-auth-status` first.
|
|
132
132
|
|
|
133
|
-
If already authenticated →
|
|
133
|
+
If **already authenticated** → print the logged-in email and ask via `AskQuestion`:
|
|
134
|
+
> "You're logged in as **{email}**. Continue with this account?"
|
|
135
|
+
- Option 1: "Yes, continue" → skip to Step 5.
|
|
136
|
+
- Option 2: "No, switch account" → call `muggle-remote-auth-login` with `forceNewSession: true`, then `muggle-remote-auth-poll`.
|
|
134
137
|
|
|
135
|
-
If not authenticated
|
|
138
|
+
If **not authenticated**:
|
|
136
139
|
1. Tell the user a browser window is about to open.
|
|
137
140
|
2. Call `muggle-remote-auth-login` (opens browser automatically).
|
|
138
141
|
3. Tell the user to complete login in the browser.
|
|
@@ -40,8 +40,13 @@ Treat this filter as a default, not a law. If the user explicitly says "include
|
|
|
40
40
|
### Step 1 — Authenticate
|
|
41
41
|
|
|
42
42
|
1. Call `muggle-remote-auth-status`.
|
|
43
|
-
2. If
|
|
44
|
-
|
|
43
|
+
2. If **authenticated and not expired** → print the logged-in email and ask via `AskQuestion`:
|
|
44
|
+
> "You're logged in as **{email}**. Continue with this account?"
|
|
45
|
+
- Option 1: "Yes, continue"
|
|
46
|
+
- Option 2: "No, switch account"
|
|
47
|
+
If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true`, then poll with `muggle-remote-auth-poll`.
|
|
48
|
+
3. If **not authenticated or expired** → call `muggle-remote-auth-login`, then poll with `muggle-remote-auth-poll`.
|
|
49
|
+
4. Do not skip auth and do not assume a stale token still works.
|
|
45
50
|
|
|
46
51
|
If auth keeps failing, suggest the user run `muggle logout && muggle login` from a terminal.
|
|
47
52
|
|