@muggleai/works 4.5.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -723,8 +723,9 @@ var AuthService = class {
723
723
  }
724
724
  /**
725
725
  * Start the device code flow.
726
+ * @param options.forceNewSession - Clear existing Auth0 browser session before login to allow account switching.
726
727
  */
727
- async startDeviceCodeFlow() {
728
+ async startDeviceCodeFlow(options) {
728
729
  const logger14 = getLogger();
729
730
  const config = getConfig();
730
731
  const { domain, clientId, audience, scopes } = config.localQa.auth0;
@@ -761,15 +762,25 @@ var AuthService = class {
761
762
  userCode: data.user_code,
762
763
  expiresAt: new Date(Date.now() + data.expires_in * 1e3).toISOString()
763
764
  });
765
+ let browserUrl = data.verification_uri_complete;
766
+ if (options?.forceNewSession) {
767
+ const logoutUrl = new URL(`https://${domain}/v2/logout`);
768
+ logoutUrl.searchParams.set("client_id", clientId);
769
+ logoutUrl.searchParams.set("returnTo", data.verification_uri_complete);
770
+ browserUrl = logoutUrl.toString();
771
+ logger14.info("Force new session: opening logout-redirect URL", {
772
+ logoutUrl: browserUrl
773
+ });
774
+ }
764
775
  const browserOpenResult = await openBrowserUrl({
765
- url: data.verification_uri_complete
776
+ url: browserUrl
766
777
  });
767
778
  if (browserOpenResult.opened) {
768
779
  logger14.info("Browser opened for device code login");
769
780
  } else {
770
781
  logger14.warn("Failed to open browser for device code login", {
771
782
  error: browserOpenResult.error,
772
- verificationUriComplete: data.verification_uri_complete
783
+ url: browserUrl
773
784
  });
774
785
  }
775
786
  return {
@@ -2556,7 +2567,7 @@ var getCredentialsFilePath = getApiKeyFilePath;
2556
2567
 
2557
2568
  // packages/mcps/src/shared/auth.ts
2558
2569
  var logger4 = getLogger();
2559
- async function startDeviceCodeFlow(config) {
2570
+ async function startDeviceCodeFlow(config, options) {
2560
2571
  const deviceCodeUrl = `https://${config.domain}/oauth/device/code`;
2561
2572
  try {
2562
2573
  logger4.info("[Auth] Starting device code flow", {
@@ -2581,8 +2592,16 @@ async function startDeviceCodeFlow(config) {
2581
2592
  userCode: data.user_code,
2582
2593
  expiresIn: data.expires_in
2583
2594
  });
2595
+ let browserUrl = data.verification_uri_complete;
2596
+ if (options?.forceNewSession) {
2597
+ const logoutUrl = new URL(`https://${config.domain}/v2/logout`);
2598
+ logoutUrl.searchParams.set("client_id", config.clientId);
2599
+ logoutUrl.searchParams.set("returnTo", data.verification_uri_complete);
2600
+ browserUrl = logoutUrl.toString();
2601
+ logger4.info("[Auth] Force new session: opening logout-redirect URL");
2602
+ }
2584
2603
  const browserOpenResult = await openBrowserUrl({
2585
- url: data.verification_uri_complete
2604
+ url: browserUrl
2586
2605
  });
2587
2606
  if (browserOpenResult.opened) {
2588
2607
  logger4.info("[Auth] Browser opened for device code login");
@@ -3251,7 +3270,8 @@ var ApiKeyRevokeInputSchema = z.object({
3251
3270
  });
3252
3271
  var AuthLoginInputSchema = z.object({
3253
3272
  waitForCompletion: z.boolean().optional().describe("Whether to wait for browser login completion before returning. Default: true"),
3254
- timeoutMs: z.number().int().positive().min(1e3).max(9e5).optional().describe("Maximum time to wait for login completion in milliseconds. Default: 120000")
3273
+ timeoutMs: z.number().int().positive().min(1e3).max(9e5).optional().describe("Maximum time to wait for login completion in milliseconds. Default: 120000"),
3274
+ forceNewSession: z.boolean().optional().describe("Force a fresh login by clearing any existing Auth0 browser session before redirecting to the device activation page. Use this to switch accounts. Default: false")
3255
3275
  });
3256
3276
  var AuthPollInputSchema = z.object({
3257
3277
  deviceCode: z.string().optional().describe("Device code from the login response. Optional if a login was recently started.")
@@ -4880,7 +4900,9 @@ var authTools = [
4880
4900
  localHandler: async (input) => {
4881
4901
  const data = input;
4882
4902
  const authService = getAuthService();
4883
- const deviceCodeResponse = await authService.startDeviceCodeFlow();
4903
+ const deviceCodeResponse = await authService.startDeviceCodeFlow({
4904
+ forceNewSession: data.forceNewSession
4905
+ });
4884
4906
  const waitForCompletion = data.waitForCompletion ?? true;
4885
4907
  if (!waitForCompletion) {
4886
4908
  return {
@@ -5173,12 +5195,10 @@ var ExecuteTestGenerationInputSchema = z.object({
5173
5195
  testCase: TestCaseDetailsSchema.describe("Test case details obtained from muggle-remote-test-case-get"),
5174
5196
  /** Local URL to test against. */
5175
5197
  localUrl: z.string().url().describe("Local URL to test against (e.g., http://localhost:3000)"),
5176
- /** Explicit approval to launch electron-app. */
5177
- approveElectronAppLaunch: z.boolean().describe("Set to true after the user explicitly approves launching electron-app"),
5178
5198
  /** Optional timeout. */
5179
5199
  timeoutMs: z.number().int().positive().optional().describe("Timeout in milliseconds (default: 300000 = 5 min)"),
5180
- /** Show the electron-app UI during execution. Ask the user before approving; true = visible window, false or omit = headless. */
5181
- showUi: z.boolean().optional().describe("Show the electron-app UI during generation. Ask the user: true to watch the window, false or omit for headless.")
5200
+ /** Show the electron-app UI during execution. Default: visible window. Pass false to run headless. */
5201
+ showUi: z.boolean().optional().describe("Show the electron-app UI during generation. Defaults to visible; pass false to run headless.")
5182
5202
  });
5183
5203
  var ExecuteReplayInputSchema = z.object({
5184
5204
  /** Test script metadata from muggle-remote-test-script-get. */
@@ -5187,12 +5207,10 @@ var ExecuteReplayInputSchema = z.object({
5187
5207
  actionScript: z.array(z.unknown()).describe("Action script steps from muggle-remote-action-script-get"),
5188
5208
  /** Local URL to test against. */
5189
5209
  localUrl: z.string().url().describe("Local URL to test against (e.g., http://localhost:3000)"),
5190
- /** Explicit approval to launch electron-app. */
5191
- approveElectronAppLaunch: z.boolean().describe("Set to true after the user explicitly approves launching electron-app"),
5192
5210
  /** Optional timeout. */
5193
5211
  timeoutMs: z.number().int().positive().optional().describe("Timeout in milliseconds (default: 180000 = 3 min)"),
5194
- /** Show the electron-app UI during execution. Ask the user before approving; true = visible window, false or omit = headless. */
5195
- showUi: z.boolean().optional().describe("Show the electron-app UI during replay. Ask the user: true to watch the window, false or omit for headless.")
5212
+ /** Show the electron-app UI during execution. Default: visible window. Pass false to run headless. */
5213
+ showUi: z.boolean().optional().describe("Show the electron-app UI during replay. Defaults to visible; pass false to run headless.")
5196
5214
  });
5197
5215
  var CancelExecutionInputSchema = z.object({
5198
5216
  runId: MuggleEntityIdSchema.describe("Run ID (UUID) to cancel")
@@ -5432,38 +5450,13 @@ var testScriptGetTool = {
5432
5450
  };
5433
5451
  var executeTestGenerationTool = {
5434
5452
  name: "muggle-local-execute-test-generation",
5435
- description: "Generate an end-to-end (E2E) acceptance test script by launching a real browser against your web app. The browser navigates your app, executes the test case steps (like signing up, filling forms, clicking through flows), and produces a replayable test script with screenshots. Use this to create new browser tests for any user flow. Requires a test case (from muggle-remote-test-case-get) and a localhost URL. Launches an Electron browser \u2014 requires explicit approval via approveElectronAppLaunch. Before approving, ask the user whether they want a visible GUI; pass showUi: true to watch the window or showUi: false for headless (default when omitted).",
5453
+ description: "Generate an end-to-end (E2E) acceptance test script by launching a real browser against your web app. The browser navigates your app, executes the test case steps (like signing up, filling forms, clicking through flows), and produces a replayable test script with screenshots. Use this to create new browser tests for any user flow. Requires a test case (from muggle-remote-test-case-get) and a localhost URL. Launches an Electron browser \u2014 defaults to a visible window; pass showUi: false to run headless.",
5436
5454
  inputSchema: ExecuteTestGenerationInputSchema,
5437
5455
  execute: async (ctx) => {
5438
5456
  const logger14 = createChildLogger2(ctx.correlationId);
5439
5457
  logger14.info("Executing muggle-local-execute-test-generation");
5440
5458
  const input = ExecuteTestGenerationInputSchema.parse(ctx.input);
5441
- if (!input.approveElectronAppLaunch) {
5442
- const showUiExplicit = input.showUi !== void 0;
5443
- const uiMode = input.showUi === true ? "visible GUI (showUi: true)" : "headless (showUi: false or omitted)";
5444
- return {
5445
- content: [
5446
- "## Electron App Launch Required",
5447
- "",
5448
- "This tool will launch the electron-app to generate a test script.",
5449
- "Please set `approveElectronAppLaunch: true` to proceed.",
5450
- "",
5451
- "**Visible GUI:** Ask the user whether they want to watch the Electron window during generation.",
5452
- "- If **yes** \u2192 when approving, pass `showUi: true`.",
5453
- "- If **no** \u2192 when approving, pass `showUi: false` (or omit `showUi`; generation runs headless).",
5454
- "",
5455
- showUiExplicit ? `**Current choice:** ${uiMode}` : "**Current choice:** not set \u2014 default on approval is headless unless you pass `showUi: true`.",
5456
- "",
5457
- `**Test Case:** ${input.testCase.title}`,
5458
- `**Local URL:** ${input.localUrl}`,
5459
- "",
5460
- "**Note:** The electron-app will navigate your test URL and record steps."
5461
- ].join("\n"),
5462
- isError: false,
5463
- data: { requiresApproval: true }
5464
- };
5465
- }
5466
- const showUi = input.showUi === true;
5459
+ const showUi = input.showUi !== false;
5467
5460
  try {
5468
5461
  const result = await executeTestGeneration({
5469
5462
  testCase: input.testCase,
@@ -5495,39 +5488,13 @@ var executeTestGenerationTool = {
5495
5488
  };
5496
5489
  var executeReplayTool = {
5497
5490
  name: "muggle-local-execute-replay",
5498
- description: "Replay an existing E2E acceptance test script in a real browser to verify your app still works correctly \u2014 use this for regression testing after code changes. The browser executes each saved step and captures screenshots so you can see what happened. Requires: (1) test script metadata from muggle-remote-test-script-get, (2) actionScript content from muggle-remote-action-script-get using the testScript.actionScriptId, and (3) a localhost URL. Launches an Electron browser \u2014 requires explicit approval via approveElectronAppLaunch. Before approving, ask the user whether they want a visible GUI; pass showUi: true to watch the window or showUi: false for headless (default when omitted).",
5491
+ description: "Replay an existing E2E acceptance test script in a real browser to verify your app still works correctly \u2014 use this for regression testing after code changes. The browser executes each saved step and captures screenshots so you can see what happened. Requires: (1) test script metadata from muggle-remote-test-script-get, (2) actionScript content from muggle-remote-action-script-get using the testScript.actionScriptId, and (3) a localhost URL. Launches an Electron browser \u2014 defaults to a visible window; pass showUi: false to run headless.",
5499
5492
  inputSchema: ExecuteReplayInputSchema,
5500
5493
  execute: async (ctx) => {
5501
5494
  const logger14 = createChildLogger2(ctx.correlationId);
5502
5495
  logger14.info("Executing muggle-local-execute-replay");
5503
5496
  const input = ExecuteReplayInputSchema.parse(ctx.input);
5504
- if (!input.approveElectronAppLaunch) {
5505
- const showUiExplicit = input.showUi !== void 0;
5506
- const uiMode = input.showUi === true ? "visible GUI (showUi: true)" : "headless (showUi: false or omitted)";
5507
- return {
5508
- content: [
5509
- "## Electron App Launch Required",
5510
- "",
5511
- "This tool will launch the electron-app to replay a test script.",
5512
- "Please set `approveElectronAppLaunch: true` to proceed.",
5513
- "",
5514
- "**Visible GUI:** Ask the user whether they want to watch the Electron window during replay.",
5515
- "- If **yes** \u2192 when approving, pass `showUi: true`.",
5516
- "- If **no** \u2192 when approving, pass `showUi: false` (or omit `showUi`; replay runs headless).",
5517
- "",
5518
- showUiExplicit ? `**Current choice:** ${uiMode}` : "**Current choice:** not set \u2014 default on approval is headless unless you pass `showUi: true`.",
5519
- "",
5520
- `**Test Script:** ${input.testScript.name}`,
5521
- `**Local URL:** ${input.localUrl}`,
5522
- `**Steps:** ${input.actionScript.length}`,
5523
- "",
5524
- "**Note:** The electron-app will execute the test steps against your local URL."
5525
- ].join("\n"),
5526
- isError: false,
5527
- data: { requiresApproval: true }
5528
- };
5529
- }
5530
- const showUi = input.showUi === true;
5497
+ const showUi = input.showUi !== false;
5531
5498
  try {
5532
5499
  const result = await executeReplay({
5533
5500
  testScript: input.testScript,
package/dist/cli.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { runCli } from './chunk-MNCBJEPQ.js';
2
+ import { runCli } from './chunk-TP4T4T2Z.js';
3
3
 
4
4
  // src/cli/main.ts
5
5
  runCli().catch((error) => {
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- export { src_exports2 as commands, createChildLogger, createUnifiedMcpServer, e2e_exports as e2e, getConfig, getLocalQaTools, getLogger, getQaTools, local_exports as localQa, mcp_exports as mcp, e2e_exports as qa, server_exports as server, src_exports as shared } from './chunk-MNCBJEPQ.js';
1
+ export { src_exports2 as commands, createChildLogger, createUnifiedMcpServer, e2e_exports as e2e, getConfig, getLocalQaTools, getLogger, getQaTools, local_exports as localQa, mcp_exports as mcp, e2e_exports as qa, server_exports as server, src_exports as shared } from './chunk-TP4T4T2Z.js';
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "muggle",
3
3
  "description": "Run real-browser end-to-end (E2E) acceptance tests on your web app from any AI coding agent. Generate test scripts from plain English, replay them on localhost, capture screenshots, and validate user flows like signup, checkout, and dashboards. Works across Claude Code, Cursor, Codex, and Windsurf.",
4
- "version": "4.5.0",
4
+ "version": "4.6.0",
5
5
  "author": {
6
6
  "name": "Muggle AI",
7
7
  "email": "support@muggle-ai.com"
@@ -2,7 +2,7 @@
2
2
  "name": "muggle",
3
3
  "displayName": "Muggle AI",
4
4
  "description": "Ship quality products with AI-powered end-to-end (E2E) acceptance testing that validates your web app like a real user — from Claude Code and Cursor to PR.",
5
- "version": "4.5.0",
5
+ "version": "4.6.0",
6
6
  "author": {
7
7
  "name": "Muggle AI",
8
8
  "email": "support@muggle-ai.com"
@@ -33,7 +33,12 @@ Read `localUrl` for each repo from the context. If it is not provided, ask the u
33
33
  ### Step 1: Check Authentication
34
34
 
35
35
  - `muggle-remote-auth-status`
36
- - If not signed in: `muggle-remote-auth-login` then `muggle-remote-auth-poll`
36
+ - If **authenticated**: print the logged-in email and ask via `AskQuestion`:
37
+ > "You're logged in as **{email}**. Continue with this account?"
38
+ - Option 1: "Yes, continue"
39
+ - Option 2: "No, switch account"
40
+ If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true` then `muggle-remote-auth-poll`.
41
+ - If **not signed in or expired**: `muggle-remote-auth-login` then `muggle-remote-auth-poll`
37
42
 
38
43
  Do not skip or assume auth.
39
44
 
@@ -62,7 +67,6 @@ For each relevant test case:
62
67
  - `testScript`: the full script object
63
68
  - `actionScript`: the full action script object (from `muggle-remote-action-script-get`)
64
69
  - `localUrl`: the resolved local URL
65
- - `approveElectronAppLaunch`: `true` *(pipeline context — user starting `muggle-do` is implicit approval)*
66
70
  - `timeoutMs`: `600000` (10 min) or `900000` (15 min) for complex flows
67
71
 
68
72
  3. **If no script exists** (generation path):
@@ -70,7 +74,6 @@ For each relevant test case:
70
74
  - `muggle-local-execute-test-generation` with:
71
75
  - `testCase`: the full test case object
72
76
  - `localUrl`: the resolved local URL
73
- - `approveElectronAppLaunch`: `true`
74
77
  - `timeoutMs`: `600000` (10 min) or `900000` (15 min) for complex flows
75
78
 
76
79
  4. When execution completes, call `muggle-local-run-result-get` with the `runId` returned by the execute call.
@@ -15,6 +15,22 @@ A router skill that detects code changes, resolves impacted test cases, executes
15
15
  - **Multi-select** (use cases, test cases): Use `AskQuestion` with `allow_multiple: true`.
16
16
  - **Free-text inputs** (URLs, descriptions): Only use plain text prompts when there is no finite set of options. Even then, offer a detected/default value when possible.
17
17
  - **Batch related questions**: If two questions are independent, present them together in a single `AskQuestion` call rather than asking sequentially.
18
+ - **Parallelize job-creation calls**: Whenever you're kicking off N independent cloud jobs — creating multiple use cases, generating/creating multiple test cases, fetching details for multiple test cases, starting multiple remote workflows, publishing multiple local runs, or fetching per-step screenshots for multiple runs — issue all N tool calls in a single message so they run in parallel. Never loop them sequentially unless there is a real ordering constraint (e.g. a single local Electron browser that can only run one test at a time).
19
+
20
+ ## Test Case Design: One Atomic Behavior Per Test Case
21
+
22
+ Every test case verifies exactly **one** user-observable behavior. Never bundle multiple concerns, sequential flows, or bootstrap/setup into a single test case — even if you think it would be "cleaner" or "more efficient."
23
+
24
+ **Ordering, dependencies, and bootstrap are Muggle's service responsibility, not yours.** Muggle's cloud handles test case dependencies, prerequisite state, and execution ordering. Your job is to describe the *atomic behavior to verify* — never the flow that gets there.
25
+
26
+ - ❌ Wrong: one test case that "signs up, logs in, navigates to the detail modal, verifies icon stacking, verifies tab order, verifies history format, and verifies reference layout."
27
+ - ✅ Right: four separate test cases — one per verifiable behavior — each with instruction text like "Verify the detail modal shows stacked pair of icons per card" with **no** signup / login / navigation / setup language.
28
+
29
+ **Never bake bootstrap into a test case description.** Signup, login, seed data, prerequisite navigation, tear-down — none of these belong inside the test case body. Write only the verification itself. The service will prepend whatever setup is needed based on its own dependency graph.
30
+
31
+ **Never consolidate the generator's output.** When `muggle-remote-test-case-generate-from-prompt` returns N micro-tests from a single prompt, that decomposition is the authoritative one. Do not "merge them into 1 for simplicity," do not "rewrite them to share bootstrap," do not "collapse them to match a 4 UC / 4 TC plan." Accept what the generator gave you.
32
+
33
+ **Never skip the generate→review cycle.** Even when you are 100% confident about the right shape, always present the generated test cases to the user before calling `muggle-remote-test-case-create`. "I'll skip the generate→review cycle and create directly" is a sign you're about to get it wrong.
18
34
 
19
35
  ## Step 1: Confirm Scope of Work (Always First)
20
36
 
@@ -41,8 +57,8 @@ If the user's intent is clear, state back what you understood and use `AskQuesti
41
57
  - Option 2: "Switch to [the other mode]"
42
58
 
43
59
  If ambiguous, use `AskQuestion` to let the user choose:
44
- - Option 1: "Locallaunch browser on your machine against localhost"
45
- - Option 2: "RemoteMuggle cloud tests against a preview/staging URL"
60
+ - Option 1: "On my computer test your localhost dev server in a browser on your machine"
61
+ - Option 2: "In the cloud test remotely targeting your deployed preview/staging URL"
46
62
 
47
63
  Only proceed after the user selects an option.
48
64
 
@@ -66,8 +82,12 @@ If no changes detected (clean tree), tell the user and ask what they want to tes
66
82
  ## Step 3: Authenticate
67
83
 
68
84
  1. Call `muggle-remote-auth-status`
69
- 2. If authenticated and not expired → proceed
70
- 3. If not authenticated or expired call `muggle-remote-auth-login`
85
+ 2. If **authenticated and not expired**print the logged-in email and ask via `AskQuestion`:
86
+ > "You're logged in as **{email}**. Continue with this account?"
87
+ - Option 1: "Yes, continue"
88
+ - Option 2: "No, switch account"
89
+ If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true`, then `muggle-remote-auth-poll`.
90
+ 3. If **not authenticated or expired** → call `muggle-remote-auth-login`
71
91
  4. If login pending → call `muggle-remote-auth-poll`
72
92
 
73
93
  If auth fails repeatedly, suggest: `muggle logout && muggle login` from terminal.
@@ -93,56 +113,68 @@ A **project** is where all your test results, use cases, and test scripts are gr
93
113
 
94
114
  Store the `projectId` only after user confirms.
95
115
 
96
- ## Step 5: Select Use Case (User Must Choose)
116
+ ## Step 5: Select Use Case (Best-Effort Shortlist)
97
117
 
98
118
  ### 5a: List existing use cases
99
119
  Call `muggle-remote-use-case-list` with the project ID.
100
120
 
101
- ### 5b: Present ALL use cases for user selection
121
+ ### 5b: Best-effort match against the change summary
122
+
123
+ Using the change summary from Step 2, pick the use cases whose title/description most plausibly relate to the impacted areas. Produce a **short shortlist** (typically 1–5) — don't try to be exhaustive, and don't dump the full project list on the user. A confident best-effort match is the goal.
124
+
125
+ If nothing looks like a confident match, fall back to asking the user which use case(s) they have in mind.
102
126
 
103
- Use `AskQuestion` with `allow_multiple: true` to present all use cases as clickable options. Always include a "Create new use case" option at the end.
127
+ ### 5c: Present the shortlist for confirmation
104
128
 
105
- Prompt: "Which use case(s) do you want to test?"
129
+ Use `AskQuestion` with `allow_multiple: true`:
106
130
 
107
- ### 5c: Wait for explicit user selection
131
+ Prompt: "These use cases look most relevant to your changes — confirm which to test:"
108
132
 
109
- **CRITICAL: Do NOT auto-select use cases** based on:
110
- - Git changes analysis
111
- - Use case title/description matching
112
- - Any heuristic or inference
133
+ - Pre-check the shortlisted items so the user can accept with one click
134
+ - Include "Pick a different use case" to reveal the full project list
135
+ - Include "Create new use case" at the end
113
136
 
114
- The user MUST explicitly tell you which use case(s) to use.
137
+ ### 5d: If user picks "Pick a different use case"
138
+ Re-present the full list from 5a via `AskQuestion` with `allow_multiple: true`, then continue.
115
139
 
116
- ### 5d: If user chooses "Create new use case"
117
- 1. Ask the user to describe the use case in plain English
118
- 2. Call `muggle-remote-use-case-create-from-prompts`:
140
+ ### 5e: If user chooses "Create new use case"
141
+ 1. Ask the user to describe the use case(s) in plain English — they may want more than one
142
+ 2. Call `muggle-remote-use-case-create-from-prompts` **once** with **all** descriptions batched into the `instructions` array (this endpoint natively fans out the jobs server-side — do NOT make one call per use case):
119
143
  - `projectId`: The project ID
120
- - `instructions`: A plain array of strings, one per use case — e.g. `["<user's description>"]`
121
- 3. Present the created use case and confirm it's correct
144
+ - `instructions`: A plain array of strings, one per use case — e.g. `["<description 1>", "<description 2>", ...]`
145
+ 3. Present the created use cases and confirm they're correct
122
146
 
123
- ## Step 6: Select Test Case (User Must Choose)
147
+ ## Step 6: Select Test Case (Best-Effort Shortlist)
124
148
 
125
149
  For the selected use case(s):
126
150
 
127
151
  ### 6a: List existing test cases
128
152
  Call `muggle-remote-test-case-list-by-use-case` with each use case ID.
129
153
 
130
- ### 6b: Present ALL test cases for user selection
154
+ ### 6b: Best-effort match against the change summary
131
155
 
132
- Use `AskQuestion` with `allow_multiple: true` to present all test cases as clickable options. Always include a "Generate new test case" option at the end.
156
+ Using the change summary from Step 2, pick the test cases that look most relevant to the impacted areas. Keep the shortlist small and confident — don't enumerate every test case attached to the use case(s).
133
157
 
134
- Prompt: "Which test case(s) do you want to run?"
158
+ If nothing looks like a confident match, fall back to offering to run all test cases for the selected use case(s), or ask the user what they had in mind.
135
159
 
136
- ### 6c: Wait for explicit user selection
160
+ ### 6c: Present the shortlist for confirmation
137
161
 
138
- **CRITICAL: Do NOT auto-select test cases** — the user MUST explicitly choose which test case(s) to execute.
162
+ Use `AskQuestion` with `allow_multiple: true`:
163
+
164
+ Prompt: "These test cases look most relevant — confirm which to run:"
165
+
166
+ - Pre-check the shortlisted items so the user can accept with one click
167
+ - Include "Show all test cases" to reveal the full list
168
+ - Include "Generate new test case" at the end
139
169
 
140
170
  ### 6d: If user chooses "Generate new test case"
141
- 1. Ask the user to describe what they want to test in plain English
142
- 2. Call `muggle-remote-test-case-generate-from-prompt`:
143
- - `projectId`, `useCaseId`, `instruction` (the user's description)
144
- 3. Present the generated test case(s) for review
145
- 4. Call `muggle-remote-test-case-create` to save the ones the user approves
171
+ 1. Ask the user to describe what they want to test in plain English — they may want more than one test case
172
+ 2. For N descriptions, issue N `muggle-remote-test-case-generate-from-prompt` calls **in parallel** (single message, multiple tool calls — never loop sequentially):
173
+ - `projectId`, `useCaseId`, `instruction` (one description per call)
174
+ - Each `instruction` must describe **exactly one atomic behavior to verify**. No signup, no login, no "first navigate to X, then click Y, then verify Z" chains, no seed data, no cleanup. Just the verification. See **Test Case Design** above.
175
+ 3. **Accept the generator's decomposition as-is.** If the generator returns 4 micro-tests from a single prompt, that's 4 correct test cases never merge, consolidate, or rewrite them to bundle bootstrap.
176
+ 4. Present the generated test case(s) for user review — **always do this review cycle**, even when you think you already know the right shape. Skipping straight to creation is the anti-pattern this skill most frequently gets wrong.
177
+ 5. For the ones the user approves, issue `muggle-remote-test-case-create` calls **in parallel**
146
178
 
147
179
  ### 6e: Confirm final selection
148
180
 
@@ -154,9 +186,7 @@ Wait for user confirmation before moving to execution.
154
186
 
155
187
  ## Step 7A: Execute — Local Mode
156
188
 
157
- ### Pre-flight questions (batch where possible)
158
-
159
- **Question 1 — Local URL:**
189
+ ### Pre-flight question Local URL
160
190
 
161
191
  Try to auto-detect the dev server URL by checking running terminals or common ports (e.g., `lsof -iTCP -sTCP:LISTEN -nP | grep -E ':(3000|3001|4200|5173|8080)'`). If a likely URL is found, present it as a clickable default via `AskQuestion`:
162
192
  - Option 1: "http://localhost:3000" (or whatever was detected)
@@ -164,38 +194,31 @@ Try to auto-detect the dev server URL by checking running terminals or common po
164
194
 
165
195
  If nothing detected, ask as free text: "Your local app should be running. What's the URL? (e.g., http://localhost:3000)"
166
196
 
167
- **Question 2Electron launch + window visibility (ask together):**
168
-
169
- After getting the URL, use a single `AskQuestion` call with two questions:
197
+ **No separate approval or visibility question.** The user picking Local mode in Step 1 *is* the approval do not ask "ready to launch Electron?" before every run. The Electron browser defaults to visible; if the user wants headless, they will say so, otherwise let it run visible.
170
198
 
171
- 1. "Ready to launch the Muggle Electron browser for [N] test case(s)?"
172
- - "Yes, launch it (visible — I want to watch)"
173
- - "Yes, launch it (headless — run in background)"
174
- - "No, cancel"
199
+ ### Fetch test case details (in parallel)
175
200
 
176
- If user cancels, stop and ask what they want to do instead.
201
+ Before execution, fetch full test case details for all selected test cases by issuing **all** `muggle-remote-test-case-get` calls in parallel (single message, multiple tool calls).
177
202
 
178
- ### Run sequentially
203
+ ### Run sequentially (Electron constraint)
179
204
 
180
- For each test case:
205
+ Execution itself **must** be sequential because there is only one local Electron browser. For each test case, in order:
181
206
 
182
- 1. Call `muggle-remote-test-case-get` to fetch full details
183
- 2. Call `muggle-local-execute-test-generation`:
184
- - `testCase`: Full test case object from step 1
185
- - `localUrl`: User's local URL (from Question 1)
186
- - `approveElectronAppLaunch`: `true` (only if user approved in Question 2)
187
- - `showUi`: `true` if user chose "visible", `false` if "headless" (from Question 2)
188
- 3. Store the returned `runId`
207
+ 1. Call `muggle-local-execute-test-generation`:
208
+ - `testCase`: Full test case object from the parallel fetch above
209
+ - `localUrl`: User's local URL from the pre-flight question
210
+ - `showUi`: omit (default visible) unless the user explicitly asked for headless, then pass `false`
211
+ 2. Store the returned `runId`
189
212
 
190
213
  If a generation fails, log it and continue to the next. Do not abort the batch.
191
214
 
192
- ### Collect results
215
+ ### Collect results (in parallel)
193
216
 
194
- For each `runId`, call `muggle-local-run-result-get`. Extract: status, duration, step count, `artifactsDir`.
217
+ For every `runId`, issue all `muggle-local-run-result-get` calls in parallel. Extract: status, duration, step count, `artifactsDir`.
195
218
 
196
- ### Publish each run to cloud
219
+ ### Publish each run to cloud (in parallel)
197
220
 
198
- For each completed run, call `muggle-local-publish-test-script`:
221
+ For every completed run, issue all `muggle-local-publish-test-script` calls in parallel (single message, multiple tool calls):
199
222
  - `runId`: The local run ID
200
223
  - `cloudTestCaseId`: The cloud test case ID
201
224
 
@@ -225,26 +248,29 @@ For failures: show which step failed, the local screenshot path, and a suggestio
225
248
 
226
249
  > "What's the preview/staging URL to test against?"
227
250
 
228
- ### Trigger remote workflows
251
+ ### Fetch test case details (in parallel)
229
252
 
230
- For each test case:
253
+ Issue all `muggle-remote-test-case-get` calls in parallel (single message, multiple tool calls) to hydrate the test case bodies.
231
254
 
232
- 1. Call `muggle-remote-test-case-get` to fetch full details
233
- 2. Call `muggle-remote-workflow-start-test-script-generation`:
234
- - `projectId`: The project ID
235
- - `useCaseId`: The use case ID
236
- - `testCaseId`: The test case ID
237
- - `name`: `"muggle-test: {test case title}"`
238
- - `url`: The preview/staging URL
239
- - `goal`: From the test case
240
- - `precondition`: From the test case (use `"None"` if empty)
241
- - `instructions`: From the test case
242
- - `expectedResult`: From the test case
243
- 3. Store the returned workflow runtime ID
255
+ ### Trigger remote workflows (in parallel)
256
+
257
+ Once details are in hand, issue all `muggle-remote-workflow-start-test-script-generation` calls in parallel — never loop them sequentially. For each test case:
258
+
259
+ - `projectId`: The project ID
260
+ - `useCaseId`: The use case ID
261
+ - `testCaseId`: The test case ID
262
+ - `name`: `"muggle-test: {test case title}"`
263
+ - `url`: The preview/staging URL
264
+ - `goal`: From the test case
265
+ - `precondition`: From the test case (use `"None"` if empty)
266
+ - `instructions`: From the test case
267
+ - `expectedResult`: From the test case
268
+
269
+ Store each returned workflow runtime ID.
244
270
 
245
- ### Monitor and report
271
+ ### Monitor and report (in parallel)
246
272
 
247
- For each workflow, call `muggle-remote-wf-get-ts-gen-latest-run` with the runtime ID.
273
+ Issue all `muggle-remote-wf-get-ts-gen-latest-run` calls in parallel, one per runtime ID.
248
274
 
249
275
  ```
250
276
  Test Case Workflow Status Runtime ID
@@ -287,12 +313,11 @@ After reporting results, ask the user if they want to attach a **visual walkthro
287
313
 
288
314
  The shared skill takes an **`E2eReport` JSON** that includes per-step screenshot URLs. You already have `projectId`, `testCaseId`, `runId`, `viewUrl`, and `status` from earlier steps — you still need the step-level data.
289
315
 
290
- For each published run from Step 7A:
316
+ For the published runs from Step 7A, issue **all** `muggle-remote-test-script-get` calls in parallel (single message, multiple tool calls) — one per `testScriptId` returned by `muggle-local-publish-test-script`. Then, for each response:
291
317
 
292
- 1. Call `muggle-remote-test-script-get` with the `testScriptId` returned by `muggle-local-publish-test-script`.
293
- 2. Extract from the response: `steps[].operation.action` (description) and `steps[].operation.screenshotUrl` (cloud URL).
294
- 3. Build a `steps` array: `[{ stepIndex: 0, action: "...", screenshotUrl: "..." }, ...]`.
295
- 4. If the run failed, also capture `failureStepIndex`, `error`, and the local `artifactsDir` from `muggle-local-run-result-get`.
318
+ 1. Extract `steps[].operation.action` (description) and `steps[].operation.screenshotUrl` (cloud URL).
319
+ 2. Build a `steps` array: `[{ stepIndex: 0, action: "...", screenshotUrl: "..." }, ...]`.
320
+ 3. If the run failed, also capture `failureStepIndex`, `error`, and the local `artifactsDir` from `muggle-local-run-result-get`.
296
321
 
297
322
  Assemble the report:
298
323
 
@@ -364,11 +389,15 @@ This skill always uses **Mode A** (post to an existing PR); `muggle-do` is the o
364
389
 
365
390
  - **Always confirm intent first** — never assume local vs remote without asking
366
391
  - **User MUST select project** — present clickable options via `AskQuestion`, wait for explicit choice, never auto-select
367
- - **User MUST select use case(s)** — present clickable options via `AskQuestion`, wait for explicit choice, never auto-select based on git changes or heuristics
368
- - **User MUST select test case(s)** — present clickable options via `AskQuestion`, wait for explicit choice, never auto-select
392
+ - **Best-effort shortlist use cases** — use the change summary to narrow the list to the most relevant 1–5 use cases and pre-check them; never dump every use case in the project on the user. Always leave an escape hatch to reveal the full list.
393
+ - **Best-effort shortlist test cases** — same idea: pre-check the test cases most relevant to the change summary; never enumerate every test case attached to a use case. Always leave an escape hatch to reveal the full list.
369
394
  - **Use `AskQuestion` for every selection** — never ask the user to type a number; always present clickable options
370
- - **Batch related questions** combine Electron approval + visibility into one question; auto-detect localhost URL when possible
371
- - **Never launch Electron without explicit user approval** (`approveElectronAppLaunch`)
395
+ - **Auto-detect localhost URL when possible**; only fall back to free-text when nothing is listening on a common port
396
+ - **Parallelize independent cloud jobs** — when creating N use cases, generating/creating N test cases, fetching N test case details, starting N remote workflows, polling N workflow runtimes, publishing N local runs, or fetching N per-step test scripts, issue all N calls in a single message so they fan out in parallel. The only tolerated sequential loop is local Electron execution (one browser, one test at a time). For use case creation specifically, use the native batch form of `muggle-remote-use-case-create-from-prompts` (all descriptions in one `instructions` array) instead of parallel calls.
397
+ - **One atomic behavior per test case** — every test case verifies exactly one user-observable behavior. Never bundle signup/login/navigation/bootstrap/teardown into a test case body. Ordering and dependencies are Muggle's service responsibility, not the skill's.
398
+ - **Never consolidate the generator's output** — if `muggle-remote-test-case-generate-from-prompt` returns N micro-tests, accept all N; never merge them into fewer test cases, even if "the plan" says 4 UC / 4 TC.
399
+ - **Never skip the generate→review cycle** — always present generated test cases to the user before calling `muggle-remote-test-case-create`, even when you're confident. "I'll skip the review and create directly" is always wrong.
400
+ - **Never ask for Electron launch approval before each run** — the user picking Local mode is the approval. Don't prompt "Ready to launch Electron?" before execution; just run.
372
401
  - **Never silently drop test cases** — log failures and continue, then report them
373
402
  - **Never guess the URL** — always ask the user for localhost or preview URL
374
403
  - **Always publish before opening browser** — the dashboard needs the published data to show results
@@ -19,7 +19,7 @@ The local URL only changes where the browser opens; it does not change the remot
19
19
 
20
20
  **Every selection-based question MUST use the `AskQuestion` tool** (or the platform's equivalent structured selection tool). Never ask the user to "reply with a number" in a plain text message — always present clickable options.
21
21
 
22
- - **Selections** (project, use case, test case, script, approval): Use `AskQuestion` with labeled options the user can click.
22
+ - **Selections** (project, use case, test case, script): Use `AskQuestion` with labeled options the user can click.
23
23
  - **Free-text inputs** (URLs, descriptions): Only use plain text prompts when there is no finite set of options. Even then, offer a detected/default value when possible.
24
24
 
25
25
  ## Workflow
@@ -27,7 +27,12 @@ The local URL only changes where the browser opens; it does not change the remot
27
27
  ### 1. Auth
28
28
 
29
29
  - `muggle-remote-auth-status`
30
- - If not signed in: `muggle-remote-auth-login` then `muggle-remote-auth-poll`
30
+ - If **authenticated**: print the logged-in email and ask via `AskQuestion`:
31
+ > "You're logged in as **{email}**. Continue with this account?"
32
+ - Option 1: "Yes, continue"
33
+ - Option 2: "No, switch account"
34
+ If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true` then `muggle-remote-auth-poll`.
35
+ - If **not signed in or expired**: call `muggle-remote-auth-login` then `muggle-remote-auth-poll`.
31
36
  Do not skip or assume auth.
32
37
 
33
38
  ### 2. Targets (user must confirm)
@@ -84,21 +89,21 @@ Remind them: local URL is only the execution target, not tied to cloud project c
84
89
  **Generate**
85
90
 
86
91
  1. `muggle-remote-test-case-get`
87
- 2. `muggle-local-execute-test-generation` (after approval in step 6) with that test case + `localUrl` + `approveElectronAppLaunch: true` (optional: `showUi: true`, **`timeoutMs`** — see below)
92
+ 2. `muggle-local-execute-test-generation` with that test case + `localUrl` (optional: `showUi: false` for headless — defaults to visible; **`timeoutMs`** — see below)
88
93
 
89
94
  **Replay**
90
95
 
91
96
  1. `muggle-remote-test-script-get` — note `actionScriptId`
92
97
  2. `muggle-remote-action-script-get` with that id — full `actionScript`
93
98
  **Use the API response as-is.** Do not edit, shorten, or rebuild `actionScript`; replay needs full `label` paths for element lookup.
94
- 3. `muggle-local-execute-replay` (after approval in step 6) with `testScript`, `actionScript`, `localUrl`, `approveElectronAppLaunch: true` (optional: `showUi: true`, **`timeoutMs`** — see below)
99
+ 3. `muggle-local-execute-replay` with `testScript`, `actionScript`, `localUrl` (optional: `showUi: false` for headless — defaults to visible; **`timeoutMs`** — see below)
95
100
 
96
101
  ### Local execution timeout (`timeoutMs`)
97
102
 
98
103
  The MCP client often uses a **default wait of 300000 ms (5 minutes)** for `muggle-local-execute-test-generation` and `muggle-local-execute-replay`. **Exploratory script generation** (Auth0 login, dashboards, multi-step wizards, many LLM iterations) routinely **runs longer than 5 minutes** while Electron is still healthy.
99
104
 
100
105
  - **Always pass `timeoutMs`** for flows that may be long — for example **`600000` (10 min)** or **`900000` (15 min)** — unless the user explicitly wants a short cap.
101
- - If the tool reports **`Electron execution timed out after 300000ms`** (or similar) **but** Electron logs show the run still progressing (steps, screenshots, LLM calls), treat it as **orchestration timeout**, not an Electron app defect: **increase `timeoutMs` and retry** (after user re-approves if your policy requires it).
106
+ - If the tool reports **`Electron execution timed out after 300000ms`** (or similar) **but** Electron logs show the run still progressing (steps, screenshots, LLM calls), treat it as **orchestration timeout**, not an Electron app defect: **increase `timeoutMs` and retry**.
102
107
  - **Test case design:** Preconditions like "a test run has already completed" on an **empty account** can force many steps (sign-up, new project, crawl). Prefer an account/project that **already has** the needed state, or narrow the test goal so generation does not try to create a full project from scratch unless that is intentional.
103
108
 
104
109
  ### Interpreting `failed` / non-zero Electron exit
@@ -106,15 +111,9 @@ The MCP client often uses a **default wait of 300000 ms (5 minutes)** for `muggl
106
111
  - **`Electron execution timed out after 300000ms`:** Orchestration wait too short — see **`timeoutMs`** above.
107
112
  - **Exit code 26** (and messages like **LLM failed to generate / replay action script**): Often corresponds to a completed exploration whose **outcome was goal not achievable** (`goal_not_achievable`, summary with `halt`) — e.g. verifying "view script after a successful run" when **no run or script exists yet** in the UI. Use `muggle-local-run-result-get` and read the **summary / structured summary**; do not assume an Electron crash. **Fix:** choose a **project that already has** completed runs and scripts, or **change the test case** so preconditions match what localhost can satisfy (e.g. include steps to create and run a test first, or assert only empty-state UI when no runs exist).
108
113
 
109
- ### 6. Approval before any local execution
114
+ ### 6. Execute (no approval prompt)
110
115
 
111
- Use `AskQuestion` to get explicit approval before launching Electron. State: replay vs generation, test case name, URL.
112
-
113
- - "Yes, launch Electron (visible — I want to watch)"
114
- - "Yes, launch Electron (headless — run in background)"
115
- - "No, cancel"
116
-
117
- Only call local execute tools with `approveElectronAppLaunch: true` after the user selects a "Yes" option. Map visible to `showUi: true`, headless to `showUi: false`.
116
+ Call `muggle-local-execute-test-generation` or `muggle-local-execute-replay` directly. **Do not** ask the user to re-approve the Electron launch the user choosing this skill in the first place is the approval. The browser defaults to visible; only pass `showUi: false` if the user explicitly asked for headless.
118
117
 
119
118
  ### 7. After successful generation only
120
119
 
@@ -177,10 +176,11 @@ Always use **Mode A** (post to existing PR) from this skill. Never hand-write th
177
176
 
178
177
  ## Non-negotiables
179
178
 
180
- - No silent auth skip; no launching Electron without approval via `AskQuestion`.
179
+ - No silent auth skip.
180
+ - **Never prompt for Electron launch approval** before execution — invoking this skill is the approval. Just run.
181
181
  - If replayable scripts exist, do not default to generation without user choice.
182
182
  - No hiding failures: surface errors and artifact paths.
183
183
  - Replay: never hand-built or simplified `actionScript` — only from `muggle-remote-action-script-get`.
184
- - Use `AskQuestion` for every selection — project, use case, test case, script, and approval. Never ask the user to type a number.
184
+ - Use `AskQuestion` for every selection — project, use case, test case, script. Never ask the user to type a number.
185
185
  - Project, use case, and test case selection lists must always include "Create new ...". Include "Show full list" whenever the API returned at least one row for that step; omit "Show full list" when the list is empty (offer "Create new ..." only). For creates, use preview tools (`muggle-remote-use-case-prompt-preview`, `muggle-remote-test-case-generate-from-prompt`) before persisting.
186
186
  - PR posting is always optional and always delegated to the `muggle-pr-visual-walkthrough` skill — never inline the walkthrough markdown or call `gh pr comment` directly from this skill.
@@ -130,9 +130,12 @@ If the user wants changes, incorporate feedback, then ask again. Only proceed af
130
130
 
131
131
  Call `muggle-remote-auth-status` first.
132
132
 
133
- If already authenticated → skip to Step 5.
133
+ If **already authenticated**print the logged-in email and ask via `AskQuestion`:
134
+ > "You're logged in as **{email}**. Continue with this account?"
135
+ - Option 1: "Yes, continue" → skip to Step 5.
136
+ - Option 2: "No, switch account" → call `muggle-remote-auth-login` with `forceNewSession: true`, then `muggle-remote-auth-poll`.
134
137
 
135
- If not authenticated:
138
+ If **not authenticated**:
136
139
  1. Tell the user a browser window is about to open.
137
140
  2. Call `muggle-remote-auth-login` (opens browser automatically).
138
141
  3. Tell the user to complete login in the browser.
@@ -40,8 +40,13 @@ Treat this filter as a default, not a law. If the user explicitly says "include
40
40
  ### Step 1 — Authenticate
41
41
 
42
42
  1. Call `muggle-remote-auth-status`.
43
- 2. If not authenticated or expired → call `muggle-remote-auth-login`, then poll with `muggle-remote-auth-poll`.
44
- 3. Do not skip auth and do not assume a stale token still works.
43
+ 2. If **authenticated and not expired**print the logged-in email and ask via `AskQuestion`:
44
+ > "You're logged in as **{email}**. Continue with this account?"
45
+ - Option 1: "Yes, continue"
46
+ - Option 2: "No, switch account"
47
+ If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true`, then poll with `muggle-remote-auth-poll`.
48
+ 3. If **not authenticated or expired** → call `muggle-remote-auth-login`, then poll with `muggle-remote-auth-poll`.
49
+ 4. Do not skip auth and do not assume a stale token still works.
45
50
 
46
51
  If auth keeps failing, suggest the user run `muggle logout && muggle login` from a terminal.
47
52
 
@@ -1,7 +1,7 @@
1
1
  {
2
- "release": "4.5.0",
3
- "buildId": "run-13-1",
4
- "commitSha": "bff100bb7229ea757db7154a9ef0c289da0124ef",
5
- "buildTime": "2026-04-09T22:06:42Z",
2
+ "release": "4.6.0",
3
+ "buildId": "run-14-1",
4
+ "commitSha": "6bb3f01991815677a37182923c26ba71d623a2b0",
5
+ "buildTime": "2026-04-10T20:31:08Z",
6
6
  "serviceName": "muggle-ai-works-mcp"
7
7
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@muggleai/works",
3
3
  "mcpName": "io.github.multiplex-ai/muggle",
4
- "version": "4.5.0",
4
+ "version": "4.6.0",
5
5
  "description": "Ship quality products with AI-powered E2E acceptance testing that validates your web app like a real user — from Claude Code and Cursor to PR.",
6
6
  "type": "module",
7
7
  "main": "dist/index.js",
@@ -41,14 +41,14 @@
41
41
  "test:watch": "vitest"
42
42
  },
43
43
  "muggleConfig": {
44
- "electronAppVersion": "1.0.47",
44
+ "electronAppVersion": "1.0.49",
45
45
  "downloadBaseUrl": "https://github.com/multiplex-ai/muggle-ai-works/releases/download",
46
46
  "runtimeTargetDefault": "production",
47
47
  "checksums": {
48
- "darwin-arm64": "f80b943ea5f05e7113d603ee8104c07be101a26092c4fa50ed6fcb37a0cbebff",
49
- "darwin-x64": "3189a5c07087f9ba2ef03f99e3735055c00a752b2421ae9cffc113f04933da61",
50
- "win32-x64": "d8e102fce024776262f856dfea0b12429e853689d29d03e27e54dbeffbe59725",
51
- "linux-x64": "d7218dddcbab47f78c64fe438cf5bd129740f20f6ad160b615a648415f8faffc"
48
+ "darwin-arm64": "0a93aa97ace4c2afbb8dd7e49c91673e352eefbc6df6bebb0ea0e7142ee5d63c",
49
+ "darwin-x64": "98e37f7ddf4b86fef15bfe6ab8275072d04857690de776b89611a1a4d83d4691",
50
+ "win32-x64": "5d72b318388ae45f8aad0a9f6363dd680f870effc69991ea8ab244786a6f9d31",
51
+ "linux-x64": "5275c5c8fca3435ac0294d3ded00d68ecf5cd7d602a4f58b6de8fef3568559ee"
52
52
  }
53
53
  },
54
54
  "dependencies": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "muggle",
3
3
  "description": "Run real-browser end-to-end (E2E) acceptance tests on your web app from any AI coding agent. Generate test scripts from plain English, replay them on localhost, capture screenshots, and validate user flows like signup, checkout, and dashboards. Works across Claude Code, Cursor, Codex, and Windsurf.",
4
- "version": "4.5.0",
4
+ "version": "4.6.0",
5
5
  "author": {
6
6
  "name": "Muggle AI",
7
7
  "email": "support@muggle-ai.com"
@@ -2,7 +2,7 @@
2
2
  "name": "muggle",
3
3
  "displayName": "Muggle AI",
4
4
  "description": "Ship quality products with AI-powered end-to-end (E2E) acceptance testing that validates your web app like a real user — from Claude Code and Cursor to PR.",
5
- "version": "4.5.0",
5
+ "version": "4.6.0",
6
6
  "author": {
7
7
  "name": "Muggle AI",
8
8
  "email": "support@muggle-ai.com"
@@ -33,7 +33,12 @@ Read `localUrl` for each repo from the context. If it is not provided, ask the u
33
33
  ### Step 1: Check Authentication
34
34
 
35
35
  - `muggle-remote-auth-status`
36
- - If not signed in: `muggle-remote-auth-login` then `muggle-remote-auth-poll`
36
+ - If **authenticated**: print the logged-in email and ask via `AskQuestion`:
37
+ > "You're logged in as **{email}**. Continue with this account?"
38
+ - Option 1: "Yes, continue"
39
+ - Option 2: "No, switch account"
40
+ If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true` then `muggle-remote-auth-poll`.
41
+ - If **not signed in or expired**: `muggle-remote-auth-login` then `muggle-remote-auth-poll`
37
42
 
38
43
  Do not skip or assume auth.
39
44
 
@@ -62,7 +67,6 @@ For each relevant test case:
62
67
  - `testScript`: the full script object
63
68
  - `actionScript`: the full action script object (from `muggle-remote-action-script-get`)
64
69
  - `localUrl`: the resolved local URL
65
- - `approveElectronAppLaunch`: `true` *(pipeline context — user starting `muggle-do` is implicit approval)*
66
70
  - `timeoutMs`: `600000` (10 min) or `900000` (15 min) for complex flows
67
71
 
68
72
  3. **If no script exists** (generation path):
@@ -70,7 +74,6 @@ For each relevant test case:
70
74
  - `muggle-local-execute-test-generation` with:
71
75
  - `testCase`: the full test case object
72
76
  - `localUrl`: the resolved local URL
73
- - `approveElectronAppLaunch`: `true`
74
77
  - `timeoutMs`: `600000` (10 min) or `900000` (15 min) for complex flows
75
78
 
76
79
  4. When execution completes, call `muggle-local-run-result-get` with the `runId` returned by the execute call.
@@ -15,6 +15,22 @@ A router skill that detects code changes, resolves impacted test cases, executes
15
15
  - **Multi-select** (use cases, test cases): Use `AskQuestion` with `allow_multiple: true`.
16
16
  - **Free-text inputs** (URLs, descriptions): Only use plain text prompts when there is no finite set of options. Even then, offer a detected/default value when possible.
17
17
  - **Batch related questions**: If two questions are independent, present them together in a single `AskQuestion` call rather than asking sequentially.
18
+ - **Parallelize job-creation calls**: Whenever you're kicking off N independent cloud jobs — creating multiple use cases, generating/creating multiple test cases, fetching details for multiple test cases, starting multiple remote workflows, publishing multiple local runs, or fetching per-step screenshots for multiple runs — issue all N tool calls in a single message so they run in parallel. Never loop them sequentially unless there is a real ordering constraint (e.g. a single local Electron browser that can only run one test at a time).
19
+
20
+ ## Test Case Design: One Atomic Behavior Per Test Case
21
+
22
+ Every test case verifies exactly **one** user-observable behavior. Never bundle multiple concerns, sequential flows, or bootstrap/setup into a single test case — even if you think it would be "cleaner" or "more efficient."
23
+
24
+ **Ordering, dependencies, and bootstrap are Muggle's service responsibility, not yours.** Muggle's cloud handles test case dependencies, prerequisite state, and execution ordering. Your job is to describe the *atomic behavior to verify* — never the flow that gets there.
25
+
26
+ - ❌ Wrong: one test case that "signs up, logs in, navigates to the detail modal, verifies icon stacking, verifies tab order, verifies history format, and verifies reference layout."
27
+ - ✅ Right: four separate test cases — one per verifiable behavior — each with instruction text like "Verify the detail modal shows stacked pair of icons per card" with **no** signup / login / navigation / setup language.
28
+
29
+ **Never bake bootstrap into a test case description.** Signup, login, seed data, prerequisite navigation, tear-down — none of these belong inside the test case body. Write only the verification itself. The service will prepend whatever setup is needed based on its own dependency graph.
30
+
31
+ **Never consolidate the generator's output.** When `muggle-remote-test-case-generate-from-prompt` returns N micro-tests from a single prompt, that decomposition is the authoritative one. Do not "merge them into 1 for simplicity," do not "rewrite them to share bootstrap," do not "collapse them to match a 4 UC / 4 TC plan." Accept what the generator gave you.
32
+
33
+ **Never skip the generate→review cycle.** Even when you are 100% confident about the right shape, always present the generated test cases to the user before calling `muggle-remote-test-case-create`. "I'll skip the generate→review cycle and create directly" is a sign you're about to get it wrong.
18
34
 
19
35
  ## Step 1: Confirm Scope of Work (Always First)
20
36
 
@@ -41,8 +57,8 @@ If the user's intent is clear, state back what you understood and use `AskQuesti
41
57
  - Option 2: "Switch to [the other mode]"
42
58
 
43
59
  If ambiguous, use `AskQuestion` to let the user choose:
44
- - Option 1: "Locallaunch browser on your machine against localhost"
45
- - Option 2: "RemoteMuggle cloud tests against a preview/staging URL"
60
+ - Option 1: "On my computer test your localhost dev server in a browser on your machine"
61
+ - Option 2: "In the cloud test remotely targeting your deployed preview/staging URL"
46
62
 
47
63
  Only proceed after the user selects an option.
48
64
 
@@ -66,8 +82,12 @@ If no changes detected (clean tree), tell the user and ask what they want to tes
66
82
  ## Step 3: Authenticate
67
83
 
68
84
  1. Call `muggle-remote-auth-status`
69
- 2. If authenticated and not expired → proceed
70
- 3. If not authenticated or expired call `muggle-remote-auth-login`
85
+ 2. If **authenticated and not expired**print the logged-in email and ask via `AskQuestion`:
86
+ > "You're logged in as **{email}**. Continue with this account?"
87
+ - Option 1: "Yes, continue"
88
+ - Option 2: "No, switch account"
89
+ If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true`, then `muggle-remote-auth-poll`.
90
+ 3. If **not authenticated or expired** → call `muggle-remote-auth-login`
71
91
  4. If login pending → call `muggle-remote-auth-poll`
72
92
 
73
93
  If auth fails repeatedly, suggest: `muggle logout && muggle login` from terminal.
@@ -93,56 +113,68 @@ A **project** is where all your test results, use cases, and test scripts are gr
93
113
 
94
114
  Store the `projectId` only after user confirms.
95
115
 
96
- ## Step 5: Select Use Case (User Must Choose)
116
+ ## Step 5: Select Use Case (Best-Effort Shortlist)
97
117
 
98
118
  ### 5a: List existing use cases
99
119
  Call `muggle-remote-use-case-list` with the project ID.
100
120
 
101
- ### 5b: Present ALL use cases for user selection
121
+ ### 5b: Best-effort match against the change summary
122
+
123
+ Using the change summary from Step 2, pick the use cases whose title/description most plausibly relate to the impacted areas. Produce a **short shortlist** (typically 1–5) — don't try to be exhaustive, and don't dump the full project list on the user. A confident best-effort match is the goal.
124
+
125
+ If nothing looks like a confident match, fall back to asking the user which use case(s) they have in mind.
102
126
 
103
- Use `AskQuestion` with `allow_multiple: true` to present all use cases as clickable options. Always include a "Create new use case" option at the end.
127
+ ### 5c: Present the shortlist for confirmation
104
128
 
105
- Prompt: "Which use case(s) do you want to test?"
129
+ Use `AskQuestion` with `allow_multiple: true`:
106
130
 
107
- ### 5c: Wait for explicit user selection
131
+ Prompt: "These use cases look most relevant to your changes — confirm which to test:"
108
132
 
109
- **CRITICAL: Do NOT auto-select use cases** based on:
110
- - Git changes analysis
111
- - Use case title/description matching
112
- - Any heuristic or inference
133
+ - Pre-check the shortlisted items so the user can accept with one click
134
+ - Include "Pick a different use case" to reveal the full project list
135
+ - Include "Create new use case" at the end
113
136
 
114
- The user MUST explicitly tell you which use case(s) to use.
137
+ ### 5d: If user picks "Pick a different use case"
138
+ Re-present the full list from 5a via `AskQuestion` with `allow_multiple: true`, then continue.
115
139
 
116
- ### 5d: If user chooses "Create new use case"
117
- 1. Ask the user to describe the use case in plain English
118
- 2. Call `muggle-remote-use-case-create-from-prompts`:
140
+ ### 5e: If user chooses "Create new use case"
141
+ 1. Ask the user to describe the use case(s) in plain English — they may want more than one
142
+ 2. Call `muggle-remote-use-case-create-from-prompts` **once** with **all** descriptions batched into the `instructions` array (this endpoint natively fans out the jobs server-side — do NOT make one call per use case):
119
143
  - `projectId`: The project ID
120
- - `instructions`: A plain array of strings, one per use case — e.g. `["<user's description>"]`
121
- 3. Present the created use case and confirm it's correct
144
+ - `instructions`: A plain array of strings, one per use case — e.g. `["<description 1>", "<description 2>", ...]`
145
+ 3. Present the created use cases and confirm they're correct
122
146
 
123
- ## Step 6: Select Test Case (User Must Choose)
147
+ ## Step 6: Select Test Case (Best-Effort Shortlist)
124
148
 
125
149
  For the selected use case(s):
126
150
 
127
151
  ### 6a: List existing test cases
128
152
  Call `muggle-remote-test-case-list-by-use-case` with each use case ID.
129
153
 
130
- ### 6b: Present ALL test cases for user selection
154
+ ### 6b: Best-effort match against the change summary
131
155
 
132
- Use `AskQuestion` with `allow_multiple: true` to present all test cases as clickable options. Always include a "Generate new test case" option at the end.
156
+ Using the change summary from Step 2, pick the test cases that look most relevant to the impacted areas. Keep the shortlist small and confident — don't enumerate every test case attached to the use case(s).
133
157
 
134
- Prompt: "Which test case(s) do you want to run?"
158
+ If nothing looks like a confident match, fall back to offering to run all test cases for the selected use case(s), or ask the user what they had in mind.
135
159
 
136
- ### 6c: Wait for explicit user selection
160
+ ### 6c: Present the shortlist for confirmation
137
161
 
138
- **CRITICAL: Do NOT auto-select test cases** — the user MUST explicitly choose which test case(s) to execute.
162
+ Use `AskQuestion` with `allow_multiple: true`:
163
+
164
+ Prompt: "These test cases look most relevant — confirm which to run:"
165
+
166
+ - Pre-check the shortlisted items so the user can accept with one click
167
+ - Include "Show all test cases" to reveal the full list
168
+ - Include "Generate new test case" at the end
139
169
 
140
170
  ### 6d: If user chooses "Generate new test case"
141
- 1. Ask the user to describe what they want to test in plain English
142
- 2. Call `muggle-remote-test-case-generate-from-prompt`:
143
- - `projectId`, `useCaseId`, `instruction` (the user's description)
144
- 3. Present the generated test case(s) for review
145
- 4. Call `muggle-remote-test-case-create` to save the ones the user approves
171
+ 1. Ask the user to describe what they want to test in plain English — they may want more than one test case
172
+ 2. For N descriptions, issue N `muggle-remote-test-case-generate-from-prompt` calls **in parallel** (single message, multiple tool calls — never loop sequentially):
173
+ - `projectId`, `useCaseId`, `instruction` (one description per call)
174
+ - Each `instruction` must describe **exactly one atomic behavior to verify**. No signup, no login, no "first navigate to X, then click Y, then verify Z" chains, no seed data, no cleanup. Just the verification. See **Test Case Design** above.
175
+ 3. **Accept the generator's decomposition as-is.** If the generator returns 4 micro-tests from a single prompt, that's 4 correct test cases never merge, consolidate, or rewrite them to bundle bootstrap.
176
+ 4. Present the generated test case(s) for user review — **always do this review cycle**, even when you think you already know the right shape. Skipping straight to creation is the anti-pattern this skill most frequently gets wrong.
177
+ 5. For the ones the user approves, issue `muggle-remote-test-case-create` calls **in parallel**
146
178
 
147
179
  ### 6e: Confirm final selection
148
180
 
@@ -154,9 +186,7 @@ Wait for user confirmation before moving to execution.
154
186
 
155
187
  ## Step 7A: Execute — Local Mode
156
188
 
157
- ### Pre-flight questions (batch where possible)
158
-
159
- **Question 1 — Local URL:**
189
+ ### Pre-flight question Local URL
160
190
 
161
191
  Try to auto-detect the dev server URL by checking running terminals or common ports (e.g., `lsof -iTCP -sTCP:LISTEN -nP | grep -E ':(3000|3001|4200|5173|8080)'`). If a likely URL is found, present it as a clickable default via `AskQuestion`:
162
192
  - Option 1: "http://localhost:3000" (or whatever was detected)
@@ -164,38 +194,31 @@ Try to auto-detect the dev server URL by checking running terminals or common po
164
194
 
165
195
  If nothing detected, ask as free text: "Your local app should be running. What's the URL? (e.g., http://localhost:3000)"
166
196
 
167
- **Question 2Electron launch + window visibility (ask together):**
168
-
169
- After getting the URL, use a single `AskQuestion` call with two questions:
197
+ **No separate approval or visibility question.** The user picking Local mode in Step 1 *is* the approval do not ask "ready to launch Electron?" before every run. The Electron browser defaults to visible; if the user wants headless, they will say so, otherwise let it run visible.
170
198
 
171
- 1. "Ready to launch the Muggle Electron browser for [N] test case(s)?"
172
- - "Yes, launch it (visible — I want to watch)"
173
- - "Yes, launch it (headless — run in background)"
174
- - "No, cancel"
199
+ ### Fetch test case details (in parallel)
175
200
 
176
- If user cancels, stop and ask what they want to do instead.
201
+ Before execution, fetch full test case details for all selected test cases by issuing **all** `muggle-remote-test-case-get` calls in parallel (single message, multiple tool calls).
177
202
 
178
- ### Run sequentially
203
+ ### Run sequentially (Electron constraint)
179
204
 
180
- For each test case:
205
+ Execution itself **must** be sequential because there is only one local Electron browser. For each test case, in order:
181
206
 
182
- 1. Call `muggle-remote-test-case-get` to fetch full details
183
- 2. Call `muggle-local-execute-test-generation`:
184
- - `testCase`: Full test case object from step 1
185
- - `localUrl`: User's local URL (from Question 1)
186
- - `approveElectronAppLaunch`: `true` (only if user approved in Question 2)
187
- - `showUi`: `true` if user chose "visible", `false` if "headless" (from Question 2)
188
- 3. Store the returned `runId`
207
+ 1. Call `muggle-local-execute-test-generation`:
208
+ - `testCase`: Full test case object from the parallel fetch above
209
+ - `localUrl`: User's local URL from the pre-flight question
210
+ - `showUi`: omit (default visible) unless the user explicitly asked for headless, then pass `false`
211
+ 2. Store the returned `runId`
189
212
 
190
213
  If a generation fails, log it and continue to the next. Do not abort the batch.
191
214
 
192
- ### Collect results
215
+ ### Collect results (in parallel)
193
216
 
194
- For each `runId`, call `muggle-local-run-result-get`. Extract: status, duration, step count, `artifactsDir`.
217
+ For every `runId`, issue all `muggle-local-run-result-get` calls in parallel. Extract: status, duration, step count, `artifactsDir`.
195
218
 
196
- ### Publish each run to cloud
219
+ ### Publish each run to cloud (in parallel)
197
220
 
198
- For each completed run, call `muggle-local-publish-test-script`:
221
+ For every completed run, issue all `muggle-local-publish-test-script` calls in parallel (single message, multiple tool calls):
199
222
  - `runId`: The local run ID
200
223
  - `cloudTestCaseId`: The cloud test case ID
201
224
 
@@ -225,26 +248,29 @@ For failures: show which step failed, the local screenshot path, and a suggestio
225
248
 
226
249
  > "What's the preview/staging URL to test against?"
227
250
 
228
- ### Trigger remote workflows
251
+ ### Fetch test case details (in parallel)
229
252
 
230
- For each test case:
253
+ Issue all `muggle-remote-test-case-get` calls in parallel (single message, multiple tool calls) to hydrate the test case bodies.
231
254
 
232
- 1. Call `muggle-remote-test-case-get` to fetch full details
233
- 2. Call `muggle-remote-workflow-start-test-script-generation`:
234
- - `projectId`: The project ID
235
- - `useCaseId`: The use case ID
236
- - `testCaseId`: The test case ID
237
- - `name`: `"muggle-test: {test case title}"`
238
- - `url`: The preview/staging URL
239
- - `goal`: From the test case
240
- - `precondition`: From the test case (use `"None"` if empty)
241
- - `instructions`: From the test case
242
- - `expectedResult`: From the test case
243
- 3. Store the returned workflow runtime ID
255
+ ### Trigger remote workflows (in parallel)
256
+
257
+ Once details are in hand, issue all `muggle-remote-workflow-start-test-script-generation` calls in parallel — never loop them sequentially. For each test case:
258
+
259
+ - `projectId`: The project ID
260
+ - `useCaseId`: The use case ID
261
+ - `testCaseId`: The test case ID
262
+ - `name`: `"muggle-test: {test case title}"`
263
+ - `url`: The preview/staging URL
264
+ - `goal`: From the test case
265
+ - `precondition`: From the test case (use `"None"` if empty)
266
+ - `instructions`: From the test case
267
+ - `expectedResult`: From the test case
268
+
269
+ Store each returned workflow runtime ID.
244
270
 
245
- ### Monitor and report
271
+ ### Monitor and report (in parallel)
246
272
 
247
- For each workflow, call `muggle-remote-wf-get-ts-gen-latest-run` with the runtime ID.
273
+ Issue all `muggle-remote-wf-get-ts-gen-latest-run` calls in parallel, one per runtime ID.
248
274
 
249
275
  ```
250
276
  Test Case Workflow Status Runtime ID
@@ -287,12 +313,11 @@ After reporting results, ask the user if they want to attach a **visual walkthro
287
313
 
288
314
  The shared skill takes an **`E2eReport` JSON** that includes per-step screenshot URLs. You already have `projectId`, `testCaseId`, `runId`, `viewUrl`, and `status` from earlier steps — you still need the step-level data.
289
315
 
290
- For each published run from Step 7A:
316
+ For the published runs from Step 7A, issue **all** `muggle-remote-test-script-get` calls in parallel (single message, multiple tool calls) — one per `testScriptId` returned by `muggle-local-publish-test-script`. Then, for each response:
291
317
 
292
- 1. Call `muggle-remote-test-script-get` with the `testScriptId` returned by `muggle-local-publish-test-script`.
293
- 2. Extract from the response: `steps[].operation.action` (description) and `steps[].operation.screenshotUrl` (cloud URL).
294
- 3. Build a `steps` array: `[{ stepIndex: 0, action: "...", screenshotUrl: "..." }, ...]`.
295
- 4. If the run failed, also capture `failureStepIndex`, `error`, and the local `artifactsDir` from `muggle-local-run-result-get`.
318
+ 1. Extract `steps[].operation.action` (description) and `steps[].operation.screenshotUrl` (cloud URL).
319
+ 2. Build a `steps` array: `[{ stepIndex: 0, action: "...", screenshotUrl: "..." }, ...]`.
320
+ 3. If the run failed, also capture `failureStepIndex`, `error`, and the local `artifactsDir` from `muggle-local-run-result-get`.
296
321
 
297
322
  Assemble the report:
298
323
 
@@ -364,11 +389,15 @@ This skill always uses **Mode A** (post to an existing PR); `muggle-do` is the o
364
389
 
365
390
  - **Always confirm intent first** — never assume local vs remote without asking
366
391
  - **User MUST select project** — present clickable options via `AskQuestion`, wait for explicit choice, never auto-select
367
- - **User MUST select use case(s)** — present clickable options via `AskQuestion`, wait for explicit choice, never auto-select based on git changes or heuristics
368
- - **User MUST select test case(s)** — present clickable options via `AskQuestion`, wait for explicit choice, never auto-select
392
+ - **Best-effort shortlist use cases** — use the change summary to narrow the list to the most relevant 1–5 use cases and pre-check them; never dump every use case in the project on the user. Always leave an escape hatch to reveal the full list.
393
+ - **Best-effort shortlist test cases** — same idea: pre-check the test cases most relevant to the change summary; never enumerate every test case attached to a use case. Always leave an escape hatch to reveal the full list.
369
394
  - **Use `AskQuestion` for every selection** — never ask the user to type a number; always present clickable options
370
- - **Batch related questions** combine Electron approval + visibility into one question; auto-detect localhost URL when possible
371
- - **Never launch Electron without explicit user approval** (`approveElectronAppLaunch`)
395
+ - **Auto-detect localhost URL when possible**; only fall back to free-text when nothing is listening on a common port
396
+ - **Parallelize independent cloud jobs** — when creating N use cases, generating/creating N test cases, fetching N test case details, starting N remote workflows, polling N workflow runtimes, publishing N local runs, or fetching N per-step test scripts, issue all N calls in a single message so they fan out in parallel. The only tolerated sequential loop is local Electron execution (one browser, one test at a time). For use case creation specifically, use the native batch form of `muggle-remote-use-case-create-from-prompts` (all descriptions in one `instructions` array) instead of parallel calls.
397
+ - **One atomic behavior per test case** — every test case verifies exactly one user-observable behavior. Never bundle signup/login/navigation/bootstrap/teardown into a test case body. Ordering and dependencies are Muggle's service responsibility, not the skill's.
398
+ - **Never consolidate the generator's output** — if `muggle-remote-test-case-generate-from-prompt` returns N micro-tests, accept all N; never merge them into fewer test cases, even if "the plan" says 4 UC / 4 TC.
399
+ - **Never skip the generate→review cycle** — always present generated test cases to the user before calling `muggle-remote-test-case-create`, even when you're confident. "I'll skip the review and create directly" is always wrong.
400
+ - **Never ask for Electron launch approval before each run** — the user picking Local mode is the approval. Don't prompt "Ready to launch Electron?" before execution; just run.
372
401
  - **Never silently drop test cases** — log failures and continue, then report them
373
402
  - **Never guess the URL** — always ask the user for localhost or preview URL
374
403
  - **Always publish before opening browser** — the dashboard needs the published data to show results
@@ -19,7 +19,7 @@ The local URL only changes where the browser opens; it does not change the remot
19
19
 
20
20
  **Every selection-based question MUST use the `AskQuestion` tool** (or the platform's equivalent structured selection tool). Never ask the user to "reply with a number" in a plain text message — always present clickable options.
21
21
 
22
- - **Selections** (project, use case, test case, script, approval): Use `AskQuestion` with labeled options the user can click.
22
+ - **Selections** (project, use case, test case, script): Use `AskQuestion` with labeled options the user can click.
23
23
  - **Free-text inputs** (URLs, descriptions): Only use plain text prompts when there is no finite set of options. Even then, offer a detected/default value when possible.
24
24
 
25
25
  ## Workflow
@@ -27,7 +27,12 @@ The local URL only changes where the browser opens; it does not change the remot
27
27
  ### 1. Auth
28
28
 
29
29
  - `muggle-remote-auth-status`
30
- - If not signed in: `muggle-remote-auth-login` then `muggle-remote-auth-poll`
30
+ - If **authenticated**: print the logged-in email and ask via `AskQuestion`:
31
+ > "You're logged in as **{email}**. Continue with this account?"
32
+ - Option 1: "Yes, continue"
33
+ - Option 2: "No, switch account"
34
+ If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true` then `muggle-remote-auth-poll`.
35
+ - If **not signed in or expired**: call `muggle-remote-auth-login` then `muggle-remote-auth-poll`.
31
36
  Do not skip or assume auth.
32
37
 
33
38
  ### 2. Targets (user must confirm)
@@ -84,21 +89,21 @@ Remind them: local URL is only the execution target, not tied to cloud project c
84
89
  **Generate**
85
90
 
86
91
  1. `muggle-remote-test-case-get`
87
- 2. `muggle-local-execute-test-generation` (after approval in step 6) with that test case + `localUrl` + `approveElectronAppLaunch: true` (optional: `showUi: true`, **`timeoutMs`** — see below)
92
+ 2. `muggle-local-execute-test-generation` with that test case + `localUrl` (optional: `showUi: false` for headless — defaults to visible; **`timeoutMs`** — see below)
88
93
 
89
94
  **Replay**
90
95
 
91
96
  1. `muggle-remote-test-script-get` — note `actionScriptId`
92
97
  2. `muggle-remote-action-script-get` with that id — full `actionScript`
93
98
  **Use the API response as-is.** Do not edit, shorten, or rebuild `actionScript`; replay needs full `label` paths for element lookup.
94
- 3. `muggle-local-execute-replay` (after approval in step 6) with `testScript`, `actionScript`, `localUrl`, `approveElectronAppLaunch: true` (optional: `showUi: true`, **`timeoutMs`** — see below)
99
+ 3. `muggle-local-execute-replay` with `testScript`, `actionScript`, `localUrl` (optional: `showUi: false` for headless — defaults to visible; **`timeoutMs`** — see below)
95
100
 
96
101
  ### Local execution timeout (`timeoutMs`)
97
102
 
98
103
  The MCP client often uses a **default wait of 300000 ms (5 minutes)** for `muggle-local-execute-test-generation` and `muggle-local-execute-replay`. **Exploratory script generation** (Auth0 login, dashboards, multi-step wizards, many LLM iterations) routinely **runs longer than 5 minutes** while Electron is still healthy.
99
104
 
100
105
  - **Always pass `timeoutMs`** for flows that may be long — for example **`600000` (10 min)** or **`900000` (15 min)** — unless the user explicitly wants a short cap.
101
- - If the tool reports **`Electron execution timed out after 300000ms`** (or similar) **but** Electron logs show the run still progressing (steps, screenshots, LLM calls), treat it as **orchestration timeout**, not an Electron app defect: **increase `timeoutMs` and retry** (after user re-approves if your policy requires it).
106
+ - If the tool reports **`Electron execution timed out after 300000ms`** (or similar) **but** Electron logs show the run still progressing (steps, screenshots, LLM calls), treat it as **orchestration timeout**, not an Electron app defect: **increase `timeoutMs` and retry**.
102
107
  - **Test case design:** Preconditions like "a test run has already completed" on an **empty account** can force many steps (sign-up, new project, crawl). Prefer an account/project that **already has** the needed state, or narrow the test goal so generation does not try to create a full project from scratch unless that is intentional.
103
108
 
104
109
  ### Interpreting `failed` / non-zero Electron exit
@@ -106,15 +111,9 @@ The MCP client often uses a **default wait of 300000 ms (5 minutes)** for `muggl
106
111
  - **`Electron execution timed out after 300000ms`:** Orchestration wait too short — see **`timeoutMs`** above.
107
112
  - **Exit code 26** (and messages like **LLM failed to generate / replay action script**): Often corresponds to a completed exploration whose **outcome was goal not achievable** (`goal_not_achievable`, summary with `halt`) — e.g. verifying "view script after a successful run" when **no run or script exists yet** in the UI. Use `muggle-local-run-result-get` and read the **summary / structured summary**; do not assume an Electron crash. **Fix:** choose a **project that already has** completed runs and scripts, or **change the test case** so preconditions match what localhost can satisfy (e.g. include steps to create and run a test first, or assert only empty-state UI when no runs exist).
108
113
 
109
- ### 6. Approval before any local execution
114
+ ### 6. Execute (no approval prompt)
110
115
 
111
- Use `AskQuestion` to get explicit approval before launching Electron. State: replay vs generation, test case name, URL.
112
-
113
- - "Yes, launch Electron (visible — I want to watch)"
114
- - "Yes, launch Electron (headless — run in background)"
115
- - "No, cancel"
116
-
117
- Only call local execute tools with `approveElectronAppLaunch: true` after the user selects a "Yes" option. Map visible to `showUi: true`, headless to `showUi: false`.
116
+ Call `muggle-local-execute-test-generation` or `muggle-local-execute-replay` directly. **Do not** ask the user to re-approve the Electron launch the user choosing this skill in the first place is the approval. The browser defaults to visible; only pass `showUi: false` if the user explicitly asked for headless.
118
117
 
119
118
  ### 7. After successful generation only
120
119
 
@@ -177,10 +176,11 @@ Always use **Mode A** (post to existing PR) from this skill. Never hand-write th
177
176
 
178
177
  ## Non-negotiables
179
178
 
180
- - No silent auth skip; no launching Electron without approval via `AskQuestion`.
179
+ - No silent auth skip.
180
+ - **Never prompt for Electron launch approval** before execution — invoking this skill is the approval. Just run.
181
181
  - If replayable scripts exist, do not default to generation without user choice.
182
182
  - No hiding failures: surface errors and artifact paths.
183
183
  - Replay: never hand-built or simplified `actionScript` — only from `muggle-remote-action-script-get`.
184
- - Use `AskQuestion` for every selection — project, use case, test case, script, and approval. Never ask the user to type a number.
184
+ - Use `AskQuestion` for every selection — project, use case, test case, script. Never ask the user to type a number.
185
185
  - Project, use case, and test case selection lists must always include "Create new ...". Include "Show full list" whenever the API returned at least one row for that step; omit "Show full list" when the list is empty (offer "Create new ..." only). For creates, use preview tools (`muggle-remote-use-case-prompt-preview`, `muggle-remote-test-case-generate-from-prompt`) before persisting.
186
186
  - PR posting is always optional and always delegated to the `muggle-pr-visual-walkthrough` skill — never inline the walkthrough markdown or call `gh pr comment` directly from this skill.
@@ -130,9 +130,12 @@ If the user wants changes, incorporate feedback, then ask again. Only proceed af
130
130
 
131
131
  Call `muggle-remote-auth-status` first.
132
132
 
133
- If already authenticated → skip to Step 5.
133
+ If **already authenticated**print the logged-in email and ask via `AskQuestion`:
134
+ > "You're logged in as **{email}**. Continue with this account?"
135
+ - Option 1: "Yes, continue" → skip to Step 5.
136
+ - Option 2: "No, switch account" → call `muggle-remote-auth-login` with `forceNewSession: true`, then `muggle-remote-auth-poll`.
134
137
 
135
- If not authenticated:
138
+ If **not authenticated**:
136
139
  1. Tell the user a browser window is about to open.
137
140
  2. Call `muggle-remote-auth-login` (opens browser automatically).
138
141
  3. Tell the user to complete login in the browser.
@@ -40,8 +40,13 @@ Treat this filter as a default, not a law. If the user explicitly says "include
40
40
  ### Step 1 — Authenticate
41
41
 
42
42
  1. Call `muggle-remote-auth-status`.
43
- 2. If not authenticated or expired → call `muggle-remote-auth-login`, then poll with `muggle-remote-auth-poll`.
44
- 3. Do not skip auth and do not assume a stale token still works.
43
+ 2. If **authenticated and not expired**print the logged-in email and ask via `AskQuestion`:
44
+ > "You're logged in as **{email}**. Continue with this account?"
45
+ - Option 1: "Yes, continue"
46
+ - Option 2: "No, switch account"
47
+ If the user picks "switch account", call `muggle-remote-auth-login` with `forceNewSession: true`, then poll with `muggle-remote-auth-poll`.
48
+ 3. If **not authenticated or expired** → call `muggle-remote-auth-login`, then poll with `muggle-remote-auth-poll`.
49
+ 4. Do not skip auth and do not assume a stale token still works.
45
50
 
46
51
  If auth keeps failing, suggest the user run `muggle logout && muggle login` from a terminal.
47
52