npm - @mindstudio-ai/remy - Versions diffs - 0.1.155 → 0.1.156 - Mend

@mindstudio-ai/remy 0.1.155 → 0.1.156

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/headless.js +54 -32
package/dist/index.js +55 -35
package/dist/prompt/compiled/auth.md +4 -0
package/dist/prompt/static/coding.md +3 -1
package/package.json +1 -1

package/dist/headless.js CHANGED Viewed

@@ -2676,6 +2676,21 @@ ${opts.styleMap}
 ${TEXT_WRAP_DISCLAIMER}`;
   return p;
 }
+async function streamScreenshotAnalysis(opts) {
+  const { url, prompt, styleMap, onLog } = opts;
+  onLog?.(JSON.stringify({ url, analysis: null }));
+  const analysisPrompt = buildScreenshotAnalysisPrompt({ prompt, styleMap });
+  let accumulated = "";
+  const analysis = await analyzeImage({
+    prompt: analysisPrompt,
+    imageUrl: url,
+    onLog: (chunk) => {
+      accumulated += chunk;
+      onLog?.(JSON.stringify({ url, analysis: accumulated }));
+    }
+  });
+  return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
+}
 async function captureAndAnalyzeScreenshot(promptOrOptions) {
   let prompt;
   let existingUrl;
@@ -2710,16 +2725,12 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
   if (prompt === false) {
     return url;
   }
-  const analysisPrompt = buildScreenshotAnalysisPrompt({
+  return streamScreenshotAnalysis({
+    url,
     prompt: prompt || void 0,
-    styleMap
-  });
-  const analysis = await analyzeImage({
-    prompt: analysisPrompt,
-    imageUrl: url,
+    styleMap,
     onLog
   });
-  return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
 }
 // src/tools/_helpers/browserLock.ts
@@ -2739,9 +2750,10 @@ function startStatusWatcher(config) {
   const { apiConfig, getContext, onStatus, interval = 5e3, signal } = config;
   let inflight = false;
   let stopped = false;
+  let pauseCount = 0;
   const url = `${apiConfig.baseUrl}/_internal/v2/agent/remy/generate-status`;
   async function tick() {
-    if (stopped || signal?.aborted || inflight) {
+    if (stopped || signal?.aborted || inflight || pauseCount > 0) {
       return;
     }
     inflight = true;
@@ -2766,6 +2778,9 @@ function startStatusWatcher(config) {
       if (!data.label) {
         return;
       }
+      if (pauseCount > 0) {
+        return;
+      }
       onStatus(data.label);
     } catch {
     } finally {
@@ -2779,6 +2794,12 @@ function startStatusWatcher(config) {
     stop() {
       stopped = true;
       clearInterval(timer);
+    },
+    pause() {
+      pauseCount++;
+    },
+    resume() {
+      pauseCount = Math.max(0, pauseCount - 1);
     }
   };
 }
@@ -3634,7 +3655,7 @@ var screenshotTool = {
         },
         instructions: {
           type: "string",
-          description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Only use instructions when you need to trigger stateful changes. Never describe what names or values to use when applying the isntructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing."
+          description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
         }
       }
     }
@@ -3663,20 +3684,12 @@ var screenshotTool = {
         if (!url) {
           return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
         }
-        const analysisPrompt = buildScreenshotAnalysisPrompt({
+        return await streamScreenshotAnalysis({
+          url,
           prompt: input.prompt,
-          styleMap
-        });
-        const analysis = await analyzeImage({
-          prompt: analysisPrompt,
-          imageUrl: url,
+          styleMap,
           onLog: context?.onLog
         });
-        return JSON.stringify({
-          url,
-          analysis,
-          ...styleMap ? { styleMap } : {}
-        });
       }
       const release = await acquireBrowserLock();
       try {
@@ -3994,20 +4007,12 @@ async function execute5(input, onLog, context) {
       if (!url) {
         return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
       }
-      const analysisPrompt = buildScreenshotAnalysisPrompt({
+      return await streamScreenshotAnalysis({
+        url,
         prompt: input.prompt,
-        styleMap
-      });
-      const analysis = await analyzeImage({
-        prompt: analysisPrompt,
-        imageUrl: url,
+        styleMap,
         onLog
       });
-      return JSON.stringify({
-        url,
-        analysis,
-        ...styleMap ? { styleMap } : {}
-      });
     } catch (err) {
       return `Error taking interactive screenshot: ${err.message}`;
     }
@@ -5477,6 +5482,11 @@ var EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
   "browserCommand",
   "setProjectMetadata"
 ]);
+var USER_BLOCKING_EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
+  "promptUser",
+  "presentPublishPlan",
+  "confirmDestructiveAction"
+]);
 function createAgentState() {
   return { messages: [] };
 }
@@ -5566,6 +5576,8 @@ async function runTurn(params) {
     let subAgentText = "";
     let currentToolNames = "";
     const statusWatcher = isFirstMessage ? { stop() {
+    }, pause() {
+    }, resume() {
     } } : startStatusWatcher({
       apiConfig,
       getContext: () => {
@@ -5870,7 +5882,17 @@ async function runTurn(params) {
                 toolCallId: tc.id,
                 name: tc.name
               });
-              result = await resolveExternalTool(tc.id, tc.name, input);
+              const blocksUser = USER_BLOCKING_EXTERNAL_TOOLS.has(tc.name);
+              if (blocksUser) {
+                statusWatcher.pause();
+              }
+              try {
+                result = await resolveExternalTool(tc.id, tc.name, input);
+              } finally {
+                if (blocksUser) {
+                  statusWatcher.resume();
+                }
+              }
             } else {
               result = await executeTool(tc.name, input, {
                 apiConfig,

package/dist/index.js CHANGED Viewed

@@ -2943,6 +2943,21 @@ ${opts.styleMap}
 ${TEXT_WRAP_DISCLAIMER}`;
   return p;
 }
+async function streamScreenshotAnalysis(opts) {
+  const { url, prompt, styleMap, onLog } = opts;
+  onLog?.(JSON.stringify({ url, analysis: null }));
+  const analysisPrompt = buildScreenshotAnalysisPrompt({ prompt, styleMap });
+  let accumulated = "";
+  const analysis = await analyzeImage({
+    prompt: analysisPrompt,
+    imageUrl: url,
+    onLog: (chunk) => {
+      accumulated += chunk;
+      onLog?.(JSON.stringify({ url, analysis: accumulated }));
+    }
+  });
+  return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
+}
 async function captureAndAnalyzeScreenshot(promptOrOptions) {
   let prompt;
   let existingUrl;
@@ -2977,16 +2992,12 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
   if (prompt === false) {
     return url;
   }
-  const analysisPrompt = buildScreenshotAnalysisPrompt({
+  return streamScreenshotAnalysis({
+    url,
     prompt: prompt || void 0,
-    styleMap
-  });
-  const analysis = await analyzeImage({
-    prompt: analysisPrompt,
-    imageUrl: url,
+    styleMap,
     onLog
   });
-  return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
 }
 var SCREENSHOT_ANALYSIS_PROMPT, TEXT_WRAP_DISCLAIMER;
 var init_screenshot = __esm({
@@ -3024,9 +3035,10 @@ function startStatusWatcher(config) {
   const { apiConfig, getContext, onStatus, interval = 5e3, signal } = config;
   let inflight = false;
   let stopped = false;
+  let pauseCount = 0;
   const url = `${apiConfig.baseUrl}/_internal/v2/agent/remy/generate-status`;
   async function tick() {
-    if (stopped || signal?.aborted || inflight) {
+    if (stopped || signal?.aborted || inflight || pauseCount > 0) {
       return;
     }
     inflight = true;
@@ -3051,6 +3063,9 @@ function startStatusWatcher(config) {
       if (!data.label) {
         return;
       }
+      if (pauseCount > 0) {
+        return;
+      }
       onStatus(data.label);
     } catch {
     } finally {
@@ -3064,6 +3079,12 @@ function startStatusWatcher(config) {
     stop() {
       stopped = true;
       clearInterval(timer);
+    },
+    pause() {
+      pauseCount++;
+    },
+    resume() {
+      pauseCount = Math.max(0, pauseCount - 1);
     }
   };
 }
@@ -3956,7 +3977,6 @@ var init_screenshot2 = __esm({
     "use strict";
     init_screenshot();
     init_browserLock();
-    init_analyzeImage();
     init_browserAutomation();
     screenshotTool = {
       clearable: true,
@@ -3980,7 +4000,7 @@ var init_screenshot2 = __esm({
             },
             instructions: {
               type: "string",
-              description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Only use instructions when you need to trigger stateful changes. Never describe what names or values to use when applying the isntructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing."
+              description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
             }
           }
         }
@@ -4009,20 +4029,12 @@ var init_screenshot2 = __esm({
             if (!url) {
               return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
             }
-            const analysisPrompt = buildScreenshotAnalysisPrompt({
+            return await streamScreenshotAnalysis({
+              url,
               prompt: input.prompt,
-              styleMap
-            });
-            const analysis = await analyzeImage({
-              prompt: analysisPrompt,
-              imageUrl: url,
+              styleMap,
               onLog: context?.onLog
             });
-            return JSON.stringify({
-              url,
-              analysis,
-              ...styleMap ? { styleMap } : {}
-            });
           }
           const release = await acquireBrowserLock();
           try {
@@ -4356,20 +4368,12 @@ async function execute5(input, onLog, context) {
       if (!url) {
         return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
       }
-      const analysisPrompt = buildScreenshotAnalysisPrompt({
+      return await streamScreenshotAnalysis({
+        url,
         prompt: input.prompt,
-        styleMap
-      });
-      const analysis = await analyzeImage({
-        prompt: analysisPrompt,
-        imageUrl: url,
+        styleMap,
         onLog
       });
-      return JSON.stringify({
-        url,
-        analysis,
-        ...styleMap ? { styleMap } : {}
-      });
     } catch (err) {
       return `Error taking interactive screenshot: ${err.message}`;
     }
@@ -4393,7 +4397,6 @@ var init_screenshot3 = __esm({
     "use strict";
     init_screenshot();
     init_browserLock();
-    init_analyzeImage();
     init_browserAutomation();
     definition5 = {
       clearable: true,
@@ -6156,6 +6159,8 @@ async function runTurn(params) {
     let subAgentText = "";
     let currentToolNames = "";
     const statusWatcher = isFirstMessage ? { stop() {
+    }, pause() {
+    }, resume() {
     } } : startStatusWatcher({
       apiConfig,
       getContext: () => {
@@ -6460,7 +6465,17 @@ async function runTurn(params) {
                 toolCallId: tc.id,
                 name: tc.name
               });
-              result = await resolveExternalTool(tc.id, tc.name, input);
+              const blocksUser = USER_BLOCKING_EXTERNAL_TOOLS.has(tc.name);
+              if (blocksUser) {
+                statusWatcher.pause();
+              }
+              try {
+                result = await resolveExternalTool(tc.id, tc.name, input);
+              } finally {
+                if (blocksUser) {
+                  statusWatcher.resume();
+                }
+              }
             } else {
               result = await executeTool(tc.name, input, {
                 apiConfig,
@@ -6565,7 +6580,7 @@ async function runTurn(params) {
     }
   }
 }
-var log8, EXTERNAL_TOOLS;
+var log8, EXTERNAL_TOOLS, USER_BLOCKING_EXTERNAL_TOOLS;
 var init_agent = __esm({
   "src/agent.ts"() {
     "use strict";
@@ -6591,6 +6606,11 @@ var init_agent = __esm({
       "browserCommand",
       "setProjectMetadata"
     ]);
+    USER_BLOCKING_EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
+      "promptUser",
+      "presentPublishPlan",
+      "confirmDestructiveAction"
+    ]);
   }
 });

package/dist/prompt/compiled/auth.md CHANGED Viewed

@@ -208,6 +208,8 @@ auth.requireRole('admin');
 auth.requireRole('admin', 'approver');  // any of these
 ```
+**Require login: check `auth.userId`. Roles are RBAC** — only declare roles that map to real business distinctions (vendor/buyer/admin), and only check them when behavior should differ. Newly verified users have `roles: []` until your code assigns them.
 ### `auth.hasRole(...roles)`
 Returns `boolean`. Same logic as `requireRole` but doesn't throw.
@@ -375,4 +377,6 @@ Auth works the same in dev/preview as in production — real verification codes
 All other emails and phone numbers receive real codes. There is no dev-mode bypass, no fake code, and no way to skip verification. When testing auth flows in the preview, use one of the test bypasses above or a real email/phone.
+The `runMethod` tool's `userId: "testUser"` shortcut resolves to this same dev-bypass identity. The platform find-or-creates a real users-table row for it on first call and caches the row's UUID for the rest of the dev session. **`auth.userId` inside the method is that UUID — not the literal string `"testUser"`.** The user row already exists, so don't try to insert it. If you need the UUID to seed app-specific rows that reference it (profiles, preferences, foreign keys), read it from any method response or query the users table directly: `SELECT id FROM users WHERE email = 'remy@mindstudio.ai'` (or `phone = '+15555555555'` for SMS-auth apps).
 Browser automation tools (screenshots, automated browser tests) handle their own auth sessions. Scenarios seed database data but do not create browser auth sessions.

package/dist/prompt/static/coding.md CHANGED Viewed

@@ -11,11 +11,13 @@ Run `lspDiagnostics` after every turn where you have edited code in any meaningf
 - Spot-check methods with `runMethod`. The dev database is a disposable snapshot that will have been seeded with scenario data, so don't worry about being destructive.
 - For frontend work, take a single `screenshot` to confirm the main view renders correctly or look at the browser log for any console errors in the user's preview.
-- Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, or when the user reports something broken that you can't identify from code alone.
+- Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, when the user reports something broken that you can't identify from code alone, or whenever the verification involves driving the app through multiple interactions.
 - If the browser is unavailable, skip the visual check and verify through methods, logs, and code instead. Browser unavailability is an infrastructure issue, not a code problem — don't try to diagnose or fix it.
 Aim for confidence that the core happy paths work. If the 80% case is solid, the remaining edge cases are likely fine and the user can surface them in chat. Don't screenshot every page, test every permutation, or verify every secondary flow. One or two runtime checks that confirm the app loads and data flows through is enough.
+When making mechanical edits as part of iterating with the user (e.g., moving elements, changing labels, small redesigns and refactors), don't re-screenshot to confirm, simply trust your code. Re-screenshot only when changes are structural enough that the visual outcome is genuinely uncertain (new layout, new component composition, new route), or when the user reports something visible that you can't see in the code.
 ### Process Logs
 Process logs are available at .logs/ in NDJSON format (one JSON object per line) for debugging. Each line has at minimum ts (unix millis) and msg fields, plus structured context like level, module, requestId, toolCallId where available. You can use `jq` to examine logs and debug failures. Tools like run method or run scenario execute synchronously, so log data will be available by the time those tools return their results to you, there is no need to `sleep` before querying logfiles.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mindstudio-ai/remy",
-  "version": "0.1.155",
+  "version": "0.1.156",
   "description": "MindStudio coding agent",
   "repository": {
     "type": "git",