npm - @mindstudio-ai/remy - Versions diffs - 0.1.191 → 0.1.193 - Mend

@mindstudio-ai/remy 0.1.191 → 0.1.193

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/headless.js +58 -17
package/dist/index.js +58 -17
package/dist/prompt/static/coding.md +2 -2
package/dist/subagents/browserAutomation/prompt.md +2 -2
package/package.json +1 -1

package/dist/headless.js CHANGED Viewed

@@ -2846,10 +2846,14 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
   let onLog;
   let model;
   let path12;
+  let fullPage = true;
   if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
     prompt = promptOrOptions.prompt;
     existingUrl = promptOrOptions.imageUrl;
     path12 = promptOrOptions.path;
+    if (promptOrOptions.fullPage !== void 0) {
+      fullPage = promptOrOptions.fullPage;
+    }
     onLog = promptOrOptions.onLog;
     model = promptOrOptions.model;
   } else {
@@ -2861,9 +2865,9 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
     url = existingUrl;
   } else {
     const ssResult = await sidecarRequest(
-      "/screenshot-full-page",
+      fullPage ? "/screenshot-full-page" : "/screenshot-viewport",
       path12 ? { path: path12 } : void 0,
-      { timeout: 12e4 }
+      { timeout: fullPage ? 12e4 : 3e4 }
     );
     url = ssResult?.url || ssResult?.screenshotUrl;
     if (!url) {
@@ -3773,6 +3777,20 @@ var BROWSER_TOOLS = [
         }
       }
     }
+  },
+  {
+    clearable: true,
+    name: "screenshotViewport",
+    description: "Capture a screenshot of just the visible viewport (no full-page scroll/stitch). Returns a CDN URL with full text analysis and description. Use this when the goal is a specific section the page is currently scrolled to, rather than the whole page.",
+    inputSchema: {
+      type: "object",
+      properties: {
+        path: {
+          type: "string",
+          description: 'Navigate to this path before capturing (e.g. "/settings"). If omitted, screenshots the current page.'
+        }
+      }
+    }
   }
 ];
 var BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand"]);
@@ -3892,7 +3910,7 @@ function resolveModel(surfaceId, models, fallback) {
 // src/subagents/browserAutomation/index.ts
 var log7 = createLogger("browser-automation");
-async function runBrowserAutomation(task, context) {
+async function runBrowserAutomation(task, context, opts) {
   const release = await acquireBrowserLock();
   try {
     const result = await runSubAgent({
@@ -3916,10 +3934,11 @@ async function runBrowserAutomation(task, context) {
             return `Error setting up browser: ${err.message}`;
           }
         }
-        if (name === "screenshotFullPage") {
+        if (name === "screenshotFullPage" || name === "screenshotViewport") {
           try {
             return await captureAndAnalyzeScreenshot({
               path: _input.path,
+              fullPage: name === "screenshotFullPage",
               onLog,
               model: resolveModel(
                 "imageAnalysis",
@@ -3995,13 +4014,15 @@ async function runBrowserAutomation(task, context) {
         return result2;
       },
       toolRegistry: context.toolRegistry,
-      captureArtifacts: ["screenshotFullPage"]
+      captureArtifacts: ["screenshotFullPage", "screenshotViewport"]
     });
     context.subAgentMessages?.set(context.toolCallId, result.messages);
-    const ss = result.artifacts?.screenshotFullPage;
+    const viewport = result.artifacts?.screenshotViewport;
+    const fullPage = result.artifacts?.screenshotFullPage;
+    const preferred = opts?.capture === "viewport" ? viewport ?? fullPage : fullPage ?? viewport;
     return {
       text: result.text,
-      ...ss?.url ? { screenshot: { url: ss.url, styleMap: ss.styleMap } } : {}
+      ...preferred?.url ? { screenshot: { url: preferred.url, styleMap: preferred.styleMap } } : {}
     };
   } finally {
     release();
@@ -4042,10 +4063,14 @@ var screenshotTool = {
   clearable: true,
   definition: {
     name: "screenshot",
-    description: "Capture a full-height screenshot of the app preview and get a description of what's on screen. Captures the settled page state \u2014 it cannot catch animations, transitions, or transient state. Optionally provide specific questions about what you're looking for. Use a bulleted list to ask many questions at once. To ask additional questions about a screenshot you have already captured, pass its URL as imageUrl to skip recapture. If the screenshot requires interaction first (logging in, clicking a tab, dismissing a modal), use the instructions param to describe the steps.",
+    description: "Capture a screenshot of the app preview and get a description of what's on screen. Choose `fullPage`: `false` captures just the visible viewport (fast \u2014 for a specific section the page is scrolled to), `true` captures the entire page top-to-bottom (slower \u2014 for overall composition or content past the fold). Captures the settled page state \u2014 it cannot catch animations, transitions, or transient state. Optionally provide specific questions about what you're looking for. Use a bulleted list to ask many questions at once. To ask additional questions about a screenshot you have already captured, pass its URL as imageUrl to skip recapture. If the screenshot requires interaction first (logging in, clicking a tab, dismissing a modal, scrolling to a section), use the instructions param to describe the steps.",
     inputSchema: {
       type: "object",
       properties: {
+        fullPage: {
+          type: "boolean",
+          description: "true = full-height capture of the entire page; false = just the visible viewport. Pick based on whether you need the whole page or a specific section."
+        },
         prompt: {
           type: "string",
           description: "Optional question about the screenshot. If omitted, returns a general description of what's visible."
@@ -4060,12 +4085,15 @@ var screenshotTool = {
         },
         instructions: {
           type: "string",
-          description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
+          description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, scrolling to a section, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions, then capture per your `fullPage` choice \u2014 so with `fullPage: false` you can scroll to a section and capture just that viewport. It can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route, scroll to a section. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
         }
-      }
+      },
+      required: ["fullPage"]
     }
   },
   async execute(input, context) {
+    const fullPage = input.fullPage === true;
+    const shotKind = fullPage ? "full-page" : "viewport";
     try {
       if (input.imageUrl) {
         return await captureAndAnalyzeScreenshot({
@@ -4076,8 +4104,10 @@ var screenshotTool = {
         });
       }
       if (input.instructions && context) {
-        const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
-        const result = await runBrowserAutomation(task, context);
+        const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a ${shotKind} screenshot.` : `${input.instructions}. After completing these steps, take a ${shotKind} screenshot.`;
+        const result = await runBrowserAutomation(task, context, {
+          capture: fullPage ? "fullPage" : "viewport"
+        });
         if (!result.screenshot) {
           return result.text;
         }
@@ -4094,6 +4124,7 @@ var screenshotTool = {
         return await captureAndAnalyzeScreenshot({
           prompt: input.prompt,
           path: input.path,
+          fullPage,
           onLog: context?.onLog,
           model: resolveModel("imageAnalysis", context?.models, context?.model)
         });
@@ -4393,10 +4424,14 @@ __export(screenshot_exports, {
 var definition5 = {
   clearable: true,
   name: "screenshot",
-  description: "Capture a full-height screenshot of the current app preview. Returns a CDN URL along with visual analysis. Use to review the current state of the UI being built. Remember, the screenshot analysis is not overly precise - for example, it cannot reliably identify specific fonts by name \u2014 it can only describe what letterforms look like.",
+  description: "Capture a screenshot of the current app preview and get it back with visual analysis. Choose `fullPage`: `false` captures just the visible viewport (fast \u2014 use it to review a specific section the page is scrolled to), `true` captures the entire page top-to-bottom (slower \u2014 use it to review overall composition or a layout you can't see in one screen). Use to review the current state of the UI being built. Remember, the screenshot analysis is not overly precise - for example, it cannot reliably identify specific fonts by name \u2014 it can only describe what letterforms look like.",
   inputSchema: {
     type: "object",
     properties: {
+      fullPage: {
+        type: "boolean",
+        description: "true = full-height capture of the entire page; false = just the visible viewport. Pick based on whether you need the whole page or a specific section."
+      },
       prompt: {
         type: "string",
         description: "Optional specific question about the screenshot. Use a bulleted list to ask many questions at once."
@@ -4407,16 +4442,21 @@ var definition5 = {
       },
       instructions: {
         type: "string",
-        description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Only use instructions when you need to trigger stateful changes. Never describe what names or values to use when applying the isntructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing."
+        description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, scrolling to a specific section, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions, then capture per your `fullPage` choice \u2014 so with `fullPage: false` you can scroll to a section and capture just that viewport. It can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start at. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing."
       }
-    }
+    },
+    required: ["fullPage"]
   }
 };
 async function execute5(input, onLog, context) {
+  const fullPage = input.fullPage === true;
+  const shotKind = fullPage ? "full-page" : "viewport";
   if (input.instructions && context) {
     try {
-      const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
-      const result = await runBrowserAutomation(task, context);
+      const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a ${shotKind} screenshot.` : `${input.instructions}. After completing these steps, take a ${shotKind} screenshot.`;
+      const result = await runBrowserAutomation(task, context, {
+        capture: fullPage ? "fullPage" : "viewport"
+      });
       if (!result.screenshot) {
         return result.text;
       }
@@ -4436,6 +4476,7 @@ async function execute5(input, onLog, context) {
     return await captureAndAnalyzeScreenshot({
       prompt: input.prompt,
       path: input.path,
+      fullPage,
       onLog,
       model: resolveModel("imageAnalysis", context?.models, context?.model)
     });

package/dist/index.js CHANGED Viewed

@@ -3247,10 +3247,14 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
   let onLog;
   let model;
   let path13;
+  let fullPage = true;
   if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
     prompt = promptOrOptions.prompt;
     existingUrl = promptOrOptions.imageUrl;
     path13 = promptOrOptions.path;
+    if (promptOrOptions.fullPage !== void 0) {
+      fullPage = promptOrOptions.fullPage;
+    }
     onLog = promptOrOptions.onLog;
     model = promptOrOptions.model;
   } else {
@@ -3262,9 +3266,9 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
     url = existingUrl;
   } else {
     const ssResult = await sidecarRequest(
-      "/screenshot-full-page",
+      fullPage ? "/screenshot-full-page" : "/screenshot-viewport",
       path13 ? { path: path13 } : void 0,
-      { timeout: 12e4 }
+      { timeout: fullPage ? 12e4 : 3e4 }
     );
     url = ssResult?.url || ssResult?.screenshotUrl;
     if (!url) {
@@ -4231,6 +4235,20 @@ var init_tools = __esm({
             }
           }
         }
+      },
+      {
+        clearable: true,
+        name: "screenshotViewport",
+        description: "Capture a screenshot of just the visible viewport (no full-page scroll/stitch). Returns a CDN URL with full text analysis and description. Use this when the goal is a specific section the page is currently scrolled to, rather than the whole page.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            path: {
+              type: "string",
+              description: 'Navigate to this path before capturing (e.g. "/settings"). If omitted, screenshots the current page.'
+            }
+          }
+        }
       }
     ];
     BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand"]);
@@ -4263,7 +4281,7 @@ var init_prompt2 = __esm({
 });
 // src/subagents/browserAutomation/index.ts
-async function runBrowserAutomation(task, context) {
+async function runBrowserAutomation(task, context, opts) {
   const release = await acquireBrowserLock();
   try {
     const result = await runSubAgent({
@@ -4287,10 +4305,11 @@ async function runBrowserAutomation(task, context) {
             return `Error setting up browser: ${err.message}`;
           }
         }
-        if (name === "screenshotFullPage") {
+        if (name === "screenshotFullPage" || name === "screenshotViewport") {
           try {
             return await captureAndAnalyzeScreenshot({
               path: _input.path,
+              fullPage: name === "screenshotFullPage",
               onLog,
               model: resolveModel(
                 "imageAnalysis",
@@ -4366,13 +4385,15 @@ async function runBrowserAutomation(task, context) {
         return result2;
       },
       toolRegistry: context.toolRegistry,
-      captureArtifacts: ["screenshotFullPage"]
+      captureArtifacts: ["screenshotFullPage", "screenshotViewport"]
     });
     context.subAgentMessages?.set(context.toolCallId, result.messages);
-    const ss = result.artifacts?.screenshotFullPage;
+    const viewport = result.artifacts?.screenshotViewport;
+    const fullPage = result.artifacts?.screenshotFullPage;
+    const preferred = opts?.capture === "viewport" ? viewport ?? fullPage : fullPage ?? viewport;
     return {
       text: result.text,
-      ...ss?.url ? { screenshot: { url: ss.url, styleMap: ss.styleMap } } : {}
+      ...preferred?.url ? { screenshot: { url: preferred.url, styleMap: preferred.styleMap } } : {}
     };
   } finally {
     release();
@@ -4437,10 +4458,14 @@ var init_screenshot2 = __esm({
       clearable: true,
       definition: {
         name: "screenshot",
-        description: "Capture a full-height screenshot of the app preview and get a description of what's on screen. Captures the settled page state \u2014 it cannot catch animations, transitions, or transient state. Optionally provide specific questions about what you're looking for. Use a bulleted list to ask many questions at once. To ask additional questions about a screenshot you have already captured, pass its URL as imageUrl to skip recapture. If the screenshot requires interaction first (logging in, clicking a tab, dismissing a modal), use the instructions param to describe the steps.",
+        description: "Capture a screenshot of the app preview and get a description of what's on screen. Choose `fullPage`: `false` captures just the visible viewport (fast \u2014 for a specific section the page is scrolled to), `true` captures the entire page top-to-bottom (slower \u2014 for overall composition or content past the fold). Captures the settled page state \u2014 it cannot catch animations, transitions, or transient state. Optionally provide specific questions about what you're looking for. Use a bulleted list to ask many questions at once. To ask additional questions about a screenshot you have already captured, pass its URL as imageUrl to skip recapture. If the screenshot requires interaction first (logging in, clicking a tab, dismissing a modal, scrolling to a section), use the instructions param to describe the steps.",
         inputSchema: {
           type: "object",
           properties: {
+            fullPage: {
+              type: "boolean",
+              description: "true = full-height capture of the entire page; false = just the visible viewport. Pick based on whether you need the whole page or a specific section."
+            },
             prompt: {
               type: "string",
               description: "Optional question about the screenshot. If omitted, returns a general description of what's visible."
@@ -4455,12 +4480,15 @@ var init_screenshot2 = __esm({
             },
             instructions: {
               type: "string",
-              description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
+              description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, scrolling to a section, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions, then capture per your `fullPage` choice \u2014 so with `fullPage: false` you can scroll to a section and capture just that viewport. It can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route, scroll to a section. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
             }
-          }
+          },
+          required: ["fullPage"]
         }
       },
       async execute(input, context) {
+        const fullPage = input.fullPage === true;
+        const shotKind = fullPage ? "full-page" : "viewport";
         try {
           if (input.imageUrl) {
             return await captureAndAnalyzeScreenshot({
@@ -4471,8 +4499,10 @@ var init_screenshot2 = __esm({
             });
           }
           if (input.instructions && context) {
-            const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
-            const result = await runBrowserAutomation(task, context);
+            const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a ${shotKind} screenshot.` : `${input.instructions}. After completing these steps, take a ${shotKind} screenshot.`;
+            const result = await runBrowserAutomation(task, context, {
+              capture: fullPage ? "fullPage" : "viewport"
+            });
             if (!result.screenshot) {
               return result.text;
             }
@@ -4489,6 +4519,7 @@ var init_screenshot2 = __esm({
             return await captureAndAnalyzeScreenshot({
               prompt: input.prompt,
               path: input.path,
+              fullPage,
               onLog: context?.onLog,
               model: resolveModel("imageAnalysis", context?.models, context?.model)
             });
@@ -4826,10 +4857,14 @@ __export(screenshot_exports, {
   execute: () => execute5
 });
 async function execute5(input, onLog, context) {
+  const fullPage = input.fullPage === true;
+  const shotKind = fullPage ? "full-page" : "viewport";
   if (input.instructions && context) {
     try {
-      const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
-      const result = await runBrowserAutomation(task, context);
+      const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a ${shotKind} screenshot.` : `${input.instructions}. After completing these steps, take a ${shotKind} screenshot.`;
+      const result = await runBrowserAutomation(task, context, {
+        capture: fullPage ? "fullPage" : "viewport"
+      });
       if (!result.screenshot) {
         return result.text;
       }
@@ -4849,6 +4884,7 @@ async function execute5(input, onLog, context) {
     return await captureAndAnalyzeScreenshot({
       prompt: input.prompt,
       path: input.path,
+      fullPage,
       onLog,
       model: resolveModel("imageAnalysis", context?.models, context?.model)
     });
@@ -4869,10 +4905,14 @@ var init_screenshot3 = __esm({
     definition5 = {
       clearable: true,
       name: "screenshot",
-      description: "Capture a full-height screenshot of the current app preview. Returns a CDN URL along with visual analysis. Use to review the current state of the UI being built. Remember, the screenshot analysis is not overly precise - for example, it cannot reliably identify specific fonts by name \u2014 it can only describe what letterforms look like.",
+      description: "Capture a screenshot of the current app preview and get it back with visual analysis. Choose `fullPage`: `false` captures just the visible viewport (fast \u2014 use it to review a specific section the page is scrolled to), `true` captures the entire page top-to-bottom (slower \u2014 use it to review overall composition or a layout you can't see in one screen). Use to review the current state of the UI being built. Remember, the screenshot analysis is not overly precise - for example, it cannot reliably identify specific fonts by name \u2014 it can only describe what letterforms look like.",
       inputSchema: {
         type: "object",
         properties: {
+          fullPage: {
+            type: "boolean",
+            description: "true = full-height capture of the entire page; false = just the visible viewport. Pick based on whether you need the whole page or a specific section."
+          },
           prompt: {
             type: "string",
             description: "Optional specific question about the screenshot. Use a bulleted list to ask many questions at once."
@@ -4883,9 +4923,10 @@ var init_screenshot3 = __esm({
           },
           instructions: {
             type: "string",
-            description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Only use instructions when you need to trigger stateful changes. Never describe what names or values to use when applying the isntructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing."
+            description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, scrolling to a specific section, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions, then capture per your `fullPage` choice \u2014 so with `fullPage: false` you can scroll to a section and capture just that viewport. It can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start at. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing."
           }
-        }
+        },
+        required: ["fullPage"]
       }
     };
   }

package/dist/prompt/static/coding.md CHANGED Viewed

@@ -9,12 +9,12 @@
 ### Verification
 Run `lspDiagnostics` after every turn where you have edited code in any meaningful way. You don't need to run it for things like changing copy or CSS colors, but you should run it after any structural changes to code. It catches syntax errors, broken imports, and type mismatches instantly. After a big build or significant changes, also do a lightweight runtime check to catch the things static analysis misses (schema mismatches, missing imports, bad queries). Your runtime check can include:
 - Spot-checking methods with `runMethod`. The dev database is a disposable snapshot that will have been seeded with scenario data, so don't worry about being destructive.
-- For frontend work, taking a `screenshot` to confirm the main view renders correctly or look at the browser log for any console errors in the user's preview.
+- For frontend work, checking the browser log for any console errors in the user's preview, and — when a change's visual outcome is genuinely uncertain — taking a `screenshot` to confirm the main view renders correctly.
 - Using `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, when the user reports something broken that you can't identify from code alone, or whenever the verification involves driving the app through multiple interactions.
 Aim for confidence that the core happy paths work. If the 80% case is solid, the remaining edge cases are likely fine and the user can surface them in chat. Don't screenshot every page, test every permutation, or verify every secondary flow. One or two runtime checks that confirm the app loads and data flows through is enough.
-When making mechanical edits as part of iterating with the user (e.g., moving elements, changing labels, small redesigns and refactors), don't screenshot to confirm, simply trust your code. Re-screenshot only when changes are structural enough that the visual outcome is genuinely uncertain (new layout, new component composition, new route), or when the user reports something visible that you can't see in the code. The screenshot tool captures static/settled state - don't try to hack it with different instructions to capture transient states or animations or things like that. If what you need is not avaialble via screenshot, fall back to static analysis by tracing code.
+Default to trusting your code. The test is whether you can predict the rendered result from the diff: for copy changes, color and spacing tweaks, swapping classes, and most style edits, you can — the diff already tells you the outcome, so don't screenshot. Reach for a `screenshot` only when the visual result is genuinely uncertain and you can't trace it from the code (a new layout, a new component composition, a new route), or when the user reports something visible that you can't see in the code. And when you're iterating live with the user on a page they're previewing, the user is your viewport — make the edit and let them react, rather than confirming what they can already see. The screenshot tool captures static/settled state - don't try to hack it with different instructions to capture transient states or animations or things like that. If what you need is not available via screenshot, fall back to static analysis by tracing code.
 ### Process Logs
 Process logs are available at .logs/ in NDJSON format (one JSON object per line) for debugging. Each line has at minimum ts (unix millis) and msg fields, plus structured context like level, module, requestId, toolCallId where available. You can use `jq` to examine logs and debug failures. Tools like run method or run scenario execute synchronously, so log data will be available by the time those tools return their results to you, there is no need to `sleep` before querying logfiles.

package/dist/subagents/browserAutomation/prompt.md CHANGED Viewed

@@ -139,8 +139,8 @@ Check a count with evaluate:
 ```
 </examples>
-### Full Page Screenshot
-You can use the `screenshotFullPage` tool to take a full-height screenshot of the current page. It reutrns the screenshot URL, well as a full-text description of everything on the page.
+### Final Screenshot
+You can use the `screenshotFullPage` tool to take a full-height screenshot of the current page, or the `screenshotViewport` tool to capture just the visible viewport (faster, and the right choice when the task is about a specific section you've scrolled to). Both return the screenshot URL plus a full-text description. If the task asked for a viewport/section view, end with `screenshotViewport`; if it asked for the whole page, end with `screenshotFullPage`.
 <rules>
   - Always batch steps into a single browserCommand call. Don't send one step per turn. Type + click + wait should be one call, not three separate turns.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mindstudio-ai/remy",
-  "version": "0.1.191",
+  "version": "0.1.193",
   "description": "MindStudio coding agent",
   "repository": {
     "type": "git",