npm - @mindstudio-ai/remy - Versions diffs - 0.1.150 → 0.1.152 - Mend

@mindstudio-ai/remy 0.1.150 → 0.1.152

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/headless.js +51 -62
package/dist/index.js +53 -65
package/dist/prompt/.notes.md +1 -3
package/dist/prompt/static/coding.md +1 -1
package/dist/prompt/static/instructions.md +2 -1
package/dist/prompt/static/team.md +1 -1
package/dist/subagents/browserAutomation/prompt.md +2 -2
package/package.json +1 -1

package/dist/headless.js CHANGED Viewed

@@ -537,6 +537,12 @@ async function* streamChat(params) {
       }
     }
   }
+  if (buffer.startsWith("data: ")) {
+    try {
+      yield JSON.parse(buffer.slice(6));
+    } catch {
+    }
+  }
   if (!receivedDone) {
     log2.warn("Stream ended without done event", {
       requestId,
@@ -544,12 +550,10 @@ async function* streamChat(params) {
       durationMs: Date.now() - startTime,
       remainingBuffer: buffer.slice(0, 200)
     });
-  }
-  if (buffer.startsWith("data: ")) {
-    try {
-      yield JSON.parse(buffer.slice(6));
-    } catch {
-    }
+    yield {
+      type: "error",
+      error: "Network error: stream ended before completion"
+    };
   }
 }
 var MAX_RETRIES = 5;
@@ -786,11 +790,40 @@ function serializeForSummary(messages) {
     return `[${msg.role}]: ${parts.join("\n")}`;
   }).join("\n\n");
 }
+var CHUNK_CHAR_LIMIT = 24e5;
 async function generateSummary(apiConfig, name, compactionPrompt, messagesToSummarize, mainSystem, mainTools) {
   const serialized = serializeForSummary(messagesToSummarize);
   if (!serialized.trim()) {
     return null;
   }
+  if (serialized.length > CHUNK_CHAR_LIMIT && messagesToSummarize.length > 1) {
+    const mid = Math.floor(messagesToSummarize.length / 2);
+    log3.info("Chunking summary", {
+      name,
+      messageCount: messagesToSummarize.length,
+      serializedLength: serialized.length
+    });
+    const [first, second] = await Promise.all([
+      generateSummary(
+        apiConfig,
+        `${name} [pt1]`,
+        compactionPrompt,
+        messagesToSummarize.slice(0, mid),
+        mainSystem,
+        mainTools
+      ),
+      generateSummary(
+        apiConfig,
+        `${name} [pt2]`,
+        compactionPrompt,
+        messagesToSummarize.slice(mid),
+        mainSystem,
+        mainTools
+      )
+    ]);
+    const parts = [first, second].filter((p) => !!p);
+    return parts.length > 0 ? parts.join("\n\n---\n\n") : null;
+  }
   log3.info("Generating summary", {
     name,
     messageCount: messagesToSummarize.length,
@@ -2676,28 +2709,6 @@ function acquireBrowserLock() {
   lockQueue = next;
   return wait.then(() => release);
 }
-async function checkBrowserConnected() {
-  try {
-    const status = await sidecarRequest(
-      "/browser-status",
-      {},
-      { timeout: 5e3 }
-    );
-    if (!status.connected) {
-      return {
-        connected: false,
-        reason: BROWSER_UNAVAILABLE_MESSAGE
-      };
-    }
-    return { connected: true };
-  } catch {
-    return {
-      connected: false,
-      reason: BROWSER_UNAVAILABLE_MESSAGE
-    };
-  }
-}
-var BROWSER_UNAVAILABLE_MESSAGE = "Browser preview unavailable \u2014 the user has closed their browser and we are continuing to work in the background. This is not a code failure and not something to diagnose. Do not tell the user to click or open anything. Skip the visual check and verify your work through other means: runMethod for backend behavior, queryDatabase for data checks, .logs/devServer.ndjson for build errors, .logs/browser.ndjson for runtime errors, lspDiagnostics for type/syntax, or read the code directly.";
 // src/statusWatcher.ts
 function startStatusWatcher(config) {
@@ -2818,9 +2829,9 @@ function fixOrphanedToolCalls(messages) {
       toolResultIds.add(msg.toolCallId);
     }
   }
-  const result = [...messages];
-  for (let i = result.length - 1; i >= 0; i--) {
-    const msg = result[i];
+  const result = [];
+  for (const msg of messages) {
+    result.push(msg);
     if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
       continue;
     }
@@ -2828,17 +2839,15 @@ function fixOrphanedToolCalls(messages) {
       (b) => b.type === "tool"
     );
     const orphans = toolBlocks.filter((tc) => !toolResultIds.has(tc.id));
-    if (orphans.length === 0) {
-      continue;
+    for (const tc of orphans) {
+      result.push({
+        role: "user",
+        content: "Error: tool result lost (session recovered)",
+        toolCallId: tc.id,
+        isToolError: true
+      });
+      toolResultIds.add(tc.id);
     }
-    const synthetics = orphans.map((tc) => ({
-      role: "user",
-      content: "Error: tool result lost (session recovered)",
-      toolCallId: tc.id,
-      isToolError: true
-    }));
-    result.splice(i + 1, 0, ...synthetics);
-    break;
   }
   return result;
 }
@@ -3336,7 +3345,7 @@ var BROWSER_TOOLS = [
   {
     clearable: true,
     name: "browserCommand",
-    description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Timeout: 120s.",
+    description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
     inputSchema: {
       type: "object",
       properties: {
@@ -3473,14 +3482,6 @@ var browserAutomationTool = {
     }
     const release = await acquireBrowserLock();
     try {
-      const browserStatus = await checkBrowserConnected();
-      if (!browserStatus.connected) {
-        return browserStatus.reason ?? "Browser preview unavailable.";
-      }
-      try {
-        await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
-      } catch {
-      }
       const result = await runSubAgent({
         system: getBrowserAutomationPrompt(),
         task: input.task,
@@ -3570,10 +3571,6 @@ var browserAutomationTool = {
         toolRegistry: context.toolRegistry,
         captureArtifacts: ["screenshotFullPage"]
       });
-      try {
-        await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
-      } catch {
-      }
       context.subAgentMessages?.set(context.toolCallId, result.messages);
       const ss = result.artifacts?.screenshotFullPage;
       if (ss?.url) {
@@ -3659,10 +3656,6 @@ var screenshotTool = {
       }
       const release = await acquireBrowserLock();
       try {
-        const browserStatus = await checkBrowserConnected();
-        if (!browserStatus.connected) {
-          return browserStatus.reason ?? "Browser preview unavailable.";
-        }
         return await captureAndAnalyzeScreenshot({
           prompt: input.prompt,
           path: input.path,
@@ -3997,10 +3990,6 @@ async function execute5(input, onLog, context) {
   }
   const release = await acquireBrowserLock();
   try {
-    const browserStatus = await checkBrowserConnected();
-    if (!browserStatus.connected) {
-      return browserStatus.reason ?? "Browser preview unavailable.";
-    }
     return await captureAndAnalyzeScreenshot({
       prompt: input.prompt,
       path: input.path,

package/dist/index.js CHANGED Viewed

@@ -222,6 +222,12 @@ async function* streamChat(params) {
       }
     }
   }
+  if (buffer.startsWith("data: ")) {
+    try {
+      yield JSON.parse(buffer.slice(6));
+    } catch {
+    }
+  }
   if (!receivedDone) {
     log.warn("Stream ended without done event", {
       requestId,
@@ -229,12 +235,10 @@ async function* streamChat(params) {
       durationMs: Date.now() - startTime,
       remainingBuffer: buffer.slice(0, 200)
     });
-  }
-  if (buffer.startsWith("data: ")) {
-    try {
-      yield JSON.parse(buffer.slice(6));
-    } catch {
-    }
+    yield {
+      type: "error",
+      error: "Network error: stream ended before completion"
+    };
   }
 }
 function isRetryableError(error) {
@@ -1507,6 +1511,34 @@ async function generateSummary(apiConfig, name, compactionPrompt, messagesToSumm
   if (!serialized.trim()) {
     return null;
   }
+  if (serialized.length > CHUNK_CHAR_LIMIT && messagesToSummarize.length > 1) {
+    const mid = Math.floor(messagesToSummarize.length / 2);
+    log2.info("Chunking summary", {
+      name,
+      messageCount: messagesToSummarize.length,
+      serializedLength: serialized.length
+    });
+    const [first, second] = await Promise.all([
+      generateSummary(
+        apiConfig,
+        `${name} [pt1]`,
+        compactionPrompt,
+        messagesToSummarize.slice(0, mid),
+        mainSystem,
+        mainTools
+      ),
+      generateSummary(
+        apiConfig,
+        `${name} [pt2]`,
+        compactionPrompt,
+        messagesToSummarize.slice(mid),
+        mainSystem,
+        mainTools
+      )
+    ]);
+    const parts = [first, second].filter((p) => !!p);
+    return parts.length > 0 ? parts.join("\n\n---\n\n") : null;
+  }
   log2.info("Generating summary", {
     name,
     messageCount: messagesToSummarize.length,
@@ -1544,7 +1576,7 @@ ${serialized}` : serialized;
   log2.info("Summary generated", { name, summaryLength: summaryText.length });
   return summaryText.trim();
 }
-var log2, CONVERSATION_SUMMARY_PROMPT, SUBAGENT_SUMMARY_PROMPT, SUMMARIZABLE_SUBAGENTS;
+var log2, CONVERSATION_SUMMARY_PROMPT, SUBAGENT_SUMMARY_PROMPT, SUMMARIZABLE_SUBAGENTS, CHUNK_CHAR_LIMIT;
 var init_compaction = __esm({
   "src/compaction/index.ts"() {
     "use strict";
@@ -1555,6 +1587,7 @@ var init_compaction = __esm({
     CONVERSATION_SUMMARY_PROMPT = readAsset("compaction", "conversation.md");
     SUBAGENT_SUMMARY_PROMPT = readAsset("compaction", "subagent.md");
     SUMMARIZABLE_SUBAGENTS = ["visualDesignExpert", "productVision"];
+    CHUNK_CHAR_LIMIT = 24e5;
   }
 });
@@ -2954,34 +2987,11 @@ function acquireBrowserLock() {
   lockQueue = next;
   return wait.then(() => release);
 }
-async function checkBrowserConnected() {
-  try {
-    const status = await sidecarRequest(
-      "/browser-status",
-      {},
-      { timeout: 5e3 }
-    );
-    if (!status.connected) {
-      return {
-        connected: false,
-        reason: BROWSER_UNAVAILABLE_MESSAGE
-      };
-    }
-    return { connected: true };
-  } catch {
-    return {
-      connected: false,
-      reason: BROWSER_UNAVAILABLE_MESSAGE
-    };
-  }
-}
-var lockQueue, BROWSER_UNAVAILABLE_MESSAGE;
+var lockQueue;
 var init_browserLock = __esm({
   "src/tools/_helpers/browserLock.ts"() {
     "use strict";
-    init_sidecar();
     lockQueue = Promise.resolve();
-    BROWSER_UNAVAILABLE_MESSAGE = "Browser preview unavailable \u2014 the user has closed their browser and we are continuing to work in the background. This is not a code failure and not something to diagnose. Do not tell the user to click or open anything. Skip the visual check and verify your work through other means: runMethod for backend behavior, queryDatabase for data checks, .logs/devServer.ndjson for build errors, .logs/browser.ndjson for runtime errors, lspDiagnostics for type/syntax, or read the code directly.";
   }
 });
@@ -3114,9 +3124,9 @@ function fixOrphanedToolCalls(messages) {
       toolResultIds.add(msg.toolCallId);
     }
   }
-  const result = [...messages];
-  for (let i = result.length - 1; i >= 0; i--) {
-    const msg = result[i];
+  const result = [];
+  for (const msg of messages) {
+    result.push(msg);
     if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
       continue;
     }
@@ -3124,17 +3134,15 @@ function fixOrphanedToolCalls(messages) {
       (b) => b.type === "tool"
     );
     const orphans = toolBlocks.filter((tc) => !toolResultIds.has(tc.id));
-    if (orphans.length === 0) {
-      continue;
+    for (const tc of orphans) {
+      result.push({
+        role: "user",
+        content: "Error: tool result lost (session recovered)",
+        toolCallId: tc.id,
+        isToolError: true
+      });
+      toolResultIds.add(tc.id);
     }
-    const synthetics = orphans.map((tc) => ({
-      role: "user",
-      content: "Error: tool result lost (session recovered)",
-      toolCallId: tc.id,
-      isToolError: true
-    }));
-    result.splice(i + 1, 0, ...synthetics);
-    break;
   }
   return result;
 }
@@ -3652,7 +3660,7 @@ var init_tools = __esm({
       {
         clearable: true,
         name: "browserCommand",
-        description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Timeout: 120s.",
+        description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
         inputSchema: {
           type: "object",
           properties: {
@@ -3810,14 +3818,6 @@ var init_browserAutomation = __esm({
         }
         const release = await acquireBrowserLock();
         try {
-          const browserStatus = await checkBrowserConnected();
-          if (!browserStatus.connected) {
-            return browserStatus.reason ?? "Browser preview unavailable.";
-          }
-          try {
-            await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
-          } catch {
-          }
           const result = await runSubAgent({
             system: getBrowserAutomationPrompt(),
             task: input.task,
@@ -3907,10 +3907,6 @@ var init_browserAutomation = __esm({
             toolRegistry: context.toolRegistry,
             captureArtifacts: ["screenshotFullPage"]
           });
-          try {
-            await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
-          } catch {
-          }
           context.subAgentMessages?.set(context.toolCallId, result.messages);
           const ss = result.artifacts?.screenshotFullPage;
           if (ss?.url) {
@@ -4006,10 +4002,6 @@ var init_screenshot2 = __esm({
           }
           const release = await acquireBrowserLock();
           try {
-            const browserStatus = await checkBrowserConnected();
-            if (!browserStatus.connected) {
-              return browserStatus.reason ?? "Browser preview unavailable.";
-            }
             return await captureAndAnalyzeScreenshot({
               prompt: input.prompt,
               path: input.path,
@@ -4360,10 +4352,6 @@ async function execute5(input, onLog, context) {
   }
   const release = await acquireBrowserLock();
   try {
-    const browserStatus = await checkBrowserConnected();
-    if (!browserStatus.connected) {
-      return browserStatus.reason ?? "Browser preview unavailable.";
-    }
     return await captureAndAnalyzeScreenshot({
       prompt: input.prompt,
       path: input.path,

package/dist/prompt/.notes.md CHANGED Viewed

@@ -145,7 +145,7 @@ The intro framing ("you have a lot on your plate") gives the model permission to
 | `productVision` | Roadmap ownership & product strategy | writeRoadmapItem, updateRoadmapItem, deleteRoadmapItem | Spec files + current roadmap |
 | `sdkConsultant` | MindStudio SDK architecture | None (shells out to `mindstudio ask` CLI) | None (external agent) |
 | `codeSanityCheck` | Pre-build review | readFile, grep, glob, searchGoogle, fetchUrl, askMindStudioSdk, bash (readonly) | Spec files |
-| `browserAutomation` | Interactive UI testing | browserCommand, screenshot, resetBrowser | None (interacts with live preview) |
+| `browserAutomation` | Interactive UI testing | browserCommand, screenshotFullPage, setupBrowser | None (interacts with live preview) |
 ### Shared infrastructure
@@ -177,8 +177,6 @@ New `type: roadmap` for MSFM files in `src/roadmap/`. Each item has frontmatter
 - **Automated message sentinel** — `@@automated::{tag}@@` prefix on user messages, stripped before sending to LLM. Frontend uses for custom rendering.
 - **Project naming** — `setProjectName` tool for setting display name after intake.
 - **Dynamic status labels** — `statusWatcher.ts` periodically calls a lightweight endpoint to generate descriptive labels during agent work.
-- **Browser status check** — agent checks `/browser-status` before starting browser automation to fail fast if preview isn't connected.
-- **Browser reset** — `resetBrowser` tool restores preview to clean state after testing.
 - **Asset bundling** — `tsup.config.ts` copies .md/.json/.sh files from src/ to dist/ on build.
 ## What's Not Done

package/dist/prompt/static/coding.md CHANGED Viewed

@@ -12,7 +12,7 @@ Run `lspDiagnostics` after every turn where you have edited code in any meaningf
 - Spot-check methods with `runMethod`. The dev database is a disposable snapshot that will have been seeded with scenario data, so don't worry about being destructive.
 - For frontend work, take a single `screenshot` to confirm the main view renders correctly or look at the browser log for any console errors in the user's preview.
 - Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, or when the user reports something broken that you can't identify from code alone.
-- If the browser preview isn't connected, skip the visual check and verify through methods, logs, and code instead. Preview unavailability is an environmental state, not a code issue — the user might have closed their browser and we are continuing to work in the background.
+- If the browser is unavailable, skip the visual check and verify through methods, logs, and code instead. Browser unavailability is an infrastructure issue, not a code problem — don't try to diagnose or fix it.
 Aim for confidence that the core happy paths work. If the 80% case is solid, the remaining edge cases are likely fine and the user can surface them in chat. Don't screenshot every page, test every permutation, or verify every secondary flow. One or two runtime checks that confirm the app loads and data flows through is enough.

package/dist/prompt/static/instructions.md CHANGED Viewed

@@ -39,4 +39,5 @@ You will occasionally receive automated messages prefixed with `@@automated_mess
 - Keep language accessible. Describe what the app *does*, not how it's implemented, unless the user demonstrates technical fluency.
 - Always use full paths relative to the project root when mentioning files (`dist/interfaces/web/src/App.tsx`, not `App.tsx`). Paths will be rendered as clickable links for the user.
 - Use inline `code` formatting only for things the user needs to type or search for.
-- When writing prose or communicating with the user, avoid em dashes (and especially when writing specs); use periods, commas, colons, or parentheses instead. Do not use emojis.
+- When writing prose or communicating with the user, avoid em dashes (and especially when writing specs); use periods, commas, colons, or parentheses instead.
+- Never use emojis when responding to the user.

package/dist/prompt/static/team.md CHANGED Viewed

@@ -44,7 +44,7 @@ The QA agent can see the screen. Describe what to test, not how — it will figu
 Never tell QA what names to use when testing or what values to input - it will use its own judgment.
-If the browser preview is unavailable, QA can't run. Treat that as an environmental limit, not a problem with the app — the user has closed their browser and we are continuing to work in the background. Do not guide the user to open or click anything. Verify through methods, logs, and code inspection instead, and just note that visual QA was skipped.
+If the browser is unavailable, QA can't run. That's an infrastructure issue, not a problem with the app — don't try to diagnose or fix it. Verify through methods, logs, and code inspection instead, and note that visual QA was skipped.
 ### Background Execution

package/dist/subagents/browserAutomation/prompt.md CHANGED Viewed

@@ -3,7 +3,7 @@ You are a browser smoke test agent. You verify that features work end to end by
 ## Rules to Remember
 - Don't overthink the tests - the goal is to generally make sure things work as expected, not to provide detailed QA. If something seems mostly okay, note it and move on. Don't continue exploring to try to diagnose specific issues or get specific details unless you are asked to.
 - Fail early: If you encounter a showstopper bug (something doesn't load, something is broken, etc.) do not attempt to diagnose it or work around it. We need core common user paths to work - if they don't the app is broken and testing should not continue until it is fixed. Return early with a report to let the developer fix it, they'll run another test when they're ready.
-- Browser disconnection is environmental, not a test failure. If `browserCommand` returns `BROWSER_DISCONNECTED` or the browser otherwise drops mid-test, the test is **inconclusive** — the user has closed their browser and we are continuing to work in the background. Do not retry, do not attribute it to app brokenness, do not tell the user to open or click anything. Report "test inconclusive: browser disconnected" and stop.
+- Browser unavailability is an infrastructure issue, not a test failure. If `browserCommand` reports the browser is unavailable or drops mid-test, the test is **inconclusive** — do not retry, do not attribute it to app brokenness. Report "test inconclusive: browser unavailable" and stop.
 ## Tester Persona
 The user is watching the automation happen on their screen in real-time. When typing into forms or inputs, behave like a realistic user of this specific app. Use the app context (if provided) to understand the audience and tone. Type the way that audience would actually type — not formal, not robotic. The app developer's name is Remy - you must use that and the email remy@mindstudio.ai as the basis for any testing that requires a persona.
@@ -39,7 +39,6 @@ Each interactive element has a `[ref=eN]` you can use to target it.
 - `select`: Select a dropdown option by text. Target the `<select>` element, set `option` to the option text.
 - `wait`: Wait for an element to appear (polls every 100ms, default 5s timeout). Also waits for network to settle after the element is found.
 - `navigate`: Navigate to a new URL within the app. Waits for the new page to load before continuing with subsequent steps. Use this instead of evaluate with `window.location.href` when you need to navigate and then continue interacting with the new page. Steps after navigate execute on the new page automatically.
-- `reload`: Reload the current page. Useful if something has crashed, you can not exit some dynamic screen, or you need to clear stale data or some stale app state. Waits for the page to reload before continuting with subsequent steps. Use this instead of using evaluate to reload a page.
 - `evaluate`: Run arbitrary JavaScript in the page and return the result.
 - `styles`: Read computed CSS styles from page elements. Pass a `properties` array with camelCase CSS property names (e.g., `["backgroundColor", "borderRadius", "fontSize"]`). Omit `properties` for a default set covering colors, typography, spacing, borders, shadows, dimensions, and layout. Uses the same targeting as click/type (ref, text, role, label, selector). Omit the target to get styles for all elements from the last snapshot.
 - `screenshotViewport`: Take a screenshot of the current viewport. Returns CDN url with full text analysis and dimensions. Useful at the end of an action batch to visually see things like layout shift or overflow. Do not use if you can get what you need with other tools - only use when you need to visually see the viewport.
@@ -61,6 +60,7 @@ Each browserCommand returns:
 - `snapshot`: the final page state after all steps complete (always present, even without an explicit snapshot step)
 - `logs`: array of browser-side events that fired during the batch (console output, network failures, JS errors, user interactions). Check this for errors before reporting pass.
 - `duration`: total execution time in ms
+- `recordingUrl` (optional): URL to an rrweb session recording of the tool call. Present whenever the batch contained an interactive step (click, type, select). Include it in your failure reports so the main agent can share it — it's the fastest way to reproduce a bug visually.
 On error, the failing step has an `error` field and execution stops. Remaining steps are skipped.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mindstudio-ai/remy",
-  "version": "0.1.150",
+  "version": "0.1.152",
   "description": "MindStudio coding agent",
   "repository": {
     "type": "git",