@mindstudio-ai/remy 0.1.27 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -2076,16 +2076,16 @@ var runMethodTool = {
2076
2076
  var SCREENSHOT_ANALYSIS_PROMPT = "Describe everything visible on screen from top to bottom \u2014 every element, its position, its size relative to the viewport, its colors, its content. Be thorough and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components).";
2077
2077
  async function captureAndAnalyzeScreenshot(promptOrOptions) {
2078
2078
  let prompt;
2079
- let viewportOnly = false;
2079
+ let fullPage = false;
2080
2080
  if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
2081
2081
  prompt = promptOrOptions.prompt;
2082
- viewportOnly = promptOrOptions.viewportOnly ?? false;
2082
+ fullPage = promptOrOptions.fullPage ?? false;
2083
2083
  } else {
2084
2084
  prompt = promptOrOptions;
2085
2085
  }
2086
2086
  const ssResult = await sidecarRequest(
2087
2087
  "/screenshot",
2088
- { fullPage: !viewportOnly },
2088
+ { fullPage },
2089
2089
  { timeout: 12e4 }
2090
2090
  );
2091
2091
  log.debug("Screenshot response", { ssResult });
@@ -2109,7 +2109,7 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2109
2109
  var screenshotTool = {
2110
2110
  definition: {
2111
2111
  name: "screenshot",
2112
- description: "Capture a screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for. Set viewportOnly to capture just what the user sees on screen.",
2112
+ description: "Capture a screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for. By default captures the viewport (what the user sees). Set fullPage to capture the entire scrollable page.",
2113
2113
  inputSchema: {
2114
2114
  type: "object",
2115
2115
  properties: {
@@ -2117,9 +2117,9 @@ var screenshotTool = {
2117
2117
  type: "string",
2118
2118
  description: "Optional question about the screenshot. If omitted, returns a general description of what's visible."
2119
2119
  },
2120
- viewportOnly: {
2120
+ fullPage: {
2121
2121
  type: "boolean",
2122
- description: "Capture only the visible viewport instead of the full scrollable page. Use when checking above-the-fold layout or viewport-relative sizing like 100vh."
2122
+ description: "Capture the full scrollable page instead of just the viewport. Use when you need to see below-the-fold content."
2123
2123
  }
2124
2124
  }
2125
2125
  }
@@ -2128,7 +2128,7 @@ var screenshotTool = {
2128
2128
  try {
2129
2129
  return await captureAndAnalyzeScreenshot({
2130
2130
  prompt: input.prompt,
2131
- viewportOnly: input.viewportOnly
2131
+ fullPage: input.fullPage
2132
2132
  });
2133
2133
  } catch (err) {
2134
2134
  return `Error taking screenshot: ${err.message}`;
@@ -2294,7 +2294,7 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
2294
2294
  if (externalTools.has(tc.name) && resolveExternalTool) {
2295
2295
  result = await resolveExternalTool(tc.id, tc.name, tc.input);
2296
2296
  } else {
2297
- result = await executeTool2(tc.name, tc.input);
2297
+ result = await executeTool2(tc.name, tc.input, tc.id);
2298
2298
  }
2299
2299
  const isError = result.startsWith("Error");
2300
2300
  emit2({
@@ -2629,7 +2629,7 @@ var DESIGN_EXPERT_TOOLS = [
2629
2629
  },
2630
2630
  {
2631
2631
  name: "screenshot",
2632
- description: "Capture a screenshot of the app preview. Returns a CDN URL with visual analysis. Use to review the current state of the UI being built. Set viewportOnly to capture just what the user sees on screen.",
2632
+ description: "Capture a screenshot of the app preview. Returns a CDN URL with visual analysis. Use to review the current state of the UI being built. By default captures the viewport. Set fullPage to capture the entire scrollable page.",
2633
2633
  inputSchema: {
2634
2634
  type: "object",
2635
2635
  properties: {
@@ -2637,22 +2637,22 @@ var DESIGN_EXPERT_TOOLS = [
2637
2637
  type: "string",
2638
2638
  description: "Optional specific question about the screenshot."
2639
2639
  },
2640
- viewportOnly: {
2640
+ fullPage: {
2641
2641
  type: "boolean",
2642
- description: "Capture only the visible viewport instead of the full scrollable page. Use when checking above-the-fold layout or viewport-relative sizing like 100vh."
2642
+ description: "Capture the full scrollable page instead of just the viewport. Use when you need to see below-the-fold content."
2643
2643
  }
2644
2644
  }
2645
2645
  }
2646
2646
  },
2647
2647
  {
2648
2648
  name: "runBrowserTest",
2649
- description: "Run an automated browser test against the live app preview. Use to verify visual implementation: check computed styles, navigate between pages, take analyzed screenshots. Describe what you want to verify and the browser agent handles the interaction.",
2649
+ description: "Run an automated browser test against the live app preview. Use to verify implementation details via getComputedStyle: font-family names, exact colors, spacing, borders, shadows, font sizes, transforms. Also supports navigation between pages and screenshots. Use this to confirm the right fonts are loaded and CSS values match the spec.",
2650
2650
  inputSchema: {
2651
2651
  type: "object",
2652
2652
  properties: {
2653
2653
  task: {
2654
2654
  type: "string",
2655
- description: 'What to verify, in natural language. E.g., "Check that the hero section cards have border-radius: 24px and the correct rotation angles" or "Navigate to /about and screenshot it".'
2655
+ description: 'What to verify, in natural language. Focus on measurable properties: "Check the hero cards have border-radius: 24px and box-shadow" or "Verify the background color of the CTA section is #C4FF0D".'
2656
2656
  }
2657
2657
  },
2658
2658
  required: ["task"]
@@ -2684,13 +2684,13 @@ var DESIGN_EXPERT_TOOLS = [
2684
2684
  }
2685
2685
  }
2686
2686
  ];
2687
- async function executeDesignExpertTool(name, input, context) {
2687
+ async function executeDesignExpertTool(name, input, context, toolCallId) {
2688
2688
  switch (name) {
2689
2689
  case "screenshot": {
2690
2690
  try {
2691
2691
  return await captureAndAnalyzeScreenshot({
2692
2692
  prompt: input.prompt,
2693
- viewportOnly: input.viewportOnly
2693
+ fullPage: input.fullPage
2694
2694
  });
2695
2695
  } catch (err) {
2696
2696
  return `Error taking screenshot: ${err.message}`;
@@ -2790,7 +2790,13 @@ ${analysis}`;
2790
2790
  if (!context) {
2791
2791
  return "Error: browser testing requires execution context (only available in headless mode)";
2792
2792
  }
2793
- return browserAutomationTool.execute({ task: input.task }, context);
2793
+ return browserAutomationTool.execute(
2794
+ { task: input.task },
2795
+ {
2796
+ ...context,
2797
+ toolCallId: toolCallId || context.toolCallId
2798
+ }
2799
+ );
2794
2800
  }
2795
2801
  default:
2796
2802
  return `Error: unknown tool "${name}"`;
@@ -3020,7 +3026,7 @@ var designExpertTool = {
3020
3026
  task: input.task,
3021
3027
  tools: DESIGN_EXPERT_TOOLS,
3022
3028
  externalTools: /* @__PURE__ */ new Set(),
3023
- executeTool: (name, input2) => executeDesignExpertTool(name, input2, context),
3029
+ executeTool: (name, input2, toolCallId) => executeDesignExpertTool(name, input2, context, toolCallId),
3024
3030
  apiConfig: context.apiConfig,
3025
3031
  model: context.model,
3026
3032
  subAgentId: "visualDesignExpert",
package/dist/index.js CHANGED
@@ -2017,16 +2017,16 @@ var init_runMethod = __esm({
2017
2017
  // src/tools/_helpers/screenshot.ts
2018
2018
  async function captureAndAnalyzeScreenshot(promptOrOptions) {
2019
2019
  let prompt;
2020
- let viewportOnly = false;
2020
+ let fullPage = false;
2021
2021
  if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
2022
2022
  prompt = promptOrOptions.prompt;
2023
- viewportOnly = promptOrOptions.viewportOnly ?? false;
2023
+ fullPage = promptOrOptions.fullPage ?? false;
2024
2024
  } else {
2025
2025
  prompt = promptOrOptions;
2026
2026
  }
2027
2027
  const ssResult = await sidecarRequest(
2028
2028
  "/screenshot",
2029
- { fullPage: !viewportOnly },
2029
+ { fullPage },
2030
2030
  { timeout: 12e4 }
2031
2031
  );
2032
2032
  log.debug("Screenshot response", { ssResult });
@@ -2065,7 +2065,7 @@ var init_screenshot2 = __esm({
2065
2065
  screenshotTool = {
2066
2066
  definition: {
2067
2067
  name: "screenshot",
2068
- description: "Capture a screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for. Set viewportOnly to capture just what the user sees on screen.",
2068
+ description: "Capture a screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for. By default captures the viewport (what the user sees). Set fullPage to capture the entire scrollable page.",
2069
2069
  inputSchema: {
2070
2070
  type: "object",
2071
2071
  properties: {
@@ -2073,9 +2073,9 @@ var init_screenshot2 = __esm({
2073
2073
  type: "string",
2074
2074
  description: "Optional question about the screenshot. If omitted, returns a general description of what's visible."
2075
2075
  },
2076
- viewportOnly: {
2076
+ fullPage: {
2077
2077
  type: "boolean",
2078
- description: "Capture only the visible viewport instead of the full scrollable page. Use when checking above-the-fold layout or viewport-relative sizing like 100vh."
2078
+ description: "Capture the full scrollable page instead of just the viewport. Use when you need to see below-the-fold content."
2079
2079
  }
2080
2080
  }
2081
2081
  }
@@ -2084,7 +2084,7 @@ var init_screenshot2 = __esm({
2084
2084
  try {
2085
2085
  return await captureAndAnalyzeScreenshot({
2086
2086
  prompt: input.prompt,
2087
- viewportOnly: input.viewportOnly
2087
+ fullPage: input.fullPage
2088
2088
  });
2089
2089
  } catch (err) {
2090
2090
  return `Error taking screenshot: ${err.message}`;
@@ -2257,7 +2257,7 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
2257
2257
  if (externalTools.has(tc.name) && resolveExternalTool) {
2258
2258
  result = await resolveExternalTool(tc.id, tc.name, tc.input);
2259
2259
  } else {
2260
- result = await executeTool2(tc.name, tc.input);
2260
+ result = await executeTool2(tc.name, tc.input, tc.id);
2261
2261
  }
2262
2262
  const isError = result.startsWith("Error");
2263
2263
  emit2({
@@ -2570,13 +2570,13 @@ function resolvePath(filename) {
2570
2570
  const local4 = path5.join(base2, filename);
2571
2571
  return fs11.existsSync(local4) ? local4 : path5.join(base2, "subagents", "designExpert", filename);
2572
2572
  }
2573
- async function executeDesignExpertTool(name, input, context) {
2573
+ async function executeDesignExpertTool(name, input, context, toolCallId) {
2574
2574
  switch (name) {
2575
2575
  case "screenshot": {
2576
2576
  try {
2577
2577
  return await captureAndAnalyzeScreenshot({
2578
2578
  prompt: input.prompt,
2579
- viewportOnly: input.viewportOnly
2579
+ fullPage: input.fullPage
2580
2580
  });
2581
2581
  } catch (err) {
2582
2582
  return `Error taking screenshot: ${err.message}`;
@@ -2676,7 +2676,13 @@ ${analysis}`;
2676
2676
  if (!context) {
2677
2677
  return "Error: browser testing requires execution context (only available in headless mode)";
2678
2678
  }
2679
- return browserAutomationTool.execute({ task: input.task }, context);
2679
+ return browserAutomationTool.execute(
2680
+ { task: input.task },
2681
+ {
2682
+ ...context,
2683
+ toolCallId: toolCallId || context.toolCallId
2684
+ }
2685
+ );
2680
2686
  }
2681
2687
  default:
2682
2688
  return `Error: unknown tool "${name}"`;
@@ -2744,7 +2750,7 @@ var init_tools2 = __esm({
2744
2750
  },
2745
2751
  {
2746
2752
  name: "screenshot",
2747
- description: "Capture a screenshot of the app preview. Returns a CDN URL with visual analysis. Use to review the current state of the UI being built. Set viewportOnly to capture just what the user sees on screen.",
2753
+ description: "Capture a screenshot of the app preview. Returns a CDN URL with visual analysis. Use to review the current state of the UI being built. By default captures the viewport. Set fullPage to capture the entire scrollable page.",
2748
2754
  inputSchema: {
2749
2755
  type: "object",
2750
2756
  properties: {
@@ -2752,22 +2758,22 @@ var init_tools2 = __esm({
2752
2758
  type: "string",
2753
2759
  description: "Optional specific question about the screenshot."
2754
2760
  },
2755
- viewportOnly: {
2761
+ fullPage: {
2756
2762
  type: "boolean",
2757
- description: "Capture only the visible viewport instead of the full scrollable page. Use when checking above-the-fold layout or viewport-relative sizing like 100vh."
2763
+ description: "Capture the full scrollable page instead of just the viewport. Use when you need to see below-the-fold content."
2758
2764
  }
2759
2765
  }
2760
2766
  }
2761
2767
  },
2762
2768
  {
2763
2769
  name: "runBrowserTest",
2764
- description: "Run an automated browser test against the live app preview. Use to verify visual implementation: check computed styles, navigate between pages, take analyzed screenshots. Describe what you want to verify and the browser agent handles the interaction.",
2770
+ description: "Run an automated browser test against the live app preview. Use to verify implementation details via getComputedStyle: font-family names, exact colors, spacing, borders, shadows, font sizes, transforms. Also supports navigation between pages and screenshots. Use this to confirm the right fonts are loaded and CSS values match the spec.",
2765
2771
  inputSchema: {
2766
2772
  type: "object",
2767
2773
  properties: {
2768
2774
  task: {
2769
2775
  type: "string",
2770
- description: 'What to verify, in natural language. E.g., "Check that the hero section cards have border-radius: 24px and the correct rotation angles" or "Navigate to /about and screenshot it".'
2776
+ description: 'What to verify, in natural language. Focus on measurable properties: "Check the hero cards have border-radius: 24px and box-shadow" or "Verify the background color of the CTA section is #C4FF0D".'
2771
2777
  }
2772
2778
  },
2773
2779
  required: ["task"]
@@ -3042,7 +3048,7 @@ Visual design expert. Describe the situation and what you need \u2014 the agent
3042
3048
  task: input.task,
3043
3049
  tools: DESIGN_EXPERT_TOOLS,
3044
3050
  externalTools: /* @__PURE__ */ new Set(),
3045
- executeTool: (name, input2) => executeDesignExpertTool(name, input2, context),
3051
+ executeTool: (name, input2, toolCallId) => executeDesignExpertTool(name, input2, context, toolCallId),
3046
3052
  apiConfig: context.apiConfig,
3047
3053
  model: context.model,
3048
3054
  subAgentId: "visualDesignExpert",
@@ -18,6 +18,10 @@ These are things we already know about and have decided to accept:
18
18
 
19
19
  - Limited browser support for `oklch` gradients using `in <colorspace>` syntax — we accept the compatibility tradeoff for better color quality
20
20
  - Limited browser support for CSS scroll-driven animations (`animation-timeline: scroll()` / `view()`) - we accept this tradeoff
21
+ - Libraries we know are actively maintained, don't bother checking:
22
+ - swr
23
+ - framer-motion
24
+ - styled-components
21
25
 
22
26
  ### Common pitfalls (always flag these)
23
27
 
@@ -1,7 +1,9 @@
1
1
  ## Tool usage
2
2
 
3
3
  - Use `analyzeReferenceImageOrUrl` to analyze any image URL or website URL. Websites are automatically screenshotted. Omit the prompt for a standard design analysis, or provide a custom prompt for specific questions. Do not screenshot font specimen pages, documentation, or other text-heavy pages — use `fetchUrl` for those instead.
4
- - Use `screenshot` to capture the current state of the app preview. Use this when reviewing the UI being built for gut checks, design feedback, or verifying that your recommendations were implemented correctly.
4
+ - Use `screenshot` to see the current state of the app preview. This is your primary tool for visual review. Use `fullPage: true` to see the entire page at once. Remember, the screenshot analysis is not overly precise - for example, it cannot reliably identify specific fonts by name — it can only describe what letterforms look like.
5
+ - Use `runBrowserTest` only to verify specific computed values that a screenshot can't tell you: exact font-family names, hex colors, pixel measurements, border-radius, box-shadow, transforms. Keep requests focused — ask it to check specific properties on specific elements, not to take screenshots or scroll around.
6
+ - **screenshot vs runBrowserTest**: Screenshot to *see* the page. Browser test to *measure* specific CSS values. Never use the browser agent for tasks your screenshot tool can handle, or vice versa.
5
7
  - Use `searchGoogle` and `fetchUrl` only when the user references something specific: a particular website to match, a brand to look up, a company whose identity you need to research. You already have curated fonts, inspiration references, and strong internal knowledge — don't search the web for generic inspiration or "best X apps." The web is for specific lookups, not creative direction.
6
8
  - When proposing multiple options, make them genuinely different directions (dark + bold vs. light + editorial) rather than minor variations.
7
9
  - When multiple tool calls are independent, make them all in a single turn. Searching for three different products, or fetching two reference sites: batch them instead of doing one per turn.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.27",
3
+ "version": "0.1.29",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",