@mindstudio-ai/remy 0.1.27 → 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js
CHANGED
|
@@ -2076,16 +2076,16 @@ var runMethodTool = {
|
|
|
2076
2076
|
var SCREENSHOT_ANALYSIS_PROMPT = "Describe everything visible on screen from top to bottom \u2014 every element, its position, its size relative to the viewport, its colors, its content. Be thorough and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components).";
|
|
2077
2077
|
async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
2078
2078
|
let prompt;
|
|
2079
|
-
let
|
|
2079
|
+
let fullPage = false;
|
|
2080
2080
|
if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
|
|
2081
2081
|
prompt = promptOrOptions.prompt;
|
|
2082
|
-
|
|
2082
|
+
fullPage = promptOrOptions.fullPage ?? false;
|
|
2083
2083
|
} else {
|
|
2084
2084
|
prompt = promptOrOptions;
|
|
2085
2085
|
}
|
|
2086
2086
|
const ssResult = await sidecarRequest(
|
|
2087
2087
|
"/screenshot",
|
|
2088
|
-
{ fullPage
|
|
2088
|
+
{ fullPage },
|
|
2089
2089
|
{ timeout: 12e4 }
|
|
2090
2090
|
);
|
|
2091
2091
|
log.debug("Screenshot response", { ssResult });
|
|
@@ -2109,7 +2109,7 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
|
2109
2109
|
var screenshotTool = {
|
|
2110
2110
|
definition: {
|
|
2111
2111
|
name: "screenshot",
|
|
2112
|
-
description: "Capture a screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for.
|
|
2112
|
+
description: "Capture a screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for. By default captures the viewport (what the user sees). Set fullPage to capture the entire scrollable page.",
|
|
2113
2113
|
inputSchema: {
|
|
2114
2114
|
type: "object",
|
|
2115
2115
|
properties: {
|
|
@@ -2117,9 +2117,9 @@ var screenshotTool = {
|
|
|
2117
2117
|
type: "string",
|
|
2118
2118
|
description: "Optional question about the screenshot. If omitted, returns a general description of what's visible."
|
|
2119
2119
|
},
|
|
2120
|
-
|
|
2120
|
+
fullPage: {
|
|
2121
2121
|
type: "boolean",
|
|
2122
|
-
description: "Capture
|
|
2122
|
+
description: "Capture the full scrollable page instead of just the viewport. Use when you need to see below-the-fold content."
|
|
2123
2123
|
}
|
|
2124
2124
|
}
|
|
2125
2125
|
}
|
|
@@ -2128,7 +2128,7 @@ var screenshotTool = {
|
|
|
2128
2128
|
try {
|
|
2129
2129
|
return await captureAndAnalyzeScreenshot({
|
|
2130
2130
|
prompt: input.prompt,
|
|
2131
|
-
|
|
2131
|
+
fullPage: input.fullPage
|
|
2132
2132
|
});
|
|
2133
2133
|
} catch (err) {
|
|
2134
2134
|
return `Error taking screenshot: ${err.message}`;
|
|
@@ -2294,7 +2294,7 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
|
|
|
2294
2294
|
if (externalTools.has(tc.name) && resolveExternalTool) {
|
|
2295
2295
|
result = await resolveExternalTool(tc.id, tc.name, tc.input);
|
|
2296
2296
|
} else {
|
|
2297
|
-
result = await executeTool2(tc.name, tc.input);
|
|
2297
|
+
result = await executeTool2(tc.name, tc.input, tc.id);
|
|
2298
2298
|
}
|
|
2299
2299
|
const isError = result.startsWith("Error");
|
|
2300
2300
|
emit2({
|
|
@@ -2629,7 +2629,7 @@ var DESIGN_EXPERT_TOOLS = [
|
|
|
2629
2629
|
},
|
|
2630
2630
|
{
|
|
2631
2631
|
name: "screenshot",
|
|
2632
|
-
description: "Capture a screenshot of the app preview. Returns a CDN URL with visual analysis. Use to review the current state of the UI being built. Set
|
|
2632
|
+
description: "Capture a screenshot of the app preview. Returns a CDN URL with visual analysis. Use to review the current state of the UI being built. By default captures the viewport. Set fullPage to capture the entire scrollable page.",
|
|
2633
2633
|
inputSchema: {
|
|
2634
2634
|
type: "object",
|
|
2635
2635
|
properties: {
|
|
@@ -2637,22 +2637,22 @@ var DESIGN_EXPERT_TOOLS = [
|
|
|
2637
2637
|
type: "string",
|
|
2638
2638
|
description: "Optional specific question about the screenshot."
|
|
2639
2639
|
},
|
|
2640
|
-
|
|
2640
|
+
fullPage: {
|
|
2641
2641
|
type: "boolean",
|
|
2642
|
-
description: "Capture
|
|
2642
|
+
description: "Capture the full scrollable page instead of just the viewport. Use when you need to see below-the-fold content."
|
|
2643
2643
|
}
|
|
2644
2644
|
}
|
|
2645
2645
|
}
|
|
2646
2646
|
},
|
|
2647
2647
|
{
|
|
2648
2648
|
name: "runBrowserTest",
|
|
2649
|
-
description: "Run an automated browser test against the live app preview. Use to verify
|
|
2649
|
+
description: "Run an automated browser test against the live app preview. Use to verify implementation details via getComputedStyle: font-family names, exact colors, spacing, borders, shadows, font sizes, transforms. Also supports navigation between pages and screenshots. Use this to confirm the right fonts are loaded and CSS values match the spec.",
|
|
2650
2650
|
inputSchema: {
|
|
2651
2651
|
type: "object",
|
|
2652
2652
|
properties: {
|
|
2653
2653
|
task: {
|
|
2654
2654
|
type: "string",
|
|
2655
|
-
description: 'What to verify, in natural language.
|
|
2655
|
+
description: 'What to verify, in natural language. Focus on measurable properties: "Check the hero cards have border-radius: 24px and box-shadow" or "Verify the background color of the CTA section is #C4FF0D".'
|
|
2656
2656
|
}
|
|
2657
2657
|
},
|
|
2658
2658
|
required: ["task"]
|
|
@@ -2684,13 +2684,13 @@ var DESIGN_EXPERT_TOOLS = [
|
|
|
2684
2684
|
}
|
|
2685
2685
|
}
|
|
2686
2686
|
];
|
|
2687
|
-
async function executeDesignExpertTool(name, input, context) {
|
|
2687
|
+
async function executeDesignExpertTool(name, input, context, toolCallId) {
|
|
2688
2688
|
switch (name) {
|
|
2689
2689
|
case "screenshot": {
|
|
2690
2690
|
try {
|
|
2691
2691
|
return await captureAndAnalyzeScreenshot({
|
|
2692
2692
|
prompt: input.prompt,
|
|
2693
|
-
|
|
2693
|
+
fullPage: input.fullPage
|
|
2694
2694
|
});
|
|
2695
2695
|
} catch (err) {
|
|
2696
2696
|
return `Error taking screenshot: ${err.message}`;
|
|
@@ -2790,7 +2790,13 @@ ${analysis}`;
|
|
|
2790
2790
|
if (!context) {
|
|
2791
2791
|
return "Error: browser testing requires execution context (only available in headless mode)";
|
|
2792
2792
|
}
|
|
2793
|
-
return browserAutomationTool.execute(
|
|
2793
|
+
return browserAutomationTool.execute(
|
|
2794
|
+
{ task: input.task },
|
|
2795
|
+
{
|
|
2796
|
+
...context,
|
|
2797
|
+
toolCallId: toolCallId || context.toolCallId
|
|
2798
|
+
}
|
|
2799
|
+
);
|
|
2794
2800
|
}
|
|
2795
2801
|
default:
|
|
2796
2802
|
return `Error: unknown tool "${name}"`;
|
|
@@ -3020,7 +3026,7 @@ var designExpertTool = {
|
|
|
3020
3026
|
task: input.task,
|
|
3021
3027
|
tools: DESIGN_EXPERT_TOOLS,
|
|
3022
3028
|
externalTools: /* @__PURE__ */ new Set(),
|
|
3023
|
-
executeTool: (name, input2) => executeDesignExpertTool(name, input2, context),
|
|
3029
|
+
executeTool: (name, input2, toolCallId) => executeDesignExpertTool(name, input2, context, toolCallId),
|
|
3024
3030
|
apiConfig: context.apiConfig,
|
|
3025
3031
|
model: context.model,
|
|
3026
3032
|
subAgentId: "visualDesignExpert",
|
package/dist/index.js
CHANGED
|
@@ -2017,16 +2017,16 @@ var init_runMethod = __esm({
|
|
|
2017
2017
|
// src/tools/_helpers/screenshot.ts
|
|
2018
2018
|
async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
2019
2019
|
let prompt;
|
|
2020
|
-
let
|
|
2020
|
+
let fullPage = false;
|
|
2021
2021
|
if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
|
|
2022
2022
|
prompt = promptOrOptions.prompt;
|
|
2023
|
-
|
|
2023
|
+
fullPage = promptOrOptions.fullPage ?? false;
|
|
2024
2024
|
} else {
|
|
2025
2025
|
prompt = promptOrOptions;
|
|
2026
2026
|
}
|
|
2027
2027
|
const ssResult = await sidecarRequest(
|
|
2028
2028
|
"/screenshot",
|
|
2029
|
-
{ fullPage
|
|
2029
|
+
{ fullPage },
|
|
2030
2030
|
{ timeout: 12e4 }
|
|
2031
2031
|
);
|
|
2032
2032
|
log.debug("Screenshot response", { ssResult });
|
|
@@ -2065,7 +2065,7 @@ var init_screenshot2 = __esm({
|
|
|
2065
2065
|
screenshotTool = {
|
|
2066
2066
|
definition: {
|
|
2067
2067
|
name: "screenshot",
|
|
2068
|
-
description: "Capture a screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for.
|
|
2068
|
+
description: "Capture a screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for. By default captures the viewport (what the user sees). Set fullPage to capture the entire scrollable page.",
|
|
2069
2069
|
inputSchema: {
|
|
2070
2070
|
type: "object",
|
|
2071
2071
|
properties: {
|
|
@@ -2073,9 +2073,9 @@ var init_screenshot2 = __esm({
|
|
|
2073
2073
|
type: "string",
|
|
2074
2074
|
description: "Optional question about the screenshot. If omitted, returns a general description of what's visible."
|
|
2075
2075
|
},
|
|
2076
|
-
|
|
2076
|
+
fullPage: {
|
|
2077
2077
|
type: "boolean",
|
|
2078
|
-
description: "Capture
|
|
2078
|
+
description: "Capture the full scrollable page instead of just the viewport. Use when you need to see below-the-fold content."
|
|
2079
2079
|
}
|
|
2080
2080
|
}
|
|
2081
2081
|
}
|
|
@@ -2084,7 +2084,7 @@ var init_screenshot2 = __esm({
|
|
|
2084
2084
|
try {
|
|
2085
2085
|
return await captureAndAnalyzeScreenshot({
|
|
2086
2086
|
prompt: input.prompt,
|
|
2087
|
-
|
|
2087
|
+
fullPage: input.fullPage
|
|
2088
2088
|
});
|
|
2089
2089
|
} catch (err) {
|
|
2090
2090
|
return `Error taking screenshot: ${err.message}`;
|
|
@@ -2257,7 +2257,7 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
|
|
|
2257
2257
|
if (externalTools.has(tc.name) && resolveExternalTool) {
|
|
2258
2258
|
result = await resolveExternalTool(tc.id, tc.name, tc.input);
|
|
2259
2259
|
} else {
|
|
2260
|
-
result = await executeTool2(tc.name, tc.input);
|
|
2260
|
+
result = await executeTool2(tc.name, tc.input, tc.id);
|
|
2261
2261
|
}
|
|
2262
2262
|
const isError = result.startsWith("Error");
|
|
2263
2263
|
emit2({
|
|
@@ -2570,13 +2570,13 @@ function resolvePath(filename) {
|
|
|
2570
2570
|
const local4 = path5.join(base2, filename);
|
|
2571
2571
|
return fs11.existsSync(local4) ? local4 : path5.join(base2, "subagents", "designExpert", filename);
|
|
2572
2572
|
}
|
|
2573
|
-
async function executeDesignExpertTool(name, input, context) {
|
|
2573
|
+
async function executeDesignExpertTool(name, input, context, toolCallId) {
|
|
2574
2574
|
switch (name) {
|
|
2575
2575
|
case "screenshot": {
|
|
2576
2576
|
try {
|
|
2577
2577
|
return await captureAndAnalyzeScreenshot({
|
|
2578
2578
|
prompt: input.prompt,
|
|
2579
|
-
|
|
2579
|
+
fullPage: input.fullPage
|
|
2580
2580
|
});
|
|
2581
2581
|
} catch (err) {
|
|
2582
2582
|
return `Error taking screenshot: ${err.message}`;
|
|
@@ -2676,7 +2676,13 @@ ${analysis}`;
|
|
|
2676
2676
|
if (!context) {
|
|
2677
2677
|
return "Error: browser testing requires execution context (only available in headless mode)";
|
|
2678
2678
|
}
|
|
2679
|
-
return browserAutomationTool.execute(
|
|
2679
|
+
return browserAutomationTool.execute(
|
|
2680
|
+
{ task: input.task },
|
|
2681
|
+
{
|
|
2682
|
+
...context,
|
|
2683
|
+
toolCallId: toolCallId || context.toolCallId
|
|
2684
|
+
}
|
|
2685
|
+
);
|
|
2680
2686
|
}
|
|
2681
2687
|
default:
|
|
2682
2688
|
return `Error: unknown tool "${name}"`;
|
|
@@ -2744,7 +2750,7 @@ var init_tools2 = __esm({
|
|
|
2744
2750
|
},
|
|
2745
2751
|
{
|
|
2746
2752
|
name: "screenshot",
|
|
2747
|
-
description: "Capture a screenshot of the app preview. Returns a CDN URL with visual analysis. Use to review the current state of the UI being built. Set
|
|
2753
|
+
description: "Capture a screenshot of the app preview. Returns a CDN URL with visual analysis. Use to review the current state of the UI being built. By default captures the viewport. Set fullPage to capture the entire scrollable page.",
|
|
2748
2754
|
inputSchema: {
|
|
2749
2755
|
type: "object",
|
|
2750
2756
|
properties: {
|
|
@@ -2752,22 +2758,22 @@ var init_tools2 = __esm({
|
|
|
2752
2758
|
type: "string",
|
|
2753
2759
|
description: "Optional specific question about the screenshot."
|
|
2754
2760
|
},
|
|
2755
|
-
|
|
2761
|
+
fullPage: {
|
|
2756
2762
|
type: "boolean",
|
|
2757
|
-
description: "Capture
|
|
2763
|
+
description: "Capture the full scrollable page instead of just the viewport. Use when you need to see below-the-fold content."
|
|
2758
2764
|
}
|
|
2759
2765
|
}
|
|
2760
2766
|
}
|
|
2761
2767
|
},
|
|
2762
2768
|
{
|
|
2763
2769
|
name: "runBrowserTest",
|
|
2764
|
-
description: "Run an automated browser test against the live app preview. Use to verify
|
|
2770
|
+
description: "Run an automated browser test against the live app preview. Use to verify implementation details via getComputedStyle: font-family names, exact colors, spacing, borders, shadows, font sizes, transforms. Also supports navigation between pages and screenshots. Use this to confirm the right fonts are loaded and CSS values match the spec.",
|
|
2765
2771
|
inputSchema: {
|
|
2766
2772
|
type: "object",
|
|
2767
2773
|
properties: {
|
|
2768
2774
|
task: {
|
|
2769
2775
|
type: "string",
|
|
2770
|
-
description: 'What to verify, in natural language.
|
|
2776
|
+
description: 'What to verify, in natural language. Focus on measurable properties: "Check the hero cards have border-radius: 24px and box-shadow" or "Verify the background color of the CTA section is #C4FF0D".'
|
|
2771
2777
|
}
|
|
2772
2778
|
},
|
|
2773
2779
|
required: ["task"]
|
|
@@ -3042,7 +3048,7 @@ Visual design expert. Describe the situation and what you need \u2014 the agent
|
|
|
3042
3048
|
task: input.task,
|
|
3043
3049
|
tools: DESIGN_EXPERT_TOOLS,
|
|
3044
3050
|
externalTools: /* @__PURE__ */ new Set(),
|
|
3045
|
-
executeTool: (name, input2) => executeDesignExpertTool(name, input2, context),
|
|
3051
|
+
executeTool: (name, input2, toolCallId) => executeDesignExpertTool(name, input2, context, toolCallId),
|
|
3046
3052
|
apiConfig: context.apiConfig,
|
|
3047
3053
|
model: context.model,
|
|
3048
3054
|
subAgentId: "visualDesignExpert",
|
|
@@ -18,6 +18,10 @@ These are things we already know about and have decided to accept:
|
|
|
18
18
|
|
|
19
19
|
- Limited browser support for `oklch` gradients using `in <colorspace>` syntax — we accept the compatibility tradeoff for better color quality
|
|
20
20
|
- Limited browser support for CSS scroll-driven animations (`animation-timeline: scroll()` / `view()`) - we accept this tradeoff
|
|
21
|
+
- Libraries we know are actively maintained, don't bother checking:
|
|
22
|
+
- swr
|
|
23
|
+
- framer-motion
|
|
24
|
+
- styled-components
|
|
21
25
|
|
|
22
26
|
### Common pitfalls (always flag these)
|
|
23
27
|
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
## Tool usage
|
|
2
2
|
|
|
3
3
|
- Use `analyzeReferenceImageOrUrl` to analyze any image URL or website URL. Websites are automatically screenshotted. Omit the prompt for a standard design analysis, or provide a custom prompt for specific questions. Do not screenshot font specimen pages, documentation, or other text-heavy pages — use `fetchUrl` for those instead.
|
|
4
|
-
- Use `screenshot` to
|
|
4
|
+
- Use `screenshot` to see the current state of the app preview. This is your primary tool for visual review. Use `fullPage: true` to see the entire page at once. Remember, the screenshot analysis is not overly precise - for example, it cannot reliably identify specific fonts by name — it can only describe what letterforms look like.
|
|
5
|
+
- Use `runBrowserTest` only to verify specific computed values that a screenshot can't tell you: exact font-family names, hex colors, pixel measurements, border-radius, box-shadow, transforms. Keep requests focused — ask it to check specific properties on specific elements, not to take screenshots or scroll around.
|
|
6
|
+
- **screenshot vs runBrowserTest**: Screenshot to *see* the page. Browser test to *measure* specific CSS values. Never use the browser agent for tasks your screenshot tool can handle, or vice versa.
|
|
5
7
|
- Use `searchGoogle` and `fetchUrl` only when the user references something specific: a particular website to match, a brand to look up, a company whose identity you need to research. You already have curated fonts, inspiration references, and strong internal knowledge — don't search the web for generic inspiration or "best X apps." The web is for specific lookups, not creative direction.
|
|
6
8
|
- When proposing multiple options, make them genuinely different directions (dark + bold vs. light + editorial) rather than minor variations.
|
|
7
9
|
- When multiple tool calls are independent, make them all in a single turn. Searching for three different products, or fetching two reference sites: batch them instead of doing one per turn.
|