@mindstudio-ai/remy 0.1.154 → 0.1.156
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
This is an automated follow-up after the initial build. The code is written and verified. Now it's time to polish and finalize so we can deliver something beautiful and magical as the user's first experience with our work.
|
|
6
6
|
|
|
7
7
|
## Polishing
|
|
8
|
-
Take a step back and do an explicit polish pass. Re-read the spec files and the design expert's guidance, then walk through each frontend file looking for
|
|
8
|
+
Take a step back and do an explicit polish pass focused on UX and interaction quality. Re-read the spec files and the design expert's guidance, then walk through each frontend file looking for behavioral details that got skipped in the initial build: layout animations, transitions, hover states, micro-interactions, spring physics, entrance reveals, gesture handling, responsiveness across breakpoints, focus and keyboard handling, and loading/empty/error states.
|
|
9
9
|
|
|
10
10
|
The initial build prioritizes getting everything connected and functional, but this pass closes the gap between "it works" and "it feels great." In many ways this is *the* most important part of the initial build, as the user's first experience of the deliverable will set their expectations for every iteration that follows. Don't mess this up.
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
The visual assets — photography, generated images, brand colors, typography — were already locked in upstream by the design expert during intake. Treat them as fixed inputs to this pass. Polish the *behavior* of the page, not the pixels of generated imagery.
|
|
13
13
|
|
|
14
14
|
## Finalizing
|
|
15
15
|
When everything is working and polished:
|
package/dist/headless.js
CHANGED
|
@@ -835,7 +835,7 @@ async function generateSummary(apiConfig, name, compactionPrompt, messagesToSumm
|
|
|
835
835
|
let summaryText = "";
|
|
836
836
|
const useMainCache = !!mainSystem;
|
|
837
837
|
const system = useMainCache ? mainSystem : compactionPrompt;
|
|
838
|
-
const tools2 =
|
|
838
|
+
const tools2 = [];
|
|
839
839
|
const userContent = useMainCache ? `${compactionPrompt}
|
|
840
840
|
|
|
841
841
|
---
|
|
@@ -2078,11 +2078,12 @@ ${unifiedDiff(input.path, content, updated)}`;
|
|
|
2078
2078
|
import { spawn as spawn2 } from "child_process";
|
|
2079
2079
|
var DEFAULT_TIMEOUT_MS = 12e4;
|
|
2080
2080
|
var DEFAULT_MAX_LINES3 = 500;
|
|
2081
|
+
var MAX_OUTPUT_BYTES = 3e4;
|
|
2081
2082
|
var bashTool = {
|
|
2082
2083
|
clearable: true,
|
|
2083
2084
|
definition: {
|
|
2084
2085
|
name: "bash",
|
|
2085
|
-
description: "Run a shell command and return stdout + stderr. 120-second timeout by default (configurable). Use for: npm install/build/test, git operations, tsc --noEmit, or any CLI tool. Prefer dedicated tools over bash when available (use grep instead of bash + rg, readFile instead of bash + cat). Output is truncated to 500 lines
|
|
2086
|
+
description: "Run a shell command and return stdout + stderr. 120-second timeout by default (configurable). Use for: npm install/build/test, git operations, tsc --noEmit, or any CLI tool. Prefer dedicated tools over bash when available (use grep instead of bash + rg, readFile instead of bash + cat). Output is truncated to 500 lines or 30KB, whichever comes first. If a command would emit a lot of data, narrow it down (grep, head/tail, --short flags) rather than reading everything.",
|
|
2086
2087
|
inputSchema: {
|
|
2087
2088
|
type: "object",
|
|
2088
2089
|
properties: {
|
|
@@ -2138,12 +2139,32 @@ var bashTool = {
|
|
|
2138
2139
|
}
|
|
2139
2140
|
return;
|
|
2140
2141
|
}
|
|
2141
|
-
const
|
|
2142
|
-
|
|
2142
|
+
const totalBytes = Buffer.byteLength(output, "utf-8");
|
|
2143
|
+
let truncated = output;
|
|
2144
|
+
let byteTruncated = false;
|
|
2145
|
+
if (totalBytes > MAX_OUTPUT_BYTES) {
|
|
2146
|
+
truncated = Buffer.from(output, "utf-8").subarray(0, MAX_OUTPUT_BYTES).toString("utf-8");
|
|
2147
|
+
byteTruncated = true;
|
|
2148
|
+
}
|
|
2149
|
+
const lines = truncated.split("\n");
|
|
2150
|
+
const lineTruncated = lines.length > maxLines;
|
|
2151
|
+
if (lineTruncated) {
|
|
2152
|
+
truncated = lines.slice(0, maxLines).join("\n");
|
|
2153
|
+
}
|
|
2154
|
+
if (byteTruncated || lineTruncated) {
|
|
2155
|
+
const reasons = [];
|
|
2156
|
+
if (lineTruncated) {
|
|
2157
|
+
reasons.push(`${maxLines} lines`);
|
|
2158
|
+
}
|
|
2159
|
+
if (byteTruncated) {
|
|
2160
|
+
reasons.push(
|
|
2161
|
+
`${(MAX_OUTPUT_BYTES / 1024).toFixed(0)}KB of ${(totalBytes / 1024).toFixed(0)}KB`
|
|
2162
|
+
);
|
|
2163
|
+
}
|
|
2143
2164
|
resolve2(
|
|
2144
|
-
|
|
2165
|
+
truncated + `
|
|
2145
2166
|
|
|
2146
|
-
(truncated at ${
|
|
2167
|
+
(truncated at ${reasons.join(" / ")} \u2014 narrow the command (grep, head/tail, smaller paths) instead of increasing limits)`
|
|
2147
2168
|
);
|
|
2148
2169
|
} else {
|
|
2149
2170
|
resolve2(output);
|
|
@@ -2655,6 +2676,21 @@ ${opts.styleMap}
|
|
|
2655
2676
|
${TEXT_WRAP_DISCLAIMER}`;
|
|
2656
2677
|
return p;
|
|
2657
2678
|
}
|
|
2679
|
+
async function streamScreenshotAnalysis(opts) {
|
|
2680
|
+
const { url, prompt, styleMap, onLog } = opts;
|
|
2681
|
+
onLog?.(JSON.stringify({ url, analysis: null }));
|
|
2682
|
+
const analysisPrompt = buildScreenshotAnalysisPrompt({ prompt, styleMap });
|
|
2683
|
+
let accumulated = "";
|
|
2684
|
+
const analysis = await analyzeImage({
|
|
2685
|
+
prompt: analysisPrompt,
|
|
2686
|
+
imageUrl: url,
|
|
2687
|
+
onLog: (chunk) => {
|
|
2688
|
+
accumulated += chunk;
|
|
2689
|
+
onLog?.(JSON.stringify({ url, analysis: accumulated }));
|
|
2690
|
+
}
|
|
2691
|
+
});
|
|
2692
|
+
return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
|
|
2693
|
+
}
|
|
2658
2694
|
async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
2659
2695
|
let prompt;
|
|
2660
2696
|
let existingUrl;
|
|
@@ -2689,16 +2725,12 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
|
2689
2725
|
if (prompt === false) {
|
|
2690
2726
|
return url;
|
|
2691
2727
|
}
|
|
2692
|
-
|
|
2728
|
+
return streamScreenshotAnalysis({
|
|
2729
|
+
url,
|
|
2693
2730
|
prompt: prompt || void 0,
|
|
2694
|
-
styleMap
|
|
2695
|
-
});
|
|
2696
|
-
const analysis = await analyzeImage({
|
|
2697
|
-
prompt: analysisPrompt,
|
|
2698
|
-
imageUrl: url,
|
|
2731
|
+
styleMap,
|
|
2699
2732
|
onLog
|
|
2700
2733
|
});
|
|
2701
|
-
return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
|
|
2702
2734
|
}
|
|
2703
2735
|
|
|
2704
2736
|
// src/tools/_helpers/browserLock.ts
|
|
@@ -2718,9 +2750,10 @@ function startStatusWatcher(config) {
|
|
|
2718
2750
|
const { apiConfig, getContext, onStatus, interval = 5e3, signal } = config;
|
|
2719
2751
|
let inflight = false;
|
|
2720
2752
|
let stopped = false;
|
|
2753
|
+
let pauseCount = 0;
|
|
2721
2754
|
const url = `${apiConfig.baseUrl}/_internal/v2/agent/remy/generate-status`;
|
|
2722
2755
|
async function tick() {
|
|
2723
|
-
if (stopped || signal?.aborted || inflight) {
|
|
2756
|
+
if (stopped || signal?.aborted || inflight || pauseCount > 0) {
|
|
2724
2757
|
return;
|
|
2725
2758
|
}
|
|
2726
2759
|
inflight = true;
|
|
@@ -2745,6 +2778,9 @@ function startStatusWatcher(config) {
|
|
|
2745
2778
|
if (!data.label) {
|
|
2746
2779
|
return;
|
|
2747
2780
|
}
|
|
2781
|
+
if (pauseCount > 0) {
|
|
2782
|
+
return;
|
|
2783
|
+
}
|
|
2748
2784
|
onStatus(data.label);
|
|
2749
2785
|
} catch {
|
|
2750
2786
|
} finally {
|
|
@@ -2758,6 +2794,12 @@ function startStatusWatcher(config) {
|
|
|
2758
2794
|
stop() {
|
|
2759
2795
|
stopped = true;
|
|
2760
2796
|
clearInterval(timer);
|
|
2797
|
+
},
|
|
2798
|
+
pause() {
|
|
2799
|
+
pauseCount++;
|
|
2800
|
+
},
|
|
2801
|
+
resume() {
|
|
2802
|
+
pauseCount = Math.max(0, pauseCount - 1);
|
|
2761
2803
|
}
|
|
2762
2804
|
};
|
|
2763
2805
|
}
|
|
@@ -3613,7 +3655,7 @@ var screenshotTool = {
|
|
|
3613
3655
|
},
|
|
3614
3656
|
instructions: {
|
|
3615
3657
|
type: "string",
|
|
3616
|
-
description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas.
|
|
3658
|
+
description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
|
|
3617
3659
|
}
|
|
3618
3660
|
}
|
|
3619
3661
|
}
|
|
@@ -3642,20 +3684,12 @@ var screenshotTool = {
|
|
|
3642
3684
|
if (!url) {
|
|
3643
3685
|
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
3644
3686
|
}
|
|
3645
|
-
|
|
3687
|
+
return await streamScreenshotAnalysis({
|
|
3688
|
+
url,
|
|
3646
3689
|
prompt: input.prompt,
|
|
3647
|
-
styleMap
|
|
3648
|
-
});
|
|
3649
|
-
const analysis = await analyzeImage({
|
|
3650
|
-
prompt: analysisPrompt,
|
|
3651
|
-
imageUrl: url,
|
|
3690
|
+
styleMap,
|
|
3652
3691
|
onLog: context?.onLog
|
|
3653
3692
|
});
|
|
3654
|
-
return JSON.stringify({
|
|
3655
|
-
url,
|
|
3656
|
-
analysis,
|
|
3657
|
-
...styleMap ? { styleMap } : {}
|
|
3658
|
-
});
|
|
3659
3693
|
}
|
|
3660
3694
|
const release = await acquireBrowserLock();
|
|
3661
3695
|
try {
|
|
@@ -3973,20 +4007,12 @@ async function execute5(input, onLog, context) {
|
|
|
3973
4007
|
if (!url) {
|
|
3974
4008
|
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
3975
4009
|
}
|
|
3976
|
-
|
|
4010
|
+
return await streamScreenshotAnalysis({
|
|
4011
|
+
url,
|
|
3977
4012
|
prompt: input.prompt,
|
|
3978
|
-
styleMap
|
|
3979
|
-
});
|
|
3980
|
-
const analysis = await analyzeImage({
|
|
3981
|
-
prompt: analysisPrompt,
|
|
3982
|
-
imageUrl: url,
|
|
4013
|
+
styleMap,
|
|
3983
4014
|
onLog
|
|
3984
4015
|
});
|
|
3985
|
-
return JSON.stringify({
|
|
3986
|
-
url,
|
|
3987
|
-
analysis,
|
|
3988
|
-
...styleMap ? { styleMap } : {}
|
|
3989
|
-
});
|
|
3990
4016
|
} catch (err) {
|
|
3991
4017
|
return `Error taking interactive screenshot: ${err.message}`;
|
|
3992
4018
|
}
|
|
@@ -5456,6 +5482,11 @@ var EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
|
|
|
5456
5482
|
"browserCommand",
|
|
5457
5483
|
"setProjectMetadata"
|
|
5458
5484
|
]);
|
|
5485
|
+
var USER_BLOCKING_EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
|
|
5486
|
+
"promptUser",
|
|
5487
|
+
"presentPublishPlan",
|
|
5488
|
+
"confirmDestructiveAction"
|
|
5489
|
+
]);
|
|
5459
5490
|
function createAgentState() {
|
|
5460
5491
|
return { messages: [] };
|
|
5461
5492
|
}
|
|
@@ -5545,6 +5576,8 @@ async function runTurn(params) {
|
|
|
5545
5576
|
let subAgentText = "";
|
|
5546
5577
|
let currentToolNames = "";
|
|
5547
5578
|
const statusWatcher = isFirstMessage ? { stop() {
|
|
5579
|
+
}, pause() {
|
|
5580
|
+
}, resume() {
|
|
5548
5581
|
} } : startStatusWatcher({
|
|
5549
5582
|
apiConfig,
|
|
5550
5583
|
getContext: () => {
|
|
@@ -5849,7 +5882,17 @@ async function runTurn(params) {
|
|
|
5849
5882
|
toolCallId: tc.id,
|
|
5850
5883
|
name: tc.name
|
|
5851
5884
|
});
|
|
5852
|
-
|
|
5885
|
+
const blocksUser = USER_BLOCKING_EXTERNAL_TOOLS.has(tc.name);
|
|
5886
|
+
if (blocksUser) {
|
|
5887
|
+
statusWatcher.pause();
|
|
5888
|
+
}
|
|
5889
|
+
try {
|
|
5890
|
+
result = await resolveExternalTool(tc.id, tc.name, input);
|
|
5891
|
+
} finally {
|
|
5892
|
+
if (blocksUser) {
|
|
5893
|
+
statusWatcher.resume();
|
|
5894
|
+
}
|
|
5895
|
+
}
|
|
5853
5896
|
} else {
|
|
5854
5897
|
result = await executeTool(tc.name, input, {
|
|
5855
5898
|
apiConfig,
|
package/dist/index.js
CHANGED
|
@@ -1550,7 +1550,7 @@ async function generateSummary(apiConfig, name, compactionPrompt, messagesToSumm
|
|
|
1550
1550
|
let summaryText = "";
|
|
1551
1551
|
const useMainCache = !!mainSystem;
|
|
1552
1552
|
const system = useMainCache ? mainSystem : compactionPrompt;
|
|
1553
|
-
const tools2 =
|
|
1553
|
+
const tools2 = [];
|
|
1554
1554
|
const userContent = useMainCache ? `${compactionPrompt}
|
|
1555
1555
|
|
|
1556
1556
|
---
|
|
@@ -2264,17 +2264,18 @@ ${unifiedDiff(input.path, content, updated)}`;
|
|
|
2264
2264
|
|
|
2265
2265
|
// src/tools/code/bash.ts
|
|
2266
2266
|
import { spawn as spawn2 } from "child_process";
|
|
2267
|
-
var DEFAULT_TIMEOUT_MS, DEFAULT_MAX_LINES3, bashTool;
|
|
2267
|
+
var DEFAULT_TIMEOUT_MS, DEFAULT_MAX_LINES3, MAX_OUTPUT_BYTES, bashTool;
|
|
2268
2268
|
var init_bash = __esm({
|
|
2269
2269
|
"src/tools/code/bash.ts"() {
|
|
2270
2270
|
"use strict";
|
|
2271
2271
|
DEFAULT_TIMEOUT_MS = 12e4;
|
|
2272
2272
|
DEFAULT_MAX_LINES3 = 500;
|
|
2273
|
+
MAX_OUTPUT_BYTES = 3e4;
|
|
2273
2274
|
bashTool = {
|
|
2274
2275
|
clearable: true,
|
|
2275
2276
|
definition: {
|
|
2276
2277
|
name: "bash",
|
|
2277
|
-
description: "Run a shell command and return stdout + stderr. 120-second timeout by default (configurable). Use for: npm install/build/test, git operations, tsc --noEmit, or any CLI tool. Prefer dedicated tools over bash when available (use grep instead of bash + rg, readFile instead of bash + cat). Output is truncated to 500 lines
|
|
2278
|
+
description: "Run a shell command and return stdout + stderr. 120-second timeout by default (configurable). Use for: npm install/build/test, git operations, tsc --noEmit, or any CLI tool. Prefer dedicated tools over bash when available (use grep instead of bash + rg, readFile instead of bash + cat). Output is truncated to 500 lines or 30KB, whichever comes first. If a command would emit a lot of data, narrow it down (grep, head/tail, --short flags) rather than reading everything.",
|
|
2278
2279
|
inputSchema: {
|
|
2279
2280
|
type: "object",
|
|
2280
2281
|
properties: {
|
|
@@ -2330,12 +2331,32 @@ var init_bash = __esm({
|
|
|
2330
2331
|
}
|
|
2331
2332
|
return;
|
|
2332
2333
|
}
|
|
2333
|
-
const
|
|
2334
|
-
|
|
2334
|
+
const totalBytes = Buffer.byteLength(output, "utf-8");
|
|
2335
|
+
let truncated = output;
|
|
2336
|
+
let byteTruncated = false;
|
|
2337
|
+
if (totalBytes > MAX_OUTPUT_BYTES) {
|
|
2338
|
+
truncated = Buffer.from(output, "utf-8").subarray(0, MAX_OUTPUT_BYTES).toString("utf-8");
|
|
2339
|
+
byteTruncated = true;
|
|
2340
|
+
}
|
|
2341
|
+
const lines = truncated.split("\n");
|
|
2342
|
+
const lineTruncated = lines.length > maxLines;
|
|
2343
|
+
if (lineTruncated) {
|
|
2344
|
+
truncated = lines.slice(0, maxLines).join("\n");
|
|
2345
|
+
}
|
|
2346
|
+
if (byteTruncated || lineTruncated) {
|
|
2347
|
+
const reasons = [];
|
|
2348
|
+
if (lineTruncated) {
|
|
2349
|
+
reasons.push(`${maxLines} lines`);
|
|
2350
|
+
}
|
|
2351
|
+
if (byteTruncated) {
|
|
2352
|
+
reasons.push(
|
|
2353
|
+
`${(MAX_OUTPUT_BYTES / 1024).toFixed(0)}KB of ${(totalBytes / 1024).toFixed(0)}KB`
|
|
2354
|
+
);
|
|
2355
|
+
}
|
|
2335
2356
|
resolve2(
|
|
2336
|
-
|
|
2357
|
+
truncated + `
|
|
2337
2358
|
|
|
2338
|
-
(truncated at ${
|
|
2359
|
+
(truncated at ${reasons.join(" / ")} \u2014 narrow the command (grep, head/tail, smaller paths) instead of increasing limits)`
|
|
2339
2360
|
);
|
|
2340
2361
|
} else {
|
|
2341
2362
|
resolve2(output);
|
|
@@ -2922,6 +2943,21 @@ ${opts.styleMap}
|
|
|
2922
2943
|
${TEXT_WRAP_DISCLAIMER}`;
|
|
2923
2944
|
return p;
|
|
2924
2945
|
}
|
|
2946
|
+
async function streamScreenshotAnalysis(opts) {
|
|
2947
|
+
const { url, prompt, styleMap, onLog } = opts;
|
|
2948
|
+
onLog?.(JSON.stringify({ url, analysis: null }));
|
|
2949
|
+
const analysisPrompt = buildScreenshotAnalysisPrompt({ prompt, styleMap });
|
|
2950
|
+
let accumulated = "";
|
|
2951
|
+
const analysis = await analyzeImage({
|
|
2952
|
+
prompt: analysisPrompt,
|
|
2953
|
+
imageUrl: url,
|
|
2954
|
+
onLog: (chunk) => {
|
|
2955
|
+
accumulated += chunk;
|
|
2956
|
+
onLog?.(JSON.stringify({ url, analysis: accumulated }));
|
|
2957
|
+
}
|
|
2958
|
+
});
|
|
2959
|
+
return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
|
|
2960
|
+
}
|
|
2925
2961
|
async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
2926
2962
|
let prompt;
|
|
2927
2963
|
let existingUrl;
|
|
@@ -2956,16 +2992,12 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
|
2956
2992
|
if (prompt === false) {
|
|
2957
2993
|
return url;
|
|
2958
2994
|
}
|
|
2959
|
-
|
|
2995
|
+
return streamScreenshotAnalysis({
|
|
2996
|
+
url,
|
|
2960
2997
|
prompt: prompt || void 0,
|
|
2961
|
-
styleMap
|
|
2962
|
-
});
|
|
2963
|
-
const analysis = await analyzeImage({
|
|
2964
|
-
prompt: analysisPrompt,
|
|
2965
|
-
imageUrl: url,
|
|
2998
|
+
styleMap,
|
|
2966
2999
|
onLog
|
|
2967
3000
|
});
|
|
2968
|
-
return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
|
|
2969
3001
|
}
|
|
2970
3002
|
var SCREENSHOT_ANALYSIS_PROMPT, TEXT_WRAP_DISCLAIMER;
|
|
2971
3003
|
var init_screenshot = __esm({
|
|
@@ -3003,9 +3035,10 @@ function startStatusWatcher(config) {
|
|
|
3003
3035
|
const { apiConfig, getContext, onStatus, interval = 5e3, signal } = config;
|
|
3004
3036
|
let inflight = false;
|
|
3005
3037
|
let stopped = false;
|
|
3038
|
+
let pauseCount = 0;
|
|
3006
3039
|
const url = `${apiConfig.baseUrl}/_internal/v2/agent/remy/generate-status`;
|
|
3007
3040
|
async function tick() {
|
|
3008
|
-
if (stopped || signal?.aborted || inflight) {
|
|
3041
|
+
if (stopped || signal?.aborted || inflight || pauseCount > 0) {
|
|
3009
3042
|
return;
|
|
3010
3043
|
}
|
|
3011
3044
|
inflight = true;
|
|
@@ -3030,6 +3063,9 @@ function startStatusWatcher(config) {
|
|
|
3030
3063
|
if (!data.label) {
|
|
3031
3064
|
return;
|
|
3032
3065
|
}
|
|
3066
|
+
if (pauseCount > 0) {
|
|
3067
|
+
return;
|
|
3068
|
+
}
|
|
3033
3069
|
onStatus(data.label);
|
|
3034
3070
|
} catch {
|
|
3035
3071
|
} finally {
|
|
@@ -3043,6 +3079,12 @@ function startStatusWatcher(config) {
|
|
|
3043
3079
|
stop() {
|
|
3044
3080
|
stopped = true;
|
|
3045
3081
|
clearInterval(timer);
|
|
3082
|
+
},
|
|
3083
|
+
pause() {
|
|
3084
|
+
pauseCount++;
|
|
3085
|
+
},
|
|
3086
|
+
resume() {
|
|
3087
|
+
pauseCount = Math.max(0, pauseCount - 1);
|
|
3046
3088
|
}
|
|
3047
3089
|
};
|
|
3048
3090
|
}
|
|
@@ -3935,7 +3977,6 @@ var init_screenshot2 = __esm({
|
|
|
3935
3977
|
"use strict";
|
|
3936
3978
|
init_screenshot();
|
|
3937
3979
|
init_browserLock();
|
|
3938
|
-
init_analyzeImage();
|
|
3939
3980
|
init_browserAutomation();
|
|
3940
3981
|
screenshotTool = {
|
|
3941
3982
|
clearable: true,
|
|
@@ -3959,7 +4000,7 @@ var init_screenshot2 = __esm({
|
|
|
3959
4000
|
},
|
|
3960
4001
|
instructions: {
|
|
3961
4002
|
type: "string",
|
|
3962
|
-
description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas.
|
|
4003
|
+
description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
|
|
3963
4004
|
}
|
|
3964
4005
|
}
|
|
3965
4006
|
}
|
|
@@ -3988,20 +4029,12 @@ var init_screenshot2 = __esm({
|
|
|
3988
4029
|
if (!url) {
|
|
3989
4030
|
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
3990
4031
|
}
|
|
3991
|
-
|
|
4032
|
+
return await streamScreenshotAnalysis({
|
|
4033
|
+
url,
|
|
3992
4034
|
prompt: input.prompt,
|
|
3993
|
-
styleMap
|
|
3994
|
-
});
|
|
3995
|
-
const analysis = await analyzeImage({
|
|
3996
|
-
prompt: analysisPrompt,
|
|
3997
|
-
imageUrl: url,
|
|
4035
|
+
styleMap,
|
|
3998
4036
|
onLog: context?.onLog
|
|
3999
4037
|
});
|
|
4000
|
-
return JSON.stringify({
|
|
4001
|
-
url,
|
|
4002
|
-
analysis,
|
|
4003
|
-
...styleMap ? { styleMap } : {}
|
|
4004
|
-
});
|
|
4005
4038
|
}
|
|
4006
4039
|
const release = await acquireBrowserLock();
|
|
4007
4040
|
try {
|
|
@@ -4335,20 +4368,12 @@ async function execute5(input, onLog, context) {
|
|
|
4335
4368
|
if (!url) {
|
|
4336
4369
|
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
4337
4370
|
}
|
|
4338
|
-
|
|
4371
|
+
return await streamScreenshotAnalysis({
|
|
4372
|
+
url,
|
|
4339
4373
|
prompt: input.prompt,
|
|
4340
|
-
styleMap
|
|
4341
|
-
});
|
|
4342
|
-
const analysis = await analyzeImage({
|
|
4343
|
-
prompt: analysisPrompt,
|
|
4344
|
-
imageUrl: url,
|
|
4374
|
+
styleMap,
|
|
4345
4375
|
onLog
|
|
4346
4376
|
});
|
|
4347
|
-
return JSON.stringify({
|
|
4348
|
-
url,
|
|
4349
|
-
analysis,
|
|
4350
|
-
...styleMap ? { styleMap } : {}
|
|
4351
|
-
});
|
|
4352
4377
|
} catch (err) {
|
|
4353
4378
|
return `Error taking interactive screenshot: ${err.message}`;
|
|
4354
4379
|
}
|
|
@@ -4372,7 +4397,6 @@ var init_screenshot3 = __esm({
|
|
|
4372
4397
|
"use strict";
|
|
4373
4398
|
init_screenshot();
|
|
4374
4399
|
init_browserLock();
|
|
4375
|
-
init_analyzeImage();
|
|
4376
4400
|
init_browserAutomation();
|
|
4377
4401
|
definition5 = {
|
|
4378
4402
|
clearable: true,
|
|
@@ -6135,6 +6159,8 @@ async function runTurn(params) {
|
|
|
6135
6159
|
let subAgentText = "";
|
|
6136
6160
|
let currentToolNames = "";
|
|
6137
6161
|
const statusWatcher = isFirstMessage ? { stop() {
|
|
6162
|
+
}, pause() {
|
|
6163
|
+
}, resume() {
|
|
6138
6164
|
} } : startStatusWatcher({
|
|
6139
6165
|
apiConfig,
|
|
6140
6166
|
getContext: () => {
|
|
@@ -6439,7 +6465,17 @@ async function runTurn(params) {
|
|
|
6439
6465
|
toolCallId: tc.id,
|
|
6440
6466
|
name: tc.name
|
|
6441
6467
|
});
|
|
6442
|
-
|
|
6468
|
+
const blocksUser = USER_BLOCKING_EXTERNAL_TOOLS.has(tc.name);
|
|
6469
|
+
if (blocksUser) {
|
|
6470
|
+
statusWatcher.pause();
|
|
6471
|
+
}
|
|
6472
|
+
try {
|
|
6473
|
+
result = await resolveExternalTool(tc.id, tc.name, input);
|
|
6474
|
+
} finally {
|
|
6475
|
+
if (blocksUser) {
|
|
6476
|
+
statusWatcher.resume();
|
|
6477
|
+
}
|
|
6478
|
+
}
|
|
6443
6479
|
} else {
|
|
6444
6480
|
result = await executeTool(tc.name, input, {
|
|
6445
6481
|
apiConfig,
|
|
@@ -6544,7 +6580,7 @@ async function runTurn(params) {
|
|
|
6544
6580
|
}
|
|
6545
6581
|
}
|
|
6546
6582
|
}
|
|
6547
|
-
var log8, EXTERNAL_TOOLS;
|
|
6583
|
+
var log8, EXTERNAL_TOOLS, USER_BLOCKING_EXTERNAL_TOOLS;
|
|
6548
6584
|
var init_agent = __esm({
|
|
6549
6585
|
"src/agent.ts"() {
|
|
6550
6586
|
"use strict";
|
|
@@ -6570,6 +6606,11 @@ var init_agent = __esm({
|
|
|
6570
6606
|
"browserCommand",
|
|
6571
6607
|
"setProjectMetadata"
|
|
6572
6608
|
]);
|
|
6609
|
+
USER_BLOCKING_EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
|
|
6610
|
+
"promptUser",
|
|
6611
|
+
"presentPublishPlan",
|
|
6612
|
+
"confirmDestructiveAction"
|
|
6613
|
+
]);
|
|
6573
6614
|
}
|
|
6574
6615
|
});
|
|
6575
6616
|
|
|
@@ -208,6 +208,8 @@ auth.requireRole('admin');
|
|
|
208
208
|
auth.requireRole('admin', 'approver'); // any of these
|
|
209
209
|
```
|
|
210
210
|
|
|
211
|
+
**Require login: check `auth.userId`. Roles are RBAC** — only declare roles that map to real business distinctions (vendor/buyer/admin), and only check them when behavior should differ. Newly verified users have `roles: []` until your code assigns them.
|
|
212
|
+
|
|
211
213
|
### `auth.hasRole(...roles)`
|
|
212
214
|
|
|
213
215
|
Returns `boolean`. Same logic as `requireRole` but doesn't throw.
|
|
@@ -375,4 +377,6 @@ Auth works the same in dev/preview as in production — real verification codes
|
|
|
375
377
|
|
|
376
378
|
All other emails and phone numbers receive real codes. There is no dev-mode bypass, no fake code, and no way to skip verification. When testing auth flows in the preview, use one of the test bypasses above or a real email/phone.
|
|
377
379
|
|
|
380
|
+
The `runMethod` tool's `userId: "testUser"` shortcut resolves to this same dev-bypass identity. The platform find-or-creates a real users-table row for it on first call and caches the row's UUID for the rest of the dev session. **`auth.userId` inside the method is that UUID — not the literal string `"testUser"`.** The user row already exists, so don't try to insert it. If you need the UUID to seed app-specific rows that reference it (profiles, preferences, foreign keys), read it from any method response or query the users table directly: `SELECT id FROM users WHERE email = 'remy@mindstudio.ai'` (or `phone = '+15555555555'` for SMS-auth apps).
|
|
381
|
+
|
|
378
382
|
Browser automation tools (screenshots, automated browser tests) handle their own auth sessions. Scenarios seed database data but do not create browser auth sessions.
|
|
@@ -11,11 +11,13 @@ Run `lspDiagnostics` after every turn where you have edited code in any meaningf
|
|
|
11
11
|
|
|
12
12
|
- Spot-check methods with `runMethod`. The dev database is a disposable snapshot that will have been seeded with scenario data, so don't worry about being destructive.
|
|
13
13
|
- For frontend work, take a single `screenshot` to confirm the main view renders correctly or look at the browser log for any console errors in the user's preview.
|
|
14
|
-
- Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot,
|
|
14
|
+
- Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, when the user reports something broken that you can't identify from code alone, or whenever the verification involves driving the app through multiple interactions.
|
|
15
15
|
- If the browser is unavailable, skip the visual check and verify through methods, logs, and code instead. Browser unavailability is an infrastructure issue, not a code problem — don't try to diagnose or fix it.
|
|
16
16
|
|
|
17
17
|
Aim for confidence that the core happy paths work. If the 80% case is solid, the remaining edge cases are likely fine and the user can surface them in chat. Don't screenshot every page, test every permutation, or verify every secondary flow. One or two runtime checks that confirm the app loads and data flows through is enough.
|
|
18
18
|
|
|
19
|
+
When making mechanical edits as part of iterating with the user (e.g., moving elements, changing labels, small redesigns and refactors), don't re-screenshot to confirm, simply trust your code. Re-screenshot only when changes are structural enough that the visual outcome is genuinely uncertain (new layout, new component composition, new route), or when the user reports something visible that you can't see in the code.
|
|
20
|
+
|
|
19
21
|
### Process Logs
|
|
20
22
|
|
|
21
23
|
Process logs are available at .logs/ in NDJSON format (one JSON object per line) for debugging. Each line has at minimum ts (unix millis) and msg fields, plus structured context like level, module, requestId, toolCallId where available. You can use `jq` to examine logs and debug failures. Tools like run method or run scenario execute synchronously, so log data will be available by the time those tools return their results to you, there is no need to `sleep` before querying logfiles.
|