@mindstudio-ai/remy 0.1.58 → 0.1.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/headless.js +43 -6
- package/dist/index.js +43 -6
- package/dist/prompt/compiled/interfaces.md +9 -5
- package/dist/prompt/static/team.md +3 -1
- package/package.json +1 -1
package/dist/headless.js
CHANGED
|
@@ -2350,9 +2350,11 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
|
2350
2350
|
let prompt;
|
|
2351
2351
|
let existingUrl;
|
|
2352
2352
|
let onLog;
|
|
2353
|
+
let path9;
|
|
2353
2354
|
if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
|
|
2354
2355
|
prompt = promptOrOptions.prompt;
|
|
2355
2356
|
existingUrl = promptOrOptions.imageUrl;
|
|
2357
|
+
path9 = promptOrOptions.path;
|
|
2356
2358
|
onLog = promptOrOptions.onLog;
|
|
2357
2359
|
} else {
|
|
2358
2360
|
prompt = promptOrOptions;
|
|
@@ -2361,9 +2363,11 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
|
2361
2363
|
if (existingUrl) {
|
|
2362
2364
|
url = existingUrl;
|
|
2363
2365
|
} else {
|
|
2364
|
-
const ssResult = await sidecarRequest(
|
|
2365
|
-
|
|
2366
|
-
|
|
2366
|
+
const ssResult = await sidecarRequest(
|
|
2367
|
+
"/screenshot-full-page",
|
|
2368
|
+
path9 ? { path: path9 } : void 0,
|
|
2369
|
+
{ timeout: 12e4 }
|
|
2370
|
+
);
|
|
2367
2371
|
url = ssResult?.url || ssResult?.screenshotUrl;
|
|
2368
2372
|
if (!url) {
|
|
2369
2373
|
throw new Error(
|
|
@@ -2398,6 +2402,10 @@ var screenshotTool = {
|
|
|
2398
2402
|
imageUrl: {
|
|
2399
2403
|
type: "string",
|
|
2400
2404
|
description: "URL of an existing screenshot to analyze instead of capturing a new one. Use this for additional questions about a previous screenshot."
|
|
2405
|
+
},
|
|
2406
|
+
path: {
|
|
2407
|
+
type: "string",
|
|
2408
|
+
description: 'Navigate to this path before capturing (e.g. "/settings", "/dashboard"). If omitted, screenshots the current page.'
|
|
2401
2409
|
}
|
|
2402
2410
|
}
|
|
2403
2411
|
}
|
|
@@ -2413,6 +2421,7 @@ var screenshotTool = {
|
|
|
2413
2421
|
}
|
|
2414
2422
|
return await captureAndAnalyzeScreenshot({
|
|
2415
2423
|
prompt: input.prompt,
|
|
2424
|
+
path: input.path,
|
|
2416
2425
|
onLog: context?.onLog
|
|
2417
2426
|
});
|
|
2418
2427
|
} catch (err) {
|
|
@@ -2425,6 +2434,7 @@ var screenshotTool = {
|
|
|
2425
2434
|
function startStatusWatcher(config) {
|
|
2426
2435
|
const { apiConfig, getContext, onStatus, interval = 3e3, signal } = config;
|
|
2427
2436
|
let lastLabel = "";
|
|
2437
|
+
let lastContext = "";
|
|
2428
2438
|
let inflight = false;
|
|
2429
2439
|
let stopped = false;
|
|
2430
2440
|
const url = `${apiConfig.baseUrl}/_internal/v2/agent/remy/generate-status`;
|
|
@@ -2435,9 +2445,10 @@ function startStatusWatcher(config) {
|
|
|
2435
2445
|
inflight = true;
|
|
2436
2446
|
try {
|
|
2437
2447
|
const context = getContext();
|
|
2438
|
-
if (!context) {
|
|
2448
|
+
if (!context || context === lastContext) {
|
|
2439
2449
|
return;
|
|
2440
2450
|
}
|
|
2451
|
+
lastContext = context;
|
|
2441
2452
|
const res = await fetch(url, {
|
|
2442
2453
|
method: "POST",
|
|
2443
2454
|
headers: {
|
|
@@ -2512,6 +2523,16 @@ ${summaryBlock.text}
|
|
|
2512
2523
|
startIdx = checkpointIdx + 1;
|
|
2513
2524
|
}
|
|
2514
2525
|
const messagesToProcess = messages.slice(startIdx);
|
|
2526
|
+
const toolUseIds = /* @__PURE__ */ new Set();
|
|
2527
|
+
for (const msg of messagesToProcess) {
|
|
2528
|
+
if (msg.role === "assistant" && Array.isArray(msg.content)) {
|
|
2529
|
+
for (const block of msg.content) {
|
|
2530
|
+
if (block.type === "tool") {
|
|
2531
|
+
toolUseIds.add(block.id);
|
|
2532
|
+
}
|
|
2533
|
+
}
|
|
2534
|
+
}
|
|
2535
|
+
}
|
|
2515
2536
|
const cleaned = messagesToProcess.filter((msg) => {
|
|
2516
2537
|
if (Array.isArray(msg.content)) {
|
|
2517
2538
|
const blocks = msg.content;
|
|
@@ -2519,6 +2540,9 @@ ${summaryBlock.text}
|
|
|
2519
2540
|
return false;
|
|
2520
2541
|
}
|
|
2521
2542
|
}
|
|
2543
|
+
if (msg.role === "user" && msg.toolCallId && !toolUseIds.has(msg.toolCallId)) {
|
|
2544
|
+
return false;
|
|
2545
|
+
}
|
|
2522
2546
|
return true;
|
|
2523
2547
|
}).map((msg) => {
|
|
2524
2548
|
if (msg.role === "user" && typeof msg.content === "string" && msg.content.startsWith("@@automated::")) {
|
|
@@ -2983,7 +3007,12 @@ var BROWSER_TOOLS = [
|
|
|
2983
3007
|
description: "Capture a full-height screenshot of the current page. Returns a CDN URL with full text analysis and description.",
|
|
2984
3008
|
inputSchema: {
|
|
2985
3009
|
type: "object",
|
|
2986
|
-
properties: {
|
|
3010
|
+
properties: {
|
|
3011
|
+
path: {
|
|
3012
|
+
type: "string",
|
|
3013
|
+
description: 'Navigate to this path before capturing (e.g. "/settings"). If omitted, screenshots the current page.'
|
|
3014
|
+
}
|
|
3015
|
+
}
|
|
2987
3016
|
}
|
|
2988
3017
|
},
|
|
2989
3018
|
{
|
|
@@ -3056,7 +3085,10 @@ var browserAutomationTool = {
|
|
|
3056
3085
|
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
3057
3086
|
if (name === "screenshotFullPage") {
|
|
3058
3087
|
try {
|
|
3059
|
-
return await captureAndAnalyzeScreenshot({
|
|
3088
|
+
return await captureAndAnalyzeScreenshot({
|
|
3089
|
+
path: _input.path,
|
|
3090
|
+
onLog
|
|
3091
|
+
});
|
|
3060
3092
|
} catch (err) {
|
|
3061
3093
|
return `Error taking screenshot: ${err.message}`;
|
|
3062
3094
|
}
|
|
@@ -3308,6 +3340,10 @@ var definition5 = {
|
|
|
3308
3340
|
prompt: {
|
|
3309
3341
|
type: "string",
|
|
3310
3342
|
description: "Optional specific question about the screenshot."
|
|
3343
|
+
},
|
|
3344
|
+
path: {
|
|
3345
|
+
type: "string",
|
|
3346
|
+
description: 'Navigate to this path before capturing (e.g. "/settings"). If omitted, screenshots the current page.'
|
|
3311
3347
|
}
|
|
3312
3348
|
}
|
|
3313
3349
|
}
|
|
@@ -3316,6 +3352,7 @@ async function execute5(input, onLog) {
|
|
|
3316
3352
|
try {
|
|
3317
3353
|
return await captureAndAnalyzeScreenshot({
|
|
3318
3354
|
prompt: input.prompt,
|
|
3355
|
+
path: input.path,
|
|
3319
3356
|
onLog
|
|
3320
3357
|
});
|
|
3321
3358
|
} catch (err) {
|
package/dist/index.js
CHANGED
|
@@ -2056,9 +2056,11 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
|
2056
2056
|
let prompt;
|
|
2057
2057
|
let existingUrl;
|
|
2058
2058
|
let onLog;
|
|
2059
|
+
let path10;
|
|
2059
2060
|
if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
|
|
2060
2061
|
prompt = promptOrOptions.prompt;
|
|
2061
2062
|
existingUrl = promptOrOptions.imageUrl;
|
|
2063
|
+
path10 = promptOrOptions.path;
|
|
2062
2064
|
onLog = promptOrOptions.onLog;
|
|
2063
2065
|
} else {
|
|
2064
2066
|
prompt = promptOrOptions;
|
|
@@ -2067,9 +2069,11 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
|
2067
2069
|
if (existingUrl) {
|
|
2068
2070
|
url = existingUrl;
|
|
2069
2071
|
} else {
|
|
2070
|
-
const ssResult = await sidecarRequest(
|
|
2071
|
-
|
|
2072
|
-
|
|
2072
|
+
const ssResult = await sidecarRequest(
|
|
2073
|
+
"/screenshot-full-page",
|
|
2074
|
+
path10 ? { path: path10 } : void 0,
|
|
2075
|
+
{ timeout: 12e4 }
|
|
2076
|
+
);
|
|
2073
2077
|
url = ssResult?.url || ssResult?.screenshotUrl;
|
|
2074
2078
|
if (!url) {
|
|
2075
2079
|
throw new Error(
|
|
@@ -2118,6 +2122,10 @@ var init_screenshot2 = __esm({
|
|
|
2118
2122
|
imageUrl: {
|
|
2119
2123
|
type: "string",
|
|
2120
2124
|
description: "URL of an existing screenshot to analyze instead of capturing a new one. Use this for additional questions about a previous screenshot."
|
|
2125
|
+
},
|
|
2126
|
+
path: {
|
|
2127
|
+
type: "string",
|
|
2128
|
+
description: 'Navigate to this path before capturing (e.g. "/settings", "/dashboard"). If omitted, screenshots the current page.'
|
|
2121
2129
|
}
|
|
2122
2130
|
}
|
|
2123
2131
|
}
|
|
@@ -2133,6 +2141,7 @@ var init_screenshot2 = __esm({
|
|
|
2133
2141
|
}
|
|
2134
2142
|
return await captureAndAnalyzeScreenshot({
|
|
2135
2143
|
prompt: input.prompt,
|
|
2144
|
+
path: input.path,
|
|
2136
2145
|
onLog: context?.onLog
|
|
2137
2146
|
});
|
|
2138
2147
|
} catch (err) {
|
|
@@ -2147,6 +2156,7 @@ var init_screenshot2 = __esm({
|
|
|
2147
2156
|
function startStatusWatcher(config) {
|
|
2148
2157
|
const { apiConfig, getContext, onStatus, interval = 3e3, signal } = config;
|
|
2149
2158
|
let lastLabel = "";
|
|
2159
|
+
let lastContext = "";
|
|
2150
2160
|
let inflight = false;
|
|
2151
2161
|
let stopped = false;
|
|
2152
2162
|
const url = `${apiConfig.baseUrl}/_internal/v2/agent/remy/generate-status`;
|
|
@@ -2157,9 +2167,10 @@ function startStatusWatcher(config) {
|
|
|
2157
2167
|
inflight = true;
|
|
2158
2168
|
try {
|
|
2159
2169
|
const context = getContext();
|
|
2160
|
-
if (!context) {
|
|
2170
|
+
if (!context || context === lastContext) {
|
|
2161
2171
|
return;
|
|
2162
2172
|
}
|
|
2173
|
+
lastContext = context;
|
|
2163
2174
|
const res = await fetch(url, {
|
|
2164
2175
|
method: "POST",
|
|
2165
2176
|
headers: {
|
|
@@ -2239,6 +2250,16 @@ ${summaryBlock.text}
|
|
|
2239
2250
|
startIdx = checkpointIdx + 1;
|
|
2240
2251
|
}
|
|
2241
2252
|
const messagesToProcess = messages.slice(startIdx);
|
|
2253
|
+
const toolUseIds = /* @__PURE__ */ new Set();
|
|
2254
|
+
for (const msg of messagesToProcess) {
|
|
2255
|
+
if (msg.role === "assistant" && Array.isArray(msg.content)) {
|
|
2256
|
+
for (const block of msg.content) {
|
|
2257
|
+
if (block.type === "tool") {
|
|
2258
|
+
toolUseIds.add(block.id);
|
|
2259
|
+
}
|
|
2260
|
+
}
|
|
2261
|
+
}
|
|
2262
|
+
}
|
|
2242
2263
|
const cleaned = messagesToProcess.filter((msg) => {
|
|
2243
2264
|
if (Array.isArray(msg.content)) {
|
|
2244
2265
|
const blocks = msg.content;
|
|
@@ -2246,6 +2267,9 @@ ${summaryBlock.text}
|
|
|
2246
2267
|
return false;
|
|
2247
2268
|
}
|
|
2248
2269
|
}
|
|
2270
|
+
if (msg.role === "user" && msg.toolCallId && !toolUseIds.has(msg.toolCallId)) {
|
|
2271
|
+
return false;
|
|
2272
|
+
}
|
|
2249
2273
|
return true;
|
|
2250
2274
|
}).map((msg) => {
|
|
2251
2275
|
if (msg.role === "user" && typeof msg.content === "string" && msg.content.startsWith("@@automated::")) {
|
|
@@ -2729,7 +2753,12 @@ var init_tools = __esm({
|
|
|
2729
2753
|
description: "Capture a full-height screenshot of the current page. Returns a CDN URL with full text analysis and description.",
|
|
2730
2754
|
inputSchema: {
|
|
2731
2755
|
type: "object",
|
|
2732
|
-
properties: {
|
|
2756
|
+
properties: {
|
|
2757
|
+
path: {
|
|
2758
|
+
type: "string",
|
|
2759
|
+
description: 'Navigate to this path before capturing (e.g. "/settings"). If omitted, screenshots the current page.'
|
|
2760
|
+
}
|
|
2761
|
+
}
|
|
2733
2762
|
}
|
|
2734
2763
|
},
|
|
2735
2764
|
{
|
|
@@ -2865,7 +2894,10 @@ var init_browserAutomation = __esm({
|
|
|
2865
2894
|
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
2866
2895
|
if (name === "screenshotFullPage") {
|
|
2867
2896
|
try {
|
|
2868
|
-
return await captureAndAnalyzeScreenshot({
|
|
2897
|
+
return await captureAndAnalyzeScreenshot({
|
|
2898
|
+
path: _input.path,
|
|
2899
|
+
onLog
|
|
2900
|
+
});
|
|
2869
2901
|
} catch (err) {
|
|
2870
2902
|
return `Error taking screenshot: ${err.message}`;
|
|
2871
2903
|
}
|
|
@@ -3143,6 +3175,7 @@ async function execute5(input, onLog) {
|
|
|
3143
3175
|
try {
|
|
3144
3176
|
return await captureAndAnalyzeScreenshot({
|
|
3145
3177
|
prompt: input.prompt,
|
|
3178
|
+
path: input.path,
|
|
3146
3179
|
onLog
|
|
3147
3180
|
});
|
|
3148
3181
|
} catch (err) {
|
|
@@ -3163,6 +3196,10 @@ var init_screenshot3 = __esm({
|
|
|
3163
3196
|
prompt: {
|
|
3164
3197
|
type: "string",
|
|
3165
3198
|
description: "Optional specific question about the screenshot."
|
|
3199
|
+
},
|
|
3200
|
+
path: {
|
|
3201
|
+
type: "string",
|
|
3202
|
+
description: 'Navigate to this path before capturing (e.g. "/settings"). If omitted, screenshots the current page.'
|
|
3166
3203
|
}
|
|
3167
3204
|
}
|
|
3168
3205
|
}
|
|
@@ -26,12 +26,16 @@ dist/interfaces/web/
|
|
|
26
26
|
|
|
27
27
|
```json
|
|
28
28
|
{
|
|
29
|
-
"
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
"web": {
|
|
30
|
+
"devPort": 5173,
|
|
31
|
+
"devCommand": "npm run dev",
|
|
32
|
+
"defaultPreviewMode": "desktop"
|
|
33
|
+
}
|
|
32
34
|
}
|
|
33
35
|
```
|
|
34
36
|
|
|
37
|
+
All fields are nested under the `"web"` key.
|
|
38
|
+
|
|
35
39
|
| Field | Type | Default | Description |
|
|
36
40
|
|-------|------|---------|-------------|
|
|
37
41
|
| `devPort` | `number` | `5173` | Port for the dev server |
|
|
@@ -99,7 +103,7 @@ Omit the `methods` field (or the config entirely) to expose all methods.
|
|
|
99
103
|
### Usage
|
|
100
104
|
|
|
101
105
|
```bash
|
|
102
|
-
curl -X POST https://
|
|
106
|
+
curl -X POST https://{app-subdomain}.mindstudio.ai/_/methods/submit-vendor-request/invoke \
|
|
103
107
|
-H "Authorization: Bearer sk..." \
|
|
104
108
|
-H "Content-Type: application/json" \
|
|
105
109
|
-d '{ "input": { "name": "Acme" } }'
|
|
@@ -200,7 +204,7 @@ Inbound HTTP endpoints that invoke methods.
|
|
|
200
204
|
}
|
|
201
205
|
```
|
|
202
206
|
|
|
203
|
-
Endpoint URL: `https://
|
|
207
|
+
Endpoint URL: `https://{app-subdomain}.mindstudio.ai/_/webhook/{secret}`
|
|
204
208
|
|
|
205
209
|
Accepts any HTTP method. The method receives `{ method, headers, query, body }` as input.
|
|
206
210
|
|
|
@@ -38,7 +38,9 @@ Always consult the code sanity check before writing code in initialCodegen with
|
|
|
38
38
|
|
|
39
39
|
### QA (`runAutomatedBrowserTest`)
|
|
40
40
|
|
|
41
|
-
For verifying complex stateful interactions: multi-step form submissions, auth flows, real-time updates, flows that require specific data/role setup. This spins up a full chrome browser automation — it's heavyweight. Do not use it for basic rendering or navigation checks. If you can verify something with a screenshot or by reading the code, do that instead. Run a scenario first to seed test data and set user roles. The user is able to watch QA work on their screen via a live browser preview - the cursor will move, type, etc - so you can also use this to demo functionality to the user and help them understand how to use their app.
|
|
41
|
+
For verifying complex stateful interactions: multi-step form submissions, auth flows, real-time updates, flows that require specific data/role setup. This spins up a full chrome browser automation — it's heavyweight. Do not use it for basic rendering or navigation checks. If you can verify something with a screenshot or by reading the code, do that instead. Run a scenario first to seed test data and set user roles. The user is able to watch QA work on their screen via a live browser preview - the cursor will move, type, etc - so you can also use this to demo functionality to the user and help them understand how to use their app.
|
|
42
|
+
|
|
43
|
+
The QA agent can see the screen. Describe what to test, not how — it will figure out what to click, what to check, and what values to use.
|
|
42
44
|
|
|
43
45
|
### Background Execution
|
|
44
46
|
|