@mindstudio-ai/remy 0.1.150 → 0.1.152

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -537,6 +537,12 @@ async function* streamChat(params) {
537
537
  }
538
538
  }
539
539
  }
540
+ if (buffer.startsWith("data: ")) {
541
+ try {
542
+ yield JSON.parse(buffer.slice(6));
543
+ } catch {
544
+ }
545
+ }
540
546
  if (!receivedDone) {
541
547
  log2.warn("Stream ended without done event", {
542
548
  requestId,
@@ -544,12 +550,10 @@ async function* streamChat(params) {
544
550
  durationMs: Date.now() - startTime,
545
551
  remainingBuffer: buffer.slice(0, 200)
546
552
  });
547
- }
548
- if (buffer.startsWith("data: ")) {
549
- try {
550
- yield JSON.parse(buffer.slice(6));
551
- } catch {
552
- }
553
+ yield {
554
+ type: "error",
555
+ error: "Network error: stream ended before completion"
556
+ };
553
557
  }
554
558
  }
555
559
  var MAX_RETRIES = 5;
@@ -786,11 +790,40 @@ function serializeForSummary(messages) {
786
790
  return `[${msg.role}]: ${parts.join("\n")}`;
787
791
  }).join("\n\n");
788
792
  }
793
+ var CHUNK_CHAR_LIMIT = 24e5;
789
794
  async function generateSummary(apiConfig, name, compactionPrompt, messagesToSummarize, mainSystem, mainTools) {
790
795
  const serialized = serializeForSummary(messagesToSummarize);
791
796
  if (!serialized.trim()) {
792
797
  return null;
793
798
  }
799
+ if (serialized.length > CHUNK_CHAR_LIMIT && messagesToSummarize.length > 1) {
800
+ const mid = Math.floor(messagesToSummarize.length / 2);
801
+ log3.info("Chunking summary", {
802
+ name,
803
+ messageCount: messagesToSummarize.length,
804
+ serializedLength: serialized.length
805
+ });
806
+ const [first, second] = await Promise.all([
807
+ generateSummary(
808
+ apiConfig,
809
+ `${name} [pt1]`,
810
+ compactionPrompt,
811
+ messagesToSummarize.slice(0, mid),
812
+ mainSystem,
813
+ mainTools
814
+ ),
815
+ generateSummary(
816
+ apiConfig,
817
+ `${name} [pt2]`,
818
+ compactionPrompt,
819
+ messagesToSummarize.slice(mid),
820
+ mainSystem,
821
+ mainTools
822
+ )
823
+ ]);
824
+ const parts = [first, second].filter((p) => !!p);
825
+ return parts.length > 0 ? parts.join("\n\n---\n\n") : null;
826
+ }
794
827
  log3.info("Generating summary", {
795
828
  name,
796
829
  messageCount: messagesToSummarize.length,
@@ -2676,28 +2709,6 @@ function acquireBrowserLock() {
2676
2709
  lockQueue = next;
2677
2710
  return wait.then(() => release);
2678
2711
  }
2679
- async function checkBrowserConnected() {
2680
- try {
2681
- const status = await sidecarRequest(
2682
- "/browser-status",
2683
- {},
2684
- { timeout: 5e3 }
2685
- );
2686
- if (!status.connected) {
2687
- return {
2688
- connected: false,
2689
- reason: BROWSER_UNAVAILABLE_MESSAGE
2690
- };
2691
- }
2692
- return { connected: true };
2693
- } catch {
2694
- return {
2695
- connected: false,
2696
- reason: BROWSER_UNAVAILABLE_MESSAGE
2697
- };
2698
- }
2699
- }
2700
- var BROWSER_UNAVAILABLE_MESSAGE = "Browser preview unavailable \u2014 the user has closed their browser and we are continuing to work in the background. This is not a code failure and not something to diagnose. Do not tell the user to click or open anything. Skip the visual check and verify your work through other means: runMethod for backend behavior, queryDatabase for data checks, .logs/devServer.ndjson for build errors, .logs/browser.ndjson for runtime errors, lspDiagnostics for type/syntax, or read the code directly.";
2701
2712
 
2702
2713
  // src/statusWatcher.ts
2703
2714
  function startStatusWatcher(config) {
@@ -2818,9 +2829,9 @@ function fixOrphanedToolCalls(messages) {
2818
2829
  toolResultIds.add(msg.toolCallId);
2819
2830
  }
2820
2831
  }
2821
- const result = [...messages];
2822
- for (let i = result.length - 1; i >= 0; i--) {
2823
- const msg = result[i];
2832
+ const result = [];
2833
+ for (const msg of messages) {
2834
+ result.push(msg);
2824
2835
  if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
2825
2836
  continue;
2826
2837
  }
@@ -2828,17 +2839,15 @@ function fixOrphanedToolCalls(messages) {
2828
2839
  (b) => b.type === "tool"
2829
2840
  );
2830
2841
  const orphans = toolBlocks.filter((tc) => !toolResultIds.has(tc.id));
2831
- if (orphans.length === 0) {
2832
- continue;
2842
+ for (const tc of orphans) {
2843
+ result.push({
2844
+ role: "user",
2845
+ content: "Error: tool result lost (session recovered)",
2846
+ toolCallId: tc.id,
2847
+ isToolError: true
2848
+ });
2849
+ toolResultIds.add(tc.id);
2833
2850
  }
2834
- const synthetics = orphans.map((tc) => ({
2835
- role: "user",
2836
- content: "Error: tool result lost (session recovered)",
2837
- toolCallId: tc.id,
2838
- isToolError: true
2839
- }));
2840
- result.splice(i + 1, 0, ...synthetics);
2841
- break;
2842
2851
  }
2843
2852
  return result;
2844
2853
  }
@@ -3336,7 +3345,7 @@ var BROWSER_TOOLS = [
3336
3345
  {
3337
3346
  clearable: true,
3338
3347
  name: "browserCommand",
3339
- description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Timeout: 120s.",
3348
+ description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
3340
3349
  inputSchema: {
3341
3350
  type: "object",
3342
3351
  properties: {
@@ -3473,14 +3482,6 @@ var browserAutomationTool = {
3473
3482
  }
3474
3483
  const release = await acquireBrowserLock();
3475
3484
  try {
3476
- const browserStatus = await checkBrowserConnected();
3477
- if (!browserStatus.connected) {
3478
- return browserStatus.reason ?? "Browser preview unavailable.";
3479
- }
3480
- try {
3481
- await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3482
- } catch {
3483
- }
3484
3485
  const result = await runSubAgent({
3485
3486
  system: getBrowserAutomationPrompt(),
3486
3487
  task: input.task,
@@ -3570,10 +3571,6 @@ var browserAutomationTool = {
3570
3571
  toolRegistry: context.toolRegistry,
3571
3572
  captureArtifacts: ["screenshotFullPage"]
3572
3573
  });
3573
- try {
3574
- await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3575
- } catch {
3576
- }
3577
3574
  context.subAgentMessages?.set(context.toolCallId, result.messages);
3578
3575
  const ss = result.artifacts?.screenshotFullPage;
3579
3576
  if (ss?.url) {
@@ -3659,10 +3656,6 @@ var screenshotTool = {
3659
3656
  }
3660
3657
  const release = await acquireBrowserLock();
3661
3658
  try {
3662
- const browserStatus = await checkBrowserConnected();
3663
- if (!browserStatus.connected) {
3664
- return browserStatus.reason ?? "Browser preview unavailable.";
3665
- }
3666
3659
  return await captureAndAnalyzeScreenshot({
3667
3660
  prompt: input.prompt,
3668
3661
  path: input.path,
@@ -3997,10 +3990,6 @@ async function execute5(input, onLog, context) {
3997
3990
  }
3998
3991
  const release = await acquireBrowserLock();
3999
3992
  try {
4000
- const browserStatus = await checkBrowserConnected();
4001
- if (!browserStatus.connected) {
4002
- return browserStatus.reason ?? "Browser preview unavailable.";
4003
- }
4004
3993
  return await captureAndAnalyzeScreenshot({
4005
3994
  prompt: input.prompt,
4006
3995
  path: input.path,
package/dist/index.js CHANGED
@@ -222,6 +222,12 @@ async function* streamChat(params) {
222
222
  }
223
223
  }
224
224
  }
225
+ if (buffer.startsWith("data: ")) {
226
+ try {
227
+ yield JSON.parse(buffer.slice(6));
228
+ } catch {
229
+ }
230
+ }
225
231
  if (!receivedDone) {
226
232
  log.warn("Stream ended without done event", {
227
233
  requestId,
@@ -229,12 +235,10 @@ async function* streamChat(params) {
229
235
  durationMs: Date.now() - startTime,
230
236
  remainingBuffer: buffer.slice(0, 200)
231
237
  });
232
- }
233
- if (buffer.startsWith("data: ")) {
234
- try {
235
- yield JSON.parse(buffer.slice(6));
236
- } catch {
237
- }
238
+ yield {
239
+ type: "error",
240
+ error: "Network error: stream ended before completion"
241
+ };
238
242
  }
239
243
  }
240
244
  function isRetryableError(error) {
@@ -1507,6 +1511,34 @@ async function generateSummary(apiConfig, name, compactionPrompt, messagesToSumm
1507
1511
  if (!serialized.trim()) {
1508
1512
  return null;
1509
1513
  }
1514
+ if (serialized.length > CHUNK_CHAR_LIMIT && messagesToSummarize.length > 1) {
1515
+ const mid = Math.floor(messagesToSummarize.length / 2);
1516
+ log2.info("Chunking summary", {
1517
+ name,
1518
+ messageCount: messagesToSummarize.length,
1519
+ serializedLength: serialized.length
1520
+ });
1521
+ const [first, second] = await Promise.all([
1522
+ generateSummary(
1523
+ apiConfig,
1524
+ `${name} [pt1]`,
1525
+ compactionPrompt,
1526
+ messagesToSummarize.slice(0, mid),
1527
+ mainSystem,
1528
+ mainTools
1529
+ ),
1530
+ generateSummary(
1531
+ apiConfig,
1532
+ `${name} [pt2]`,
1533
+ compactionPrompt,
1534
+ messagesToSummarize.slice(mid),
1535
+ mainSystem,
1536
+ mainTools
1537
+ )
1538
+ ]);
1539
+ const parts = [first, second].filter((p) => !!p);
1540
+ return parts.length > 0 ? parts.join("\n\n---\n\n") : null;
1541
+ }
1510
1542
  log2.info("Generating summary", {
1511
1543
  name,
1512
1544
  messageCount: messagesToSummarize.length,
@@ -1544,7 +1576,7 @@ ${serialized}` : serialized;
1544
1576
  log2.info("Summary generated", { name, summaryLength: summaryText.length });
1545
1577
  return summaryText.trim();
1546
1578
  }
1547
- var log2, CONVERSATION_SUMMARY_PROMPT, SUBAGENT_SUMMARY_PROMPT, SUMMARIZABLE_SUBAGENTS;
1579
+ var log2, CONVERSATION_SUMMARY_PROMPT, SUBAGENT_SUMMARY_PROMPT, SUMMARIZABLE_SUBAGENTS, CHUNK_CHAR_LIMIT;
1548
1580
  var init_compaction = __esm({
1549
1581
  "src/compaction/index.ts"() {
1550
1582
  "use strict";
@@ -1555,6 +1587,7 @@ var init_compaction = __esm({
1555
1587
  CONVERSATION_SUMMARY_PROMPT = readAsset("compaction", "conversation.md");
1556
1588
  SUBAGENT_SUMMARY_PROMPT = readAsset("compaction", "subagent.md");
1557
1589
  SUMMARIZABLE_SUBAGENTS = ["visualDesignExpert", "productVision"];
1590
+ CHUNK_CHAR_LIMIT = 24e5;
1558
1591
  }
1559
1592
  });
1560
1593
 
@@ -2954,34 +2987,11 @@ function acquireBrowserLock() {
2954
2987
  lockQueue = next;
2955
2988
  return wait.then(() => release);
2956
2989
  }
2957
- async function checkBrowserConnected() {
2958
- try {
2959
- const status = await sidecarRequest(
2960
- "/browser-status",
2961
- {},
2962
- { timeout: 5e3 }
2963
- );
2964
- if (!status.connected) {
2965
- return {
2966
- connected: false,
2967
- reason: BROWSER_UNAVAILABLE_MESSAGE
2968
- };
2969
- }
2970
- return { connected: true };
2971
- } catch {
2972
- return {
2973
- connected: false,
2974
- reason: BROWSER_UNAVAILABLE_MESSAGE
2975
- };
2976
- }
2977
- }
2978
- var lockQueue, BROWSER_UNAVAILABLE_MESSAGE;
2990
+ var lockQueue;
2979
2991
  var init_browserLock = __esm({
2980
2992
  "src/tools/_helpers/browserLock.ts"() {
2981
2993
  "use strict";
2982
- init_sidecar();
2983
2994
  lockQueue = Promise.resolve();
2984
- BROWSER_UNAVAILABLE_MESSAGE = "Browser preview unavailable \u2014 the user has closed their browser and we are continuing to work in the background. This is not a code failure and not something to diagnose. Do not tell the user to click or open anything. Skip the visual check and verify your work through other means: runMethod for backend behavior, queryDatabase for data checks, .logs/devServer.ndjson for build errors, .logs/browser.ndjson for runtime errors, lspDiagnostics for type/syntax, or read the code directly.";
2985
2995
  }
2986
2996
  });
2987
2997
 
@@ -3114,9 +3124,9 @@ function fixOrphanedToolCalls(messages) {
3114
3124
  toolResultIds.add(msg.toolCallId);
3115
3125
  }
3116
3126
  }
3117
- const result = [...messages];
3118
- for (let i = result.length - 1; i >= 0; i--) {
3119
- const msg = result[i];
3127
+ const result = [];
3128
+ for (const msg of messages) {
3129
+ result.push(msg);
3120
3130
  if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
3121
3131
  continue;
3122
3132
  }
@@ -3124,17 +3134,15 @@ function fixOrphanedToolCalls(messages) {
3124
3134
  (b) => b.type === "tool"
3125
3135
  );
3126
3136
  const orphans = toolBlocks.filter((tc) => !toolResultIds.has(tc.id));
3127
- if (orphans.length === 0) {
3128
- continue;
3137
+ for (const tc of orphans) {
3138
+ result.push({
3139
+ role: "user",
3140
+ content: "Error: tool result lost (session recovered)",
3141
+ toolCallId: tc.id,
3142
+ isToolError: true
3143
+ });
3144
+ toolResultIds.add(tc.id);
3129
3145
  }
3130
- const synthetics = orphans.map((tc) => ({
3131
- role: "user",
3132
- content: "Error: tool result lost (session recovered)",
3133
- toolCallId: tc.id,
3134
- isToolError: true
3135
- }));
3136
- result.splice(i + 1, 0, ...synthetics);
3137
- break;
3138
3146
  }
3139
3147
  return result;
3140
3148
  }
@@ -3652,7 +3660,7 @@ var init_tools = __esm({
3652
3660
  {
3653
3661
  clearable: true,
3654
3662
  name: "browserCommand",
3655
- description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Timeout: 120s.",
3663
+ description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
3656
3664
  inputSchema: {
3657
3665
  type: "object",
3658
3666
  properties: {
@@ -3810,14 +3818,6 @@ var init_browserAutomation = __esm({
3810
3818
  }
3811
3819
  const release = await acquireBrowserLock();
3812
3820
  try {
3813
- const browserStatus = await checkBrowserConnected();
3814
- if (!browserStatus.connected) {
3815
- return browserStatus.reason ?? "Browser preview unavailable.";
3816
- }
3817
- try {
3818
- await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3819
- } catch {
3820
- }
3821
3821
  const result = await runSubAgent({
3822
3822
  system: getBrowserAutomationPrompt(),
3823
3823
  task: input.task,
@@ -3907,10 +3907,6 @@ var init_browserAutomation = __esm({
3907
3907
  toolRegistry: context.toolRegistry,
3908
3908
  captureArtifacts: ["screenshotFullPage"]
3909
3909
  });
3910
- try {
3911
- await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3912
- } catch {
3913
- }
3914
3910
  context.subAgentMessages?.set(context.toolCallId, result.messages);
3915
3911
  const ss = result.artifacts?.screenshotFullPage;
3916
3912
  if (ss?.url) {
@@ -4006,10 +4002,6 @@ var init_screenshot2 = __esm({
4006
4002
  }
4007
4003
  const release = await acquireBrowserLock();
4008
4004
  try {
4009
- const browserStatus = await checkBrowserConnected();
4010
- if (!browserStatus.connected) {
4011
- return browserStatus.reason ?? "Browser preview unavailable.";
4012
- }
4013
4005
  return await captureAndAnalyzeScreenshot({
4014
4006
  prompt: input.prompt,
4015
4007
  path: input.path,
@@ -4360,10 +4352,6 @@ async function execute5(input, onLog, context) {
4360
4352
  }
4361
4353
  const release = await acquireBrowserLock();
4362
4354
  try {
4363
- const browserStatus = await checkBrowserConnected();
4364
- if (!browserStatus.connected) {
4365
- return browserStatus.reason ?? "Browser preview unavailable.";
4366
- }
4367
4355
  return await captureAndAnalyzeScreenshot({
4368
4356
  prompt: input.prompt,
4369
4357
  path: input.path,
@@ -145,7 +145,7 @@ The intro framing ("you have a lot on your plate") gives the model permission to
145
145
  | `productVision` | Roadmap ownership & product strategy | writeRoadmapItem, updateRoadmapItem, deleteRoadmapItem | Spec files + current roadmap |
146
146
  | `sdkConsultant` | MindStudio SDK architecture | None (shells out to `mindstudio ask` CLI) | None (external agent) |
147
147
  | `codeSanityCheck` | Pre-build review | readFile, grep, glob, searchGoogle, fetchUrl, askMindStudioSdk, bash (readonly) | Spec files |
148
- | `browserAutomation` | Interactive UI testing | browserCommand, screenshot, resetBrowser | None (interacts with live preview) |
148
+ | `browserAutomation` | Interactive UI testing | browserCommand, screenshotFullPage, setupBrowser | None (interacts with live preview) |
149
149
 
150
150
  ### Shared infrastructure
151
151
 
@@ -177,8 +177,6 @@ New `type: roadmap` for MSFM files in `src/roadmap/`. Each item has frontmatter
177
177
  - **Automated message sentinel** — `@@automated::{tag}@@` prefix on user messages, stripped before sending to LLM. Frontend uses for custom rendering.
178
178
  - **Project naming** — `setProjectName` tool for setting display name after intake.
179
179
  - **Dynamic status labels** — `statusWatcher.ts` periodically calls a lightweight endpoint to generate descriptive labels during agent work.
180
- - **Browser status check** — agent checks `/browser-status` before starting browser automation to fail fast if preview isn't connected.
181
- - **Browser reset** — `resetBrowser` tool restores preview to clean state after testing.
182
180
  - **Asset bundling** — `tsup.config.ts` copies .md/.json/.sh files from src/ to dist/ on build.
183
181
 
184
182
  ## What's Not Done
@@ -12,7 +12,7 @@ Run `lspDiagnostics` after every turn where you have edited code in any meaningf
12
12
  - Spot-check methods with `runMethod`. The dev database is a disposable snapshot that will have been seeded with scenario data, so don't worry about being destructive.
13
13
  - For frontend work, take a single `screenshot` to confirm the main view renders correctly or look at the browser log for any console errors in the user's preview.
14
14
  - Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, or when the user reports something broken that you can't identify from code alone.
15
- - If the browser preview isn't connected, skip the visual check and verify through methods, logs, and code instead. Preview unavailability is an environmental state, not a code issuethe user might have closed their browser and we are continuing to work in the background.
15
+ - If the browser is unavailable, skip the visual check and verify through methods, logs, and code instead. Browser unavailability is an infrastructure issue, not a code problemdon't try to diagnose or fix it.
16
16
 
17
17
  Aim for confidence that the core happy paths work. If the 80% case is solid, the remaining edge cases are likely fine and the user can surface them in chat. Don't screenshot every page, test every permutation, or verify every secondary flow. One or two runtime checks that confirm the app loads and data flows through is enough.
18
18
 
@@ -39,4 +39,5 @@ You will occasionally receive automated messages prefixed with `@@automated_mess
39
39
  - Keep language accessible. Describe what the app *does*, not how it's implemented, unless the user demonstrates technical fluency.
40
40
  - Always use full paths relative to the project root when mentioning files (`dist/interfaces/web/src/App.tsx`, not `App.tsx`). Paths will be rendered as clickable links for the user.
41
41
  - Use inline `code` formatting only for things the user needs to type or search for.
42
- - When writing prose or communicating with the user, avoid em dashes (and especially when writing specs); use periods, commas, colons, or parentheses instead. Do not use emojis.
42
+ - When writing prose or communicating with the user, avoid em dashes (and especially when writing specs); use periods, commas, colons, or parentheses instead.
43
+ - Never use emojis when responding to the user.
@@ -44,7 +44,7 @@ The QA agent can see the screen. Describe what to test, not how — it will figu
44
44
 
45
45
  Never tell QA what names to use when testing or what values to input - it will use its own judgment.
46
46
 
47
- If the browser preview is unavailable, QA can't run. Treat that as an environmental limit, not a problem with the app — the user has closed their browser and we are continuing to work in the background. Do not guide the user to open or click anything. Verify through methods, logs, and code inspection instead, and just note that visual QA was skipped.
47
+ If the browser is unavailable, QA can't run. That's an infrastructure issue, not a problem with the app — don't try to diagnose or fix it. Verify through methods, logs, and code inspection instead, and note that visual QA was skipped.
48
48
 
49
49
  ### Background Execution
50
50
 
@@ -3,7 +3,7 @@ You are a browser smoke test agent. You verify that features work end to end by
3
3
  ## Rules to Remember
4
4
  - Don't overthink the tests - the goal is to generally make sure things work as expected, not to provide detailed QA. If something seems mostly okay, note it and move on. Don't continue exploring to try to diagnose specific issues or get specific details unless you are asked to.
5
5
  - Fail early: If you encounter a showstopper bug (something doesn't load, something is broken, etc.) do not attempt to diagnose it or work around it. We need core common user paths to work - if they don't the app is broken and testing should not continue until it is fixed. Return early with a report to let the developer fix it, they'll run another test when they're ready.
6
- - Browser disconnection is environmental, not a test failure. If `browserCommand` returns `BROWSER_DISCONNECTED` or the browser otherwise drops mid-test, the test is **inconclusive** — the user has closed their browser and we are continuing to work in the background. Do not retry, do not attribute it to app brokenness, do not tell the user to open or click anything. Report "test inconclusive: browser disconnected" and stop.
6
+ - Browser unavailability is an infrastructure issue, not a test failure. If `browserCommand` reports the browser is unavailable or drops mid-test, the test is **inconclusive** — do not retry, do not attribute it to app brokenness. Report "test inconclusive: browser unavailable" and stop.
7
7
 
8
8
  ## Tester Persona
9
9
  The user is watching the automation happen on their screen in real-time. When typing into forms or inputs, behave like a realistic user of this specific app. Use the app context (if provided) to understand the audience and tone. Type the way that audience would actually type — not formal, not robotic. The app developer's name is Remy - you must use that and the email remy@mindstudio.ai as the basis for any testing that requires a persona.
@@ -39,7 +39,6 @@ Each interactive element has a `[ref=eN]` you can use to target it.
39
39
  - `select`: Select a dropdown option by text. Target the `<select>` element, set `option` to the option text.
40
40
  - `wait`: Wait for an element to appear (polls every 100ms, default 5s timeout). Also waits for network to settle after the element is found.
41
41
  - `navigate`: Navigate to a new URL within the app. Waits for the new page to load before continuing with subsequent steps. Use this instead of evaluate with `window.location.href` when you need to navigate and then continue interacting with the new page. Steps after navigate execute on the new page automatically.
42
- - `reload`: Reload the current page. Useful if something has crashed, you can not exit some dynamic screen, or you need to clear stale data or some stale app state. Waits for the page to reload before continuting with subsequent steps. Use this instead of using evaluate to reload a page.
43
42
  - `evaluate`: Run arbitrary JavaScript in the page and return the result.
44
43
  - `styles`: Read computed CSS styles from page elements. Pass a `properties` array with camelCase CSS property names (e.g., `["backgroundColor", "borderRadius", "fontSize"]`). Omit `properties` for a default set covering colors, typography, spacing, borders, shadows, dimensions, and layout. Uses the same targeting as click/type (ref, text, role, label, selector). Omit the target to get styles for all elements from the last snapshot.
45
44
  - `screenshotViewport`: Take a screenshot of the current viewport. Returns CDN url with full text analysis and dimensions. Useful at the end of an action batch to visually see things like layout shift or overflow. Do not use if you can get what you need with other tools - only use when you need to visually see the viewport.
@@ -61,6 +60,7 @@ Each browserCommand returns:
61
60
  - `snapshot`: the final page state after all steps complete (always present, even without an explicit snapshot step)
62
61
  - `logs`: array of browser-side events that fired during the batch (console output, network failures, JS errors, user interactions). Check this for errors before reporting pass.
63
62
  - `duration`: total execution time in ms
63
+ - `recordingUrl` (optional): URL to an rrweb session recording of the tool call. Present whenever the batch contained an interactive step (click, type, select). Include it in your failure reports so the main agent can share it — it's the fastest way to reproduce a bug visually.
64
64
 
65
65
  On error, the failing step has an `error` field and execution stops. Remaining steps are skipped.
66
66
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.150",
3
+ "version": "0.1.152",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",