@mindstudio-ai/remy 0.1.155 → 0.1.156

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -2676,6 +2676,21 @@ ${opts.styleMap}
2676
2676
  ${TEXT_WRAP_DISCLAIMER}`;
2677
2677
  return p;
2678
2678
  }
2679
+ async function streamScreenshotAnalysis(opts) {
2680
+ const { url, prompt, styleMap, onLog } = opts;
2681
+ onLog?.(JSON.stringify({ url, analysis: null }));
2682
+ const analysisPrompt = buildScreenshotAnalysisPrompt({ prompt, styleMap });
2683
+ let accumulated = "";
2684
+ const analysis = await analyzeImage({
2685
+ prompt: analysisPrompt,
2686
+ imageUrl: url,
2687
+ onLog: (chunk) => {
2688
+ accumulated += chunk;
2689
+ onLog?.(JSON.stringify({ url, analysis: accumulated }));
2690
+ }
2691
+ });
2692
+ return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
2693
+ }
2679
2694
  async function captureAndAnalyzeScreenshot(promptOrOptions) {
2680
2695
  let prompt;
2681
2696
  let existingUrl;
@@ -2710,16 +2725,12 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2710
2725
  if (prompt === false) {
2711
2726
  return url;
2712
2727
  }
2713
- const analysisPrompt = buildScreenshotAnalysisPrompt({
2728
+ return streamScreenshotAnalysis({
2729
+ url,
2714
2730
  prompt: prompt || void 0,
2715
- styleMap
2716
- });
2717
- const analysis = await analyzeImage({
2718
- prompt: analysisPrompt,
2719
- imageUrl: url,
2731
+ styleMap,
2720
2732
  onLog
2721
2733
  });
2722
- return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
2723
2734
  }
2724
2735
 
2725
2736
  // src/tools/_helpers/browserLock.ts
@@ -2739,9 +2750,10 @@ function startStatusWatcher(config) {
2739
2750
  const { apiConfig, getContext, onStatus, interval = 5e3, signal } = config;
2740
2751
  let inflight = false;
2741
2752
  let stopped = false;
2753
+ let pauseCount = 0;
2742
2754
  const url = `${apiConfig.baseUrl}/_internal/v2/agent/remy/generate-status`;
2743
2755
  async function tick() {
2744
- if (stopped || signal?.aborted || inflight) {
2756
+ if (stopped || signal?.aborted || inflight || pauseCount > 0) {
2745
2757
  return;
2746
2758
  }
2747
2759
  inflight = true;
@@ -2766,6 +2778,9 @@ function startStatusWatcher(config) {
2766
2778
  if (!data.label) {
2767
2779
  return;
2768
2780
  }
2781
+ if (pauseCount > 0) {
2782
+ return;
2783
+ }
2769
2784
  onStatus(data.label);
2770
2785
  } catch {
2771
2786
  } finally {
@@ -2779,6 +2794,12 @@ function startStatusWatcher(config) {
2779
2794
  stop() {
2780
2795
  stopped = true;
2781
2796
  clearInterval(timer);
2797
+ },
2798
+ pause() {
2799
+ pauseCount++;
2800
+ },
2801
+ resume() {
2802
+ pauseCount = Math.max(0, pauseCount - 1);
2782
2803
  }
2783
2804
  };
2784
2805
  }
@@ -3634,7 +3655,7 @@ var screenshotTool = {
3634
3655
  },
3635
3656
  instructions: {
3636
3657
  type: "string",
3637
- description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Only use instructions when you need to trigger stateful changes. Never describe what names or values to use when applying the isntructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing."
3658
+ description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
3638
3659
  }
3639
3660
  }
3640
3661
  }
@@ -3663,20 +3684,12 @@ var screenshotTool = {
3663
3684
  if (!url) {
3664
3685
  return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
3665
3686
  }
3666
- const analysisPrompt = buildScreenshotAnalysisPrompt({
3687
+ return await streamScreenshotAnalysis({
3688
+ url,
3667
3689
  prompt: input.prompt,
3668
- styleMap
3669
- });
3670
- const analysis = await analyzeImage({
3671
- prompt: analysisPrompt,
3672
- imageUrl: url,
3690
+ styleMap,
3673
3691
  onLog: context?.onLog
3674
3692
  });
3675
- return JSON.stringify({
3676
- url,
3677
- analysis,
3678
- ...styleMap ? { styleMap } : {}
3679
- });
3680
3693
  }
3681
3694
  const release = await acquireBrowserLock();
3682
3695
  try {
@@ -3994,20 +4007,12 @@ async function execute5(input, onLog, context) {
3994
4007
  if (!url) {
3995
4008
  return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
3996
4009
  }
3997
- const analysisPrompt = buildScreenshotAnalysisPrompt({
4010
+ return await streamScreenshotAnalysis({
4011
+ url,
3998
4012
  prompt: input.prompt,
3999
- styleMap
4000
- });
4001
- const analysis = await analyzeImage({
4002
- prompt: analysisPrompt,
4003
- imageUrl: url,
4013
+ styleMap,
4004
4014
  onLog
4005
4015
  });
4006
- return JSON.stringify({
4007
- url,
4008
- analysis,
4009
- ...styleMap ? { styleMap } : {}
4010
- });
4011
4016
  } catch (err) {
4012
4017
  return `Error taking interactive screenshot: ${err.message}`;
4013
4018
  }
@@ -5477,6 +5482,11 @@ var EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
5477
5482
  "browserCommand",
5478
5483
  "setProjectMetadata"
5479
5484
  ]);
5485
+ var USER_BLOCKING_EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
5486
+ "promptUser",
5487
+ "presentPublishPlan",
5488
+ "confirmDestructiveAction"
5489
+ ]);
5480
5490
  function createAgentState() {
5481
5491
  return { messages: [] };
5482
5492
  }
@@ -5566,6 +5576,8 @@ async function runTurn(params) {
5566
5576
  let subAgentText = "";
5567
5577
  let currentToolNames = "";
5568
5578
  const statusWatcher = isFirstMessage ? { stop() {
5579
+ }, pause() {
5580
+ }, resume() {
5569
5581
  } } : startStatusWatcher({
5570
5582
  apiConfig,
5571
5583
  getContext: () => {
@@ -5870,7 +5882,17 @@ async function runTurn(params) {
5870
5882
  toolCallId: tc.id,
5871
5883
  name: tc.name
5872
5884
  });
5873
- result = await resolveExternalTool(tc.id, tc.name, input);
5885
+ const blocksUser = USER_BLOCKING_EXTERNAL_TOOLS.has(tc.name);
5886
+ if (blocksUser) {
5887
+ statusWatcher.pause();
5888
+ }
5889
+ try {
5890
+ result = await resolveExternalTool(tc.id, tc.name, input);
5891
+ } finally {
5892
+ if (blocksUser) {
5893
+ statusWatcher.resume();
5894
+ }
5895
+ }
5874
5896
  } else {
5875
5897
  result = await executeTool(tc.name, input, {
5876
5898
  apiConfig,
package/dist/index.js CHANGED
@@ -2943,6 +2943,21 @@ ${opts.styleMap}
2943
2943
  ${TEXT_WRAP_DISCLAIMER}`;
2944
2944
  return p;
2945
2945
  }
2946
+ async function streamScreenshotAnalysis(opts) {
2947
+ const { url, prompt, styleMap, onLog } = opts;
2948
+ onLog?.(JSON.stringify({ url, analysis: null }));
2949
+ const analysisPrompt = buildScreenshotAnalysisPrompt({ prompt, styleMap });
2950
+ let accumulated = "";
2951
+ const analysis = await analyzeImage({
2952
+ prompt: analysisPrompt,
2953
+ imageUrl: url,
2954
+ onLog: (chunk) => {
2955
+ accumulated += chunk;
2956
+ onLog?.(JSON.stringify({ url, analysis: accumulated }));
2957
+ }
2958
+ });
2959
+ return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
2960
+ }
2946
2961
  async function captureAndAnalyzeScreenshot(promptOrOptions) {
2947
2962
  let prompt;
2948
2963
  let existingUrl;
@@ -2977,16 +2992,12 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2977
2992
  if (prompt === false) {
2978
2993
  return url;
2979
2994
  }
2980
- const analysisPrompt = buildScreenshotAnalysisPrompt({
2995
+ return streamScreenshotAnalysis({
2996
+ url,
2981
2997
  prompt: prompt || void 0,
2982
- styleMap
2983
- });
2984
- const analysis = await analyzeImage({
2985
- prompt: analysisPrompt,
2986
- imageUrl: url,
2998
+ styleMap,
2987
2999
  onLog
2988
3000
  });
2989
- return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
2990
3001
  }
2991
3002
  var SCREENSHOT_ANALYSIS_PROMPT, TEXT_WRAP_DISCLAIMER;
2992
3003
  var init_screenshot = __esm({
@@ -3024,9 +3035,10 @@ function startStatusWatcher(config) {
3024
3035
  const { apiConfig, getContext, onStatus, interval = 5e3, signal } = config;
3025
3036
  let inflight = false;
3026
3037
  let stopped = false;
3038
+ let pauseCount = 0;
3027
3039
  const url = `${apiConfig.baseUrl}/_internal/v2/agent/remy/generate-status`;
3028
3040
  async function tick() {
3029
- if (stopped || signal?.aborted || inflight) {
3041
+ if (stopped || signal?.aborted || inflight || pauseCount > 0) {
3030
3042
  return;
3031
3043
  }
3032
3044
  inflight = true;
@@ -3051,6 +3063,9 @@ function startStatusWatcher(config) {
3051
3063
  if (!data.label) {
3052
3064
  return;
3053
3065
  }
3066
+ if (pauseCount > 0) {
3067
+ return;
3068
+ }
3054
3069
  onStatus(data.label);
3055
3070
  } catch {
3056
3071
  } finally {
@@ -3064,6 +3079,12 @@ function startStatusWatcher(config) {
3064
3079
  stop() {
3065
3080
  stopped = true;
3066
3081
  clearInterval(timer);
3082
+ },
3083
+ pause() {
3084
+ pauseCount++;
3085
+ },
3086
+ resume() {
3087
+ pauseCount = Math.max(0, pauseCount - 1);
3067
3088
  }
3068
3089
  };
3069
3090
  }
@@ -3956,7 +3977,6 @@ var init_screenshot2 = __esm({
3956
3977
  "use strict";
3957
3978
  init_screenshot();
3958
3979
  init_browserLock();
3959
- init_analyzeImage();
3960
3980
  init_browserAutomation();
3961
3981
  screenshotTool = {
3962
3982
  clearable: true,
@@ -3980,7 +4000,7 @@ var init_screenshot2 = __esm({
3980
4000
  },
3981
4001
  instructions: {
3982
4002
  type: "string",
3983
- description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Only use instructions when you need to trigger stateful changes. Never describe what names or values to use when applying the isntructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing."
4003
+ description: "If the screenshot you need requires interaction first (dismissing a modal, clicking a tab, filling out a form, navigating a flow, getting through a login/auth checkpoint), describe the steps to get there. A browser automation agent will follow these instructions before capturing the screenshot - it can bypass auth and get right to where it needs to be if you tell it to authenticate as a test user and give it the path/screen to start its test at. You will always get back a full-height screenshot of the entire page. Do not attempt to scroll or capture specific areas. Never describe what names or values to use when applying the instructions - the browser automation agent must use its own values for it to work properly. If a specific auth role is required to access the content, be sure to note that - it can automatically assume it for the purpose of testing. Use only when interaction is required to *reach* the state you want to capture \u2014 log in, dismiss a modal, switch a tab, follow a route. If your steps are exercising the app's functionality across multiple states (running flows, asserting behavior under interaction, multi-step QA), use `runAutomatedBrowserTest` instead."
3984
4004
  }
3985
4005
  }
3986
4006
  }
@@ -4009,20 +4029,12 @@ var init_screenshot2 = __esm({
4009
4029
  if (!url) {
4010
4030
  return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
4011
4031
  }
4012
- const analysisPrompt = buildScreenshotAnalysisPrompt({
4032
+ return await streamScreenshotAnalysis({
4033
+ url,
4013
4034
  prompt: input.prompt,
4014
- styleMap
4015
- });
4016
- const analysis = await analyzeImage({
4017
- prompt: analysisPrompt,
4018
- imageUrl: url,
4035
+ styleMap,
4019
4036
  onLog: context?.onLog
4020
4037
  });
4021
- return JSON.stringify({
4022
- url,
4023
- analysis,
4024
- ...styleMap ? { styleMap } : {}
4025
- });
4026
4038
  }
4027
4039
  const release = await acquireBrowserLock();
4028
4040
  try {
@@ -4356,20 +4368,12 @@ async function execute5(input, onLog, context) {
4356
4368
  if (!url) {
4357
4369
  return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
4358
4370
  }
4359
- const analysisPrompt = buildScreenshotAnalysisPrompt({
4371
+ return await streamScreenshotAnalysis({
4372
+ url,
4360
4373
  prompt: input.prompt,
4361
- styleMap
4362
- });
4363
- const analysis = await analyzeImage({
4364
- prompt: analysisPrompt,
4365
- imageUrl: url,
4374
+ styleMap,
4366
4375
  onLog
4367
4376
  });
4368
- return JSON.stringify({
4369
- url,
4370
- analysis,
4371
- ...styleMap ? { styleMap } : {}
4372
- });
4373
4377
  } catch (err) {
4374
4378
  return `Error taking interactive screenshot: ${err.message}`;
4375
4379
  }
@@ -4393,7 +4397,6 @@ var init_screenshot3 = __esm({
4393
4397
  "use strict";
4394
4398
  init_screenshot();
4395
4399
  init_browserLock();
4396
- init_analyzeImage();
4397
4400
  init_browserAutomation();
4398
4401
  definition5 = {
4399
4402
  clearable: true,
@@ -6156,6 +6159,8 @@ async function runTurn(params) {
6156
6159
  let subAgentText = "";
6157
6160
  let currentToolNames = "";
6158
6161
  const statusWatcher = isFirstMessage ? { stop() {
6162
+ }, pause() {
6163
+ }, resume() {
6159
6164
  } } : startStatusWatcher({
6160
6165
  apiConfig,
6161
6166
  getContext: () => {
@@ -6460,7 +6465,17 @@ async function runTurn(params) {
6460
6465
  toolCallId: tc.id,
6461
6466
  name: tc.name
6462
6467
  });
6463
- result = await resolveExternalTool(tc.id, tc.name, input);
6468
+ const blocksUser = USER_BLOCKING_EXTERNAL_TOOLS.has(tc.name);
6469
+ if (blocksUser) {
6470
+ statusWatcher.pause();
6471
+ }
6472
+ try {
6473
+ result = await resolveExternalTool(tc.id, tc.name, input);
6474
+ } finally {
6475
+ if (blocksUser) {
6476
+ statusWatcher.resume();
6477
+ }
6478
+ }
6464
6479
  } else {
6465
6480
  result = await executeTool(tc.name, input, {
6466
6481
  apiConfig,
@@ -6565,7 +6580,7 @@ async function runTurn(params) {
6565
6580
  }
6566
6581
  }
6567
6582
  }
6568
- var log8, EXTERNAL_TOOLS;
6583
+ var log8, EXTERNAL_TOOLS, USER_BLOCKING_EXTERNAL_TOOLS;
6569
6584
  var init_agent = __esm({
6570
6585
  "src/agent.ts"() {
6571
6586
  "use strict";
@@ -6591,6 +6606,11 @@ var init_agent = __esm({
6591
6606
  "browserCommand",
6592
6607
  "setProjectMetadata"
6593
6608
  ]);
6609
+ USER_BLOCKING_EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
6610
+ "promptUser",
6611
+ "presentPublishPlan",
6612
+ "confirmDestructiveAction"
6613
+ ]);
6594
6614
  }
6595
6615
  });
6596
6616
 
@@ -208,6 +208,8 @@ auth.requireRole('admin');
208
208
  auth.requireRole('admin', 'approver'); // any of these
209
209
  ```
210
210
 
211
+ **Require login: check `auth.userId`. Roles are RBAC** — only declare roles that map to real business distinctions (vendor/buyer/admin), and only check them when behavior should differ. Newly verified users have `roles: []` until your code assigns them.
212
+
211
213
  ### `auth.hasRole(...roles)`
212
214
 
213
215
  Returns `boolean`. Same logic as `requireRole` but doesn't throw.
@@ -375,4 +377,6 @@ Auth works the same in dev/preview as in production — real verification codes
375
377
 
376
378
  All other emails and phone numbers receive real codes. There is no dev-mode bypass, no fake code, and no way to skip verification. When testing auth flows in the preview, use one of the test bypasses above or a real email/phone.
377
379
 
380
+ The `runMethod` tool's `userId: "testUser"` shortcut resolves to this same dev-bypass identity. The platform find-or-creates a real users-table row for it on first call and caches the row's UUID for the rest of the dev session. **`auth.userId` inside the method is that UUID — not the literal string `"testUser"`.** The user row already exists, so don't try to insert it. If you need the UUID to seed app-specific rows that reference it (profiles, preferences, foreign keys), read it from any method response or query the users table directly: `SELECT id FROM users WHERE email = 'remy@mindstudio.ai'` (or `phone = '+15555555555'` for SMS-auth apps).
381
+
378
382
  Browser automation tools (screenshots, automated browser tests) handle their own auth sessions. Scenarios seed database data but do not create browser auth sessions.
@@ -11,11 +11,13 @@ Run `lspDiagnostics` after every turn where you have edited code in any meaningf
11
11
 
12
12
  - Spot-check methods with `runMethod`. The dev database is a disposable snapshot that will have been seeded with scenario data, so don't worry about being destructive.
13
13
  - For frontend work, take a single `screenshot` to confirm the main view renders correctly or look at the browser log for any console errors in the user's preview.
14
- - Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, or when the user reports something broken that you can't identify from code alone.
14
+ - Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, when the user reports something broken that you can't identify from code alone, or whenever the verification involves driving the app through multiple interactions.
15
15
  - If the browser is unavailable, skip the visual check and verify through methods, logs, and code instead. Browser unavailability is an infrastructure issue, not a code problem — don't try to diagnose or fix it.
16
16
 
17
17
  Aim for confidence that the core happy paths work. If the 80% case is solid, the remaining edge cases are likely fine and the user can surface them in chat. Don't screenshot every page, test every permutation, or verify every secondary flow. One or two runtime checks that confirm the app loads and data flows through is enough.
18
18
 
19
+ When making mechanical edits as part of iterating with the user (e.g., moving elements, changing labels, small redesigns and refactors), don't re-screenshot to confirm, simply trust your code. Re-screenshot only when changes are structural enough that the visual outcome is genuinely uncertain (new layout, new component composition, new route), or when the user reports something visible that you can't see in the code.
20
+
19
21
  ### Process Logs
20
22
 
21
23
  Process logs are available at .logs/ in NDJSON format (one JSON object per line) for debugging. Each line has at minimum ts (unix millis) and msg fields, plus structured context like level, module, requestId, toolCallId where available. You can use `jq` to examine logs and debug failures. Tools like run method or run scenario execute synchronously, so log data will be available by the time those tools return their results to you, there is no need to `sleep` before querying logfiles.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.155",
3
+ "version": "0.1.156",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",