@mindstudio-ai/remy 0.1.83 → 0.1.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,9 @@ This is an automated action triggered by the user pressing "Build" in the editor
6
6
 
7
7
  The user has reviewed the spec and is ready to build.
8
8
 
9
- Think about your approach and then get a quick sanity check from `codeSanityCheck` to make sure you aren't missing anything. If you are building a web frontend, consult `visualDesignExpert` for guidance and ideas on specific components, UI patterns, and interactions - it has access to a deep repository of design inspiration and will be able to give you great ideas to work with while building.
9
+ Think about your approach and then get a quick sanity check from `codeSanityCheck` to make sure you aren't missing anything.
10
+
11
+ If you are building a web frontend, consult `visualDesignExpert` for guidance and ideas on specific component design, UI patterns, and interactions - it has access to a deep repository of design inspiration and will be able to give you great ideas to work with while building. Don't ask it to design full screens - focus on specific components, moments, and concepts where its ideas can be additive and transformative, you already have the basic design and layout guidance from the spec.
10
12
 
11
13
  Then, build everything in one turn: methods, tables, interfaces, manifest updates, and scenarios, using the spec as the master plan. Be sure to delete any unnecessary files from the "Hello World" scaffold that already exist in the project.
12
14
 
package/dist/headless.js CHANGED
@@ -3123,15 +3123,6 @@ var BROWSER_TOOLS = [
3123
3123
  }
3124
3124
  }
3125
3125
  }
3126
- },
3127
- {
3128
- clearable: false,
3129
- name: "resetBrowser",
3130
- description: "Reset the browser to a clean state. Call this once after all tests are complete to restore the preview for the user. Fire and forget \u2014 does not wait for the reload to finish.",
3131
- inputSchema: {
3132
- type: "object",
3133
- properties: {}
3134
- }
3135
3126
  }
3136
3127
  ];
3137
3128
  var BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand"]);
@@ -3156,11 +3147,21 @@ ${appSpec}
3156
3147
 
3157
3148
  // src/subagents/browserAutomation/index.ts
3158
3149
  var log6 = createLogger("browser-automation");
3150
+ var lockQueue = Promise.resolve();
3151
+ function acquireBrowserLock() {
3152
+ let release;
3153
+ const next = new Promise((res) => {
3154
+ release = res;
3155
+ });
3156
+ const wait = lockQueue;
3157
+ lockQueue = next;
3158
+ return wait.then(() => release);
3159
+ }
3159
3160
  var browserAutomationTool = {
3160
3161
  clearable: true,
3161
3162
  definition: {
3162
3163
  name: "runAutomatedBrowserTest",
3163
- description: "Run an automated browser test against the live preview. Describe what to test \u2014 the agent figures out how. Use after meaningful changes frontend code, to reproduce user-reported issues, or to test end-to-end flows.",
3164
+ description: "Run an automated browser test against the live preview. Describe what to test \u2014 the agent figures out how. Use after meaningful changes frontend code, to reproduce user-reported issues, or to test end-to-end flows. Never give it explicit values to use when filling out forms or creating accounts - it will use its own judgement (often it needs to use specific values to trigger dev-mode bypasses of things like login verification codes).",
3164
3165
  inputSchema: {
3165
3166
  type: "object",
3166
3167
  properties: {
@@ -3176,99 +3177,104 @@ var browserAutomationTool = {
3176
3177
  if (!context) {
3177
3178
  return "Error: browser automation requires execution context (only available in headless mode)";
3178
3179
  }
3180
+ const release = await acquireBrowserLock();
3179
3181
  try {
3180
- const status = await sidecarRequest(
3181
- "/browser-status",
3182
- {},
3183
- { timeout: 5e3 }
3184
- );
3185
- if (!status.connected) {
3186
- return "Error: the browser preview is not connected. The user needs to open the preview before browser tests can run.";
3182
+ try {
3183
+ const status = await sidecarRequest(
3184
+ "/browser-status",
3185
+ {},
3186
+ { timeout: 5e3 }
3187
+ );
3188
+ if (!status.connected) {
3189
+ return "Error: the browser preview is not connected. The user needs to open the preview before browser tests can run.";
3190
+ }
3191
+ } catch {
3192
+ return "Error: could not check browser status. The dev environment may not be running.";
3187
3193
  }
3188
- } catch {
3189
- return "Error: could not check browser status. The dev environment may not be running.";
3190
- }
3191
- const result = await runSubAgent({
3192
- system: getBrowserAutomationPrompt(),
3193
- task: input.task,
3194
- tools: BROWSER_TOOLS,
3195
- externalTools: BROWSER_EXTERNAL_TOOLS,
3196
- executeTool: async (name, _input, _toolCallId, onLog) => {
3197
- if (name === "screenshotFullPage") {
3198
- try {
3199
- return await captureAndAnalyzeScreenshot({
3200
- path: _input.path,
3201
- onLog
3202
- });
3203
- } catch (err) {
3204
- return `Error taking screenshot: ${err.message}`;
3194
+ try {
3195
+ await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3196
+ } catch {
3197
+ }
3198
+ const result = await runSubAgent({
3199
+ system: getBrowserAutomationPrompt(),
3200
+ task: input.task,
3201
+ tools: BROWSER_TOOLS,
3202
+ externalTools: BROWSER_EXTERNAL_TOOLS,
3203
+ executeTool: async (name, _input, _toolCallId, onLog) => {
3204
+ if (name === "screenshotFullPage") {
3205
+ try {
3206
+ return await captureAndAnalyzeScreenshot({
3207
+ path: _input.path,
3208
+ onLog
3209
+ });
3210
+ } catch (err) {
3211
+ return `Error taking screenshot: ${err.message}`;
3212
+ }
3205
3213
  }
3206
- }
3207
- if (name === "resetBrowser") {
3208
- try {
3209
- await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3210
- return "Browser reset triggered.";
3211
- } catch {
3212
- return "Error: could not reset browser.";
3214
+ return `Error: unknown local tool "${name}"`;
3215
+ },
3216
+ apiConfig: context.apiConfig,
3217
+ model: context.model,
3218
+ subAgentId: "browserAutomation",
3219
+ signal: context.signal,
3220
+ parentToolId: context.toolCallId,
3221
+ requestId: context.requestId,
3222
+ onEvent: context.onEvent,
3223
+ resolveExternalTool: async (id, name, input2) => {
3224
+ if (!context.resolveExternalTool) {
3225
+ return "Error: no external tool resolver";
3213
3226
  }
3214
- }
3215
- return `Error: unknown local tool "${name}"`;
3216
- },
3217
- apiConfig: context.apiConfig,
3218
- model: context.model,
3219
- subAgentId: "browserAutomation",
3220
- signal: context.signal,
3221
- parentToolId: context.toolCallId,
3222
- requestId: context.requestId,
3223
- onEvent: context.onEvent,
3224
- resolveExternalTool: async (id, name, input2) => {
3225
- if (!context.resolveExternalTool) {
3226
- return "Error: no external tool resolver";
3227
- }
3228
- const result2 = await context.resolveExternalTool(id, name, input2);
3229
- if (name === "browserCommand") {
3230
- try {
3231
- const parsed = JSON.parse(result2);
3232
- const screenshotSteps = (parsed.steps || []).filter(
3233
- (s) => s.command === "screenshotViewport" && s.result?.url
3234
- );
3235
- if (screenshotSteps.length > 0) {
3236
- const batchInput = screenshotSteps.map((s) => ({
3237
- stepType: "analyzeImage",
3238
- step: {
3239
- imageUrl: s.result.url,
3240
- prompt: SCREENSHOT_ANALYSIS_PROMPT
3241
- }
3242
- }));
3243
- const batchResult = await runCli(
3244
- `mindstudio batch --no-meta ${JSON.stringify(JSON.stringify(batchInput))}`,
3245
- { timeout: 2e5 }
3227
+ const result2 = await context.resolveExternalTool(id, name, input2);
3228
+ if (name === "browserCommand") {
3229
+ try {
3230
+ const parsed = JSON.parse(result2);
3231
+ const screenshotSteps = (parsed.steps || []).filter(
3232
+ (s) => s.command === "screenshotViewport" && s.result?.url
3246
3233
  );
3247
- try {
3248
- const analyses = JSON.parse(batchResult);
3249
- let ai = 0;
3250
- for (const step of parsed.steps) {
3251
- if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
3252
- step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
3253
- ai++;
3234
+ if (screenshotSteps.length > 0) {
3235
+ const batchInput = screenshotSteps.map((s) => ({
3236
+ stepType: "analyzeImage",
3237
+ step: {
3238
+ imageUrl: s.result.url,
3239
+ prompt: SCREENSHOT_ANALYSIS_PROMPT
3240
+ }
3241
+ }));
3242
+ const batchResult = await runCli(
3243
+ `mindstudio batch --no-meta ${JSON.stringify(JSON.stringify(batchInput))}`,
3244
+ { timeout: 2e5 }
3245
+ );
3246
+ try {
3247
+ const analyses = JSON.parse(batchResult);
3248
+ let ai = 0;
3249
+ for (const step of parsed.steps) {
3250
+ if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
3251
+ step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
3252
+ ai++;
3253
+ }
3254
3254
  }
3255
+ } catch {
3256
+ log6.debug("Failed to parse batch analysis result", {
3257
+ batchResult
3258
+ });
3255
3259
  }
3256
- } catch {
3257
- log6.debug("Failed to parse batch analysis result", {
3258
- batchResult
3259
- });
3260
+ return JSON.stringify(parsed);
3260
3261
  }
3261
- return JSON.stringify(parsed);
3262
+ } catch {
3262
3263
  }
3263
- } catch {
3264
3264
  }
3265
- }
3266
- return result2;
3267
- },
3268
- toolRegistry: context.toolRegistry
3269
- });
3270
- context.subAgentMessages?.set(context.toolCallId, result.messages);
3271
- return result.text;
3265
+ return result2;
3266
+ },
3267
+ toolRegistry: context.toolRegistry
3268
+ });
3269
+ try {
3270
+ await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3271
+ } catch {
3272
+ }
3273
+ context.subAgentMessages?.set(context.toolCallId, result.messages);
3274
+ return result.text;
3275
+ } finally {
3276
+ release();
3277
+ }
3272
3278
  }
3273
3279
  };
3274
3280
 
@@ -4960,6 +4966,7 @@ var EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
4960
4966
  "confirmDestructiveAction",
4961
4967
  "runScenario",
4962
4968
  "runMethod",
4969
+ "queryDatabase",
4963
4970
  "browserCommand",
4964
4971
  "setProjectMetadata"
4965
4972
  ]);
package/dist/index.js CHANGED
@@ -2856,15 +2856,6 @@ var init_tools = __esm({
2856
2856
  }
2857
2857
  }
2858
2858
  }
2859
- },
2860
- {
2861
- clearable: false,
2862
- name: "resetBrowser",
2863
- description: "Reset the browser to a clean state. Call this once after all tests are complete to restore the preview for the user. Fire and forget \u2014 does not wait for the reload to finish.",
2864
- inputSchema: {
2865
- type: "object",
2866
- properties: {}
2867
- }
2868
2859
  }
2869
2860
  ];
2870
2861
  BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand"]);
@@ -2940,7 +2931,16 @@ var init_prompt = __esm({
2940
2931
  });
2941
2932
 
2942
2933
  // src/subagents/browserAutomation/index.ts
2943
- var log4, browserAutomationTool;
2934
+ function acquireBrowserLock() {
2935
+ let release;
2936
+ const next = new Promise((res) => {
2937
+ release = res;
2938
+ });
2939
+ const wait = lockQueue;
2940
+ lockQueue = next;
2941
+ return wait.then(() => release);
2942
+ }
2943
+ var log4, lockQueue, browserAutomationTool;
2944
2944
  var init_browserAutomation = __esm({
2945
2945
  "src/subagents/browserAutomation/index.ts"() {
2946
2946
  "use strict";
@@ -2952,11 +2952,12 @@ var init_browserAutomation = __esm({
2952
2952
  init_runCli();
2953
2953
  init_logger();
2954
2954
  log4 = createLogger("browser-automation");
2955
+ lockQueue = Promise.resolve();
2955
2956
  browserAutomationTool = {
2956
2957
  clearable: true,
2957
2958
  definition: {
2958
2959
  name: "runAutomatedBrowserTest",
2959
- description: "Run an automated browser test against the live preview. Describe what to test \u2014 the agent figures out how. Use after meaningful changes frontend code, to reproduce user-reported issues, or to test end-to-end flows.",
2960
+ description: "Run an automated browser test against the live preview. Describe what to test \u2014 the agent figures out how. Use after meaningful changes frontend code, to reproduce user-reported issues, or to test end-to-end flows. Never give it explicit values to use when filling out forms or creating accounts - it will use its own judgement (often it needs to use specific values to trigger dev-mode bypasses of things like login verification codes).",
2960
2961
  inputSchema: {
2961
2962
  type: "object",
2962
2963
  properties: {
@@ -2972,99 +2973,104 @@ var init_browserAutomation = __esm({
2972
2973
  if (!context) {
2973
2974
  return "Error: browser automation requires execution context (only available in headless mode)";
2974
2975
  }
2976
+ const release = await acquireBrowserLock();
2975
2977
  try {
2976
- const status = await sidecarRequest(
2977
- "/browser-status",
2978
- {},
2979
- { timeout: 5e3 }
2980
- );
2981
- if (!status.connected) {
2982
- return "Error: the browser preview is not connected. The user needs to open the preview before browser tests can run.";
2978
+ try {
2979
+ const status = await sidecarRequest(
2980
+ "/browser-status",
2981
+ {},
2982
+ { timeout: 5e3 }
2983
+ );
2984
+ if (!status.connected) {
2985
+ return "Error: the browser preview is not connected. The user needs to open the preview before browser tests can run.";
2986
+ }
2987
+ } catch {
2988
+ return "Error: could not check browser status. The dev environment may not be running.";
2983
2989
  }
2984
- } catch {
2985
- return "Error: could not check browser status. The dev environment may not be running.";
2986
- }
2987
- const result = await runSubAgent({
2988
- system: getBrowserAutomationPrompt(),
2989
- task: input.task,
2990
- tools: BROWSER_TOOLS,
2991
- externalTools: BROWSER_EXTERNAL_TOOLS,
2992
- executeTool: async (name, _input, _toolCallId, onLog) => {
2993
- if (name === "screenshotFullPage") {
2994
- try {
2995
- return await captureAndAnalyzeScreenshot({
2996
- path: _input.path,
2997
- onLog
2998
- });
2999
- } catch (err) {
3000
- return `Error taking screenshot: ${err.message}`;
2990
+ try {
2991
+ await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
2992
+ } catch {
2993
+ }
2994
+ const result = await runSubAgent({
2995
+ system: getBrowserAutomationPrompt(),
2996
+ task: input.task,
2997
+ tools: BROWSER_TOOLS,
2998
+ externalTools: BROWSER_EXTERNAL_TOOLS,
2999
+ executeTool: async (name, _input, _toolCallId, onLog) => {
3000
+ if (name === "screenshotFullPage") {
3001
+ try {
3002
+ return await captureAndAnalyzeScreenshot({
3003
+ path: _input.path,
3004
+ onLog
3005
+ });
3006
+ } catch (err) {
3007
+ return `Error taking screenshot: ${err.message}`;
3008
+ }
3001
3009
  }
3002
- }
3003
- if (name === "resetBrowser") {
3004
- try {
3005
- await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3006
- return "Browser reset triggered.";
3007
- } catch {
3008
- return "Error: could not reset browser.";
3010
+ return `Error: unknown local tool "${name}"`;
3011
+ },
3012
+ apiConfig: context.apiConfig,
3013
+ model: context.model,
3014
+ subAgentId: "browserAutomation",
3015
+ signal: context.signal,
3016
+ parentToolId: context.toolCallId,
3017
+ requestId: context.requestId,
3018
+ onEvent: context.onEvent,
3019
+ resolveExternalTool: async (id, name, input2) => {
3020
+ if (!context.resolveExternalTool) {
3021
+ return "Error: no external tool resolver";
3009
3022
  }
3010
- }
3011
- return `Error: unknown local tool "${name}"`;
3012
- },
3013
- apiConfig: context.apiConfig,
3014
- model: context.model,
3015
- subAgentId: "browserAutomation",
3016
- signal: context.signal,
3017
- parentToolId: context.toolCallId,
3018
- requestId: context.requestId,
3019
- onEvent: context.onEvent,
3020
- resolveExternalTool: async (id, name, input2) => {
3021
- if (!context.resolveExternalTool) {
3022
- return "Error: no external tool resolver";
3023
- }
3024
- const result2 = await context.resolveExternalTool(id, name, input2);
3025
- if (name === "browserCommand") {
3026
- try {
3027
- const parsed = JSON.parse(result2);
3028
- const screenshotSteps = (parsed.steps || []).filter(
3029
- (s) => s.command === "screenshotViewport" && s.result?.url
3030
- );
3031
- if (screenshotSteps.length > 0) {
3032
- const batchInput = screenshotSteps.map((s) => ({
3033
- stepType: "analyzeImage",
3034
- step: {
3035
- imageUrl: s.result.url,
3036
- prompt: SCREENSHOT_ANALYSIS_PROMPT
3037
- }
3038
- }));
3039
- const batchResult = await runCli(
3040
- `mindstudio batch --no-meta ${JSON.stringify(JSON.stringify(batchInput))}`,
3041
- { timeout: 2e5 }
3023
+ const result2 = await context.resolveExternalTool(id, name, input2);
3024
+ if (name === "browserCommand") {
3025
+ try {
3026
+ const parsed = JSON.parse(result2);
3027
+ const screenshotSteps = (parsed.steps || []).filter(
3028
+ (s) => s.command === "screenshotViewport" && s.result?.url
3042
3029
  );
3043
- try {
3044
- const analyses = JSON.parse(batchResult);
3045
- let ai = 0;
3046
- for (const step of parsed.steps) {
3047
- if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
3048
- step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
3049
- ai++;
3030
+ if (screenshotSteps.length > 0) {
3031
+ const batchInput = screenshotSteps.map((s) => ({
3032
+ stepType: "analyzeImage",
3033
+ step: {
3034
+ imageUrl: s.result.url,
3035
+ prompt: SCREENSHOT_ANALYSIS_PROMPT
3036
+ }
3037
+ }));
3038
+ const batchResult = await runCli(
3039
+ `mindstudio batch --no-meta ${JSON.stringify(JSON.stringify(batchInput))}`,
3040
+ { timeout: 2e5 }
3041
+ );
3042
+ try {
3043
+ const analyses = JSON.parse(batchResult);
3044
+ let ai = 0;
3045
+ for (const step of parsed.steps) {
3046
+ if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
3047
+ step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
3048
+ ai++;
3049
+ }
3050
3050
  }
3051
+ } catch {
3052
+ log4.debug("Failed to parse batch analysis result", {
3053
+ batchResult
3054
+ });
3051
3055
  }
3052
- } catch {
3053
- log4.debug("Failed to parse batch analysis result", {
3054
- batchResult
3055
- });
3056
+ return JSON.stringify(parsed);
3056
3057
  }
3057
- return JSON.stringify(parsed);
3058
+ } catch {
3058
3059
  }
3059
- } catch {
3060
3060
  }
3061
- }
3062
- return result2;
3063
- },
3064
- toolRegistry: context.toolRegistry
3065
- });
3066
- context.subAgentMessages?.set(context.toolCallId, result.messages);
3067
- return result.text;
3061
+ return result2;
3062
+ },
3063
+ toolRegistry: context.toolRegistry
3064
+ });
3065
+ try {
3066
+ await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
3067
+ } catch {
3068
+ }
3069
+ context.subAgentMessages?.set(context.toolCallId, result.messages);
3070
+ return result.text;
3071
+ } finally {
3072
+ release();
3073
+ }
3068
3074
  }
3069
3075
  };
3070
3076
  }
@@ -5503,6 +5509,7 @@ var init_agent = __esm({
5503
5509
  "confirmDestructiveAction",
5504
5510
  "runScenario",
5505
5511
  "runMethod",
5512
+ "queryDatabase",
5506
5513
  "browserCommand",
5507
5514
  "setProjectMetadata"
5508
5515
  ]);
@@ -80,6 +80,10 @@ The UI should feel instant. Never make the user wait for a server round-trip to
80
80
  - **Mutate after actions.** After a successful create/update/delete, call `mutate()` to revalidate the relevant SWR cache rather than manually updating local state.
81
81
  - **Skeleton loading.** Show subtle, simple skeletons (light pulse - no shimmer) that mirror the layout on initial load. Never show a blank page or centered spinner while data is loading.
82
82
 
83
+ ### Errors
84
+
85
+ Handle errors gracefully. You don't need to design for every error case, but if remote API requests fail, make sure to show them nicely in a toast or some other appropriate view with a human-friendly label - don't just drop "Error 500 XYZ" inline in a form.
86
+
83
87
  ## Auth
84
88
 
85
89
  Login and signup screens set the tone for the user's entire experience with the app and are important to get right - they should feel like exciting entry points into the next level of the user journy. A janky login form with misaligned inputs and no feedback dminishes excitement and undermines trust before the user even gets in.
@@ -19,7 +19,7 @@ The dev session gets its own database — a snapshot of the live database at ses
19
19
  - **Truncate** — keep the schema, delete all row data (used by scenarios for a clean canvas)
20
20
  - **Schema sync** — add a field to a table interface and it's immediately available in dev
21
21
 
22
- The dev database is disposable. Experiment freely — there's no risk of breaking anything.
22
+ The dev database is disposable. Experiment freely — there's no risk of breaking anything. Just be considerate that the user may have created their own data (user rows or other data) while testing, and it might be frustrating for them to have it wiped.
23
23
 
24
24
  ### Debugging
25
25
 
@@ -96,12 +96,14 @@ Shared setup code can go in `dist/methods/.scenarios/_helpers/`.
96
96
  ## How Scenarios Run
97
97
 
98
98
  When a scenario runs, the platform:
99
- 1. **Truncates** all tables (deletes all rows, preserves schema)
99
+ 1. **Truncates** all tables (deletes all rows, preserves schema - unless skipTruncate is true)
100
100
  2. **Executes** the seed function (your `db.push()` calls populate the clean database)
101
101
  3. **Impersonates** the roles from the scenario's `roles` field (the app renders from that user's perspective)
102
102
 
103
103
  This is deterministic — same scenario always produces the same state.
104
104
 
105
+ Scenarios are useful for seeding initial app state after build for testing, as well as to give the user a first impression of an app that is already filled with data and looks and feels usable. The user can choose to run further scenarios after initial build by clicking the Scenarios tab and selecting a scenario to run.
106
+
105
107
  ## Scenario Data
106
108
 
107
109
  Align scenario data to the vibe of the app - construct data that feels like it fits.
@@ -9,7 +9,7 @@
9
9
  ### Verification
10
10
  Run `lspDiagnostics` after every turn where you have edited code in any meaningful way. You don't need to run it for things like changing copy or CSS colors, but you should run it after any structural changes to code. It catches syntax errors, broken imports, and type mismatches instantly. After a big build or significant changes, also do a lightweight runtime check to catch the things static analysis misses (schema mismatches, missing imports, bad queries):
11
11
 
12
- - Seed test data with `runScenario`, then spot-check the primary method or two with `runMethod`. The dev database is a disposable snapshot, so don't worry about being destructive.
12
+ - Spot-check methods with `runMethod`. The dev database is a disposable snapshot that will have been seeded with scenario data, so don't worry about being destructive.
13
13
  - For frontend work, take a single `screenshot` to confirm the main view renders correctly or look at the browser log for any console errors in the user's preview.
14
14
  - Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, or when the user reports something broken that you can't identify from code alone.
15
15
 
@@ -19,7 +19,7 @@ Aim for confidence that the core happy paths work. If the 80% case is solid, the
19
19
 
20
20
  Process logs are available at .logs/ in NDJSON format (one JSON object per line) for debugging. Each line has at minimum ts (unix millis) and msg fields, plus structured context like level, module, requestId, toolCallId where available. You can use `jq` to examine logs and debug failures. Tools like run method or run scenario execute synchronously, so log data will be available by the time those tools return their results to you, there is no need to `sleep` before querying logfiles.
21
21
  - `.logs/tunnel.ndjson`: method execution, schema sync, session lifecycle, platform connection
22
- - `.logs/devServer.ndjson`: frontend build errors, HMR, module resolution failures
22
+ - `.logs/devServer.ndjson`: frontend build errors, HMR, module resolution failures - check this to see if compilation is broken on web frontends.
23
23
  - `.logs/system.ndjson`: sandbox server logs — agent lifecycle, tool dispatch, file watching, process management
24
24
  - `.logs/agent.ndjson`: coding agent protocol events and errors
25
25
  - `.logs/requests.ndjson`: structured log of every method and scenario execution with full input, output, errors (including stack traces), console output, and duration
@@ -38,8 +38,5 @@ For any work involving AI models, external actions (web scraping, email, SMS), o
38
38
  ### State Management
39
39
  - Calls to methods introduce latency. When building web frontends that load data from methods, consider front-loading as much data as you can in a single API request - e.g., when possible, load a large data object into a central store and use that to render sub-screens in an app, rather than an API call on every screen.
40
40
 
41
- ### Build Notes
42
- For complex builds that span many files — especially an initial buildout from a spec — write a `.remy-notes.md` scratchpad in the project root. Use it to record decisions, keep a checklist of tasks, and reference data you'll need across multiple tool calls: design tokens, color values, typography specs, image URLs, what's been built so far, what's left. Read it back instead of restating everything in your messages. Delete it when the build is done. Don't use this for small changes or single-file edits.
43
-
44
41
  ### Dependencies
45
42
  Before installing a package you haven't used in this project, do a quick web search to confirm it's still the best option. The JavaScript ecosystem moves fast — the package you remember from training may have been superseded by something smaller, faster, or better maintained. A 10-second search beats debugging a deprecated library.
@@ -15,6 +15,9 @@
15
15
  - After two failed attempts at the same approach, tell the user what's going wrong.
16
16
  - Pushing to main branch will trigger a deploy. The user presses the publish button in the interface to request publishing.
17
17
 
18
+ ### Build Notes
19
+ For complex tasks — especially an initial buildout from a spec or making multiple changes in a single turn — write a `.remy-notes.md` scratchpad in the project root. Use it to record decisions, keep a checklist of tasks, and reference data you'll need across multiple tool calls: design tokens, color values, typography specs, image URLs, what's been built so far, what's left. Read it back instead of restating everything in your messages. Delete it when your work is done.
20
+
18
21
  ## Communication
19
22
  The user can already see your tool calls, so most of your work is visible without narration. Focus text output on three things:
20
23
  - **Decisions that need input.** Questions, tradeoffs, ambiguity that blocks progress.
@@ -40,7 +40,9 @@ Always consult the code sanity check before writing code in initialCodegen with
40
40
 
41
41
  For verifying complex stateful interactions: multi-step form submissions, auth flows, real-time updates, flows that require specific data/role setup. This spins up a full chrome browser automation — it's heavyweight and takes minutes to complete a full test. Do not use it for basic rendering or navigation checks. If you can verify something with a screenshot or by reading the code, do that instead. Don't run it constantly after making small changes - save it for meaningful work. Run a scenario first to seed test data and set user roles. The user is able to watch QA work on their screen via a live browser preview - the cursor will move, type, etc - so you can also use this to demo functionality to the user and help them understand how to use their app.
42
42
 
43
- The QA agent can see the screen. Describe what to test, not how — it will figure out what to click, what to check, and what values to use. Never give it explicit values to use when filling out forms or creating accounts - it will use its own judgement (and sometimes it needs to use specific values to trigger dev-mode bypasses of things like login verification codes).
43
+ The QA agent can see the screen. Describe what to test, not how — it will figure out what to click, what to check, and what values to use. It always starts its tests logged out/unauthenticated on "/" root. After every test session, the browser is reset to / and any authentication used or created by the tester is cleared and reset.
44
+
45
+ Never tell QA what names to use when testing or what values to input - it will use its own judgment.
44
46
 
45
47
  ### Background Execution
46
48
 
@@ -11,6 +11,9 @@ The user is watching the automation happen on their screen in real-time. When ty
11
11
  When the app has a login or signup flow, you must use `remy@mindstudio.ai` for email and `+15551234567` for phone number. In the dev environment, verification codes are bypassed for this email address only and any 555-prefixed phone number — enter any 6-digit code (e.g., `123456`) and it will be accepted. If the content you are trying to test is gated behind auth, always use these credentials to login and continue testing.
12
12
 
13
13
  ## Browser Commands
14
+
15
+ Your session always starts on the app root / in a logged out/unauthenticated state.
16
+
14
17
  ### Snapshot format
15
18
 
16
19
  The snapshot command returns a compact accessibility tree:
@@ -143,7 +146,6 @@ You can use the `screenshotFullPage` tool to take a full-height screenshot of th
143
146
  - evaluate auto-returns simple expressions. `"script": "document.title"` works directly. For multi-statement scripts, use explicit return.
144
147
  - The snapshot in the response is always the most current page state. Even if a wait times out, check the snapshot field; the content you were waiting for may have appeared by then.
145
148
  - Execution stops on first error. If step 2 of 5 fails, steps 3-5 don't run. The response will contain results for steps 0-2 (with step 2 having an error field) plus the current snapshot. Adjust and retry from the failed step.
146
- - Always call `resetBrowser` as your final action after all tests are complete. This restores the preview to a clean state for the user.
147
149
  - If something fails, bail early. Do not attempt to diagnose why; do not do things like attempt different inputs to try to work around an error - just report the failure and early return.
148
150
  </rules>
149
151
 
@@ -35,6 +35,10 @@ These are recurring mistakes the coding agent makes. If you see the conditions f
35
35
 
36
36
  - **CSS Module animation scoping.** If the agent defines `@keyframes` in a global CSS file but references the animation name from a CSS Module, the animation will silently fail. CSS Modules scope animation names, so a keyframe defined globally can't be found by a scoped class. The fix: define keyframes in the same CSS Module that uses them, or use `:global()` to escape the scoping.
37
37
 
38
+ - **Too many granular API calls.** These apps are MVPs with small datasets. If the plan has separate method calls for every screen or sub-view (load profile, then load posts, then load post detail, then load comments), flag it. Favor fewer, fatter requests — a profile page that loads posts with full content means tapping a post is instant. A feed that includes comment previews and like state means the detail view renders from memory. Over-fetching at this scale is almost always the right call — users notice instant transitions, they don't notice a slightly larger payload.
39
+
40
+ - **Wouter is not React Router.** The agent defaults to React Router patterns which silently break in wouter. Key differences: no `useNavigate()` (use `const [, setLocation] = useLocation()`), no `navigate(-1)` for back (use `window.history.back()`), no `element` prop on Route (use `component={Foo}` or children), no `<Routes>` (use `<Switch>` — without it all matching routes render simultaneously), no `<Navigate>` (use `<Redirect>`), no `<Outlet>` for nested routes (use `nest` prop on Route), and no `useSearchParams()` from react-router (wouter has its own version with a different setter API). If you see any of these React Router patterns in a wouter project, flag it.
41
+
38
42
  ## When to stay quiet
39
43
 
40
44
  Nits, style preferences, missing edge cases, things the agent will figure out as it goes, patterns that are "not ideal but fine," minor code smells. Let them slide. The agent is busy.
@@ -69,9 +69,9 @@ The developer should never need to source their own imagery. Always provide URLs
69
69
 
70
70
  ### Icons and logos
71
71
 
72
- App icons and logos require work and thinking to get right. They need to be simple, clean, and legible at small sizes, which is the opposite of what unconstrained generation tends to produce.
72
+ App icons and logos require work and thinking to get right.
73
73
 
74
- **What works:** Smooth 3D rendering in the style of current macOS/iOS app icons. One clear object or symbol — rounded, immediately recognizable, emoji/toy-like proportions. Clean surfaces with soft lighting and gentle shadows. Two or three accent colors, not a rainbow. Always generate with `transparentBackground: true`.
74
+ **What works:** Smooth 3D rendering in the style of 2026-era macOS/iOS app icons. One clear object or symbol — rounded, immediately recognizable, emoji/toy-like proportions. Clean surfaces with soft lighting and gentle shadows. Two or three accent colors, not a rainbow. Always full bleed.
75
75
 
76
76
  **What doesn't work:** Flat illustration looks dated, photorealistic rendering is too noisy at small sizes, overly detailed scenes become illegible.
77
77
 
@@ -29,3 +29,89 @@ Authentication moments must feel natural and intuitive - they should not feel ja
29
29
  If the app includes an AI chat interface, take care to make it beautiful and intentional. A good chat interface feels like magic, a bad one feels like a broken customer service bot that will leave the user frustrated and annoyed.
30
30
 
31
31
  Pay close attention to text streaming when the AI replies - it should feel natural, smooth, and beautiful. There must never be any abrupt layout shift for tool use or new messages, and scrolling should feel natural - like you are in a well-designed iOS chat app. Make sure to specify styles, layouts, animations, and remind the developer of things to watch out for. Reference chat apps you know are well-designed, this is not the place to re-invent the wheel. Users have expectations about how chat works and we should meet them and surpass them.
32
+
33
+ ### Wireframes
34
+
35
+ When a pattern or interaction is hard to convey in words alone — a core component, an animation sequence, a swipe gesture, a layout grid — you can include a small interactive wireframe to demonstrate it. Use a markdown code fence with `wireframe` as the type. Start with a YAML frontmatter block (`name` and `description`) to identify the component, then the self-contained HTML+CSS prototype.
36
+
37
+ Use wireframes instead of ASCII art and code-block diagrams you might otherwise reach for when trying to show a layout or interaction. Wireframes are better because the developer can actually see and interact with the result. Like those diagrams, they isolate one small piece: a single card component, a button animation, a transition, a grid layout. Each wireframe should be around 60-80 lines of HTML+CSS — if you're past 100 lines, you're building too much. These are not screens, flows, or multi-step prototypes. They render in a small iframe and should look complete at that scale. Most of your communication should be in words - wireframes are simply another tool when you need them. Never build out full screens or pages in wireframes, even if you are asked to - this is critically important.
38
+
39
+ Remember, never use ascii art or code-block diagrams to describe layouts - always use wireframes.
40
+
41
+ The wireframe code will be rendered in a transparent iframe. Don't fill the viewport or add a background color to the body. Place the component at a natural size in a card with a background color that is centered vertically and horizontally in the viewport. Keep the component tight and self-contained. The iframe is for the component only — no annotations, labels, or explanatory text inside it. Put your notes and implementation guidance in the markdown around the wireframe. Wireframes can be interactive and are especially useful for demonstrating states, animations, effects, and transitions. If your wireframe has triggers or states, include a small "play" control button within the frame (make sure to allow reply/reset for all interactivity). No images - these are functional prototypes meant to demonstrate feel and behavior, not visual comps.
42
+
43
+ ```wireframe
44
+ ---
45
+ name: Feed Post Card
46
+ description: Photo post card with header, image frame, action row (like/comment/share/bookmark), like count, and caption. Shows spacing, typography hierarchy, and icon placement.
47
+ ---
48
+ <html lang="en"><head>
49
+ <meta charset="utf-8"/>
50
+ <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
51
+ <link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600&display=swap" rel="stylesheet"/>
52
+ <link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&display=swap" rel="stylesheet"/>
53
+ <style>
54
+ * { margin: 0; padding: 0; box-sizing: border-box; }
55
+ body {
56
+ font-family: 'Plus Jakarta Sans', sans-serif; background: transparent;
57
+ display: flex; align-items: center; justify-content: center;
58
+ }
59
+ .material-symbols-outlined { font-variation-settings: 'FILL' 0, 'wght' 300; }
60
+
61
+ .card {
62
+ width: 340px;
63
+ background: #fff; border-radius: 20px; overflow: hidden;
64
+ box-shadow: 0 8px 32px rgba(0,0,0,0.06);
65
+ }
66
+ .card-header {
67
+ padding: 20px 24px; display: flex; align-items: center; gap: 12px;
68
+ }
69
+ .avatar {
70
+ width: 40px; height: 40px; border-radius: 50%;
71
+ background: linear-gradient(135deg, #98a68e, #55624d);
72
+ }
73
+ .card-header .name { font-weight: 600; font-size: 14px; color: #191c18; }
74
+ .card-header .meta { font-size: 12px; color: #757870; margin-top: 2px; }
75
+ .card-image {
76
+ width: 100%; aspect-ratio: 4/5; background: linear-gradient(180deg, #d9e7cd 0%, #fed7d2 100%);
77
+ }
78
+ .card-actions {
79
+ padding: 16px 24px; display: flex; gap: 16px; align-items: center;
80
+ }
81
+ .card-actions button {
82
+ background: none; border: none; cursor: pointer; color: #444841;
83
+ display: flex; align-items: center; transition: color 0.15s;
84
+ }
85
+ .card-actions button:hover { color: #55624d; }
86
+ .card-actions .spacer { flex: 1; }
87
+ .card-body { padding: 0 24px 20px; }
88
+ .card-body .likes { font-weight: 600; font-size: 13px; color: #191c18; margin-bottom: 6px; }
89
+ .card-body .caption { font-size: 13px; color: #444841; line-height: 1.5; }
90
+ .card-body .caption strong { font-weight: 600; color: #191c18; }
91
+ </style>
92
+ </head>
93
+ <body>
94
+ <div class="card">
95
+ <div class="card-header">
96
+ <div class="avatar"></div>
97
+ <div>
98
+ <div class="name">sarah.chen</div>
99
+ <div class="meta">Golden Gate Park · 2h</div>
100
+ </div>
101
+ </div>
102
+ <div class="card-image"></div>
103
+ <div class="card-actions">
104
+ <button><span class="material-symbols-outlined">favorite</span></button>
105
+ <button><span class="material-symbols-outlined">chat_bubble</span></button>
106
+ <button><span class="material-symbols-outlined">send</span></button>
107
+ <span class="spacer"></span>
108
+ <button><span class="material-symbols-outlined">bookmark</span></button>
109
+ </div>
110
+ <div class="card-body">
111
+ <div class="likes">2,847 likes</div>
112
+ <div class="caption"><strong>sarah.chen</strong> Morning light through the eucalyptus grove</div>
113
+ </div>
114
+ </div>
115
+ </body>
116
+ </html>
117
+ ```
@@ -50,12 +50,12 @@ For photorealistic images, be specific about:
50
50
 
51
51
  For app icons and logos, the goal is something that reads clearly at small sizes and feels polished enough to sit on a home screen or in an app header.
52
52
 
53
- - Frame as "A 3D icon against a white background:" followed by the subject. Do NOT use the phrase "app icon" — it triggers mockup framing (the model renders an icon inset on a phone screen or mounted on a wall). "3D icon" works.
53
+ - Frame as "A 3D icon against a XYZ background" followed by the subject. Do NOT use the phrase "app icon" — it triggers mockup framing (the model renders an icon inset on a phone screen or mounted on a wall). "3D icon" works.
54
54
  - Describe smooth, rounded emoji-type 3D objects — think current macOS/iOS app icon design language. Clean surfaces, soft lighting, gentle shadows. Not flat illustration, not photorealistic, not clay/matte.
55
55
  - Subjects should be simplified and immediately recognizable. Prefer one clear object or symbol, not a scene.
56
56
  - Specify "reads well at small sizes" as an explicit constraint.
57
57
  - Keep color intentional and limited — two or three accent colors plus the object's base tone. Colors should complement the app's brand if known.
58
- - Always use transparent background for icons and logos.
58
+ - Make sure to specify full bleed - never say anything about rounded corners or there is a high likelihood that the image will come back as a rounded rectangle on a white background!
59
59
 
60
60
  ## Output
61
61
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.83",
3
+ "version": "0.1.85",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",