@mindstudio-ai/remy 0.1.83 → 0.1.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/automatedActions/buildFromInitialSpec.md +3 -1
- package/dist/headless.js +100 -93
- package/dist/index.js +101 -94
- package/dist/prompt/compiled/design.md +4 -0
- package/dist/prompt/compiled/dev-and-deploy.md +1 -1
- package/dist/prompt/compiled/scenarios.md +3 -1
- package/dist/prompt/static/coding.md +2 -5
- package/dist/prompt/static/instructions.md +3 -0
- package/dist/prompt/static/team.md +3 -1
- package/dist/subagents/browserAutomation/prompt.md +3 -1
- package/dist/subagents/codeSanityCheck/prompt.md +4 -0
- package/dist/subagents/designExpert/prompts/images.md +2 -2
- package/dist/subagents/designExpert/prompts/ui-patterns.md +86 -0
- package/dist/subagents/designExpert/tools/images/enhance-image-prompt.md +2 -2
- package/package.json +1 -1
|
@@ -6,7 +6,9 @@ This is an automated action triggered by the user pressing "Build" in the editor
|
|
|
6
6
|
|
|
7
7
|
The user has reviewed the spec and is ready to build.
|
|
8
8
|
|
|
9
|
-
Think about your approach and then get a quick sanity check from `codeSanityCheck` to make sure you aren't missing anything.
|
|
9
|
+
Think about your approach and then get a quick sanity check from `codeSanityCheck` to make sure you aren't missing anything.
|
|
10
|
+
|
|
11
|
+
If you are building a web frontend, consult `visualDesignExpert` for guidance and ideas on specific component design, UI patterns, and interactions - it has access to a deep repository of design inspiration and will be able to give you great ideas to work with while building. Don't ask it to design full screens - focus on specific components, moments, and concepts where its ideas can be additive and transformative, you already have the basic design and layout guidance from the spec.
|
|
10
12
|
|
|
11
13
|
Then, build everything in one turn: methods, tables, interfaces, manifest updates, and scenarios, using the spec as the master plan. Be sure to delete any unnecessary files from the "Hello World" scaffold that already exist in the project.
|
|
12
14
|
|
package/dist/headless.js
CHANGED
|
@@ -3123,15 +3123,6 @@ var BROWSER_TOOLS = [
|
|
|
3123
3123
|
}
|
|
3124
3124
|
}
|
|
3125
3125
|
}
|
|
3126
|
-
},
|
|
3127
|
-
{
|
|
3128
|
-
clearable: false,
|
|
3129
|
-
name: "resetBrowser",
|
|
3130
|
-
description: "Reset the browser to a clean state. Call this once after all tests are complete to restore the preview for the user. Fire and forget \u2014 does not wait for the reload to finish.",
|
|
3131
|
-
inputSchema: {
|
|
3132
|
-
type: "object",
|
|
3133
|
-
properties: {}
|
|
3134
|
-
}
|
|
3135
3126
|
}
|
|
3136
3127
|
];
|
|
3137
3128
|
var BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand"]);
|
|
@@ -3156,11 +3147,21 @@ ${appSpec}
|
|
|
3156
3147
|
|
|
3157
3148
|
// src/subagents/browserAutomation/index.ts
|
|
3158
3149
|
var log6 = createLogger("browser-automation");
|
|
3150
|
+
var lockQueue = Promise.resolve();
|
|
3151
|
+
function acquireBrowserLock() {
|
|
3152
|
+
let release;
|
|
3153
|
+
const next = new Promise((res) => {
|
|
3154
|
+
release = res;
|
|
3155
|
+
});
|
|
3156
|
+
const wait = lockQueue;
|
|
3157
|
+
lockQueue = next;
|
|
3158
|
+
return wait.then(() => release);
|
|
3159
|
+
}
|
|
3159
3160
|
var browserAutomationTool = {
|
|
3160
3161
|
clearable: true,
|
|
3161
3162
|
definition: {
|
|
3162
3163
|
name: "runAutomatedBrowserTest",
|
|
3163
|
-
description: "Run an automated browser test against the live preview. Describe what to test \u2014 the agent figures out how. Use after meaningful changes frontend code, to reproduce user-reported issues, or to test end-to-end flows.",
|
|
3164
|
+
description: "Run an automated browser test against the live preview. Describe what to test \u2014 the agent figures out how. Use after meaningful changes frontend code, to reproduce user-reported issues, or to test end-to-end flows. Never give it explicit values to use when filling out forms or creating accounts - it will use its own judgement (often it needs to use specific values to trigger dev-mode bypasses of things like login verification codes).",
|
|
3164
3165
|
inputSchema: {
|
|
3165
3166
|
type: "object",
|
|
3166
3167
|
properties: {
|
|
@@ -3176,99 +3177,104 @@ var browserAutomationTool = {
|
|
|
3176
3177
|
if (!context) {
|
|
3177
3178
|
return "Error: browser automation requires execution context (only available in headless mode)";
|
|
3178
3179
|
}
|
|
3180
|
+
const release = await acquireBrowserLock();
|
|
3179
3181
|
try {
|
|
3180
|
-
|
|
3181
|
-
|
|
3182
|
-
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
|
|
3182
|
+
try {
|
|
3183
|
+
const status = await sidecarRequest(
|
|
3184
|
+
"/browser-status",
|
|
3185
|
+
{},
|
|
3186
|
+
{ timeout: 5e3 }
|
|
3187
|
+
);
|
|
3188
|
+
if (!status.connected) {
|
|
3189
|
+
return "Error: the browser preview is not connected. The user needs to open the preview before browser tests can run.";
|
|
3190
|
+
}
|
|
3191
|
+
} catch {
|
|
3192
|
+
return "Error: could not check browser status. The dev environment may not be running.";
|
|
3187
3193
|
}
|
|
3188
|
-
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
3196
|
-
|
|
3197
|
-
|
|
3198
|
-
|
|
3199
|
-
|
|
3200
|
-
|
|
3201
|
-
|
|
3202
|
-
|
|
3203
|
-
|
|
3204
|
-
|
|
3194
|
+
try {
|
|
3195
|
+
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
3196
|
+
} catch {
|
|
3197
|
+
}
|
|
3198
|
+
const result = await runSubAgent({
|
|
3199
|
+
system: getBrowserAutomationPrompt(),
|
|
3200
|
+
task: input.task,
|
|
3201
|
+
tools: BROWSER_TOOLS,
|
|
3202
|
+
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
3203
|
+
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
3204
|
+
if (name === "screenshotFullPage") {
|
|
3205
|
+
try {
|
|
3206
|
+
return await captureAndAnalyzeScreenshot({
|
|
3207
|
+
path: _input.path,
|
|
3208
|
+
onLog
|
|
3209
|
+
});
|
|
3210
|
+
} catch (err) {
|
|
3211
|
+
return `Error taking screenshot: ${err.message}`;
|
|
3212
|
+
}
|
|
3205
3213
|
}
|
|
3206
|
-
|
|
3207
|
-
|
|
3208
|
-
|
|
3209
|
-
|
|
3210
|
-
|
|
3211
|
-
|
|
3212
|
-
|
|
3214
|
+
return `Error: unknown local tool "${name}"`;
|
|
3215
|
+
},
|
|
3216
|
+
apiConfig: context.apiConfig,
|
|
3217
|
+
model: context.model,
|
|
3218
|
+
subAgentId: "browserAutomation",
|
|
3219
|
+
signal: context.signal,
|
|
3220
|
+
parentToolId: context.toolCallId,
|
|
3221
|
+
requestId: context.requestId,
|
|
3222
|
+
onEvent: context.onEvent,
|
|
3223
|
+
resolveExternalTool: async (id, name, input2) => {
|
|
3224
|
+
if (!context.resolveExternalTool) {
|
|
3225
|
+
return "Error: no external tool resolver";
|
|
3213
3226
|
}
|
|
3214
|
-
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
|
|
3219
|
-
|
|
3220
|
-
signal: context.signal,
|
|
3221
|
-
parentToolId: context.toolCallId,
|
|
3222
|
-
requestId: context.requestId,
|
|
3223
|
-
onEvent: context.onEvent,
|
|
3224
|
-
resolveExternalTool: async (id, name, input2) => {
|
|
3225
|
-
if (!context.resolveExternalTool) {
|
|
3226
|
-
return "Error: no external tool resolver";
|
|
3227
|
-
}
|
|
3228
|
-
const result2 = await context.resolveExternalTool(id, name, input2);
|
|
3229
|
-
if (name === "browserCommand") {
|
|
3230
|
-
try {
|
|
3231
|
-
const parsed = JSON.parse(result2);
|
|
3232
|
-
const screenshotSteps = (parsed.steps || []).filter(
|
|
3233
|
-
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
3234
|
-
);
|
|
3235
|
-
if (screenshotSteps.length > 0) {
|
|
3236
|
-
const batchInput = screenshotSteps.map((s) => ({
|
|
3237
|
-
stepType: "analyzeImage",
|
|
3238
|
-
step: {
|
|
3239
|
-
imageUrl: s.result.url,
|
|
3240
|
-
prompt: SCREENSHOT_ANALYSIS_PROMPT
|
|
3241
|
-
}
|
|
3242
|
-
}));
|
|
3243
|
-
const batchResult = await runCli(
|
|
3244
|
-
`mindstudio batch --no-meta ${JSON.stringify(JSON.stringify(batchInput))}`,
|
|
3245
|
-
{ timeout: 2e5 }
|
|
3227
|
+
const result2 = await context.resolveExternalTool(id, name, input2);
|
|
3228
|
+
if (name === "browserCommand") {
|
|
3229
|
+
try {
|
|
3230
|
+
const parsed = JSON.parse(result2);
|
|
3231
|
+
const screenshotSteps = (parsed.steps || []).filter(
|
|
3232
|
+
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
3246
3233
|
);
|
|
3247
|
-
|
|
3248
|
-
const
|
|
3249
|
-
|
|
3250
|
-
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
|
|
3234
|
+
if (screenshotSteps.length > 0) {
|
|
3235
|
+
const batchInput = screenshotSteps.map((s) => ({
|
|
3236
|
+
stepType: "analyzeImage",
|
|
3237
|
+
step: {
|
|
3238
|
+
imageUrl: s.result.url,
|
|
3239
|
+
prompt: SCREENSHOT_ANALYSIS_PROMPT
|
|
3240
|
+
}
|
|
3241
|
+
}));
|
|
3242
|
+
const batchResult = await runCli(
|
|
3243
|
+
`mindstudio batch --no-meta ${JSON.stringify(JSON.stringify(batchInput))}`,
|
|
3244
|
+
{ timeout: 2e5 }
|
|
3245
|
+
);
|
|
3246
|
+
try {
|
|
3247
|
+
const analyses = JSON.parse(batchResult);
|
|
3248
|
+
let ai = 0;
|
|
3249
|
+
for (const step of parsed.steps) {
|
|
3250
|
+
if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
|
|
3251
|
+
step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
|
|
3252
|
+
ai++;
|
|
3253
|
+
}
|
|
3254
3254
|
}
|
|
3255
|
+
} catch {
|
|
3256
|
+
log6.debug("Failed to parse batch analysis result", {
|
|
3257
|
+
batchResult
|
|
3258
|
+
});
|
|
3255
3259
|
}
|
|
3256
|
-
|
|
3257
|
-
log6.debug("Failed to parse batch analysis result", {
|
|
3258
|
-
batchResult
|
|
3259
|
-
});
|
|
3260
|
+
return JSON.stringify(parsed);
|
|
3260
3261
|
}
|
|
3261
|
-
|
|
3262
|
+
} catch {
|
|
3262
3263
|
}
|
|
3263
|
-
} catch {
|
|
3264
3264
|
}
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
|
|
3265
|
+
return result2;
|
|
3266
|
+
},
|
|
3267
|
+
toolRegistry: context.toolRegistry
|
|
3268
|
+
});
|
|
3269
|
+
try {
|
|
3270
|
+
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
3271
|
+
} catch {
|
|
3272
|
+
}
|
|
3273
|
+
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3274
|
+
return result.text;
|
|
3275
|
+
} finally {
|
|
3276
|
+
release();
|
|
3277
|
+
}
|
|
3272
3278
|
}
|
|
3273
3279
|
};
|
|
3274
3280
|
|
|
@@ -4960,6 +4966,7 @@ var EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
|
|
|
4960
4966
|
"confirmDestructiveAction",
|
|
4961
4967
|
"runScenario",
|
|
4962
4968
|
"runMethod",
|
|
4969
|
+
"queryDatabase",
|
|
4963
4970
|
"browserCommand",
|
|
4964
4971
|
"setProjectMetadata"
|
|
4965
4972
|
]);
|
package/dist/index.js
CHANGED
|
@@ -2856,15 +2856,6 @@ var init_tools = __esm({
|
|
|
2856
2856
|
}
|
|
2857
2857
|
}
|
|
2858
2858
|
}
|
|
2859
|
-
},
|
|
2860
|
-
{
|
|
2861
|
-
clearable: false,
|
|
2862
|
-
name: "resetBrowser",
|
|
2863
|
-
description: "Reset the browser to a clean state. Call this once after all tests are complete to restore the preview for the user. Fire and forget \u2014 does not wait for the reload to finish.",
|
|
2864
|
-
inputSchema: {
|
|
2865
|
-
type: "object",
|
|
2866
|
-
properties: {}
|
|
2867
|
-
}
|
|
2868
2859
|
}
|
|
2869
2860
|
];
|
|
2870
2861
|
BROWSER_EXTERNAL_TOOLS = /* @__PURE__ */ new Set(["browserCommand"]);
|
|
@@ -2940,7 +2931,16 @@ var init_prompt = __esm({
|
|
|
2940
2931
|
});
|
|
2941
2932
|
|
|
2942
2933
|
// src/subagents/browserAutomation/index.ts
|
|
2943
|
-
|
|
2934
|
+
function acquireBrowserLock() {
|
|
2935
|
+
let release;
|
|
2936
|
+
const next = new Promise((res) => {
|
|
2937
|
+
release = res;
|
|
2938
|
+
});
|
|
2939
|
+
const wait = lockQueue;
|
|
2940
|
+
lockQueue = next;
|
|
2941
|
+
return wait.then(() => release);
|
|
2942
|
+
}
|
|
2943
|
+
var log4, lockQueue, browserAutomationTool;
|
|
2944
2944
|
var init_browserAutomation = __esm({
|
|
2945
2945
|
"src/subagents/browserAutomation/index.ts"() {
|
|
2946
2946
|
"use strict";
|
|
@@ -2952,11 +2952,12 @@ var init_browserAutomation = __esm({
|
|
|
2952
2952
|
init_runCli();
|
|
2953
2953
|
init_logger();
|
|
2954
2954
|
log4 = createLogger("browser-automation");
|
|
2955
|
+
lockQueue = Promise.resolve();
|
|
2955
2956
|
browserAutomationTool = {
|
|
2956
2957
|
clearable: true,
|
|
2957
2958
|
definition: {
|
|
2958
2959
|
name: "runAutomatedBrowserTest",
|
|
2959
|
-
description: "Run an automated browser test against the live preview. Describe what to test \u2014 the agent figures out how. Use after meaningful changes frontend code, to reproduce user-reported issues, or to test end-to-end flows.",
|
|
2960
|
+
description: "Run an automated browser test against the live preview. Describe what to test \u2014 the agent figures out how. Use after meaningful changes frontend code, to reproduce user-reported issues, or to test end-to-end flows. Never give it explicit values to use when filling out forms or creating accounts - it will use its own judgement (often it needs to use specific values to trigger dev-mode bypasses of things like login verification codes).",
|
|
2960
2961
|
inputSchema: {
|
|
2961
2962
|
type: "object",
|
|
2962
2963
|
properties: {
|
|
@@ -2972,99 +2973,104 @@ var init_browserAutomation = __esm({
|
|
|
2972
2973
|
if (!context) {
|
|
2973
2974
|
return "Error: browser automation requires execution context (only available in headless mode)";
|
|
2974
2975
|
}
|
|
2976
|
+
const release = await acquireBrowserLock();
|
|
2975
2977
|
try {
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
|
|
2979
|
-
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2978
|
+
try {
|
|
2979
|
+
const status = await sidecarRequest(
|
|
2980
|
+
"/browser-status",
|
|
2981
|
+
{},
|
|
2982
|
+
{ timeout: 5e3 }
|
|
2983
|
+
);
|
|
2984
|
+
if (!status.connected) {
|
|
2985
|
+
return "Error: the browser preview is not connected. The user needs to open the preview before browser tests can run.";
|
|
2986
|
+
}
|
|
2987
|
+
} catch {
|
|
2988
|
+
return "Error: could not check browser status. The dev environment may not be running.";
|
|
2983
2989
|
}
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3000
|
-
|
|
2990
|
+
try {
|
|
2991
|
+
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
2992
|
+
} catch {
|
|
2993
|
+
}
|
|
2994
|
+
const result = await runSubAgent({
|
|
2995
|
+
system: getBrowserAutomationPrompt(),
|
|
2996
|
+
task: input.task,
|
|
2997
|
+
tools: BROWSER_TOOLS,
|
|
2998
|
+
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
2999
|
+
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
3000
|
+
if (name === "screenshotFullPage") {
|
|
3001
|
+
try {
|
|
3002
|
+
return await captureAndAnalyzeScreenshot({
|
|
3003
|
+
path: _input.path,
|
|
3004
|
+
onLog
|
|
3005
|
+
});
|
|
3006
|
+
} catch (err) {
|
|
3007
|
+
return `Error taking screenshot: ${err.message}`;
|
|
3008
|
+
}
|
|
3001
3009
|
}
|
|
3002
|
-
|
|
3003
|
-
|
|
3004
|
-
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3010
|
+
return `Error: unknown local tool "${name}"`;
|
|
3011
|
+
},
|
|
3012
|
+
apiConfig: context.apiConfig,
|
|
3013
|
+
model: context.model,
|
|
3014
|
+
subAgentId: "browserAutomation",
|
|
3015
|
+
signal: context.signal,
|
|
3016
|
+
parentToolId: context.toolCallId,
|
|
3017
|
+
requestId: context.requestId,
|
|
3018
|
+
onEvent: context.onEvent,
|
|
3019
|
+
resolveExternalTool: async (id, name, input2) => {
|
|
3020
|
+
if (!context.resolveExternalTool) {
|
|
3021
|
+
return "Error: no external tool resolver";
|
|
3009
3022
|
}
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
signal: context.signal,
|
|
3017
|
-
parentToolId: context.toolCallId,
|
|
3018
|
-
requestId: context.requestId,
|
|
3019
|
-
onEvent: context.onEvent,
|
|
3020
|
-
resolveExternalTool: async (id, name, input2) => {
|
|
3021
|
-
if (!context.resolveExternalTool) {
|
|
3022
|
-
return "Error: no external tool resolver";
|
|
3023
|
-
}
|
|
3024
|
-
const result2 = await context.resolveExternalTool(id, name, input2);
|
|
3025
|
-
if (name === "browserCommand") {
|
|
3026
|
-
try {
|
|
3027
|
-
const parsed = JSON.parse(result2);
|
|
3028
|
-
const screenshotSteps = (parsed.steps || []).filter(
|
|
3029
|
-
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
3030
|
-
);
|
|
3031
|
-
if (screenshotSteps.length > 0) {
|
|
3032
|
-
const batchInput = screenshotSteps.map((s) => ({
|
|
3033
|
-
stepType: "analyzeImage",
|
|
3034
|
-
step: {
|
|
3035
|
-
imageUrl: s.result.url,
|
|
3036
|
-
prompt: SCREENSHOT_ANALYSIS_PROMPT
|
|
3037
|
-
}
|
|
3038
|
-
}));
|
|
3039
|
-
const batchResult = await runCli(
|
|
3040
|
-
`mindstudio batch --no-meta ${JSON.stringify(JSON.stringify(batchInput))}`,
|
|
3041
|
-
{ timeout: 2e5 }
|
|
3023
|
+
const result2 = await context.resolveExternalTool(id, name, input2);
|
|
3024
|
+
if (name === "browserCommand") {
|
|
3025
|
+
try {
|
|
3026
|
+
const parsed = JSON.parse(result2);
|
|
3027
|
+
const screenshotSteps = (parsed.steps || []).filter(
|
|
3028
|
+
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
3042
3029
|
);
|
|
3043
|
-
|
|
3044
|
-
const
|
|
3045
|
-
|
|
3046
|
-
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
|
|
3030
|
+
if (screenshotSteps.length > 0) {
|
|
3031
|
+
const batchInput = screenshotSteps.map((s) => ({
|
|
3032
|
+
stepType: "analyzeImage",
|
|
3033
|
+
step: {
|
|
3034
|
+
imageUrl: s.result.url,
|
|
3035
|
+
prompt: SCREENSHOT_ANALYSIS_PROMPT
|
|
3036
|
+
}
|
|
3037
|
+
}));
|
|
3038
|
+
const batchResult = await runCli(
|
|
3039
|
+
`mindstudio batch --no-meta ${JSON.stringify(JSON.stringify(batchInput))}`,
|
|
3040
|
+
{ timeout: 2e5 }
|
|
3041
|
+
);
|
|
3042
|
+
try {
|
|
3043
|
+
const analyses = JSON.parse(batchResult);
|
|
3044
|
+
let ai = 0;
|
|
3045
|
+
for (const step of parsed.steps) {
|
|
3046
|
+
if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
|
|
3047
|
+
step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
|
|
3048
|
+
ai++;
|
|
3049
|
+
}
|
|
3050
3050
|
}
|
|
3051
|
+
} catch {
|
|
3052
|
+
log4.debug("Failed to parse batch analysis result", {
|
|
3053
|
+
batchResult
|
|
3054
|
+
});
|
|
3051
3055
|
}
|
|
3052
|
-
|
|
3053
|
-
log4.debug("Failed to parse batch analysis result", {
|
|
3054
|
-
batchResult
|
|
3055
|
-
});
|
|
3056
|
+
return JSON.stringify(parsed);
|
|
3056
3057
|
}
|
|
3057
|
-
|
|
3058
|
+
} catch {
|
|
3058
3059
|
}
|
|
3059
|
-
} catch {
|
|
3060
3060
|
}
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
|
|
3065
|
-
|
|
3066
|
-
|
|
3067
|
-
|
|
3061
|
+
return result2;
|
|
3062
|
+
},
|
|
3063
|
+
toolRegistry: context.toolRegistry
|
|
3064
|
+
});
|
|
3065
|
+
try {
|
|
3066
|
+
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
3067
|
+
} catch {
|
|
3068
|
+
}
|
|
3069
|
+
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3070
|
+
return result.text;
|
|
3071
|
+
} finally {
|
|
3072
|
+
release();
|
|
3073
|
+
}
|
|
3068
3074
|
}
|
|
3069
3075
|
};
|
|
3070
3076
|
}
|
|
@@ -5503,6 +5509,7 @@ var init_agent = __esm({
|
|
|
5503
5509
|
"confirmDestructiveAction",
|
|
5504
5510
|
"runScenario",
|
|
5505
5511
|
"runMethod",
|
|
5512
|
+
"queryDatabase",
|
|
5506
5513
|
"browserCommand",
|
|
5507
5514
|
"setProjectMetadata"
|
|
5508
5515
|
]);
|
|
@@ -80,6 +80,10 @@ The UI should feel instant. Never make the user wait for a server round-trip to
|
|
|
80
80
|
- **Mutate after actions.** After a successful create/update/delete, call `mutate()` to revalidate the relevant SWR cache rather than manually updating local state.
|
|
81
81
|
- **Skeleton loading.** Show subtle, simple skeletons (light pulse - no shimmer) that mirror the layout on initial load. Never show a blank page or centered spinner while data is loading.
|
|
82
82
|
|
|
83
|
+
### Errors
|
|
84
|
+
|
|
85
|
+
Handle errors gracefully. You don't need to design for every error case, but if remote API requests fail, make sure to show them nicely in a toast or some other appropriate view with a human-friendly label - don't just drop "Error 500 XYZ" inline in a form.
|
|
86
|
+
|
|
83
87
|
## Auth
|
|
84
88
|
|
|
85
89
|
Login and signup screens set the tone for the user's entire experience with the app and are important to get right - they should feel like exciting entry points into the next level of the user journy. A janky login form with misaligned inputs and no feedback dminishes excitement and undermines trust before the user even gets in.
|
|
@@ -19,7 +19,7 @@ The dev session gets its own database — a snapshot of the live database at ses
|
|
|
19
19
|
- **Truncate** — keep the schema, delete all row data (used by scenarios for a clean canvas)
|
|
20
20
|
- **Schema sync** — add a field to a table interface and it's immediately available in dev
|
|
21
21
|
|
|
22
|
-
The dev database is disposable. Experiment freely — there's no risk of breaking anything.
|
|
22
|
+
The dev database is disposable. Experiment freely — there's no risk of breaking anything. Just be considerate that the user may have created their own data (user rows or other data) while testing, and it might be frustrating for them to have it wiped.
|
|
23
23
|
|
|
24
24
|
### Debugging
|
|
25
25
|
|
|
@@ -96,12 +96,14 @@ Shared setup code can go in `dist/methods/.scenarios/_helpers/`.
|
|
|
96
96
|
## How Scenarios Run
|
|
97
97
|
|
|
98
98
|
When a scenario runs, the platform:
|
|
99
|
-
1. **Truncates** all tables (deletes all rows, preserves schema)
|
|
99
|
+
1. **Truncates** all tables (deletes all rows, preserves schema - unless skipTruncate is true)
|
|
100
100
|
2. **Executes** the seed function (your `db.push()` calls populate the clean database)
|
|
101
101
|
3. **Impersonates** the roles from the scenario's `roles` field (the app renders from that user's perspective)
|
|
102
102
|
|
|
103
103
|
This is deterministic — same scenario always produces the same state.
|
|
104
104
|
|
|
105
|
+
Scenarios are useful for seeding initial app state after build for testing, as well as to give the user a first impression of an app that is already filled with data and looks and feels usable. The user can choose to run further scenarios after initial build by clicking the Scenarios tab and selecting a scenario to run.
|
|
106
|
+
|
|
105
107
|
## Scenario Data
|
|
106
108
|
|
|
107
109
|
Align scenario data to the vibe of the app - construct data that feels like it fits.
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
### Verification
|
|
10
10
|
Run `lspDiagnostics` after every turn where you have edited code in any meaningful way. You don't need to run it for things like changing copy or CSS colors, but you should run it after any structural changes to code. It catches syntax errors, broken imports, and type mismatches instantly. After a big build or significant changes, also do a lightweight runtime check to catch the things static analysis misses (schema mismatches, missing imports, bad queries):
|
|
11
11
|
|
|
12
|
-
-
|
|
12
|
+
- Spot-check methods with `runMethod`. The dev database is a disposable snapshot that will have been seeded with scenario data, so don't worry about being destructive.
|
|
13
13
|
- For frontend work, take a single `screenshot` to confirm the main view renders correctly or look at the browser log for any console errors in the user's preview.
|
|
14
14
|
- Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, or when the user reports something broken that you can't identify from code alone.
|
|
15
15
|
|
|
@@ -19,7 +19,7 @@ Aim for confidence that the core happy paths work. If the 80% case is solid, the
|
|
|
19
19
|
|
|
20
20
|
Process logs are available at .logs/ in NDJSON format (one JSON object per line) for debugging. Each line has at minimum ts (unix millis) and msg fields, plus structured context like level, module, requestId, toolCallId where available. You can use `jq` to examine logs and debug failures. Tools like run method or run scenario execute synchronously, so log data will be available by the time those tools return their results to you, there is no need to `sleep` before querying logfiles.
|
|
21
21
|
- `.logs/tunnel.ndjson`: method execution, schema sync, session lifecycle, platform connection
|
|
22
|
-
- `.logs/devServer.ndjson`: frontend build errors, HMR, module resolution failures
|
|
22
|
+
- `.logs/devServer.ndjson`: frontend build errors, HMR, module resolution failures - check this to see if compilation is broken on web frontends.
|
|
23
23
|
- `.logs/system.ndjson`: sandbox server logs — agent lifecycle, tool dispatch, file watching, process management
|
|
24
24
|
- `.logs/agent.ndjson`: coding agent protocol events and errors
|
|
25
25
|
- `.logs/requests.ndjson`: structured log of every method and scenario execution with full input, output, errors (including stack traces), console output, and duration
|
|
@@ -38,8 +38,5 @@ For any work involving AI models, external actions (web scraping, email, SMS), o
|
|
|
38
38
|
### State Management
|
|
39
39
|
- Calls to methods introduce latency. When building web frontends that load data from methods, consider front-loading as much data as you can in a single API request - e.g., when possible, load a large data object into a central store and use that to render sub-screens in an app, rather than an API call on every screen.
|
|
40
40
|
|
|
41
|
-
### Build Notes
|
|
42
|
-
For complex builds that span many files — especially an initial buildout from a spec — write a `.remy-notes.md` scratchpad in the project root. Use it to record decisions, keep a checklist of tasks, and reference data you'll need across multiple tool calls: design tokens, color values, typography specs, image URLs, what's been built so far, what's left. Read it back instead of restating everything in your messages. Delete it when the build is done. Don't use this for small changes or single-file edits.
|
|
43
|
-
|
|
44
41
|
### Dependencies
|
|
45
42
|
Before installing a package you haven't used in this project, do a quick web search to confirm it's still the best option. The JavaScript ecosystem moves fast — the package you remember from training may have been superseded by something smaller, faster, or better maintained. A 10-second search beats debugging a deprecated library.
|
|
@@ -15,6 +15,9 @@
|
|
|
15
15
|
- After two failed attempts at the same approach, tell the user what's going wrong.
|
|
16
16
|
- Pushing to main branch will trigger a deploy. The user presses the publish button in the interface to request publishing.
|
|
17
17
|
|
|
18
|
+
### Build Notes
|
|
19
|
+
For complex tasks — especially an initial buildout from a spec or making multiple changes in a single turn — write a `.remy-notes.md` scratchpad in the project root. Use it to record decisions, keep a checklist of tasks, and reference data you'll need across multiple tool calls: design tokens, color values, typography specs, image URLs, what's been built so far, what's left. Read it back instead of restating everything in your messages. Delete it when your work is done.
|
|
20
|
+
|
|
18
21
|
## Communication
|
|
19
22
|
The user can already see your tool calls, so most of your work is visible without narration. Focus text output on three things:
|
|
20
23
|
- **Decisions that need input.** Questions, tradeoffs, ambiguity that blocks progress.
|
|
@@ -40,7 +40,9 @@ Always consult the code sanity check before writing code in initialCodegen with
|
|
|
40
40
|
|
|
41
41
|
For verifying complex stateful interactions: multi-step form submissions, auth flows, real-time updates, flows that require specific data/role setup. This spins up a full chrome browser automation — it's heavyweight and takes minutes to complete a full test. Do not use it for basic rendering or navigation checks. If you can verify something with a screenshot or by reading the code, do that instead. Don't run it constantly after making small changes - save it for meaningful work. Run a scenario first to seed test data and set user roles. The user is able to watch QA work on their screen via a live browser preview - the cursor will move, type, etc - so you can also use this to demo functionality to the user and help them understand how to use their app.
|
|
42
42
|
|
|
43
|
-
The QA agent can see the screen. Describe what to test, not how — it will figure out what to click, what to check, and what values to use.
|
|
43
|
+
The QA agent can see the screen. Describe what to test, not how — it will figure out what to click, what to check, and what values to use. It always starts its tests logged out/unauthenticated on "/" root. After every test session, the browser is reset to / and any authentication used or created by the tester is cleared and reset.
|
|
44
|
+
|
|
45
|
+
Never tell QA what names to use when testing or what values to input - it will use its own judgment.
|
|
44
46
|
|
|
45
47
|
### Background Execution
|
|
46
48
|
|
|
@@ -11,6 +11,9 @@ The user is watching the automation happen on their screen in real-time. When ty
|
|
|
11
11
|
When the app has a login or signup flow, you must use `remy@mindstudio.ai` for email and `+15551234567` for phone number. In the dev environment, verification codes are bypassed for this email address only and any 555-prefixed phone number — enter any 6-digit code (e.g., `123456`) and it will be accepted. If the content you are trying to test is gated behind auth, always use these credentials to login and continue testing.
|
|
12
12
|
|
|
13
13
|
## Browser Commands
|
|
14
|
+
|
|
15
|
+
Your session always starts on the app root / in a logged out/unauthenticated state.
|
|
16
|
+
|
|
14
17
|
### Snapshot format
|
|
15
18
|
|
|
16
19
|
The snapshot command returns a compact accessibility tree:
|
|
@@ -143,7 +146,6 @@ You can use the `screenshotFullPage` tool to take a full-height screenshot of th
|
|
|
143
146
|
- evaluate auto-returns simple expressions. `"script": "document.title"` works directly. For multi-statement scripts, use explicit return.
|
|
144
147
|
- The snapshot in the response is always the most current page state. Even if a wait times out, check the snapshot field; the content you were waiting for may have appeared by then.
|
|
145
148
|
- Execution stops on first error. If step 2 of 5 fails, steps 3-5 don't run. The response will contain results for steps 0-2 (with step 2 having an error field) plus the current snapshot. Adjust and retry from the failed step.
|
|
146
|
-
- Always call `resetBrowser` as your final action after all tests are complete. This restores the preview to a clean state for the user.
|
|
147
149
|
- If something fails, bail early. Do not attempt to diagnose why; do not do things like attempt different inputs to try to work around an error - just report the failure and early return.
|
|
148
150
|
</rules>
|
|
149
151
|
|
|
@@ -35,6 +35,10 @@ These are recurring mistakes the coding agent makes. If you see the conditions f
|
|
|
35
35
|
|
|
36
36
|
- **CSS Module animation scoping.** If the agent defines `@keyframes` in a global CSS file but references the animation name from a CSS Module, the animation will silently fail. CSS Modules scope animation names, so a keyframe defined globally can't be found by a scoped class. The fix: define keyframes in the same CSS Module that uses them, or use `:global()` to escape the scoping.
|
|
37
37
|
|
|
38
|
+
- **Too many granular API calls.** These apps are MVPs with small datasets. If the plan has separate method calls for every screen or sub-view (load profile, then load posts, then load post detail, then load comments), flag it. Favor fewer, fatter requests — a profile page that loads posts with full content means tapping a post is instant. A feed that includes comment previews and like state means the detail view renders from memory. Over-fetching at this scale is almost always the right call — users notice instant transitions, they don't notice a slightly larger payload.
|
|
39
|
+
|
|
40
|
+
- **Wouter is not React Router.** The agent defaults to React Router patterns which silently break in wouter. Key differences: no `useNavigate()` (use `const [, setLocation] = useLocation()`), no `navigate(-1)` for back (use `window.history.back()`), no `element` prop on Route (use `component={Foo}` or children), no `<Routes>` (use `<Switch>` — without it all matching routes render simultaneously), no `<Navigate>` (use `<Redirect>`), no `<Outlet>` for nested routes (use `nest` prop on Route), and no `useSearchParams()` from react-router (wouter has its own version with a different setter API). If you see any of these React Router patterns in a wouter project, flag it.
|
|
41
|
+
|
|
38
42
|
## When to stay quiet
|
|
39
43
|
|
|
40
44
|
Nits, style preferences, missing edge cases, things the agent will figure out as it goes, patterns that are "not ideal but fine," minor code smells. Let them slide. The agent is busy.
|
|
@@ -69,9 +69,9 @@ The developer should never need to source their own imagery. Always provide URLs
|
|
|
69
69
|
|
|
70
70
|
### Icons and logos
|
|
71
71
|
|
|
72
|
-
App icons and logos require work and thinking to get right.
|
|
72
|
+
App icons and logos require work and thinking to get right.
|
|
73
73
|
|
|
74
|
-
**What works:** Smooth 3D rendering in the style of
|
|
74
|
+
**What works:** Smooth 3D rendering in the style of 2026-era macOS/iOS app icons. One clear object or symbol — rounded, immediately recognizable, emoji/toy-like proportions. Clean surfaces with soft lighting and gentle shadows. Two or three accent colors, not a rainbow. Always full bleed.
|
|
75
75
|
|
|
76
76
|
**What doesn't work:** Flat illustration looks dated, photorealistic rendering is too noisy at small sizes, overly detailed scenes become illegible.
|
|
77
77
|
|
|
@@ -29,3 +29,89 @@ Authentication moments must feel natural and intuitive - they should not feel ja
|
|
|
29
29
|
If the app includes an AI chat interface, take care to make it beautiful and intentional. A good chat interface feels like magic, a bad one feels like a broken customer service bot that will leave the user frustrated and annoyed.
|
|
30
30
|
|
|
31
31
|
Pay close attention to text streaming when the AI replies - it should feel natural, smooth, and beautiful. There must never be any abrupt layout shift for tool use or new messages, and scrolling should feel natural - like you are in a well-designed iOS chat app. Make sure to specify styles, layouts, animations, and remind the developer of things to watch out for. Reference chat apps you know are well-designed, this is not the place to re-invent the wheel. Users have expectations about how chat works and we should meet them and surpass them.
|
|
32
|
+
|
|
33
|
+
### Wireframes
|
|
34
|
+
|
|
35
|
+
When a pattern or interaction is hard to convey in words alone — a core component, an animation sequence, a swipe gesture, a layout grid — you can include a small interactive wireframe to demonstrate it. Use a markdown code fence with `wireframe` as the type. Start with a YAML frontmatter block (`name` and `description`) to identify the component, then the self-contained HTML+CSS prototype.
|
|
36
|
+
|
|
37
|
+
Use wireframes instead of ASCII art and code-block diagrams you might otherwise reach for when trying to show a layout or interaction. Wireframes are better because the developer can actually see and interact with the result. Like those diagrams, they isolate one small piece: a single card component, a button animation, a transition, a grid layout. Each wireframe should be around 60-80 lines of HTML+CSS — if you're past 100 lines, you're building too much. These are not screens, flows, or multi-step prototypes. They render in a small iframe and should look complete at that scale. Most of your communication should be in words - wireframes are simply another tool when you need them. Never build out full screens or pages in wireframes, even if you are asked to - this is critically important.
|
|
38
|
+
|
|
39
|
+
Remember, never use ascii art or code-block diagrams to describe layouts - always use wireframes.
|
|
40
|
+
|
|
41
|
+
The wireframe code will be rendered in a transparent iframe. Don't fill the viewport or add a background color to the body. Place the component at a natural size in a card with a background color that is centered vertically and horizontally in the viewport. Keep the component tight and self-contained. The iframe is for the component only — no annotations, labels, or explanatory text inside it. Put your notes and implementation guidance in the markdown around the wireframe. Wireframes can be interactive and are especially useful for demonstrating states, animations, effects, and transitions. If your wireframe has triggers or states, include a small "play" control button within the frame (make sure to allow reply/reset for all interactivity). No images - these are functional prototypes meant to demonstrate feel and behavior, not visual comps.
|
|
42
|
+
|
|
43
|
+
```wireframe
|
|
44
|
+
---
|
|
45
|
+
name: Feed Post Card
|
|
46
|
+
description: Photo post card with header, image frame, action row (like/comment/share/bookmark), like count, and caption. Shows spacing, typography hierarchy, and icon placement.
|
|
47
|
+
---
|
|
48
|
+
<html lang="en"><head>
|
|
49
|
+
<meta charset="utf-8"/>
|
|
50
|
+
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
|
|
51
|
+
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600&display=swap" rel="stylesheet"/>
|
|
52
|
+
<link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:wght,FILL@100..700,0..1&display=swap" rel="stylesheet"/>
|
|
53
|
+
<style>
|
|
54
|
+
* { margin: 0; padding: 0; box-sizing: border-box; }
|
|
55
|
+
body {
|
|
56
|
+
font-family: 'Plus Jakarta Sans', sans-serif; background: transparent;
|
|
57
|
+
display: flex; align-items: center; justify-content: center;
|
|
58
|
+
}
|
|
59
|
+
.material-symbols-outlined { font-variation-settings: 'FILL' 0, 'wght' 300; }
|
|
60
|
+
|
|
61
|
+
.card {
|
|
62
|
+
width: 340px;
|
|
63
|
+
background: #fff; border-radius: 20px; overflow: hidden;
|
|
64
|
+
box-shadow: 0 8px 32px rgba(0,0,0,0.06);
|
|
65
|
+
}
|
|
66
|
+
.card-header {
|
|
67
|
+
padding: 20px 24px; display: flex; align-items: center; gap: 12px;
|
|
68
|
+
}
|
|
69
|
+
.avatar {
|
|
70
|
+
width: 40px; height: 40px; border-radius: 50%;
|
|
71
|
+
background: linear-gradient(135deg, #98a68e, #55624d);
|
|
72
|
+
}
|
|
73
|
+
.card-header .name { font-weight: 600; font-size: 14px; color: #191c18; }
|
|
74
|
+
.card-header .meta { font-size: 12px; color: #757870; margin-top: 2px; }
|
|
75
|
+
.card-image {
|
|
76
|
+
width: 100%; aspect-ratio: 4/5; background: linear-gradient(180deg, #d9e7cd 0%, #fed7d2 100%);
|
|
77
|
+
}
|
|
78
|
+
.card-actions {
|
|
79
|
+
padding: 16px 24px; display: flex; gap: 16px; align-items: center;
|
|
80
|
+
}
|
|
81
|
+
.card-actions button {
|
|
82
|
+
background: none; border: none; cursor: pointer; color: #444841;
|
|
83
|
+
display: flex; align-items: center; transition: color 0.15s;
|
|
84
|
+
}
|
|
85
|
+
.card-actions button:hover { color: #55624d; }
|
|
86
|
+
.card-actions .spacer { flex: 1; }
|
|
87
|
+
.card-body { padding: 0 24px 20px; }
|
|
88
|
+
.card-body .likes { font-weight: 600; font-size: 13px; color: #191c18; margin-bottom: 6px; }
|
|
89
|
+
.card-body .caption { font-size: 13px; color: #444841; line-height: 1.5; }
|
|
90
|
+
.card-body .caption strong { font-weight: 600; color: #191c18; }
|
|
91
|
+
</style>
|
|
92
|
+
</head>
|
|
93
|
+
<body>
|
|
94
|
+
<div class="card">
|
|
95
|
+
<div class="card-header">
|
|
96
|
+
<div class="avatar"></div>
|
|
97
|
+
<div>
|
|
98
|
+
<div class="name">sarah.chen</div>
|
|
99
|
+
<div class="meta">Golden Gate Park · 2h</div>
|
|
100
|
+
</div>
|
|
101
|
+
</div>
|
|
102
|
+
<div class="card-image"></div>
|
|
103
|
+
<div class="card-actions">
|
|
104
|
+
<button><span class="material-symbols-outlined">favorite</span></button>
|
|
105
|
+
<button><span class="material-symbols-outlined">chat_bubble</span></button>
|
|
106
|
+
<button><span class="material-symbols-outlined">send</span></button>
|
|
107
|
+
<span class="spacer"></span>
|
|
108
|
+
<button><span class="material-symbols-outlined">bookmark</span></button>
|
|
109
|
+
</div>
|
|
110
|
+
<div class="card-body">
|
|
111
|
+
<div class="likes">2,847 likes</div>
|
|
112
|
+
<div class="caption"><strong>sarah.chen</strong> Morning light through the eucalyptus grove</div>
|
|
113
|
+
</div>
|
|
114
|
+
</div>
|
|
115
|
+
</body>
|
|
116
|
+
</html>
|
|
117
|
+
```
|
|
@@ -50,12 +50,12 @@ For photorealistic images, be specific about:
|
|
|
50
50
|
|
|
51
51
|
For app icons and logos, the goal is something that reads clearly at small sizes and feels polished enough to sit on a home screen or in an app header.
|
|
52
52
|
|
|
53
|
-
- Frame as "A 3D icon against a
|
|
53
|
+
- Frame as "A 3D icon against a XYZ background" followed by the subject. Do NOT use the phrase "app icon" — it triggers mockup framing (the model renders an icon inset on a phone screen or mounted on a wall). "3D icon" works.
|
|
54
54
|
- Describe smooth, rounded emoji-type 3D objects — think current macOS/iOS app icon design language. Clean surfaces, soft lighting, gentle shadows. Not flat illustration, not photorealistic, not clay/matte.
|
|
55
55
|
- Subjects should be simplified and immediately recognizable. Prefer one clear object or symbol, not a scene.
|
|
56
56
|
- Specify "reads well at small sizes" as an explicit constraint.
|
|
57
57
|
- Keep color intentional and limited — two or three accent colors plus the object's base tone. Colors should complement the app's brand if known.
|
|
58
|
-
-
|
|
58
|
+
- Make sure to specify full bleed - never say anything about rounded corners or there is a high likelihood that the image will come back as a rounded rectangle on a white background!
|
|
59
59
|
|
|
60
60
|
## Output
|
|
61
61
|
|