@mindstudio-ai/remy 0.1.151 → 0.1.153
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/headless.js +42 -53
- package/dist/index.js +44 -56
- package/dist/prompt/.notes.md +1 -3
- package/dist/prompt/static/coding.md +1 -1
- package/dist/prompt/static/instructions.md +3 -2
- package/dist/prompt/static/team.md +1 -1
- package/dist/subagents/browserAutomation/prompt.md +2 -2
- package/package.json +1 -1
package/dist/headless.js
CHANGED
|
@@ -486,18 +486,21 @@ async function* streamChat(params) {
|
|
|
486
486
|
})
|
|
487
487
|
]);
|
|
488
488
|
clearTimeout(stallTimer);
|
|
489
|
-
} catch {
|
|
489
|
+
} catch (err) {
|
|
490
490
|
clearTimeout(stallTimer);
|
|
491
|
-
|
|
492
|
-
|
|
491
|
+
try {
|
|
492
|
+
await reader.cancel();
|
|
493
|
+
} catch {
|
|
494
|
+
}
|
|
495
|
+
const isStall = err?.message === "stream_stall";
|
|
496
|
+
const errorMessage = isStall ? "Stream stalled \u2014 no data received for 5 minutes" : `Network error: stream interrupted \u2014 ${err?.message ?? "unknown"}`;
|
|
497
|
+
log2.error(isStall ? "Stream stalled" : "Stream interrupted", {
|
|
493
498
|
requestId,
|
|
494
499
|
...subAgentId && { subAgentId },
|
|
495
|
-
durationMs: Date.now() - startTime
|
|
500
|
+
durationMs: Date.now() - startTime,
|
|
501
|
+
error: errorMessage
|
|
496
502
|
});
|
|
497
|
-
yield {
|
|
498
|
-
type: "error",
|
|
499
|
-
error: "Stream stalled \u2014 no data received for 5 minutes"
|
|
500
|
-
};
|
|
503
|
+
yield { type: "error", error: errorMessage };
|
|
501
504
|
return;
|
|
502
505
|
}
|
|
503
506
|
const { done, value } = readResult;
|
|
@@ -790,11 +793,40 @@ function serializeForSummary(messages) {
|
|
|
790
793
|
return `[${msg.role}]: ${parts.join("\n")}`;
|
|
791
794
|
}).join("\n\n");
|
|
792
795
|
}
|
|
796
|
+
var CHUNK_CHAR_LIMIT = 24e5;
|
|
793
797
|
async function generateSummary(apiConfig, name, compactionPrompt, messagesToSummarize, mainSystem, mainTools) {
|
|
794
798
|
const serialized = serializeForSummary(messagesToSummarize);
|
|
795
799
|
if (!serialized.trim()) {
|
|
796
800
|
return null;
|
|
797
801
|
}
|
|
802
|
+
if (serialized.length > CHUNK_CHAR_LIMIT && messagesToSummarize.length > 1) {
|
|
803
|
+
const mid = Math.floor(messagesToSummarize.length / 2);
|
|
804
|
+
log3.info("Chunking summary", {
|
|
805
|
+
name,
|
|
806
|
+
messageCount: messagesToSummarize.length,
|
|
807
|
+
serializedLength: serialized.length
|
|
808
|
+
});
|
|
809
|
+
const [first, second] = await Promise.all([
|
|
810
|
+
generateSummary(
|
|
811
|
+
apiConfig,
|
|
812
|
+
`${name} [pt1]`,
|
|
813
|
+
compactionPrompt,
|
|
814
|
+
messagesToSummarize.slice(0, mid),
|
|
815
|
+
mainSystem,
|
|
816
|
+
mainTools
|
|
817
|
+
),
|
|
818
|
+
generateSummary(
|
|
819
|
+
apiConfig,
|
|
820
|
+
`${name} [pt2]`,
|
|
821
|
+
compactionPrompt,
|
|
822
|
+
messagesToSummarize.slice(mid),
|
|
823
|
+
mainSystem,
|
|
824
|
+
mainTools
|
|
825
|
+
)
|
|
826
|
+
]);
|
|
827
|
+
const parts = [first, second].filter((p) => !!p);
|
|
828
|
+
return parts.length > 0 ? parts.join("\n\n---\n\n") : null;
|
|
829
|
+
}
|
|
798
830
|
log3.info("Generating summary", {
|
|
799
831
|
name,
|
|
800
832
|
messageCount: messagesToSummarize.length,
|
|
@@ -1293,7 +1325,7 @@ var writePlanTool = {
|
|
|
1293
1325
|
clearable: false,
|
|
1294
1326
|
definition: {
|
|
1295
1327
|
name: "writePlan",
|
|
1296
|
-
description: "Write an implementation plan for user approval before making changes. Use this only for large, multi-step changes like new features, new interface types, or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. The plan is displayed standalone in the UI with approve/reject buttons
|
|
1328
|
+
description: "Write an implementation plan for user approval before making changes. Use this only for large, multi-step changes like new features, new interface types, or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. The plan is displayed standalone in the UI with approve/reject buttons attached; the plan content itself is just the substantive sections and ends when those do. Any wrap-up to the user \u2014 sign-offs, prompts for approval, offers to revise \u2014 belongs in your chat message alongside the plan, not inside it. If the user asks for revisions, call this tool again with updated content to overwrite the plan.",
|
|
1297
1329
|
inputSchema: {
|
|
1298
1330
|
type: "object",
|
|
1299
1331
|
properties: {
|
|
@@ -2680,28 +2712,6 @@ function acquireBrowserLock() {
|
|
|
2680
2712
|
lockQueue = next;
|
|
2681
2713
|
return wait.then(() => release);
|
|
2682
2714
|
}
|
|
2683
|
-
async function checkBrowserConnected() {
|
|
2684
|
-
try {
|
|
2685
|
-
const status = await sidecarRequest(
|
|
2686
|
-
"/browser-status",
|
|
2687
|
-
{},
|
|
2688
|
-
{ timeout: 5e3 }
|
|
2689
|
-
);
|
|
2690
|
-
if (!status.connected) {
|
|
2691
|
-
return {
|
|
2692
|
-
connected: false,
|
|
2693
|
-
reason: BROWSER_UNAVAILABLE_MESSAGE
|
|
2694
|
-
};
|
|
2695
|
-
}
|
|
2696
|
-
return { connected: true };
|
|
2697
|
-
} catch {
|
|
2698
|
-
return {
|
|
2699
|
-
connected: false,
|
|
2700
|
-
reason: BROWSER_UNAVAILABLE_MESSAGE
|
|
2701
|
-
};
|
|
2702
|
-
}
|
|
2703
|
-
}
|
|
2704
|
-
var BROWSER_UNAVAILABLE_MESSAGE = "Browser preview unavailable \u2014 the user has closed their browser and we are continuing to work in the background. This is not a code failure and not something to diagnose. Do not tell the user to click or open anything. Skip the visual check and verify your work through other means: runMethod for backend behavior, queryDatabase for data checks, .logs/devServer.ndjson for build errors, .logs/browser.ndjson for runtime errors, lspDiagnostics for type/syntax, or read the code directly.";
|
|
2705
2715
|
|
|
2706
2716
|
// src/statusWatcher.ts
|
|
2707
2717
|
function startStatusWatcher(config) {
|
|
@@ -3338,7 +3348,7 @@ var BROWSER_TOOLS = [
|
|
|
3338
3348
|
{
|
|
3339
3349
|
clearable: true,
|
|
3340
3350
|
name: "browserCommand",
|
|
3341
|
-
description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Timeout: 120s.",
|
|
3351
|
+
description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
|
|
3342
3352
|
inputSchema: {
|
|
3343
3353
|
type: "object",
|
|
3344
3354
|
properties: {
|
|
@@ -3475,14 +3485,6 @@ var browserAutomationTool = {
|
|
|
3475
3485
|
}
|
|
3476
3486
|
const release = await acquireBrowserLock();
|
|
3477
3487
|
try {
|
|
3478
|
-
const browserStatus = await checkBrowserConnected();
|
|
3479
|
-
if (!browserStatus.connected) {
|
|
3480
|
-
return browserStatus.reason ?? "Browser preview unavailable.";
|
|
3481
|
-
}
|
|
3482
|
-
try {
|
|
3483
|
-
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
3484
|
-
} catch {
|
|
3485
|
-
}
|
|
3486
3488
|
const result = await runSubAgent({
|
|
3487
3489
|
system: getBrowserAutomationPrompt(),
|
|
3488
3490
|
task: input.task,
|
|
@@ -3572,10 +3574,6 @@ var browserAutomationTool = {
|
|
|
3572
3574
|
toolRegistry: context.toolRegistry,
|
|
3573
3575
|
captureArtifacts: ["screenshotFullPage"]
|
|
3574
3576
|
});
|
|
3575
|
-
try {
|
|
3576
|
-
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
3577
|
-
} catch {
|
|
3578
|
-
}
|
|
3579
3577
|
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3580
3578
|
const ss = result.artifacts?.screenshotFullPage;
|
|
3581
3579
|
if (ss?.url) {
|
|
@@ -3661,10 +3659,6 @@ var screenshotTool = {
|
|
|
3661
3659
|
}
|
|
3662
3660
|
const release = await acquireBrowserLock();
|
|
3663
3661
|
try {
|
|
3664
|
-
const browserStatus = await checkBrowserConnected();
|
|
3665
|
-
if (!browserStatus.connected) {
|
|
3666
|
-
return browserStatus.reason ?? "Browser preview unavailable.";
|
|
3667
|
-
}
|
|
3668
3662
|
return await captureAndAnalyzeScreenshot({
|
|
3669
3663
|
prompt: input.prompt,
|
|
3670
3664
|
path: input.path,
|
|
@@ -3999,10 +3993,6 @@ async function execute5(input, onLog, context) {
|
|
|
3999
3993
|
}
|
|
4000
3994
|
const release = await acquireBrowserLock();
|
|
4001
3995
|
try {
|
|
4002
|
-
const browserStatus = await checkBrowserConnected();
|
|
4003
|
-
if (!browserStatus.connected) {
|
|
4004
|
-
return browserStatus.reason ?? "Browser preview unavailable.";
|
|
4005
|
-
}
|
|
4006
3996
|
return await captureAndAnalyzeScreenshot({
|
|
4007
3997
|
prompt: input.prompt,
|
|
4008
3998
|
path: input.path,
|
|
@@ -6791,7 +6781,6 @@ ${userMessage}` : header;
|
|
|
6791
6781
|
requestId,
|
|
6792
6782
|
error: err.message
|
|
6793
6783
|
});
|
|
6794
|
-
this.queue.drain();
|
|
6795
6784
|
}
|
|
6796
6785
|
this.applyPendingSummaries();
|
|
6797
6786
|
this.applyPendingBlockUpdates();
|
package/dist/index.js
CHANGED
|
@@ -171,18 +171,21 @@ async function* streamChat(params) {
|
|
|
171
171
|
})
|
|
172
172
|
]);
|
|
173
173
|
clearTimeout(stallTimer);
|
|
174
|
-
} catch {
|
|
174
|
+
} catch (err) {
|
|
175
175
|
clearTimeout(stallTimer);
|
|
176
|
-
|
|
177
|
-
|
|
176
|
+
try {
|
|
177
|
+
await reader.cancel();
|
|
178
|
+
} catch {
|
|
179
|
+
}
|
|
180
|
+
const isStall = err?.message === "stream_stall";
|
|
181
|
+
const errorMessage = isStall ? "Stream stalled \u2014 no data received for 5 minutes" : `Network error: stream interrupted \u2014 ${err?.message ?? "unknown"}`;
|
|
182
|
+
log.error(isStall ? "Stream stalled" : "Stream interrupted", {
|
|
178
183
|
requestId,
|
|
179
184
|
...subAgentId && { subAgentId },
|
|
180
|
-
durationMs: Date.now() - startTime
|
|
185
|
+
durationMs: Date.now() - startTime,
|
|
186
|
+
error: errorMessage
|
|
181
187
|
});
|
|
182
|
-
yield {
|
|
183
|
-
type: "error",
|
|
184
|
-
error: "Stream stalled \u2014 no data received for 5 minutes"
|
|
185
|
-
};
|
|
188
|
+
yield { type: "error", error: errorMessage };
|
|
186
189
|
return;
|
|
187
190
|
}
|
|
188
191
|
const { done, value } = readResult;
|
|
@@ -834,7 +837,7 @@ var init_writePlan = __esm({
|
|
|
834
837
|
clearable: false,
|
|
835
838
|
definition: {
|
|
836
839
|
name: "writePlan",
|
|
837
|
-
description: "Write an implementation plan for user approval before making changes. Use this only for large, multi-step changes like new features, new interface types, or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. The plan is displayed standalone in the UI with approve/reject buttons
|
|
840
|
+
description: "Write an implementation plan for user approval before making changes. Use this only for large, multi-step changes like new features, new interface types, or when the user explicitly asks to see a plan. Most work should be done autonomously without a plan. Write a clear markdown summary of what you intend to do in plain language \u2014 describe the changes from the user's perspective, not as a list of files and code paths. The plan is displayed standalone in the UI with approve/reject buttons attached; the plan content itself is just the substantive sections and ends when those do. Any wrap-up to the user \u2014 sign-offs, prompts for approval, offers to revise \u2014 belongs in your chat message alongside the plan, not inside it. If the user asks for revisions, call this tool again with updated content to overwrite the plan.",
|
|
838
841
|
inputSchema: {
|
|
839
842
|
type: "object",
|
|
840
843
|
properties: {
|
|
@@ -1511,6 +1514,34 @@ async function generateSummary(apiConfig, name, compactionPrompt, messagesToSumm
|
|
|
1511
1514
|
if (!serialized.trim()) {
|
|
1512
1515
|
return null;
|
|
1513
1516
|
}
|
|
1517
|
+
if (serialized.length > CHUNK_CHAR_LIMIT && messagesToSummarize.length > 1) {
|
|
1518
|
+
const mid = Math.floor(messagesToSummarize.length / 2);
|
|
1519
|
+
log2.info("Chunking summary", {
|
|
1520
|
+
name,
|
|
1521
|
+
messageCount: messagesToSummarize.length,
|
|
1522
|
+
serializedLength: serialized.length
|
|
1523
|
+
});
|
|
1524
|
+
const [first, second] = await Promise.all([
|
|
1525
|
+
generateSummary(
|
|
1526
|
+
apiConfig,
|
|
1527
|
+
`${name} [pt1]`,
|
|
1528
|
+
compactionPrompt,
|
|
1529
|
+
messagesToSummarize.slice(0, mid),
|
|
1530
|
+
mainSystem,
|
|
1531
|
+
mainTools
|
|
1532
|
+
),
|
|
1533
|
+
generateSummary(
|
|
1534
|
+
apiConfig,
|
|
1535
|
+
`${name} [pt2]`,
|
|
1536
|
+
compactionPrompt,
|
|
1537
|
+
messagesToSummarize.slice(mid),
|
|
1538
|
+
mainSystem,
|
|
1539
|
+
mainTools
|
|
1540
|
+
)
|
|
1541
|
+
]);
|
|
1542
|
+
const parts = [first, second].filter((p) => !!p);
|
|
1543
|
+
return parts.length > 0 ? parts.join("\n\n---\n\n") : null;
|
|
1544
|
+
}
|
|
1514
1545
|
log2.info("Generating summary", {
|
|
1515
1546
|
name,
|
|
1516
1547
|
messageCount: messagesToSummarize.length,
|
|
@@ -1548,7 +1579,7 @@ ${serialized}` : serialized;
|
|
|
1548
1579
|
log2.info("Summary generated", { name, summaryLength: summaryText.length });
|
|
1549
1580
|
return summaryText.trim();
|
|
1550
1581
|
}
|
|
1551
|
-
var log2, CONVERSATION_SUMMARY_PROMPT, SUBAGENT_SUMMARY_PROMPT, SUMMARIZABLE_SUBAGENTS;
|
|
1582
|
+
var log2, CONVERSATION_SUMMARY_PROMPT, SUBAGENT_SUMMARY_PROMPT, SUMMARIZABLE_SUBAGENTS, CHUNK_CHAR_LIMIT;
|
|
1552
1583
|
var init_compaction = __esm({
|
|
1553
1584
|
"src/compaction/index.ts"() {
|
|
1554
1585
|
"use strict";
|
|
@@ -1559,6 +1590,7 @@ var init_compaction = __esm({
|
|
|
1559
1590
|
CONVERSATION_SUMMARY_PROMPT = readAsset("compaction", "conversation.md");
|
|
1560
1591
|
SUBAGENT_SUMMARY_PROMPT = readAsset("compaction", "subagent.md");
|
|
1561
1592
|
SUMMARIZABLE_SUBAGENTS = ["visualDesignExpert", "productVision"];
|
|
1593
|
+
CHUNK_CHAR_LIMIT = 24e5;
|
|
1562
1594
|
}
|
|
1563
1595
|
});
|
|
1564
1596
|
|
|
@@ -2958,34 +2990,11 @@ function acquireBrowserLock() {
|
|
|
2958
2990
|
lockQueue = next;
|
|
2959
2991
|
return wait.then(() => release);
|
|
2960
2992
|
}
|
|
2961
|
-
|
|
2962
|
-
try {
|
|
2963
|
-
const status = await sidecarRequest(
|
|
2964
|
-
"/browser-status",
|
|
2965
|
-
{},
|
|
2966
|
-
{ timeout: 5e3 }
|
|
2967
|
-
);
|
|
2968
|
-
if (!status.connected) {
|
|
2969
|
-
return {
|
|
2970
|
-
connected: false,
|
|
2971
|
-
reason: BROWSER_UNAVAILABLE_MESSAGE
|
|
2972
|
-
};
|
|
2973
|
-
}
|
|
2974
|
-
return { connected: true };
|
|
2975
|
-
} catch {
|
|
2976
|
-
return {
|
|
2977
|
-
connected: false,
|
|
2978
|
-
reason: BROWSER_UNAVAILABLE_MESSAGE
|
|
2979
|
-
};
|
|
2980
|
-
}
|
|
2981
|
-
}
|
|
2982
|
-
var lockQueue, BROWSER_UNAVAILABLE_MESSAGE;
|
|
2993
|
+
var lockQueue;
|
|
2983
2994
|
var init_browserLock = __esm({
|
|
2984
2995
|
"src/tools/_helpers/browserLock.ts"() {
|
|
2985
2996
|
"use strict";
|
|
2986
|
-
init_sidecar();
|
|
2987
2997
|
lockQueue = Promise.resolve();
|
|
2988
|
-
BROWSER_UNAVAILABLE_MESSAGE = "Browser preview unavailable \u2014 the user has closed their browser and we are continuing to work in the background. This is not a code failure and not something to diagnose. Do not tell the user to click or open anything. Skip the visual check and verify your work through other means: runMethod for backend behavior, queryDatabase for data checks, .logs/devServer.ndjson for build errors, .logs/browser.ndjson for runtime errors, lspDiagnostics for type/syntax, or read the code directly.";
|
|
2989
2998
|
}
|
|
2990
2999
|
});
|
|
2991
3000
|
|
|
@@ -3654,7 +3663,7 @@ var init_tools = __esm({
|
|
|
3654
3663
|
{
|
|
3655
3664
|
clearable: true,
|
|
3656
3665
|
name: "browserCommand",
|
|
3657
|
-
description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Timeout: 120s.",
|
|
3666
|
+
description: "Interact with the app's live preview by sending browser commands. Commands execute sequentially with an animated cursor. Always start with a snapshot to see the current state and get ref identifiers. The result includes a snapshot field with the final page state after all steps complete. On error, the failing step has an error field and execution stops. Batches that contain an interactive step (click, type, select) also return a `recordingUrl` \u2014 an rrweb session recording for visual replay. Timeout: 120s.",
|
|
3658
3667
|
inputSchema: {
|
|
3659
3668
|
type: "object",
|
|
3660
3669
|
properties: {
|
|
@@ -3812,14 +3821,6 @@ var init_browserAutomation = __esm({
|
|
|
3812
3821
|
}
|
|
3813
3822
|
const release = await acquireBrowserLock();
|
|
3814
3823
|
try {
|
|
3815
|
-
const browserStatus = await checkBrowserConnected();
|
|
3816
|
-
if (!browserStatus.connected) {
|
|
3817
|
-
return browserStatus.reason ?? "Browser preview unavailable.";
|
|
3818
|
-
}
|
|
3819
|
-
try {
|
|
3820
|
-
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
3821
|
-
} catch {
|
|
3822
|
-
}
|
|
3823
3824
|
const result = await runSubAgent({
|
|
3824
3825
|
system: getBrowserAutomationPrompt(),
|
|
3825
3826
|
task: input.task,
|
|
@@ -3909,10 +3910,6 @@ var init_browserAutomation = __esm({
|
|
|
3909
3910
|
toolRegistry: context.toolRegistry,
|
|
3910
3911
|
captureArtifacts: ["screenshotFullPage"]
|
|
3911
3912
|
});
|
|
3912
|
-
try {
|
|
3913
|
-
await sidecarRequest("/reset-browser", {}, { timeout: 5e3 });
|
|
3914
|
-
} catch {
|
|
3915
|
-
}
|
|
3916
3913
|
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3917
3914
|
const ss = result.artifacts?.screenshotFullPage;
|
|
3918
3915
|
if (ss?.url) {
|
|
@@ -4008,10 +4005,6 @@ var init_screenshot2 = __esm({
|
|
|
4008
4005
|
}
|
|
4009
4006
|
const release = await acquireBrowserLock();
|
|
4010
4007
|
try {
|
|
4011
|
-
const browserStatus = await checkBrowserConnected();
|
|
4012
|
-
if (!browserStatus.connected) {
|
|
4013
|
-
return browserStatus.reason ?? "Browser preview unavailable.";
|
|
4014
|
-
}
|
|
4015
4008
|
return await captureAndAnalyzeScreenshot({
|
|
4016
4009
|
prompt: input.prompt,
|
|
4017
4010
|
path: input.path,
|
|
@@ -4362,10 +4355,6 @@ async function execute5(input, onLog, context) {
|
|
|
4362
4355
|
}
|
|
4363
4356
|
const release = await acquireBrowserLock();
|
|
4364
4357
|
try {
|
|
4365
|
-
const browserStatus = await checkBrowserConnected();
|
|
4366
|
-
if (!browserStatus.connected) {
|
|
4367
|
-
return browserStatus.reason ?? "Browser preview unavailable.";
|
|
4368
|
-
}
|
|
4369
4358
|
return await captureAndAnalyzeScreenshot({
|
|
4370
4359
|
prompt: input.prompt,
|
|
4371
4360
|
path: input.path,
|
|
@@ -7527,7 +7516,6 @@ ${userMessage}` : header;
|
|
|
7527
7516
|
requestId,
|
|
7528
7517
|
error: err.message
|
|
7529
7518
|
});
|
|
7530
|
-
this.queue.drain();
|
|
7531
7519
|
}
|
|
7532
7520
|
this.applyPendingSummaries();
|
|
7533
7521
|
this.applyPendingBlockUpdates();
|
package/dist/prompt/.notes.md
CHANGED
|
@@ -145,7 +145,7 @@ The intro framing ("you have a lot on your plate") gives the model permission to
|
|
|
145
145
|
| `productVision` | Roadmap ownership & product strategy | writeRoadmapItem, updateRoadmapItem, deleteRoadmapItem | Spec files + current roadmap |
|
|
146
146
|
| `sdkConsultant` | MindStudio SDK architecture | None (shells out to `mindstudio ask` CLI) | None (external agent) |
|
|
147
147
|
| `codeSanityCheck` | Pre-build review | readFile, grep, glob, searchGoogle, fetchUrl, askMindStudioSdk, bash (readonly) | Spec files |
|
|
148
|
-
| `browserAutomation` | Interactive UI testing | browserCommand,
|
|
148
|
+
| `browserAutomation` | Interactive UI testing | browserCommand, screenshotFullPage, setupBrowser | None (interacts with live preview) |
|
|
149
149
|
|
|
150
150
|
### Shared infrastructure
|
|
151
151
|
|
|
@@ -177,8 +177,6 @@ New `type: roadmap` for MSFM files in `src/roadmap/`. Each item has frontmatter
|
|
|
177
177
|
- **Automated message sentinel** — `@@automated::{tag}@@` prefix on user messages, stripped before sending to LLM. Frontend uses for custom rendering.
|
|
178
178
|
- **Project naming** — `setProjectName` tool for setting display name after intake.
|
|
179
179
|
- **Dynamic status labels** — `statusWatcher.ts` periodically calls a lightweight endpoint to generate descriptive labels during agent work.
|
|
180
|
-
- **Browser status check** — agent checks `/browser-status` before starting browser automation to fail fast if preview isn't connected.
|
|
181
|
-
- **Browser reset** — `resetBrowser` tool restores preview to clean state after testing.
|
|
182
180
|
- **Asset bundling** — `tsup.config.ts` copies .md/.json/.sh files from src/ to dist/ on build.
|
|
183
181
|
|
|
184
182
|
## What's Not Done
|
|
@@ -12,7 +12,7 @@ Run `lspDiagnostics` after every turn where you have edited code in any meaningf
|
|
|
12
12
|
- Spot-check methods with `runMethod`. The dev database is a disposable snapshot that will have been seeded with scenario data, so don't worry about being destructive.
|
|
13
13
|
- For frontend work, take a single `screenshot` to confirm the main view renders correctly or look at the browser log for any console errors in the user's preview.
|
|
14
14
|
- Use `runAutomatedBrowserTest` to verify an interactive flow that you can't confirm from a screenshot, or when the user reports something broken that you can't identify from code alone.
|
|
15
|
-
- If the browser
|
|
15
|
+
- If the browser is unavailable, skip the visual check and verify through methods, logs, and code instead. Browser unavailability is an infrastructure issue, not a code problem — don't try to diagnose or fix it.
|
|
16
16
|
|
|
17
17
|
Aim for confidence that the core happy paths work. If the 80% case is solid, the remaining edge cases are likely fine and the user can surface them in chat. Don't screenshot every page, test every permutation, or verify every secondary flow. One or two runtime checks that confirm the app loads and data flows through is enough.
|
|
18
18
|
|
|
@@ -35,8 +35,9 @@ You will occasionally receive automated messages prefixed with `@@automated_mess
|
|
|
35
35
|
|
|
36
36
|
## Style
|
|
37
37
|
- Your messages are rendered as markdown. Use formatting (headers, bold, lists, code blocks) when it helps readability. You can include images using `` — use this to show the user screenshots, generated images, or other visual references inline in your messages.
|
|
38
|
-
- When offering suggestions or options the user might want to quickly select in a conversation, format them as clickable suggestion links: `[
|
|
38
|
+
- When offering suggestions or options the user might want to quickly select in a conversation, format them as clickable suggestion links: `[label](suggest:message sent on click)`. The label renders as a tappable chip and should be a few words — chip-sized, not sentence-sized. The `suggest:` payload can be longer; that's what gets sent as the user's next message when clicked. Use these liberally: when brainstorming, offering directions, listing options, or any time you're asking a question the user could answer with a quick tap. When explicitly gathering information from the user, however, always use the `promptUser` tool instead.
|
|
39
39
|
- Keep language accessible. Describe what the app *does*, not how it's implemented, unless the user demonstrates technical fluency.
|
|
40
40
|
- Always use full paths relative to the project root when mentioning files (`dist/interfaces/web/src/App.tsx`, not `App.tsx`). Paths will be rendered as clickable links for the user.
|
|
41
41
|
- Use inline `code` formatting only for things the user needs to type or search for.
|
|
42
|
-
- When writing prose or communicating with the user, avoid em dashes (and especially when writing specs); use periods, commas, colons, or parentheses instead.
|
|
42
|
+
- When writing prose or communicating with the user, avoid em dashes (and especially when writing specs); use periods, commas, colons, or parentheses instead.
|
|
43
|
+
- Never use emojis when responding to the user.
|
|
@@ -44,7 +44,7 @@ The QA agent can see the screen. Describe what to test, not how — it will figu
|
|
|
44
44
|
|
|
45
45
|
Never tell QA what names to use when testing or what values to input - it will use its own judgment.
|
|
46
46
|
|
|
47
|
-
If the browser
|
|
47
|
+
If the browser is unavailable, QA can't run. That's an infrastructure issue, not a problem with the app — don't try to diagnose or fix it. Verify through methods, logs, and code inspection instead, and note that visual QA was skipped.
|
|
48
48
|
|
|
49
49
|
### Background Execution
|
|
50
50
|
|
|
@@ -3,7 +3,7 @@ You are a browser smoke test agent. You verify that features work end to end by
|
|
|
3
3
|
## Rules to Remember
|
|
4
4
|
- Don't overthink the tests - the goal is to generally make sure things work as expected, not to provide detailed QA. If something seems mostly okay, note it and move on. Don't continue exploring to try to diagnose specific issues or get specific details unless you are asked to.
|
|
5
5
|
- Fail early: If you encounter a showstopper bug (something doesn't load, something is broken, etc.) do not attempt to diagnose it or work around it. We need core common user paths to work - if they don't the app is broken and testing should not continue until it is fixed. Return early with a report to let the developer fix it, they'll run another test when they're ready.
|
|
6
|
-
- Browser
|
|
6
|
+
- Browser unavailability is an infrastructure issue, not a test failure. If `browserCommand` reports the browser is unavailable or drops mid-test, the test is **inconclusive** — do not retry, do not attribute it to app brokenness. Report "test inconclusive: browser unavailable" and stop.
|
|
7
7
|
|
|
8
8
|
## Tester Persona
|
|
9
9
|
The user is watching the automation happen on their screen in real-time. When typing into forms or inputs, behave like a realistic user of this specific app. Use the app context (if provided) to understand the audience and tone. Type the way that audience would actually type — not formal, not robotic. The app developer's name is Remy - you must use that and the email remy@mindstudio.ai as the basis for any testing that requires a persona.
|
|
@@ -39,7 +39,6 @@ Each interactive element has a `[ref=eN]` you can use to target it.
|
|
|
39
39
|
- `select`: Select a dropdown option by text. Target the `<select>` element, set `option` to the option text.
|
|
40
40
|
- `wait`: Wait for an element to appear (polls every 100ms, default 5s timeout). Also waits for network to settle after the element is found.
|
|
41
41
|
- `navigate`: Navigate to a new URL within the app. Waits for the new page to load before continuing with subsequent steps. Use this instead of evaluate with `window.location.href` when you need to navigate and then continue interacting with the new page. Steps after navigate execute on the new page automatically.
|
|
42
|
-
- `reload`: Reload the current page. Useful if something has crashed, you can not exit some dynamic screen, or you need to clear stale data or some stale app state. Waits for the page to reload before continuting with subsequent steps. Use this instead of using evaluate to reload a page.
|
|
43
42
|
- `evaluate`: Run arbitrary JavaScript in the page and return the result.
|
|
44
43
|
- `styles`: Read computed CSS styles from page elements. Pass a `properties` array with camelCase CSS property names (e.g., `["backgroundColor", "borderRadius", "fontSize"]`). Omit `properties` for a default set covering colors, typography, spacing, borders, shadows, dimensions, and layout. Uses the same targeting as click/type (ref, text, role, label, selector). Omit the target to get styles for all elements from the last snapshot.
|
|
45
44
|
- `screenshotViewport`: Take a screenshot of the current viewport. Returns CDN url with full text analysis and dimensions. Useful at the end of an action batch to visually see things like layout shift or overflow. Do not use if you can get what you need with other tools - only use when you need to visually see the viewport.
|
|
@@ -61,6 +60,7 @@ Each browserCommand returns:
|
|
|
61
60
|
- `snapshot`: the final page state after all steps complete (always present, even without an explicit snapshot step)
|
|
62
61
|
- `logs`: array of browser-side events that fired during the batch (console output, network failures, JS errors, user interactions). Check this for errors before reporting pass.
|
|
63
62
|
- `duration`: total execution time in ms
|
|
63
|
+
- `recordingUrl` (optional): URL to an rrweb session recording of the tool call. Present whenever the batch contained an interactive step (click, type, select). Include it in your failure reports so the main agent can share it — it's the fastest way to reproduce a bug visually.
|
|
64
64
|
|
|
65
65
|
On error, the failing step has an `error` field and execution stops. Remaining steps are skipped.
|
|
66
66
|
|