@mindstudio-ai/remy 0.1.109 → 0.1.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -414,7 +414,7 @@ ${isLspConfigured() ? `<typescript_lsp>
414
414
  <conversation_summaries>
415
415
  Your conversation history may include <prior_conversation_summary> blocks in the user's messages. These are automated summaries of earlier messages that have been compacted to save context space. The user does not see this summary, they see the full conversation history in their UI. Treat the summary as ground truth for what happened before, but do not reference it directly to the user ("as mentioned in the summary..."). Just continue naturally as if you remember the prior work.
416
416
 
417
- Old tool results are periodically cleared from the conversation to save context space. This is automatic and expected \u2014 you don't need to note down or preserve information from tool results. If you need to reference something from an earlier tool call, just re-read the file or re-run the query, or use your .remy-notes.md file.
417
+ Old tool results are periodically cleared from the conversation to save context space. This is automatic and expected \u2014 you don't need to note down or preserve information from tool results. If you need to reference something from an earlier tool call, just re-read the file or re-run the query.
418
418
  </conversation_summaries>
419
419
 
420
420
  <project_onboarding>
@@ -2455,6 +2455,23 @@ async function analyzeImage(params) {
2455
2455
  var SCREENSHOT_ANALYSIS_PROMPT = `Describe everything visible on screen from top to bottom \u2014 every element, its position, its size relative to the viewport, its colors, its content. Be comprehensive, thorough, and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components).
2456
2456
 
2457
2457
  Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.`;
2458
+ var TEXT_WRAP_DISCLAIMER = `Note: ignore text wrapping issues. Screenshots occasionally show text wrapping onto an extra line compared to the live page \u2014 most noticeable in buttons, badges, and headings. This is a known limitation of SVG foreignObject rendering used the DOM-to-image capture library that took the screenshot. The browser's SVG renderer computes slightly wider text metrics than the HTML layout engine, so text that fits on one line in the live DOM can overflow by a fraction of a pixel in the capture - this is not a real issue.`;
2459
+ function buildScreenshotAnalysisPrompt(opts) {
2460
+ let p = opts?.prompt || SCREENSHOT_ANALYSIS_PROMPT;
2461
+ if (opts?.styleMap) {
2462
+ p += `
2463
+
2464
+ The following styleMap describes the computed layout state at the moment of capture. Use it to verify typography, spacing, overflow, and element dimensions \u2014 it is more accurate than visual estimation from the image.
2465
+
2466
+ <style_map>
2467
+ ${opts.styleMap}
2468
+ </style_map>`;
2469
+ }
2470
+ p += `
2471
+
2472
+ ${TEXT_WRAP_DISCLAIMER}`;
2473
+ return p;
2474
+ }
2458
2475
  async function captureAndAnalyzeScreenshot(promptOrOptions) {
2459
2476
  let prompt;
2460
2477
  let existingUrl;
@@ -2469,6 +2486,7 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2469
2486
  prompt = promptOrOptions;
2470
2487
  }
2471
2488
  let url;
2489
+ let styleMap;
2472
2490
  if (existingUrl) {
2473
2491
  url = existingUrl;
2474
2492
  } else {
@@ -2483,20 +2501,21 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2483
2501
  `No URL in sidecar response. The browser may not be ready yet. Response: ${JSON.stringify(ssResult)}`
2484
2502
  );
2485
2503
  }
2504
+ styleMap = ssResult?.styleMap;
2486
2505
  }
2487
2506
  if (prompt === false) {
2488
2507
  return url;
2489
2508
  }
2490
- let analysisPrompt = prompt || SCREENSHOT_ANALYSIS_PROMPT;
2491
- analysisPrompt += `
2492
- Note: ignore text wrapping issues. Screenshots occasionally show text wrapping onto an extra line compared to the live page \u2014 most noticeable in buttons, badges, and headings. This is a known limitation of SVG foreignObject rendering used the DOM-to-image capture library that took the screenshot. The browser's SVG renderer computes slightly wider text metrics than the HTML layout engine, so text that fits on one line in the live DOM can overflow by a fraction of a pixel in the capture - this is not a real issue.
2493
- `;
2509
+ const analysisPrompt = buildScreenshotAnalysisPrompt({
2510
+ prompt: prompt || void 0,
2511
+ styleMap
2512
+ });
2494
2513
  const analysis = await analyzeImage({
2495
2514
  prompt: analysisPrompt,
2496
2515
  imageUrl: url,
2497
2516
  onLog
2498
2517
  });
2499
- return JSON.stringify({ url, analysis });
2518
+ return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
2500
2519
  }
2501
2520
 
2502
2521
  // src/tools/code/screenshot.ts
@@ -3281,7 +3300,9 @@ var browserAutomationTool = {
3281
3300
  stepType: "analyzeImage",
3282
3301
  step: {
3283
3302
  imageUrl: s.result.url,
3284
- prompt: SCREENSHOT_ANALYSIS_PROMPT
3303
+ prompt: buildScreenshotAnalysisPrompt({
3304
+ styleMap: s.result.styleMap
3305
+ })
3285
3306
  }
3286
3307
  }));
3287
3308
  const batchResult = await runCli(
@@ -3460,10 +3481,6 @@ __export(analyzeImage_exports, {
3460
3481
  definition: () => definition4,
3461
3482
  execute: () => execute4
3462
3483
  });
3463
- var DEFAULT_PROMPT = `
3464
- Describe everything visible in this image \u2014 every element, its position, its size relative to the frame, its colors, its content. Be comprehensive, thorough and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components).
3465
-
3466
- Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.`;
3467
3484
  var definition4 = {
3468
3485
  clearable: true,
3469
3486
  name: "analyzeImage",
@@ -3485,11 +3502,9 @@ var definition4 = {
3485
3502
  };
3486
3503
  async function execute4(input, onLog) {
3487
3504
  const imageUrl = input.imageUrl;
3488
- let prompt = input.prompt || DEFAULT_PROMPT;
3489
- prompt += `
3490
-
3491
- Note: ignore text wrapping issues. Screenshots occasionally show text wrapping onto an extra line compared to the live page \u2014 most noticeable in buttons, badges, and headings. This is a known limitation of SVG foreignObject rendering used the DOM-to-image capture library that took the screenshot. The browser's SVG renderer computes slightly wider text metrics than the HTML layout engine, so text that fits on one line in the live DOM can overflow by a fraction of a pixel in the capture - this is not a real issue.
3492
- `;
3505
+ const prompt = buildScreenshotAnalysisPrompt({
3506
+ prompt: input.prompt
3507
+ });
3493
3508
  const analysis = await analyzeImage({
3494
3509
  prompt,
3495
3510
  imageUrl,
@@ -3538,13 +3553,26 @@ async function execute5(input, onLog, context) {
3538
3553
  return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
3539
3554
  }
3540
3555
  const url = urlMatch[0];
3541
- const analysisPrompt = input.prompt || SCREENSHOT_ANALYSIS_PROMPT;
3556
+ let styleMap;
3557
+ try {
3558
+ const parsed = JSON.parse(result);
3559
+ styleMap = parsed?.styleMap;
3560
+ } catch {
3561
+ }
3562
+ const analysisPrompt = buildScreenshotAnalysisPrompt({
3563
+ prompt: input.prompt,
3564
+ styleMap
3565
+ });
3542
3566
  const analysis = await analyzeImage({
3543
3567
  prompt: analysisPrompt,
3544
3568
  imageUrl: url,
3545
3569
  onLog
3546
3570
  });
3547
- return JSON.stringify({ url, analysis });
3571
+ return JSON.stringify({
3572
+ url,
3573
+ analysis,
3574
+ ...styleMap ? { styleMap } : {}
3575
+ });
3548
3576
  } catch (err) {
3549
3577
  return `Error taking interactive screenshot: ${err.message}`;
3550
3578
  }
@@ -6044,6 +6072,9 @@ ${xmlParts}
6044
6072
  if (pending) {
6045
6073
  pendingTools.delete(id);
6046
6074
  pending.resolve(result);
6075
+ } else if (!running) {
6076
+ log10.info("Late tool_result while idle, dismissing", { id });
6077
+ emit("completed", { success: true }, requestId);
6047
6078
  } else {
6048
6079
  earlyResults.set(id, result);
6049
6080
  }
package/dist/index.js CHANGED
@@ -2154,6 +2154,22 @@ var init_analyzeImage = __esm({
2154
2154
  });
2155
2155
 
2156
2156
  // src/tools/_helpers/screenshot.ts
2157
+ function buildScreenshotAnalysisPrompt(opts) {
2158
+ let p = opts?.prompt || SCREENSHOT_ANALYSIS_PROMPT;
2159
+ if (opts?.styleMap) {
2160
+ p += `
2161
+
2162
+ The following styleMap describes the computed layout state at the moment of capture. Use it to verify typography, spacing, overflow, and element dimensions \u2014 it is more accurate than visual estimation from the image.
2163
+
2164
+ <style_map>
2165
+ ${opts.styleMap}
2166
+ </style_map>`;
2167
+ }
2168
+ p += `
2169
+
2170
+ ${TEXT_WRAP_DISCLAIMER}`;
2171
+ return p;
2172
+ }
2157
2173
  async function captureAndAnalyzeScreenshot(promptOrOptions) {
2158
2174
  let prompt;
2159
2175
  let existingUrl;
@@ -2168,6 +2184,7 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2168
2184
  prompt = promptOrOptions;
2169
2185
  }
2170
2186
  let url;
2187
+ let styleMap;
2171
2188
  if (existingUrl) {
2172
2189
  url = existingUrl;
2173
2190
  } else {
@@ -2182,22 +2199,23 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2182
2199
  `No URL in sidecar response. The browser may not be ready yet. Response: ${JSON.stringify(ssResult)}`
2183
2200
  );
2184
2201
  }
2202
+ styleMap = ssResult?.styleMap;
2185
2203
  }
2186
2204
  if (prompt === false) {
2187
2205
  return url;
2188
2206
  }
2189
- let analysisPrompt = prompt || SCREENSHOT_ANALYSIS_PROMPT;
2190
- analysisPrompt += `
2191
- Note: ignore text wrapping issues. Screenshots occasionally show text wrapping onto an extra line compared to the live page \u2014 most noticeable in buttons, badges, and headings. This is a known limitation of SVG foreignObject rendering used the DOM-to-image capture library that took the screenshot. The browser's SVG renderer computes slightly wider text metrics than the HTML layout engine, so text that fits on one line in the live DOM can overflow by a fraction of a pixel in the capture - this is not a real issue.
2192
- `;
2207
+ const analysisPrompt = buildScreenshotAnalysisPrompt({
2208
+ prompt: prompt || void 0,
2209
+ styleMap
2210
+ });
2193
2211
  const analysis = await analyzeImage({
2194
2212
  prompt: analysisPrompt,
2195
2213
  imageUrl: url,
2196
2214
  onLog
2197
2215
  });
2198
- return JSON.stringify({ url, analysis });
2216
+ return JSON.stringify({ url, analysis, ...styleMap ? { styleMap } : {} });
2199
2217
  }
2200
- var SCREENSHOT_ANALYSIS_PROMPT;
2218
+ var SCREENSHOT_ANALYSIS_PROMPT, TEXT_WRAP_DISCLAIMER;
2201
2219
  var init_screenshot = __esm({
2202
2220
  "src/tools/_helpers/screenshot.ts"() {
2203
2221
  "use strict";
@@ -2206,6 +2224,7 @@ var init_screenshot = __esm({
2206
2224
  SCREENSHOT_ANALYSIS_PROMPT = `Describe everything visible on screen from top to bottom \u2014 every element, its position, its size relative to the viewport, its colors, its content. Be comprehensive, thorough, and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components).
2207
2225
 
2208
2226
  Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.`;
2227
+ TEXT_WRAP_DISCLAIMER = `Note: ignore text wrapping issues. Screenshots occasionally show text wrapping onto an extra line compared to the live page \u2014 most noticeable in buttons, badges, and headings. This is a known limitation of SVG foreignObject rendering used the DOM-to-image capture library that took the screenshot. The browser's SVG renderer computes slightly wider text metrics than the HTML layout engine, so text that fits on one line in the live DOM can overflow by a fraction of a pixel in the capture - this is not a real issue.`;
2209
2228
  }
2210
2229
  });
2211
2230
 
@@ -3085,7 +3104,9 @@ var init_browserAutomation = __esm({
3085
3104
  stepType: "analyzeImage",
3086
3105
  step: {
3087
3106
  imageUrl: s.result.url,
3088
- prompt: SCREENSHOT_ANALYSIS_PROMPT
3107
+ prompt: buildScreenshotAnalysisPrompt({
3108
+ styleMap: s.result.styleMap
3109
+ })
3089
3110
  }
3090
3111
  }));
3091
3112
  const batchResult = await runCli(
@@ -3290,11 +3311,9 @@ __export(analyzeImage_exports, {
3290
3311
  });
3291
3312
  async function execute4(input, onLog) {
3292
3313
  const imageUrl = input.imageUrl;
3293
- let prompt = input.prompt || DEFAULT_PROMPT;
3294
- prompt += `
3295
-
3296
- Note: ignore text wrapping issues. Screenshots occasionally show text wrapping onto an extra line compared to the live page \u2014 most noticeable in buttons, badges, and headings. This is a known limitation of SVG foreignObject rendering used the DOM-to-image capture library that took the screenshot. The browser's SVG renderer computes slightly wider text metrics than the HTML layout engine, so text that fits on one line in the live DOM can overflow by a fraction of a pixel in the capture - this is not a real issue.
3297
- `;
3314
+ const prompt = buildScreenshotAnalysisPrompt({
3315
+ prompt: input.prompt
3316
+ });
3298
3317
  const analysis = await analyzeImage({
3299
3318
  prompt,
3300
3319
  imageUrl,
@@ -3302,15 +3321,12 @@ async function execute4(input, onLog) {
3302
3321
  });
3303
3322
  return JSON.stringify({ url: imageUrl, analysis });
3304
3323
  }
3305
- var DEFAULT_PROMPT, definition4;
3324
+ var definition4;
3306
3325
  var init_analyzeImage2 = __esm({
3307
3326
  "src/subagents/designExpert/tools/analyzeImage.ts"() {
3308
3327
  "use strict";
3309
3328
  init_analyzeImage();
3310
- DEFAULT_PROMPT = `
3311
- Describe everything visible in this image \u2014 every element, its position, its size relative to the frame, its colors, its content. Be comprehensive, thorough and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components).
3312
-
3313
- Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.`;
3329
+ init_screenshot();
3314
3330
  definition4 = {
3315
3331
  clearable: true,
3316
3332
  name: "analyzeImage",
@@ -3351,13 +3367,26 @@ async function execute5(input, onLog, context) {
3351
3367
  return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${result}`;
3352
3368
  }
3353
3369
  const url = urlMatch[0];
3354
- const analysisPrompt = input.prompt || SCREENSHOT_ANALYSIS_PROMPT;
3370
+ let styleMap;
3371
+ try {
3372
+ const parsed = JSON.parse(result);
3373
+ styleMap = parsed?.styleMap;
3374
+ } catch {
3375
+ }
3376
+ const analysisPrompt = buildScreenshotAnalysisPrompt({
3377
+ prompt: input.prompt,
3378
+ styleMap
3379
+ });
3355
3380
  const analysis = await analyzeImage({
3356
3381
  prompt: analysisPrompt,
3357
3382
  imageUrl: url,
3358
3383
  onLog
3359
3384
  });
3360
- return JSON.stringify({ url, analysis });
3385
+ return JSON.stringify({
3386
+ url,
3387
+ analysis,
3388
+ ...styleMap ? { styleMap } : {}
3389
+ });
3361
3390
  } catch (err) {
3362
3391
  return `Error taking interactive screenshot: ${err.message}`;
3363
3392
  }
@@ -5828,7 +5857,7 @@ ${isLspConfigured() ? `<typescript_lsp>
5828
5857
  <conversation_summaries>
5829
5858
  Your conversation history may include <prior_conversation_summary> blocks in the user's messages. These are automated summaries of earlier messages that have been compacted to save context space. The user does not see this summary, they see the full conversation history in their UI. Treat the summary as ground truth for what happened before, but do not reference it directly to the user ("as mentioned in the summary..."). Just continue naturally as if you remember the prior work.
5830
5859
 
5831
- Old tool results are periodically cleared from the conversation to save context space. This is automatic and expected \u2014 you don't need to note down or preserve information from tool results. If you need to reference something from an earlier tool call, just re-read the file or re-run the query, or use your .remy-notes.md file.
5860
+ Old tool results are periodically cleared from the conversation to save context space. This is automatic and expected \u2014 you don't need to note down or preserve information from tool results. If you need to reference something from an earlier tool call, just re-read the file or re-run the query.
5832
5861
  </conversation_summaries>
5833
5862
 
5834
5863
  <project_onboarding>
@@ -6688,6 +6717,9 @@ ${xmlParts}
6688
6717
  if (pending) {
6689
6718
  pendingTools.delete(id);
6690
6719
  pending.resolve(result);
6720
+ } else if (!running) {
6721
+ log10.info("Late tool_result while idle, dismissing", { id });
6722
+ emit("completed", { success: true }, requestId);
6691
6723
  } else {
6692
6724
  earlyResults.set(id, result);
6693
6725
  }
@@ -251,7 +251,7 @@ The human-readable spec. Frontmatter contains structured fields; the prose body
251
251
  ```yaml
252
252
  ---
253
253
  name: Todo Assistant
254
- model: {"model": "claude-4-5-haiku", "temperature": 0.5, "maxResponseTokens": 15000}
254
+ model: {"model": "claude-4-5-haiku", "temperature": 0.5, "maxResponseTokens": 16000}
255
255
  description: Conversational agent that helps users manage their to-do list.
256
256
  ---
257
257
  ```
@@ -282,7 +282,7 @@ dist/interfaces/agent/
282
282
  "agent": {
283
283
  "model": "claude-4-5-haiku",
284
284
  "temperature": 0.5,
285
- "maxTokens": 15000,
285
+ "maxTokens": 16000,
286
286
  "systemPrompt": "system.md",
287
287
  "tools": [
288
288
  { "method": "create-todo", "description": "tools/createTodo.md" },
@@ -127,6 +127,7 @@ const { content } = await agent.generateText({
127
127
  modelOverride: {
128
128
  model: 'claude-sonnet-4-6',
129
129
  temperature: 0.7,
130
+ maxResponseTokens: 16000,
130
131
  },
131
132
  });
132
133
  ```
@@ -136,6 +136,7 @@ const { content } = await agent.generateText({
136
136
  modelOverride: {
137
137
  model: model.id,
138
138
  temperature: 0.7,
139
+ maxResponseTokens: 16000,
139
140
  },
140
141
  });
141
142
  ```
@@ -17,7 +17,7 @@
17
17
  - Pushing to main branch will trigger a deploy. The user presses the publish button in the interface to request publishing.
18
18
 
19
19
  ### Build Notes
20
- For complex tasks — especially an initial buildout from a spec or making multiple changes in a single turn — write a `.remy-notes.md` scratchpad in the project root. Use it to track progress: a checklist of what's been built and what's remaining. Never include implementation details or other decisions in the notes - it is solely for keeping track of tasks. Read the spec files directly when you need design details, implementation decisions, or other reference materials. Delete the notes file when your work is done.
20
+ For complex tasks — especially an initial buildout from a spec or making multiple changes in a single turn — write a `.remy-notes.md` scratchpad in the project root. Use it to track progress: a checklist of what's been built and what's remaining. Do not include implementation details or other decisions in the notes - it is solely for keeping track of tasks. Read the spec files directly when you need design details, implementation decisions, or other reference materials. Delete the notes file when your work is done.
21
21
 
22
22
  ## Communication
23
23
  The user can already see your tool calls, so most of your work is visible without narration. Focus text output on three things:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mindstudio-ai/remy",
3
- "version": "0.1.109",
3
+ "version": "0.1.110",
4
4
  "description": "MindStudio coding agent",
5
5
  "repository": {
6
6
  "type": "git",