@mindstudio-ai/remy 0.1.53 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/headless.js CHANGED
@@ -6,6 +6,7 @@ var __export = (target, all) => {
6
6
 
7
7
  // src/headless.ts
8
8
  import { createInterface } from "readline";
9
+ import { writeFileSync } from "fs";
9
10
 
10
11
  // src/logger.ts
11
12
  import fs from "fs";
@@ -406,6 +407,10 @@ ${isLspConfigured() ? `<typescript_lsp>
406
407
 
407
408
  {{static/instructions.md}}
408
409
 
410
+ <conversation_summaries>
411
+ Your conversation history may include <prior_conversation_summary> blocks in the user's messages. These are automated summaries of earlier messages that have been compacted to save context space. The user does not see this summary, they see the full conversation history in their UI. Treat the summary as ground truth for what happened before, but do not reference it directly to the user ("as mentioned in the summary..."). Just continue naturally as if you remember the prior work.
412
+ </conversation_summaries>
413
+
409
414
  <!-- cache_breakpoint -->
410
415
 
411
416
  <project_onboarding>
@@ -636,6 +641,181 @@ async function generateBackgroundAck(params) {
636
641
  }
637
642
  }
638
643
 
644
+ // src/compaction/index.ts
645
+ var log4 = createLogger("compaction");
646
+ var CONVERSATION_SUMMARY_PROMPT = readAsset("compaction", "conversation.md");
647
+ var SUBAGENT_SUMMARY_PROMPT = readAsset("compaction", "subagent.md");
648
+ var SUMMARIZABLE_SUBAGENTS = ["visualDesignExpert", "productVision"];
649
+ async function compactConversation(state, apiConfig) {
650
+ const insertionIndex = state.messages.length;
651
+ const summaries = [];
652
+ const tasks = [];
653
+ const conversationMessages = getConversationMessagesForSummary(
654
+ state.messages,
655
+ insertionIndex
656
+ );
657
+ if (conversationMessages.length > 0) {
658
+ tasks.push(
659
+ generateSummary(
660
+ apiConfig,
661
+ "conversation",
662
+ CONVERSATION_SUMMARY_PROMPT,
663
+ conversationMessages
664
+ ).then((text) => {
665
+ if (text) {
666
+ summaries.push({ name: "conversation", text });
667
+ }
668
+ })
669
+ );
670
+ }
671
+ for (const name of SUMMARIZABLE_SUBAGENTS) {
672
+ const subagentMessages = getSubAgentMessagesForSummary(
673
+ state.messages,
674
+ name,
675
+ insertionIndex
676
+ );
677
+ if (subagentMessages.length > 0) {
678
+ tasks.push(
679
+ generateSummary(
680
+ apiConfig,
681
+ name,
682
+ SUBAGENT_SUMMARY_PROMPT,
683
+ subagentMessages
684
+ ).then((text) => {
685
+ if (text) {
686
+ summaries.push({ name, text });
687
+ }
688
+ })
689
+ );
690
+ }
691
+ }
692
+ await Promise.all(tasks);
693
+ const checkpointMessages = summaries.map((s) => ({
694
+ role: "user",
695
+ hidden: true,
696
+ content: [
697
+ {
698
+ type: "summary",
699
+ name: s.name,
700
+ text: s.text,
701
+ startedAt: Date.now()
702
+ }
703
+ ]
704
+ }));
705
+ if (checkpointMessages.length > 0) {
706
+ state.messages.splice(insertionIndex, 0, ...checkpointMessages);
707
+ }
708
+ log4.info("Compaction complete", {
709
+ summaries: summaries.length,
710
+ insertionIndex,
711
+ messagesAfter: state.messages.length - insertionIndex - checkpointMessages.length
712
+ });
713
+ }
714
+ function getConversationMessagesForSummary(messages, endIndex) {
715
+ let startIdx = 0;
716
+ for (let i = endIndex - 1; i >= 0; i--) {
717
+ const msg = messages[i];
718
+ if (!Array.isArray(msg.content)) {
719
+ continue;
720
+ }
721
+ for (const block of msg.content) {
722
+ if (block.type === "summary" && block.name === "conversation") {
723
+ startIdx = i + 1;
724
+ break;
725
+ }
726
+ }
727
+ if (startIdx > 0) {
728
+ break;
729
+ }
730
+ }
731
+ return messages.slice(startIdx, endIndex);
732
+ }
733
+ function getSubAgentMessagesForSummary(messages, subAgentName, endIndex) {
734
+ let checkpointIdx = -1;
735
+ for (let i = endIndex - 1; i >= 0; i--) {
736
+ const msg = messages[i];
737
+ if (!Array.isArray(msg.content)) {
738
+ continue;
739
+ }
740
+ for (const block of msg.content) {
741
+ if (block.type === "summary" && block.name === subAgentName) {
742
+ checkpointIdx = i;
743
+ break;
744
+ }
745
+ }
746
+ if (checkpointIdx !== -1) {
747
+ break;
748
+ }
749
+ }
750
+ const startIdx = checkpointIdx !== -1 ? checkpointIdx + 1 : 0;
751
+ const collected = [];
752
+ for (let i = startIdx; i < endIndex; i++) {
753
+ const msg = messages[i];
754
+ if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
755
+ continue;
756
+ }
757
+ for (const block of msg.content) {
758
+ if (block.type === "tool" && block.name === subAgentName && block.subAgentMessages?.length) {
759
+ collected.push(...block.subAgentMessages);
760
+ }
761
+ }
762
+ }
763
+ return collected;
764
+ }
765
+ function serializeForSummary(messages) {
766
+ return messages.map((msg) => {
767
+ if (typeof msg.content === "string") {
768
+ return `[${msg.role}]: ${msg.content}`;
769
+ }
770
+ if (!Array.isArray(msg.content)) {
771
+ return `[${msg.role}]: (empty)`;
772
+ }
773
+ const blocks = msg.content;
774
+ const parts = [];
775
+ for (const block of blocks) {
776
+ if (block.type === "text") {
777
+ parts.push(block.text);
778
+ } else if (block.type === "tool") {
779
+ parts.push(
780
+ `[tool: ${block.name}(${JSON.stringify(block.input).slice(0, 200)})] \u2192 ${(block.result ?? "").slice(0, 500)}`
781
+ );
782
+ }
783
+ }
784
+ return `[${msg.role}]: ${parts.join("\n")}`;
785
+ }).join("\n\n");
786
+ }
787
+ async function generateSummary(apiConfig, name, systemPrompt, messagesToSummarize) {
788
+ const serialized = serializeForSummary(messagesToSummarize);
789
+ if (!serialized.trim()) {
790
+ return null;
791
+ }
792
+ log4.info("Generating summary", {
793
+ name,
794
+ messageCount: messagesToSummarize.length
795
+ });
796
+ let summaryText = "";
797
+ for await (const event of streamChat({
798
+ ...apiConfig,
799
+ subAgentId: "conversationSummarizer",
800
+ system: systemPrompt,
801
+ messages: [{ role: "user", content: serialized }],
802
+ tools: []
803
+ })) {
804
+ if (event.type === "text") {
805
+ summaryText += event.text;
806
+ } else if (event.type === "error") {
807
+ log4.error("Summary generation failed", { name, error: event.error });
808
+ return null;
809
+ }
810
+ }
811
+ if (!summaryText.trim()) {
812
+ log4.warn("Empty summary generated", { name });
813
+ return null;
814
+ }
815
+ log4.info("Summary generated", { name, summaryLength: summaryText.length });
816
+ return summaryText.trim();
817
+ }
818
+
639
819
  // src/tools/spec/readSpec.ts
640
820
  import fs5 from "fs/promises";
641
821
 
@@ -1417,40 +1597,6 @@ var askMindStudioSdkTool = {
1417
1597
  }
1418
1598
  };
1419
1599
 
1420
- // src/tools/common/fetchUrl.ts
1421
- var fetchUrlTool = {
1422
- definition: {
1423
- name: "scapeWebUrl",
1424
- description: "Scrape the content of a web page. Returns the HTML of the page as markdown text. Optionally capture a screenshot if you need see the visual design. Use this when you need to fetch or analyze content from a website",
1425
- inputSchema: {
1426
- type: "object",
1427
- properties: {
1428
- url: {
1429
- type: "string",
1430
- description: "The URL to fetch."
1431
- },
1432
- screenshot: {
1433
- type: "boolean",
1434
- description: "Capture a screenshot of the page in addition to the text content. Adds latency; only use when you need to see the visual design."
1435
- }
1436
- },
1437
- required: ["url"]
1438
- }
1439
- },
1440
- async execute(input, context) {
1441
- const url = input.url;
1442
- const screenshot = input.screenshot;
1443
- const pageOptions = { onlyMainContent: true };
1444
- if (screenshot) {
1445
- pageOptions.screenshot = true;
1446
- }
1447
- return runCli(
1448
- `mindstudio scrape-url --url ${JSON.stringify(url)} --page-options ${JSON.stringify(JSON.stringify(pageOptions))} --no-meta`,
1449
- { onLog: context?.onLog }
1450
- );
1451
- }
1452
- };
1453
-
1454
1600
  // src/tools/common/searchGoogle.ts
1455
1601
  var searchGoogleTool = {
1456
1602
  definition: {
@@ -2179,21 +2325,28 @@ async function analyzeImage(params) {
2179
2325
  var SCREENSHOT_ANALYSIS_PROMPT = "Describe everything visible on screen from top to bottom \u2014 every element, its position, its size relative to the viewport, its colors, its content. Be comprehensive, thorough, and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components). Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
2180
2326
  async function captureAndAnalyzeScreenshot(promptOrOptions) {
2181
2327
  let prompt;
2328
+ let existingUrl;
2182
2329
  let onLog;
2183
2330
  if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
2184
2331
  prompt = promptOrOptions.prompt;
2332
+ existingUrl = promptOrOptions.imageUrl;
2185
2333
  onLog = promptOrOptions.onLog;
2186
2334
  } else {
2187
2335
  prompt = promptOrOptions;
2188
2336
  }
2189
- const ssResult = await sidecarRequest("/screenshot-full-page", void 0, {
2190
- timeout: 12e4
2191
- });
2192
- const url = ssResult?.url || ssResult?.screenshotUrl;
2193
- if (!url) {
2194
- throw new Error(
2195
- `No URL in sidecar response. The browser may not be ready yet. Response: ${JSON.stringify(ssResult)}`
2196
- );
2337
+ let url;
2338
+ if (existingUrl) {
2339
+ url = existingUrl;
2340
+ } else {
2341
+ const ssResult = await sidecarRequest("/screenshot-full-page", void 0, {
2342
+ timeout: 12e4
2343
+ });
2344
+ url = ssResult?.url || ssResult?.screenshotUrl;
2345
+ if (!url) {
2346
+ throw new Error(
2347
+ `No URL in sidecar response. The browser may not be ready yet. Response: ${JSON.stringify(ssResult)}`
2348
+ );
2349
+ }
2197
2350
  }
2198
2351
  if (prompt === false) {
2199
2352
  return url;
@@ -2211,19 +2364,30 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2211
2364
  var screenshotTool = {
2212
2365
  definition: {
2213
2366
  name: "screenshot",
2214
- description: "Capture a full-height screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for..",
2367
+ description: "Capture a full-height screenshot of the app preview and get a description of what's on screen. Provides static image analysis only, will not capture animations or video. Optionally provide specific questions about what you're looking for. Use a bulleted list to ask many questions at once. To ask additional questions about a screenshot you have already captured, pass its URL as imageUrl to skip recapture.",
2215
2368
  inputSchema: {
2216
2369
  type: "object",
2217
2370
  properties: {
2218
2371
  prompt: {
2219
2372
  type: "string",
2220
2373
  description: "Optional question about the screenshot. If omitted, returns a general description of what's visible."
2374
+ },
2375
+ imageUrl: {
2376
+ type: "string",
2377
+ description: "URL of an existing screenshot to analyze instead of capturing a new one. Use this for additional questions about a previous screenshot."
2221
2378
  }
2222
2379
  }
2223
2380
  }
2224
2381
  },
2225
2382
  async execute(input, context) {
2226
2383
  try {
2384
+ if (input.imageUrl) {
2385
+ return await captureAndAnalyzeScreenshot({
2386
+ prompt: input.prompt,
2387
+ imageUrl: input.imageUrl,
2388
+ onLog: context?.onLog
2389
+ });
2390
+ }
2227
2391
  return await captureAndAnalyzeScreenshot({
2228
2392
  prompt: input.prompt,
2229
2393
  onLog: context?.onLog
@@ -2289,8 +2453,51 @@ function startStatusWatcher(config) {
2289
2453
  }
2290
2454
 
2291
2455
  // src/subagents/common/cleanMessages.ts
2456
+ function findLastSummaryCheckpoint(messages, name) {
2457
+ for (let i = messages.length - 1; i >= 0; i--) {
2458
+ const msg = messages[i];
2459
+ if (!Array.isArray(msg.content)) {
2460
+ continue;
2461
+ }
2462
+ for (const block of msg.content) {
2463
+ if (block.type === "summary" && block.name === name) {
2464
+ return i;
2465
+ }
2466
+ }
2467
+ }
2468
+ return -1;
2469
+ }
2292
2470
  function cleanMessagesForApi(messages) {
2293
- return messages.map((msg) => {
2471
+ const checkpointIdx = findLastSummaryCheckpoint(messages, "conversation");
2472
+ let startIdx = 0;
2473
+ const prefix = [];
2474
+ if (checkpointIdx !== -1) {
2475
+ const checkpointMsg = messages[checkpointIdx];
2476
+ const blocks = checkpointMsg.content;
2477
+ const summaryBlock = blocks.find(
2478
+ (b) => b.type === "summary" && b.name === "conversation"
2479
+ );
2480
+ if (summaryBlock && summaryBlock.type === "summary") {
2481
+ prefix.push({
2482
+ role: "user",
2483
+ content: `<conversation_summary>
2484
+ ${summaryBlock.text}
2485
+ </conversation_summary>`,
2486
+ hidden: true
2487
+ });
2488
+ }
2489
+ startIdx = checkpointIdx + 1;
2490
+ }
2491
+ const messagesToProcess = messages.slice(startIdx);
2492
+ const cleaned = messagesToProcess.filter((msg) => {
2493
+ if (Array.isArray(msg.content)) {
2494
+ const blocks = msg.content;
2495
+ if (blocks.some((b) => b.type === "summary")) {
2496
+ return false;
2497
+ }
2498
+ }
2499
+ return true;
2500
+ }).map((msg) => {
2294
2501
  if (msg.role === "user" && typeof msg.content === "string" && msg.content.startsWith("@@automated::")) {
2295
2502
  return {
2296
2503
  ...msg,
@@ -2306,25 +2513,26 @@ function cleanMessagesForApi(messages) {
2306
2513
  const thinking = blocks.filter(
2307
2514
  (b) => b.type === "thinking"
2308
2515
  ).map((b) => ({ thinking: b.thinking, signature: b.signature }));
2309
- const cleaned = {
2516
+ const cleaned2 = {
2310
2517
  role: msg.role,
2311
2518
  content: text
2312
2519
  };
2313
2520
  if (toolCalls.length > 0) {
2314
- cleaned.toolCalls = toolCalls;
2521
+ cleaned2.toolCalls = toolCalls;
2315
2522
  }
2316
2523
  if (thinking.length > 0) {
2317
- cleaned.thinking = thinking;
2524
+ cleaned2.thinking = thinking;
2318
2525
  }
2319
2526
  if (msg.hidden) {
2320
- cleaned.hidden = true;
2527
+ cleaned2.hidden = true;
2321
2528
  }
2322
- return cleaned;
2529
+ return cleaned2;
2323
2530
  });
2531
+ return [...prefix, ...cleaned];
2324
2532
  }
2325
2533
 
2326
2534
  // src/subagents/runner.ts
2327
- var log4 = createLogger("sub-agent");
2535
+ var log5 = createLogger("sub-agent");
2328
2536
  async function runSubAgent(config) {
2329
2537
  const {
2330
2538
  system,
@@ -2341,6 +2549,7 @@ async function runSubAgent(config) {
2341
2549
  resolveExternalTool,
2342
2550
  toolRegistry,
2343
2551
  requestId,
2552
+ history,
2344
2553
  background,
2345
2554
  onBackgroundComplete
2346
2555
  } = config;
@@ -2348,13 +2557,16 @@ async function runSubAgent(config) {
2348
2557
  const signal = background ? bgAbort.signal : parentSignal;
2349
2558
  const agentName = subAgentId || "sub-agent";
2350
2559
  const runStart = Date.now();
2351
- log4.info("Sub-agent started", { requestId, parentToolId, agentName });
2560
+ log5.info("Sub-agent started", { requestId, parentToolId, agentName });
2352
2561
  const emit2 = (e) => {
2353
2562
  onEvent({ ...e, parentToolId });
2354
2563
  };
2355
2564
  let turns = 0;
2356
2565
  const run = async () => {
2357
- const messages = [{ role: "user", content: task }];
2566
+ const messages = [
2567
+ ...history ?? [],
2568
+ { role: "user", content: task }
2569
+ ];
2358
2570
  function getPartialText(blocks) {
2359
2571
  return blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
2360
2572
  }
@@ -2501,7 +2713,7 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
2501
2713
  const text = getPartialText(contentBlocks);
2502
2714
  return { text, messages };
2503
2715
  }
2504
- log4.info("Tools executing", {
2716
+ log5.info("Tools executing", {
2505
2717
  requestId,
2506
2718
  parentToolId,
2507
2719
  count: toolCalls.length,
@@ -2572,7 +2784,7 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
2572
2784
  run2(tc.input);
2573
2785
  const r = await resultPromise;
2574
2786
  toolRegistry?.unregister(tc.id);
2575
- log4.info("Tool completed", {
2787
+ log5.info("Tool completed", {
2576
2788
  requestId,
2577
2789
  parentToolId,
2578
2790
  toolCallId: tc.id,
@@ -2613,7 +2825,7 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
2613
2825
  const wrapRun = async () => {
2614
2826
  try {
2615
2827
  const result = await run();
2616
- log4.info("Sub-agent complete", {
2828
+ log5.info("Sub-agent complete", {
2617
2829
  requestId,
2618
2830
  parentToolId,
2619
2831
  agentName,
@@ -2622,7 +2834,7 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
2622
2834
  });
2623
2835
  return result;
2624
2836
  } catch (err) {
2625
- log4.warn("Sub-agent error", {
2837
+ log5.warn("Sub-agent error", {
2626
2838
  requestId,
2627
2839
  parentToolId,
2628
2840
  agentName,
@@ -2634,7 +2846,7 @@ Current date/time: ${(/* @__PURE__ */ new Date()).toISOString().replace("T", " "
2634
2846
  if (!background) {
2635
2847
  return wrapRun();
2636
2848
  }
2637
- log4.info("Sub-agent backgrounded", { requestId, parentToolId, agentName });
2849
+ log5.info("Sub-agent backgrounded", { requestId, parentToolId, agentName });
2638
2850
  toolRegistry?.register({
2639
2851
  id: parentToolId,
2640
2852
  name: agentName,
@@ -2781,7 +2993,7 @@ ${appSpec}
2781
2993
  }
2782
2994
 
2783
2995
  // src/subagents/browserAutomation/index.ts
2784
- var log5 = createLogger("browser-automation");
2996
+ var log6 = createLogger("browser-automation");
2785
2997
  var browserAutomationTool = {
2786
2998
  definition: {
2787
2999
  name: "runAutomatedBrowserTest",
@@ -2876,7 +3088,7 @@ var browserAutomationTool = {
2876
3088
  }
2877
3089
  }
2878
3090
  } catch {
2879
- log5.debug("Failed to parse batch analysis result", {
3091
+ log6.debug("Failed to parse batch analysis result", {
2880
3092
  batchResult
2881
3093
  });
2882
3094
  }
@@ -2902,7 +3114,7 @@ __export(searchGoogle_exports, {
2902
3114
  });
2903
3115
  var definition = {
2904
3116
  name: "searchGoogle",
2905
- description: 'Search Google for web results. Reserch modern design trends in industries or verticals, "best [domain] apps 2026", ui patterns, or find something specific if the the user has an explicit reference. Prioritize authoritative sources like Figma and other design leaders, avoid random blog spam. Pick one or more URLs from the results and then use `fetchUrl` to get their text content.',
3117
+ description: 'Search Google for web results. Reserch modern design trends in industries or verticals, "best [domain] apps 2026", ui patterns, or find something specific if the the user has an explicit reference. Searching for and reading case studies is a great way to get information and context about a project\'s domain. Prioritize authoritative sources like Figma and other design leaders, avoid random blog spam. Pick one or more URLs from the results and then use `scrapeWebUrl` to get their text content.',
2906
3118
  inputSchema: {
2907
3119
  type: "object",
2908
3120
  properties: {
@@ -2921,14 +3133,14 @@ async function execute(input, onLog) {
2921
3133
  );
2922
3134
  }
2923
3135
 
2924
- // src/subagents/designExpert/tools/fetchUrl.ts
2925
- var fetchUrl_exports = {};
2926
- __export(fetchUrl_exports, {
3136
+ // src/subagents/designExpert/tools/scrapeWebUrl.ts
3137
+ var scrapeWebUrl_exports = {};
3138
+ __export(scrapeWebUrl_exports, {
2927
3139
  definition: () => definition2,
2928
3140
  execute: () => execute2
2929
3141
  });
2930
3142
  var definition2 = {
2931
- name: "fetchUrl",
3143
+ name: "scrapeWebUrl",
2932
3144
  description: "Fetch the content of a web page as markdown. Use when reading sites from search results or specific things the user wants to incorporate.",
2933
3145
  inputSchema: {
2934
3146
  type: "object",
@@ -2983,7 +3195,7 @@ Respond only with your analysis as Markdown and absolutely no other text. Do not
2983
3195
  `;
2984
3196
  var definition3 = {
2985
3197
  name: "analyzeDesign",
2986
- description: "Analyze the visual design of a website or image URL. Websites are automatically screenshotted first. If no prompt is provided, performs a full design reference analysis (mood, color, typography, layout, distinctiveness). Provide a custom prompt to ask a specific design question instead.",
3198
+ description: "Analyze the visual design of a website or image URL. Websites are automatically screenshotted first. Provides static image analysis only, will not capture animations or video. If no prompt is provided, performs a full design reference analysis (mood, color, typography, layout, distinctiveness). Provide a custom prompt to ask a specific design question instead. Use a bulleted list to ask many questions at once.",
2987
3199
  inputSchema: {
2988
3200
  type: "object",
2989
3201
  properties: {
@@ -3028,10 +3240,10 @@ __export(analyzeImage_exports, {
3028
3240
  definition: () => definition4,
3029
3241
  execute: () => execute4
3030
3242
  });
3031
- var DEFAULT_PROMPT = "Describe everything visible in this image \u2014 every element, its position, its size relative to the frame, its colors, its content. Be comprhensive, thorough and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components). Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
3243
+ var DEFAULT_PROMPT = "Describe everything visible in this image \u2014 every element, its position, its size relative to the frame, its colors, its content. Be comprehensive, thorough and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components). Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
3032
3244
  var definition4 = {
3033
3245
  name: "analyzeImage",
3034
- description: "Analyze an image by URL using a vision model. Returns an objective description of what is visible \u2014 shapes, colors, layout, text, artifacts. Use for factual inventory of image contents, not for subjective design judgment - the vision model providing the analysis has no sense of design. You are the design expert - use the analysis tool for factual inventory, then apply your own expertise for quality and suitability assessments.",
3246
+ description: "Analyze an image by URL using a vision model. Provides static image analysis only, will not capture animations or video. Returns an objective description of what is visible \u2014 shapes, colors, layout, text, artifacts. Use for factual inventory of image contents, not for subjective design judgment - the vision model providing the analysis has no sense of design. You are the design expert - use the analysis tool for factual inventory, then apply your own expertise for quality and suitability assessments. Optionally provide specific questions about what you're looking for. Use a bulleted list to ask many questions at once. If you are analyzing a screenshot of the app preview, you can reuse the same screenshot URL multiple times to ask multiple questions.",
3035
3247
  inputSchema: {
3036
3248
  type: "object",
3037
3249
  properties: {
@@ -3330,7 +3542,7 @@ async function execute7(input, onLog) {
3330
3542
  // src/subagents/designExpert/tools/index.ts
3331
3543
  var tools = {
3332
3544
  searchGoogle: searchGoogle_exports,
3333
- fetchUrl: fetchUrl_exports,
3545
+ scrapeWebUrl: scrapeWebUrl_exports,
3334
3546
  analyzeDesign: analyzeDesign_exports,
3335
3547
  analyzeImage: analyzeImage_exports,
3336
3548
  screenshot: screenshot_exports,
@@ -3587,6 +3799,50 @@ ${specContext}`;
3587
3799
  return prompt;
3588
3800
  }
3589
3801
 
3802
+ // src/subagents/common/history.ts
3803
+ function getSubAgentHistory(messages, subAgentName) {
3804
+ let checkpointIdx = -1;
3805
+ let summaryText = "";
3806
+ for (let i = messages.length - 1; i >= 0; i--) {
3807
+ const msg = messages[i];
3808
+ if (!Array.isArray(msg.content)) {
3809
+ continue;
3810
+ }
3811
+ for (const block of msg.content) {
3812
+ if (block.type === "summary" && block.name === subAgentName) {
3813
+ checkpointIdx = i;
3814
+ summaryText = block.text;
3815
+ break;
3816
+ }
3817
+ }
3818
+ if (checkpointIdx !== -1) {
3819
+ break;
3820
+ }
3821
+ }
3822
+ const history = [];
3823
+ if (checkpointIdx !== -1 && summaryText) {
3824
+ history.push({
3825
+ role: "user",
3826
+ content: `<prior_conversation_summary>
3827
+ ${summaryText}
3828
+ </prior_conversation_summary>`
3829
+ });
3830
+ }
3831
+ const startIdx = checkpointIdx !== -1 ? checkpointIdx + 1 : 0;
3832
+ for (let i = startIdx; i < messages.length; i++) {
3833
+ const msg = messages[i];
3834
+ if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
3835
+ continue;
3836
+ }
3837
+ for (const block of msg.content) {
3838
+ if (block.type === "tool" && block.name === subAgentName && block.subAgentMessages?.length) {
3839
+ history.push(...block.subAgentMessages);
3840
+ }
3841
+ }
3842
+ }
3843
+ return history;
3844
+ }
3845
+
3590
3846
  // src/subagents/designExpert/index.ts
3591
3847
  var DESCRIPTION = `
3592
3848
  Visual design expert. Describe the situation and what you need \u2014 the agent decides what to deliver. It reads the spec files automatically. Include relevant user requirements and context it can't get from the spec, but do not list specific deliverables or tell it how to do its job. Do not suggest implementation details or ideas - only relay what is needed.
@@ -3614,9 +3870,11 @@ var designExpertTool = {
3614
3870
  if (!context) {
3615
3871
  return "Error: visual design expert requires execution context";
3616
3872
  }
3873
+ const history = context.conversationMessages ? getSubAgentHistory(context.conversationMessages, "visualDesignExpert") : [];
3617
3874
  const result = await runSubAgent({
3618
3875
  system: getDesignExpertPrompt(),
3619
3876
  task: input.task,
3877
+ history: history.length > 0 ? history : void 0,
3620
3878
  tools: DESIGN_EXPERT_TOOLS,
3621
3879
  externalTools: /* @__PURE__ */ new Set(),
3622
3880
  executeTool: (name, input2, toolCallId, onLog) => executeDesignExpertTool(name, input2, context, toolCallId, onLog),
@@ -3912,9 +4170,11 @@ var productVisionTool = {
3912
4170
  if (!context) {
3913
4171
  return "Error: product vision requires execution context";
3914
4172
  }
4173
+ const history = context.conversationMessages ? getSubAgentHistory(context.conversationMessages, "productVision") : [];
3915
4174
  const result = await runSubAgent({
3916
4175
  system: getProductVisionPrompt(),
3917
4176
  task: input.task,
4177
+ history: history.length > 0 ? history : void 0,
3918
4178
  tools: VISION_TOOLS,
3919
4179
  externalTools: /* @__PURE__ */ new Set(),
3920
4180
  executeTool: executeVisionTool,
@@ -3999,7 +4259,7 @@ var SANITY_CHECK_TOOLS = [
3999
4259
  }
4000
4260
  },
4001
4261
  {
4002
- name: "fetchUrl",
4262
+ name: "scrapeWebUrl",
4003
4263
  description: "Fetch a web page as markdown. Use to read package docs, changelogs, npm pages.",
4004
4264
  inputSchema: {
4005
4265
  type: "object",
@@ -4082,6 +4342,40 @@ var codeSanityCheckTool = {
4082
4342
  }
4083
4343
  };
4084
4344
 
4345
+ // src/tools/common/scrapeWebUrl.ts
4346
+ var scrapeWebUrlTool = {
4347
+ definition: {
4348
+ name: "scrapeWebUrl",
4349
+ description: "Scrape the content of a web page. Returns the HTML of the page as markdown text. Optionally capture a screenshot if you need see the visual design. Use this when you need to fetch or analyze content from a website",
4350
+ inputSchema: {
4351
+ type: "object",
4352
+ properties: {
4353
+ url: {
4354
+ type: "string",
4355
+ description: "The URL to fetch."
4356
+ },
4357
+ screenshot: {
4358
+ type: "boolean",
4359
+ description: "Capture a screenshot of the page in addition to the text content. Adds latency; only use when you need to see the visual design."
4360
+ }
4361
+ },
4362
+ required: ["url"]
4363
+ }
4364
+ },
4365
+ async execute(input, context) {
4366
+ const url = input.url;
4367
+ const screenshot = input.screenshot;
4368
+ const pageOptions = { onlyMainContent: true };
4369
+ if (screenshot) {
4370
+ pageOptions.screenshot = true;
4371
+ }
4372
+ return runCli(
4373
+ `mindstudio scrape-url --url ${JSON.stringify(url)} --page-options ${JSON.stringify(JSON.stringify(pageOptions))} --no-meta`,
4374
+ { onLog: context?.onLog }
4375
+ );
4376
+ }
4377
+ };
4378
+
4085
4379
  // src/tools/index.ts
4086
4380
  function getSpecTools() {
4087
4381
  return [readSpecTool, writeSpecTool, editSpecTool, listSpecFilesTool];
@@ -4112,7 +4406,7 @@ function getCommonTools() {
4112
4406
  promptUserTool,
4113
4407
  confirmDestructiveActionTool,
4114
4408
  askMindStudioSdkTool,
4115
- fetchUrlTool,
4409
+ scrapeWebUrlTool,
4116
4410
  searchGoogleTool,
4117
4411
  setProjectMetadataTool,
4118
4412
  designExpertTool,
@@ -4165,7 +4459,7 @@ function executeTool(name, input, context) {
4165
4459
 
4166
4460
  // src/session.ts
4167
4461
  import fs17 from "fs";
4168
- var log6 = createLogger("session");
4462
+ var log7 = createLogger("session");
4169
4463
  var SESSION_FILE = ".remy-session.json";
4170
4464
  function loadSession(state) {
4171
4465
  try {
@@ -4173,7 +4467,7 @@ function loadSession(state) {
4173
4467
  const data = JSON.parse(raw);
4174
4468
  if (Array.isArray(data.messages) && data.messages.length > 0) {
4175
4469
  state.messages = sanitizeMessages(data.messages);
4176
- log6.info("Session loaded", { messageCount: state.messages.length });
4470
+ log7.info("Session loaded", { messageCount: state.messages.length });
4177
4471
  return true;
4178
4472
  }
4179
4473
  } catch {
@@ -4223,9 +4517,9 @@ function saveSession(state) {
4223
4517
  JSON.stringify({ messages: state.messages }, null, 2),
4224
4518
  "utf-8"
4225
4519
  );
4226
- log6.info("Session saved", { messageCount: state.messages.length });
4520
+ log7.info("Session saved", { messageCount: state.messages.length });
4227
4521
  } catch (err) {
4228
- log6.warn("Session save failed", { error: err.message });
4522
+ log7.warn("Session save failed", { error: err.message });
4229
4523
  }
4230
4524
  }
4231
4525
  function clearSession(state) {
@@ -4426,7 +4720,7 @@ function friendlyError(raw) {
4426
4720
  }
4427
4721
 
4428
4722
  // src/agent.ts
4429
- var log7 = createLogger("agent");
4723
+ var log8 = createLogger("agent");
4430
4724
  function getTextContent(blocks) {
4431
4725
  return blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
4432
4726
  }
@@ -4469,7 +4763,7 @@ async function runTurn(params) {
4469
4763
  onBackgroundComplete
4470
4764
  } = params;
4471
4765
  const tools2 = getToolDefinitions(onboardingState);
4472
- log7.info("Turn started", {
4766
+ log8.info("Turn started", {
4473
4767
  requestId,
4474
4768
  model,
4475
4769
  toolCount: tools2.length,
@@ -4495,6 +4789,11 @@ async function runTurn(params) {
4495
4789
  let lastCompletedTools = "";
4496
4790
  let lastCompletedInput = "";
4497
4791
  let lastCompletedResult = "";
4792
+ let turnInputTokens = 0;
4793
+ let turnOutputTokens = 0;
4794
+ let turnCacheCreation = 0;
4795
+ let turnCacheRead = 0;
4796
+ let turnLlmCalls = 0;
4498
4797
  while (true) {
4499
4798
  let getOrCreateAccumulator2 = function(id, name) {
4500
4799
  let acc = toolInputAccumulators.get(id);
@@ -4671,7 +4970,7 @@ async function runTurn(params) {
4671
4970
  const tool = getToolByName(event.name);
4672
4971
  const wasStreamed = acc?.started ?? false;
4673
4972
  const isInputStreaming = !!tool?.streaming?.partialInput;
4674
- log7.info("Tool received", {
4973
+ log8.info("Tool received", {
4675
4974
  requestId,
4676
4975
  toolCallId: event.id,
4677
4976
  name: event.name
@@ -4688,6 +4987,11 @@ async function runTurn(params) {
4688
4987
  }
4689
4988
  case "done":
4690
4989
  stopReason = event.stopReason;
4990
+ turnLlmCalls++;
4991
+ turnInputTokens += event.usage.inputTokens;
4992
+ turnOutputTokens += event.usage.outputTokens;
4993
+ turnCacheCreation += event.usage.cacheCreationTokens ?? 0;
4994
+ turnCacheRead += event.usage.cacheReadTokens ?? 0;
4691
4995
  break;
4692
4996
  case "error":
4693
4997
  onEvent({ type: "error", error: friendlyError(event.error) });
@@ -4725,10 +5029,19 @@ async function runTurn(params) {
4725
5029
  if (stopReason !== "tool_use" || toolCalls.length === 0) {
4726
5030
  statusWatcher.stop();
4727
5031
  saveSession(state);
4728
- onEvent({ type: "turn_done" });
5032
+ onEvent({
5033
+ type: "turn_done",
5034
+ stats: {
5035
+ inputTokens: turnInputTokens,
5036
+ outputTokens: turnOutputTokens,
5037
+ cacheCreationTokens: turnCacheCreation || void 0,
5038
+ cacheReadTokens: turnCacheRead || void 0,
5039
+ llmCalls: turnLlmCalls
5040
+ }
5041
+ });
4729
5042
  return;
4730
5043
  }
4731
- log7.info("Tools executing", {
5044
+ log8.info("Tools executing", {
4732
5045
  requestId,
4733
5046
  count: toolCalls.length,
4734
5047
  tools: toolCalls.map((tc) => tc.name)
@@ -4772,7 +5085,7 @@ async function runTurn(params) {
4772
5085
  let result;
4773
5086
  if (EXTERNAL_TOOLS.has(tc.name) && resolveExternalTool) {
4774
5087
  saveSession(state);
4775
- log7.info("Waiting for external tool result", {
5088
+ log8.info("Waiting for external tool result", {
4776
5089
  requestId,
4777
5090
  toolCallId: tc.id,
4778
5091
  name: tc.name
@@ -4788,6 +5101,7 @@ async function runTurn(params) {
4788
5101
  toolCallId: tc.id,
4789
5102
  requestId,
4790
5103
  subAgentMessages,
5104
+ conversationMessages: state.messages,
4791
5105
  toolRegistry,
4792
5106
  onBackgroundComplete,
4793
5107
  onLog: (line) => wrappedOnEvent({
@@ -4827,7 +5141,7 @@ async function runTurn(params) {
4827
5141
  if (!tc.input.background) {
4828
5142
  toolRegistry?.unregister(tc.id);
4829
5143
  }
4830
- log7.info("Tool completed", {
5144
+ log8.info("Tool completed", {
4831
5145
  requestId,
4832
5146
  toolCallId: tc.id,
4833
5147
  name: tc.name,
@@ -4882,7 +5196,7 @@ async function runTurn(params) {
4882
5196
  }
4883
5197
 
4884
5198
  // src/toolRegistry.ts
4885
- var log8 = createLogger("tool-registry");
5199
+ var log9 = createLogger("tool-registry");
4886
5200
  var ToolRegistry = class {
4887
5201
  entries = /* @__PURE__ */ new Map();
4888
5202
  onEvent;
@@ -4908,7 +5222,7 @@ var ToolRegistry = class {
4908
5222
  if (!entry) {
4909
5223
  return false;
4910
5224
  }
4911
- log8.info("Tool stopped", { toolCallId: id, name: entry.name, mode });
5225
+ log9.info("Tool stopped", { toolCallId: id, name: entry.name, mode });
4912
5226
  entry.abortController.abort(mode);
4913
5227
  if (mode === "graceful") {
4914
5228
  const partial = entry.getPartialResult?.() ?? "";
@@ -4941,7 +5255,7 @@ ${partial}` : "[INTERRUPTED] Tool execution was stopped.";
4941
5255
  if (!entry) {
4942
5256
  return false;
4943
5257
  }
4944
- log8.info("Tool restarted", { toolCallId: id, name: entry.name });
5258
+ log9.info("Tool restarted", { toolCallId: id, name: entry.name });
4945
5259
  entry.abortController.abort("restart");
4946
5260
  const newInput = patchedInput ? { ...entry.input, ...patchedInput } : entry.input;
4947
5261
  this.onEvent?.({
@@ -4985,7 +5299,7 @@ ${body}`;
4985
5299
  }
4986
5300
 
4987
5301
  // src/headless.ts
4988
- var log9 = createLogger("headless");
5302
+ var log10 = createLogger("headless");
4989
5303
  function emit(event, data, requestId) {
4990
5304
  const payload = { event, ...data };
4991
5305
  if (requestId) {
@@ -5049,6 +5363,17 @@ async function startHeadless(opts = {}) {
5049
5363
  const pendingTools = /* @__PURE__ */ new Map();
5050
5364
  const earlyResults = /* @__PURE__ */ new Map();
5051
5365
  const toolRegistry = new ToolRegistry();
5366
+ const sessionStats = {
5367
+ messageCount: 0,
5368
+ turns: 0,
5369
+ totalInputTokens: 0,
5370
+ totalOutputTokens: 0,
5371
+ totalCacheCreationTokens: 0,
5372
+ totalCacheReadTokens: 0,
5373
+ lastContextSize: 0,
5374
+ compactionInProgress: false,
5375
+ updatedAt: 0
5376
+ };
5052
5377
  const backgroundQueue = [];
5053
5378
  function flushBackgroundQueue() {
5054
5379
  if (backgroundQueue.length === 0) {
@@ -5066,22 +5391,32 @@ ${xmlParts}
5066
5391
  </background_results>`;
5067
5392
  handleMessage({ action: "message", text: message }, void 0);
5068
5393
  }
5069
- function onBackgroundComplete(toolCallId, name, result, subAgentMessages) {
5070
- for (const msg of state.messages) {
5071
- if (!Array.isArray(msg.content)) {
5072
- continue;
5073
- }
5074
- for (const block of msg.content) {
5075
- if (block.type === "tool" && block.id === toolCallId) {
5076
- block.backgroundResult = result;
5077
- block.completedAt = Date.now();
5078
- if (subAgentMessages) {
5079
- block.subAgentMessages = subAgentMessages;
5394
+ const pendingBlockUpdates = [];
5395
+ function applyPendingBlockUpdates() {
5396
+ if (pendingBlockUpdates.length === 0) {
5397
+ return;
5398
+ }
5399
+ const updates = pendingBlockUpdates.splice(0);
5400
+ for (const update of updates) {
5401
+ for (const msg of state.messages) {
5402
+ if (!Array.isArray(msg.content)) {
5403
+ continue;
5404
+ }
5405
+ for (const block of msg.content) {
5406
+ if (block.type === "tool" && block.id === update.toolCallId) {
5407
+ block.backgroundResult = update.result;
5408
+ block.completedAt = Date.now();
5409
+ if (update.subAgentMessages) {
5410
+ block.subAgentMessages = update.subAgentMessages;
5411
+ }
5080
5412
  }
5081
5413
  }
5082
5414
  }
5083
5415
  }
5084
- log9.info("Background complete", {
5416
+ }
5417
+ function onBackgroundComplete(toolCallId, name, result, subAgentMessages) {
5418
+ pendingBlockUpdates.push({ toolCallId, result, subAgentMessages });
5419
+ log10.info("Background complete", {
5085
5420
  toolCallId,
5086
5421
  name,
5087
5422
  requestId: currentRequestId
@@ -5099,6 +5434,7 @@ ${xmlParts}
5099
5434
  completedAt: Date.now()
5100
5435
  });
5101
5436
  if (!running) {
5437
+ applyPendingBlockUpdates();
5102
5438
  flushBackgroundQueue();
5103
5439
  }
5104
5440
  }
@@ -5141,8 +5477,25 @@ ${xmlParts}
5141
5477
  // Terminal events — translate to `completed`
5142
5478
  case "turn_done":
5143
5479
  completedEmitted = true;
5480
+ if (e.stats) {
5481
+ sessionStats.turns++;
5482
+ sessionStats.totalInputTokens += e.stats.inputTokens;
5483
+ sessionStats.totalOutputTokens += e.stats.outputTokens;
5484
+ sessionStats.totalCacheCreationTokens += e.stats.cacheCreationTokens ?? 0;
5485
+ sessionStats.totalCacheReadTokens += e.stats.cacheReadTokens ?? 0;
5486
+ sessionStats.lastContextSize = e.stats.inputTokens;
5487
+ }
5488
+ sessionStats.messageCount = state.messages.length;
5489
+ sessionStats.updatedAt = Date.now();
5490
+ try {
5491
+ writeFileSync(".remy-stats.json", JSON.stringify(sessionStats));
5492
+ } catch {
5493
+ }
5144
5494
  emit("completed", { success: true }, rid);
5145
- setTimeout(() => flushBackgroundQueue(), 0);
5495
+ setTimeout(() => {
5496
+ applyPendingBlockUpdates();
5497
+ flushBackgroundQueue();
5498
+ }, 0);
5146
5499
  return;
5147
5500
  case "turn_cancelled":
5148
5501
  completedEmitted = true;
@@ -5331,7 +5684,7 @@ ${xmlParts}
5331
5684
  requestId
5332
5685
  );
5333
5686
  }
5334
- log9.info("Turn complete", {
5687
+ log10.info("Turn complete", {
5335
5688
  requestId,
5336
5689
  durationMs: Date.now() - turnStart
5337
5690
  });
@@ -5340,7 +5693,7 @@ ${xmlParts}
5340
5693
  emit("error", { error: err.message }, requestId);
5341
5694
  emit("completed", { success: false, error: err.message }, requestId);
5342
5695
  }
5343
- log9.warn("Command failed", {
5696
+ log10.warn("Command failed", {
5344
5697
  action: "message",
5345
5698
  requestId,
5346
5699
  error: err.message
@@ -5360,7 +5713,7 @@ ${xmlParts}
5360
5713
  return;
5361
5714
  }
5362
5715
  const { action, requestId } = parsed;
5363
- log9.info("Command received", { action, requestId });
5716
+ log10.info("Command received", { action, requestId });
5364
5717
  if (action === "tool_result" && parsed.id) {
5365
5718
  const id = parsed.id;
5366
5719
  const result = parsed.result ?? "";
@@ -5416,6 +5769,39 @@ ${xmlParts}
5416
5769
  }
5417
5770
  return;
5418
5771
  }
5772
+ if (action === "compact") {
5773
+ sessionStats.compactionInProgress = true;
5774
+ sessionStats.updatedAt = Date.now();
5775
+ try {
5776
+ writeFileSync(".remy-stats.json", JSON.stringify(sessionStats));
5777
+ } catch {
5778
+ }
5779
+ compactConversation(state, config).then(() => {
5780
+ saveSession(state);
5781
+ emit("compaction_complete", {}, requestId);
5782
+ emit("completed", { success: true }, requestId);
5783
+ }).catch((err) => {
5784
+ emit(
5785
+ "compaction_complete",
5786
+ { error: err.message || "Compaction failed" },
5787
+ requestId
5788
+ );
5789
+ emit(
5790
+ "completed",
5791
+ { success: false, error: err.message || "Compaction failed" },
5792
+ requestId
5793
+ );
5794
+ }).finally(() => {
5795
+ sessionStats.compactionInProgress = false;
5796
+ sessionStats.messageCount = state.messages.length;
5797
+ sessionStats.updatedAt = Date.now();
5798
+ try {
5799
+ writeFileSync(".remy-stats.json", JSON.stringify(sessionStats));
5800
+ } catch {
5801
+ }
5802
+ });
5803
+ return;
5804
+ }
5419
5805
  if (action === "message") {
5420
5806
  await handleMessage(parsed, requestId);
5421
5807
  return;