@mindstudio-ai/remy 0.1.53 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1170,47 +1170,6 @@ var init_sdkConsultant = __esm({
1170
1170
  }
1171
1171
  });
1172
1172
 
1173
- // src/tools/common/fetchUrl.ts
1174
- var fetchUrlTool;
1175
- var init_fetchUrl = __esm({
1176
- "src/tools/common/fetchUrl.ts"() {
1177
- "use strict";
1178
- init_runCli();
1179
- fetchUrlTool = {
1180
- definition: {
1181
- name: "scapeWebUrl",
1182
- description: "Scrape the content of a web page. Returns the HTML of the page as markdown text. Optionally capture a screenshot if you need see the visual design. Use this when you need to fetch or analyze content from a website",
1183
- inputSchema: {
1184
- type: "object",
1185
- properties: {
1186
- url: {
1187
- type: "string",
1188
- description: "The URL to fetch."
1189
- },
1190
- screenshot: {
1191
- type: "boolean",
1192
- description: "Capture a screenshot of the page in addition to the text content. Adds latency; only use when you need to see the visual design."
1193
- }
1194
- },
1195
- required: ["url"]
1196
- }
1197
- },
1198
- async execute(input, context) {
1199
- const url = input.url;
1200
- const screenshot = input.screenshot;
1201
- const pageOptions = { onlyMainContent: true };
1202
- if (screenshot) {
1203
- pageOptions.screenshot = true;
1204
- }
1205
- return runCli(
1206
- `mindstudio scrape-url --url ${JSON.stringify(url)} --page-options ${JSON.stringify(JSON.stringify(pageOptions))} --no-meta`,
1207
- { onLog: context?.onLog }
1208
- );
1209
- }
1210
- };
1211
- }
1212
- });
1213
-
1214
1173
  // src/tools/common/searchGoogle.ts
1215
1174
  var searchGoogleTool;
1216
1175
  var init_searchGoogle = __esm({
@@ -2095,21 +2054,28 @@ var init_analyzeImage = __esm({
2095
2054
  // src/tools/_helpers/screenshot.ts
2096
2055
  async function captureAndAnalyzeScreenshot(promptOrOptions) {
2097
2056
  let prompt;
2057
+ let existingUrl;
2098
2058
  let onLog;
2099
2059
  if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
2100
2060
  prompt = promptOrOptions.prompt;
2061
+ existingUrl = promptOrOptions.imageUrl;
2101
2062
  onLog = promptOrOptions.onLog;
2102
2063
  } else {
2103
2064
  prompt = promptOrOptions;
2104
2065
  }
2105
- const ssResult = await sidecarRequest("/screenshot-full-page", void 0, {
2106
- timeout: 12e4
2107
- });
2108
- const url = ssResult?.url || ssResult?.screenshotUrl;
2109
- if (!url) {
2110
- throw new Error(
2111
- `No URL in sidecar response. The browser may not be ready yet. Response: ${JSON.stringify(ssResult)}`
2112
- );
2066
+ let url;
2067
+ if (existingUrl) {
2068
+ url = existingUrl;
2069
+ } else {
2070
+ const ssResult = await sidecarRequest("/screenshot-full-page", void 0, {
2071
+ timeout: 12e4
2072
+ });
2073
+ url = ssResult?.url || ssResult?.screenshotUrl;
2074
+ if (!url) {
2075
+ throw new Error(
2076
+ `No URL in sidecar response. The browser may not be ready yet. Response: ${JSON.stringify(ssResult)}`
2077
+ );
2078
+ }
2113
2079
  }
2114
2080
  if (prompt === false) {
2115
2081
  return url;
@@ -2141,19 +2107,30 @@ var init_screenshot2 = __esm({
2141
2107
  screenshotTool = {
2142
2108
  definition: {
2143
2109
  name: "screenshot",
2144
- description: "Capture a full-height screenshot of the app preview and get a description of what's on screen. Optionally provide a specific question about what you're looking for..",
2110
+ description: "Capture a full-height screenshot of the app preview and get a description of what's on screen. Provides static image analysis only, will not capture animations or video. Optionally provide specific questions about what you're looking for. Use a bulleted list to ask many questions at once. To ask additional questions about a screenshot you have already captured, pass its URL as imageUrl to skip recapture.",
2145
2111
  inputSchema: {
2146
2112
  type: "object",
2147
2113
  properties: {
2148
2114
  prompt: {
2149
2115
  type: "string",
2150
2116
  description: "Optional question about the screenshot. If omitted, returns a general description of what's visible."
2117
+ },
2118
+ imageUrl: {
2119
+ type: "string",
2120
+ description: "URL of an existing screenshot to analyze instead of capturing a new one. Use this for additional questions about a previous screenshot."
2151
2121
  }
2152
2122
  }
2153
2123
  }
2154
2124
  },
2155
2125
  async execute(input, context) {
2156
2126
  try {
2127
+ if (input.imageUrl) {
2128
+ return await captureAndAnalyzeScreenshot({
2129
+ prompt: input.prompt,
2130
+ imageUrl: input.imageUrl,
2131
+ onLog: context?.onLog
2132
+ });
2133
+ }
2157
2134
  return await captureAndAnalyzeScreenshot({
2158
2135
  prompt: input.prompt,
2159
2136
  onLog: context?.onLog
@@ -2226,8 +2203,51 @@ var init_statusWatcher = __esm({
2226
2203
  });
2227
2204
 
2228
2205
  // src/subagents/common/cleanMessages.ts
2206
+ function findLastSummaryCheckpoint(messages, name) {
2207
+ for (let i = messages.length - 1; i >= 0; i--) {
2208
+ const msg = messages[i];
2209
+ if (!Array.isArray(msg.content)) {
2210
+ continue;
2211
+ }
2212
+ for (const block of msg.content) {
2213
+ if (block.type === "summary" && block.name === name) {
2214
+ return i;
2215
+ }
2216
+ }
2217
+ }
2218
+ return -1;
2219
+ }
2229
2220
  function cleanMessagesForApi(messages) {
2230
- return messages.map((msg) => {
2221
+ const checkpointIdx = findLastSummaryCheckpoint(messages, "conversation");
2222
+ let startIdx = 0;
2223
+ const prefix = [];
2224
+ if (checkpointIdx !== -1) {
2225
+ const checkpointMsg = messages[checkpointIdx];
2226
+ const blocks = checkpointMsg.content;
2227
+ const summaryBlock = blocks.find(
2228
+ (b) => b.type === "summary" && b.name === "conversation"
2229
+ );
2230
+ if (summaryBlock && summaryBlock.type === "summary") {
2231
+ prefix.push({
2232
+ role: "user",
2233
+ content: `<conversation_summary>
2234
+ ${summaryBlock.text}
2235
+ </conversation_summary>`,
2236
+ hidden: true
2237
+ });
2238
+ }
2239
+ startIdx = checkpointIdx + 1;
2240
+ }
2241
+ const messagesToProcess = messages.slice(startIdx);
2242
+ const cleaned = messagesToProcess.filter((msg) => {
2243
+ if (Array.isArray(msg.content)) {
2244
+ const blocks = msg.content;
2245
+ if (blocks.some((b) => b.type === "summary")) {
2246
+ return false;
2247
+ }
2248
+ }
2249
+ return true;
2250
+ }).map((msg) => {
2231
2251
  if (msg.role === "user" && typeof msg.content === "string" && msg.content.startsWith("@@automated::")) {
2232
2252
  return {
2233
2253
  ...msg,
@@ -2243,21 +2263,22 @@ function cleanMessagesForApi(messages) {
2243
2263
  const thinking = blocks.filter(
2244
2264
  (b) => b.type === "thinking"
2245
2265
  ).map((b) => ({ thinking: b.thinking, signature: b.signature }));
2246
- const cleaned = {
2266
+ const cleaned2 = {
2247
2267
  role: msg.role,
2248
2268
  content: text
2249
2269
  };
2250
2270
  if (toolCalls.length > 0) {
2251
- cleaned.toolCalls = toolCalls;
2271
+ cleaned2.toolCalls = toolCalls;
2252
2272
  }
2253
2273
  if (thinking.length > 0) {
2254
- cleaned.thinking = thinking;
2274
+ cleaned2.thinking = thinking;
2255
2275
  }
2256
2276
  if (msg.hidden) {
2257
- cleaned.hidden = true;
2277
+ cleaned2.hidden = true;
2258
2278
  }
2259
- return cleaned;
2279
+ return cleaned2;
2260
2280
  });
2281
+ return [...prefix, ...cleaned];
2261
2282
  }
2262
2283
  var init_cleanMessages = __esm({
2263
2284
  "src/subagents/common/cleanMessages.ts"() {
@@ -2282,6 +2303,7 @@ async function runSubAgent(config) {
2282
2303
  resolveExternalTool,
2283
2304
  toolRegistry,
2284
2305
  requestId,
2306
+ history,
2285
2307
  background,
2286
2308
  onBackgroundComplete
2287
2309
  } = config;
@@ -2295,7 +2317,10 @@ async function runSubAgent(config) {
2295
2317
  };
2296
2318
  let turns = 0;
2297
2319
  const run = async () => {
2298
- const messages = [{ role: "user", content: task }];
2320
+ const messages = [
2321
+ ...history ?? [],
2322
+ { role: "user", content: task }
2323
+ ];
2299
2324
  function getPartialText(blocks) {
2300
2325
  return blocks.filter((b) => b.type === "text").map((b) => b.text).join("");
2301
2326
  }
@@ -2934,7 +2959,7 @@ var init_searchGoogle2 = __esm({
2934
2959
  init_runCli();
2935
2960
  definition = {
2936
2961
  name: "searchGoogle",
2937
- description: 'Search Google for web results. Reserch modern design trends in industries or verticals, "best [domain] apps 2026", ui patterns, or find something specific if the the user has an explicit reference. Prioritize authoritative sources like Figma and other design leaders, avoid random blog spam. Pick one or more URLs from the results and then use `fetchUrl` to get their text content.',
2962
+ description: 'Search Google for web results. Reserch modern design trends in industries or verticals, "best [domain] apps 2026", ui patterns, or find something specific if the the user has an explicit reference. Searching for and reading case studies is a great way to get information and context about a project\'s domain. Prioritize authoritative sources like Figma and other design leaders, avoid random blog spam. Pick one or more URLs from the results and then use `scrapeWebUrl` to get their text content.',
2938
2963
  inputSchema: {
2939
2964
  type: "object",
2940
2965
  properties: {
@@ -2949,9 +2974,9 @@ var init_searchGoogle2 = __esm({
2949
2974
  }
2950
2975
  });
2951
2976
 
2952
- // src/subagents/designExpert/tools/fetchUrl.ts
2953
- var fetchUrl_exports = {};
2954
- __export(fetchUrl_exports, {
2977
+ // src/subagents/designExpert/tools/scrapeWebUrl.ts
2978
+ var scrapeWebUrl_exports = {};
2979
+ __export(scrapeWebUrl_exports, {
2955
2980
  definition: () => definition2,
2956
2981
  execute: () => execute2
2957
2982
  });
@@ -2966,12 +2991,12 @@ async function execute2(input, onLog) {
2966
2991
  );
2967
2992
  }
2968
2993
  var definition2;
2969
- var init_fetchUrl2 = __esm({
2970
- "src/subagents/designExpert/tools/fetchUrl.ts"() {
2994
+ var init_scrapeWebUrl = __esm({
2995
+ "src/subagents/designExpert/tools/scrapeWebUrl.ts"() {
2971
2996
  "use strict";
2972
2997
  init_runCli();
2973
2998
  definition2 = {
2974
- name: "fetchUrl",
2999
+ name: "scrapeWebUrl",
2975
3000
  description: "Fetch the content of a web page as markdown. Use when reading sites from search results or specific things the user wants to incorporate.",
2976
3001
  inputSchema: {
2977
3002
  type: "object",
@@ -3046,7 +3071,7 @@ Respond only with your analysis as Markdown and absolutely no other text. Do not
3046
3071
  `;
3047
3072
  definition3 = {
3048
3073
  name: "analyzeDesign",
3049
- description: "Analyze the visual design of a website or image URL. Websites are automatically screenshotted first. If no prompt is provided, performs a full design reference analysis (mood, color, typography, layout, distinctiveness). Provide a custom prompt to ask a specific design question instead.",
3074
+ description: "Analyze the visual design of a website or image URL. Websites are automatically screenshotted first. Provides static image analysis only, will not capture animations or video. If no prompt is provided, performs a full design reference analysis (mood, color, typography, layout, distinctiveness). Provide a custom prompt to ask a specific design question instead. Use a bulleted list to ask many questions at once.",
3050
3075
  inputSchema: {
3051
3076
  type: "object",
3052
3077
  properties: {
@@ -3086,10 +3111,10 @@ var init_analyzeImage2 = __esm({
3086
3111
  "src/subagents/designExpert/tools/analyzeImage.ts"() {
3087
3112
  "use strict";
3088
3113
  init_analyzeImage();
3089
- DEFAULT_PROMPT = "Describe everything visible in this image \u2014 every element, its position, its size relative to the frame, its colors, its content. Be comprhensive, thorough and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components). Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
3114
+ DEFAULT_PROMPT = "Describe everything visible in this image \u2014 every element, its position, its size relative to the frame, its colors, its content. Be comprehensive, thorough and spatial. After the inventory, note anything that looks visually broken (overlapping elements, clipped text, misaligned components). Respond only with your analysis as Markdown and absolutely no other text. Do not use emojis - use unicode if you need symbols.";
3090
3115
  definition4 = {
3091
3116
  name: "analyzeImage",
3092
- description: "Analyze an image by URL using a vision model. Returns an objective description of what is visible \u2014 shapes, colors, layout, text, artifacts. Use for factual inventory of image contents, not for subjective design judgment - the vision model providing the analysis has no sense of design. You are the design expert - use the analysis tool for factual inventory, then apply your own expertise for quality and suitability assessments.",
3117
+ description: "Analyze an image by URL using a vision model. Provides static image analysis only, will not capture animations or video. Returns an objective description of what is visible \u2014 shapes, colors, layout, text, artifacts. Use for factual inventory of image contents, not for subjective design judgment - the vision model providing the analysis has no sense of design. You are the design expert - use the analysis tool for factual inventory, then apply your own expertise for quality and suitability assessments. Optionally provide specific questions about what you're looking for. Use a bulleted list to ask many questions at once. If you are analyzing a screenshot of the app preview, you can reuse the same screenshot URL multiple times to ask multiple questions.",
3093
3118
  inputSchema: {
3094
3119
  type: "object",
3095
3120
  properties: {
@@ -3426,7 +3451,7 @@ var init_tools2 = __esm({
3426
3451
  "src/subagents/designExpert/tools/index.ts"() {
3427
3452
  "use strict";
3428
3453
  init_searchGoogle2();
3429
- init_fetchUrl2();
3454
+ init_scrapeWebUrl();
3430
3455
  init_analyzeDesign();
3431
3456
  init_analyzeImage2();
3432
3457
  init_screenshot3();
@@ -3434,7 +3459,7 @@ var init_tools2 = __esm({
3434
3459
  init_editImages();
3435
3460
  tools = {
3436
3461
  searchGoogle: searchGoogle_exports,
3437
- fetchUrl: fetchUrl_exports,
3462
+ scrapeWebUrl: scrapeWebUrl_exports,
3438
3463
  analyzeDesign: analyzeDesign_exports,
3439
3464
  analyzeImage: analyzeImage_exports,
3440
3465
  screenshot: screenshot_exports,
@@ -3721,6 +3746,55 @@ var init_prompt2 = __esm({
3721
3746
  }
3722
3747
  });
3723
3748
 
3749
+ // src/subagents/common/history.ts
3750
+ function getSubAgentHistory(messages, subAgentName) {
3751
+ let checkpointIdx = -1;
3752
+ let summaryText = "";
3753
+ for (let i = messages.length - 1; i >= 0; i--) {
3754
+ const msg = messages[i];
3755
+ if (!Array.isArray(msg.content)) {
3756
+ continue;
3757
+ }
3758
+ for (const block of msg.content) {
3759
+ if (block.type === "summary" && block.name === subAgentName) {
3760
+ checkpointIdx = i;
3761
+ summaryText = block.text;
3762
+ break;
3763
+ }
3764
+ }
3765
+ if (checkpointIdx !== -1) {
3766
+ break;
3767
+ }
3768
+ }
3769
+ const history = [];
3770
+ if (checkpointIdx !== -1 && summaryText) {
3771
+ history.push({
3772
+ role: "user",
3773
+ content: `<prior_conversation_summary>
3774
+ ${summaryText}
3775
+ </prior_conversation_summary>`
3776
+ });
3777
+ }
3778
+ const startIdx = checkpointIdx !== -1 ? checkpointIdx + 1 : 0;
3779
+ for (let i = startIdx; i < messages.length; i++) {
3780
+ const msg = messages[i];
3781
+ if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
3782
+ continue;
3783
+ }
3784
+ for (const block of msg.content) {
3785
+ if (block.type === "tool" && block.name === subAgentName && block.subAgentMessages?.length) {
3786
+ history.push(...block.subAgentMessages);
3787
+ }
3788
+ }
3789
+ }
3790
+ return history;
3791
+ }
3792
+ var init_history = __esm({
3793
+ "src/subagents/common/history.ts"() {
3794
+ "use strict";
3795
+ }
3796
+ });
3797
+
3724
3798
  // src/subagents/designExpert/index.ts
3725
3799
  var DESCRIPTION, designExpertTool;
3726
3800
  var init_designExpert = __esm({
@@ -3729,6 +3803,7 @@ var init_designExpert = __esm({
3729
3803
  init_runner();
3730
3804
  init_tools2();
3731
3805
  init_prompt2();
3806
+ init_history();
3732
3807
  DESCRIPTION = `
3733
3808
  Visual design expert. Describe the situation and what you need \u2014 the agent decides what to deliver. It reads the spec files automatically. Include relevant user requirements and context it can't get from the spec, but do not list specific deliverables or tell it how to do its job. Do not suggest implementation details or ideas - only relay what is needed.
3734
3809
  `.trim();
@@ -3755,9 +3830,11 @@ Visual design expert. Describe the situation and what you need \u2014 the agent
3755
3830
  if (!context) {
3756
3831
  return "Error: visual design expert requires execution context";
3757
3832
  }
3833
+ const history = context.conversationMessages ? getSubAgentHistory(context.conversationMessages, "visualDesignExpert") : [];
3758
3834
  const result = await runSubAgent({
3759
3835
  system: getDesignExpertPrompt(),
3760
3836
  task: input.task,
3837
+ history: history.length > 0 ? history : void 0,
3761
3838
  tools: DESIGN_EXPERT_TOOLS,
3762
3839
  externalTools: /* @__PURE__ */ new Set(),
3763
3840
  executeTool: (name, input2, toolCallId, onLog) => executeDesignExpertTool(name, input2, context, toolCallId, onLog),
@@ -4063,6 +4140,7 @@ var init_productVision = __esm({
4063
4140
  init_tools3();
4064
4141
  init_executor();
4065
4142
  init_prompt3();
4143
+ init_history();
4066
4144
  productVisionTool = {
4067
4145
  definition: {
4068
4146
  name: "productVision",
@@ -4086,9 +4164,11 @@ var init_productVision = __esm({
4086
4164
  if (!context) {
4087
4165
  return "Error: product vision requires execution context";
4088
4166
  }
4167
+ const history = context.conversationMessages ? getSubAgentHistory(context.conversationMessages, "productVision") : [];
4089
4168
  const result = await runSubAgent({
4090
4169
  system: getProductVisionPrompt(),
4091
4170
  task: input.task,
4171
+ history: history.length > 0 ? history : void 0,
4092
4172
  tools: VISION_TOOLS,
4093
4173
  externalTools: /* @__PURE__ */ new Set(),
4094
4174
  executeTool: executeVisionTool,
@@ -4179,7 +4259,7 @@ var init_tools4 = __esm({
4179
4259
  }
4180
4260
  },
4181
4261
  {
4182
- name: "fetchUrl",
4262
+ name: "scrapeWebUrl",
4183
4263
  description: "Fetch a web page as markdown. Use to read package docs, changelogs, npm pages.",
4184
4264
  inputSchema: {
4185
4265
  type: "object",
@@ -4275,6 +4355,47 @@ var init_codeSanityCheck = __esm({
4275
4355
  }
4276
4356
  });
4277
4357
 
4358
+ // src/tools/common/scrapeWebUrl.ts
4359
+ var scrapeWebUrlTool;
4360
+ var init_scrapeWebUrl2 = __esm({
4361
+ "src/tools/common/scrapeWebUrl.ts"() {
4362
+ "use strict";
4363
+ init_runCli();
4364
+ scrapeWebUrlTool = {
4365
+ definition: {
4366
+ name: "scrapeWebUrl",
4367
+ description: "Scrape the content of a web page. Returns the HTML of the page as markdown text. Optionally capture a screenshot if you need see the visual design. Use this when you need to fetch or analyze content from a website",
4368
+ inputSchema: {
4369
+ type: "object",
4370
+ properties: {
4371
+ url: {
4372
+ type: "string",
4373
+ description: "The URL to fetch."
4374
+ },
4375
+ screenshot: {
4376
+ type: "boolean",
4377
+ description: "Capture a screenshot of the page in addition to the text content. Adds latency; only use when you need to see the visual design."
4378
+ }
4379
+ },
4380
+ required: ["url"]
4381
+ }
4382
+ },
4383
+ async execute(input, context) {
4384
+ const url = input.url;
4385
+ const screenshot = input.screenshot;
4386
+ const pageOptions = { onlyMainContent: true };
4387
+ if (screenshot) {
4388
+ pageOptions.screenshot = true;
4389
+ }
4390
+ return runCli(
4391
+ `mindstudio scrape-url --url ${JSON.stringify(url)} --page-options ${JSON.stringify(JSON.stringify(pageOptions))} --no-meta`,
4392
+ { onLog: context?.onLog }
4393
+ );
4394
+ }
4395
+ };
4396
+ }
4397
+ });
4398
+
4278
4399
  // src/tools/index.ts
4279
4400
  function getSpecTools() {
4280
4401
  return [readSpecTool, writeSpecTool, editSpecTool, listSpecFilesTool];
@@ -4305,7 +4426,7 @@ function getCommonTools() {
4305
4426
  promptUserTool,
4306
4427
  confirmDestructiveActionTool,
4307
4428
  askMindStudioSdkTool,
4308
- fetchUrlTool,
4429
+ scrapeWebUrlTool,
4309
4430
  searchGoogleTool,
4310
4431
  setProjectMetadataTool,
4311
4432
  designExpertTool,
@@ -4370,7 +4491,6 @@ var init_tools5 = __esm({
4370
4491
  init_promptUser();
4371
4492
  init_confirmDestructiveAction();
4372
4493
  init_sdkConsultant();
4373
- init_fetchUrl();
4374
4494
  init_searchGoogle();
4375
4495
  init_setProjectMetadata();
4376
4496
  init_readFile();
@@ -4391,6 +4511,7 @@ var init_tools5 = __esm({
4391
4511
  init_designExpert();
4392
4512
  init_productVision();
4393
4513
  init_codeSanityCheck();
4514
+ init_scrapeWebUrl2();
4394
4515
  }
4395
4516
  });
4396
4517
 
@@ -4731,6 +4852,11 @@ async function runTurn(params) {
4731
4852
  let lastCompletedTools = "";
4732
4853
  let lastCompletedInput = "";
4733
4854
  let lastCompletedResult = "";
4855
+ let turnInputTokens = 0;
4856
+ let turnOutputTokens = 0;
4857
+ let turnCacheCreation = 0;
4858
+ let turnCacheRead = 0;
4859
+ let turnLlmCalls = 0;
4734
4860
  while (true) {
4735
4861
  let getOrCreateAccumulator2 = function(id, name) {
4736
4862
  let acc = toolInputAccumulators.get(id);
@@ -4924,6 +5050,11 @@ async function runTurn(params) {
4924
5050
  }
4925
5051
  case "done":
4926
5052
  stopReason = event.stopReason;
5053
+ turnLlmCalls++;
5054
+ turnInputTokens += event.usage.inputTokens;
5055
+ turnOutputTokens += event.usage.outputTokens;
5056
+ turnCacheCreation += event.usage.cacheCreationTokens ?? 0;
5057
+ turnCacheRead += event.usage.cacheReadTokens ?? 0;
4927
5058
  break;
4928
5059
  case "error":
4929
5060
  onEvent({ type: "error", error: friendlyError(event.error) });
@@ -4961,7 +5092,16 @@ async function runTurn(params) {
4961
5092
  if (stopReason !== "tool_use" || toolCalls.length === 0) {
4962
5093
  statusWatcher.stop();
4963
5094
  saveSession(state);
4964
- onEvent({ type: "turn_done" });
5095
+ onEvent({
5096
+ type: "turn_done",
5097
+ stats: {
5098
+ inputTokens: turnInputTokens,
5099
+ outputTokens: turnOutputTokens,
5100
+ cacheCreationTokens: turnCacheCreation || void 0,
5101
+ cacheReadTokens: turnCacheRead || void 0,
5102
+ llmCalls: turnLlmCalls
5103
+ }
5104
+ });
4965
5105
  return;
4966
5106
  }
4967
5107
  log6.info("Tools executing", {
@@ -5024,6 +5164,7 @@ async function runTurn(params) {
5024
5164
  toolCallId: tc.id,
5025
5165
  requestId,
5026
5166
  subAgentMessages,
5167
+ conversationMessages: state.messages,
5027
5168
  toolRegistry,
5028
5169
  onBackgroundComplete,
5029
5170
  onLog: (line) => wrappedOnEvent({
@@ -5376,6 +5517,10 @@ ${isLspConfigured() ? `<typescript_lsp>
5376
5517
 
5377
5518
  {{static/instructions.md}}
5378
5519
 
5520
+ <conversation_summaries>
5521
+ Your conversation history may include <prior_conversation_summary> blocks in the user's messages. These are automated summaries of earlier messages that have been compacted to save context space. The user does not see this summary, they see the full conversation history in their UI. Treat the summary as ground truth for what happened before, but do not reference it directly to the user ("as mentioned in the summary..."). Just continue naturally as if you remember the prior work.
5522
+ </conversation_summaries>
5523
+
5379
5524
  <!-- cache_breakpoint -->
5380
5525
 
5381
5526
  <project_onboarding>
@@ -5459,13 +5604,197 @@ var init_config = __esm({
5459
5604
  }
5460
5605
  });
5461
5606
 
5607
+ // src/compaction/index.ts
5608
+ async function compactConversation(state, apiConfig) {
5609
+ const insertionIndex = state.messages.length;
5610
+ const summaries = [];
5611
+ const tasks = [];
5612
+ const conversationMessages = getConversationMessagesForSummary(
5613
+ state.messages,
5614
+ insertionIndex
5615
+ );
5616
+ if (conversationMessages.length > 0) {
5617
+ tasks.push(
5618
+ generateSummary(
5619
+ apiConfig,
5620
+ "conversation",
5621
+ CONVERSATION_SUMMARY_PROMPT,
5622
+ conversationMessages
5623
+ ).then((text) => {
5624
+ if (text) {
5625
+ summaries.push({ name: "conversation", text });
5626
+ }
5627
+ })
5628
+ );
5629
+ }
5630
+ for (const name of SUMMARIZABLE_SUBAGENTS) {
5631
+ const subagentMessages = getSubAgentMessagesForSummary(
5632
+ state.messages,
5633
+ name,
5634
+ insertionIndex
5635
+ );
5636
+ if (subagentMessages.length > 0) {
5637
+ tasks.push(
5638
+ generateSummary(
5639
+ apiConfig,
5640
+ name,
5641
+ SUBAGENT_SUMMARY_PROMPT,
5642
+ subagentMessages
5643
+ ).then((text) => {
5644
+ if (text) {
5645
+ summaries.push({ name, text });
5646
+ }
5647
+ })
5648
+ );
5649
+ }
5650
+ }
5651
+ await Promise.all(tasks);
5652
+ const checkpointMessages = summaries.map((s) => ({
5653
+ role: "user",
5654
+ hidden: true,
5655
+ content: [
5656
+ {
5657
+ type: "summary",
5658
+ name: s.name,
5659
+ text: s.text,
5660
+ startedAt: Date.now()
5661
+ }
5662
+ ]
5663
+ }));
5664
+ if (checkpointMessages.length > 0) {
5665
+ state.messages.splice(insertionIndex, 0, ...checkpointMessages);
5666
+ }
5667
+ log8.info("Compaction complete", {
5668
+ summaries: summaries.length,
5669
+ insertionIndex,
5670
+ messagesAfter: state.messages.length - insertionIndex - checkpointMessages.length
5671
+ });
5672
+ }
5673
+ function getConversationMessagesForSummary(messages, endIndex) {
5674
+ let startIdx = 0;
5675
+ for (let i = endIndex - 1; i >= 0; i--) {
5676
+ const msg = messages[i];
5677
+ if (!Array.isArray(msg.content)) {
5678
+ continue;
5679
+ }
5680
+ for (const block of msg.content) {
5681
+ if (block.type === "summary" && block.name === "conversation") {
5682
+ startIdx = i + 1;
5683
+ break;
5684
+ }
5685
+ }
5686
+ if (startIdx > 0) {
5687
+ break;
5688
+ }
5689
+ }
5690
+ return messages.slice(startIdx, endIndex);
5691
+ }
5692
+ function getSubAgentMessagesForSummary(messages, subAgentName, endIndex) {
5693
+ let checkpointIdx = -1;
5694
+ for (let i = endIndex - 1; i >= 0; i--) {
5695
+ const msg = messages[i];
5696
+ if (!Array.isArray(msg.content)) {
5697
+ continue;
5698
+ }
5699
+ for (const block of msg.content) {
5700
+ if (block.type === "summary" && block.name === subAgentName) {
5701
+ checkpointIdx = i;
5702
+ break;
5703
+ }
5704
+ }
5705
+ if (checkpointIdx !== -1) {
5706
+ break;
5707
+ }
5708
+ }
5709
+ const startIdx = checkpointIdx !== -1 ? checkpointIdx + 1 : 0;
5710
+ const collected = [];
5711
+ for (let i = startIdx; i < endIndex; i++) {
5712
+ const msg = messages[i];
5713
+ if (msg.role !== "assistant" || !Array.isArray(msg.content)) {
5714
+ continue;
5715
+ }
5716
+ for (const block of msg.content) {
5717
+ if (block.type === "tool" && block.name === subAgentName && block.subAgentMessages?.length) {
5718
+ collected.push(...block.subAgentMessages);
5719
+ }
5720
+ }
5721
+ }
5722
+ return collected;
5723
+ }
5724
+ function serializeForSummary(messages) {
5725
+ return messages.map((msg) => {
5726
+ if (typeof msg.content === "string") {
5727
+ return `[${msg.role}]: ${msg.content}`;
5728
+ }
5729
+ if (!Array.isArray(msg.content)) {
5730
+ return `[${msg.role}]: (empty)`;
5731
+ }
5732
+ const blocks = msg.content;
5733
+ const parts = [];
5734
+ for (const block of blocks) {
5735
+ if (block.type === "text") {
5736
+ parts.push(block.text);
5737
+ } else if (block.type === "tool") {
5738
+ parts.push(
5739
+ `[tool: ${block.name}(${JSON.stringify(block.input).slice(0, 200)})] \u2192 ${(block.result ?? "").slice(0, 500)}`
5740
+ );
5741
+ }
5742
+ }
5743
+ return `[${msg.role}]: ${parts.join("\n")}`;
5744
+ }).join("\n\n");
5745
+ }
5746
+ async function generateSummary(apiConfig, name, systemPrompt, messagesToSummarize) {
5747
+ const serialized = serializeForSummary(messagesToSummarize);
5748
+ if (!serialized.trim()) {
5749
+ return null;
5750
+ }
5751
+ log8.info("Generating summary", {
5752
+ name,
5753
+ messageCount: messagesToSummarize.length
5754
+ });
5755
+ let summaryText = "";
5756
+ for await (const event of streamChat({
5757
+ ...apiConfig,
5758
+ subAgentId: "conversationSummarizer",
5759
+ system: systemPrompt,
5760
+ messages: [{ role: "user", content: serialized }],
5761
+ tools: []
5762
+ })) {
5763
+ if (event.type === "text") {
5764
+ summaryText += event.text;
5765
+ } else if (event.type === "error") {
5766
+ log8.error("Summary generation failed", { name, error: event.error });
5767
+ return null;
5768
+ }
5769
+ }
5770
+ if (!summaryText.trim()) {
5771
+ log8.warn("Empty summary generated", { name });
5772
+ return null;
5773
+ }
5774
+ log8.info("Summary generated", { name, summaryLength: summaryText.length });
5775
+ return summaryText.trim();
5776
+ }
5777
+ var log8, CONVERSATION_SUMMARY_PROMPT, SUBAGENT_SUMMARY_PROMPT, SUMMARIZABLE_SUBAGENTS;
5778
+ var init_compaction = __esm({
5779
+ "src/compaction/index.ts"() {
5780
+ "use strict";
5781
+ init_api();
5782
+ init_assets();
5783
+ init_logger();
5784
+ log8 = createLogger("compaction");
5785
+ CONVERSATION_SUMMARY_PROMPT = readAsset("compaction", "conversation.md");
5786
+ SUBAGENT_SUMMARY_PROMPT = readAsset("compaction", "subagent.md");
5787
+ SUMMARIZABLE_SUBAGENTS = ["visualDesignExpert", "productVision"];
5788
+ }
5789
+ });
5790
+
5462
5791
  // src/toolRegistry.ts
5463
- var log8, ToolRegistry;
5792
+ var log9, ToolRegistry;
5464
5793
  var init_toolRegistry = __esm({
5465
5794
  "src/toolRegistry.ts"() {
5466
5795
  "use strict";
5467
5796
  init_logger();
5468
- log8 = createLogger("tool-registry");
5797
+ log9 = createLogger("tool-registry");
5469
5798
  ToolRegistry = class {
5470
5799
  entries = /* @__PURE__ */ new Map();
5471
5800
  onEvent;
@@ -5491,7 +5820,7 @@ var init_toolRegistry = __esm({
5491
5820
  if (!entry) {
5492
5821
  return false;
5493
5822
  }
5494
- log8.info("Tool stopped", { toolCallId: id, name: entry.name, mode });
5823
+ log9.info("Tool stopped", { toolCallId: id, name: entry.name, mode });
5495
5824
  entry.abortController.abort(mode);
5496
5825
  if (mode === "graceful") {
5497
5826
  const partial = entry.getPartialResult?.() ?? "";
@@ -5524,7 +5853,7 @@ ${partial}` : "[INTERRUPTED] Tool execution was stopped.";
5524
5853
  if (!entry) {
5525
5854
  return false;
5526
5855
  }
5527
- log8.info("Tool restarted", { toolCallId: id, name: entry.name });
5856
+ log9.info("Tool restarted", { toolCallId: id, name: entry.name });
5528
5857
  entry.abortController.abort("restart");
5529
5858
  const newInput = patchedInput ? { ...entry.input, ...patchedInput } : entry.input;
5530
5859
  this.onEvent?.({
@@ -5582,6 +5911,7 @@ __export(headless_exports, {
5582
5911
  startHeadless: () => startHeadless
5583
5912
  });
5584
5913
  import { createInterface } from "readline";
5914
+ import { writeFileSync } from "fs";
5585
5915
  function emit(event, data, requestId) {
5586
5916
  const payload = { event, ...data };
5587
5917
  if (requestId) {
@@ -5645,6 +5975,17 @@ async function startHeadless(opts = {}) {
5645
5975
  const pendingTools = /* @__PURE__ */ new Map();
5646
5976
  const earlyResults = /* @__PURE__ */ new Map();
5647
5977
  const toolRegistry = new ToolRegistry();
5978
+ const sessionStats = {
5979
+ messageCount: 0,
5980
+ turns: 0,
5981
+ totalInputTokens: 0,
5982
+ totalOutputTokens: 0,
5983
+ totalCacheCreationTokens: 0,
5984
+ totalCacheReadTokens: 0,
5985
+ lastContextSize: 0,
5986
+ compactionInProgress: false,
5987
+ updatedAt: 0
5988
+ };
5648
5989
  const backgroundQueue = [];
5649
5990
  function flushBackgroundQueue() {
5650
5991
  if (backgroundQueue.length === 0) {
@@ -5662,22 +6003,32 @@ ${xmlParts}
5662
6003
  </background_results>`;
5663
6004
  handleMessage({ action: "message", text: message }, void 0);
5664
6005
  }
5665
- function onBackgroundComplete(toolCallId, name, result, subAgentMessages) {
5666
- for (const msg of state.messages) {
5667
- if (!Array.isArray(msg.content)) {
5668
- continue;
5669
- }
5670
- for (const block of msg.content) {
5671
- if (block.type === "tool" && block.id === toolCallId) {
5672
- block.backgroundResult = result;
5673
- block.completedAt = Date.now();
5674
- if (subAgentMessages) {
5675
- block.subAgentMessages = subAgentMessages;
6006
+ const pendingBlockUpdates = [];
6007
+ function applyPendingBlockUpdates() {
6008
+ if (pendingBlockUpdates.length === 0) {
6009
+ return;
6010
+ }
6011
+ const updates = pendingBlockUpdates.splice(0);
6012
+ for (const update of updates) {
6013
+ for (const msg of state.messages) {
6014
+ if (!Array.isArray(msg.content)) {
6015
+ continue;
6016
+ }
6017
+ for (const block of msg.content) {
6018
+ if (block.type === "tool" && block.id === update.toolCallId) {
6019
+ block.backgroundResult = update.result;
6020
+ block.completedAt = Date.now();
6021
+ if (update.subAgentMessages) {
6022
+ block.subAgentMessages = update.subAgentMessages;
6023
+ }
5676
6024
  }
5677
6025
  }
5678
6026
  }
5679
6027
  }
5680
- log9.info("Background complete", {
6028
+ }
6029
+ function onBackgroundComplete(toolCallId, name, result, subAgentMessages) {
6030
+ pendingBlockUpdates.push({ toolCallId, result, subAgentMessages });
6031
+ log10.info("Background complete", {
5681
6032
  toolCallId,
5682
6033
  name,
5683
6034
  requestId: currentRequestId
@@ -5695,6 +6046,7 @@ ${xmlParts}
5695
6046
  completedAt: Date.now()
5696
6047
  });
5697
6048
  if (!running) {
6049
+ applyPendingBlockUpdates();
5698
6050
  flushBackgroundQueue();
5699
6051
  }
5700
6052
  }
@@ -5737,8 +6089,25 @@ ${xmlParts}
5737
6089
  // Terminal events — translate to `completed`
5738
6090
  case "turn_done":
5739
6091
  completedEmitted = true;
6092
+ if (e.stats) {
6093
+ sessionStats.turns++;
6094
+ sessionStats.totalInputTokens += e.stats.inputTokens;
6095
+ sessionStats.totalOutputTokens += e.stats.outputTokens;
6096
+ sessionStats.totalCacheCreationTokens += e.stats.cacheCreationTokens ?? 0;
6097
+ sessionStats.totalCacheReadTokens += e.stats.cacheReadTokens ?? 0;
6098
+ sessionStats.lastContextSize = e.stats.inputTokens;
6099
+ }
6100
+ sessionStats.messageCount = state.messages.length;
6101
+ sessionStats.updatedAt = Date.now();
6102
+ try {
6103
+ writeFileSync(".remy-stats.json", JSON.stringify(sessionStats));
6104
+ } catch {
6105
+ }
5740
6106
  emit("completed", { success: true }, rid);
5741
- setTimeout(() => flushBackgroundQueue(), 0);
6107
+ setTimeout(() => {
6108
+ applyPendingBlockUpdates();
6109
+ flushBackgroundQueue();
6110
+ }, 0);
5742
6111
  return;
5743
6112
  case "turn_cancelled":
5744
6113
  completedEmitted = true;
@@ -5927,7 +6296,7 @@ ${xmlParts}
5927
6296
  requestId
5928
6297
  );
5929
6298
  }
5930
- log9.info("Turn complete", {
6299
+ log10.info("Turn complete", {
5931
6300
  requestId,
5932
6301
  durationMs: Date.now() - turnStart
5933
6302
  });
@@ -5936,7 +6305,7 @@ ${xmlParts}
5936
6305
  emit("error", { error: err.message }, requestId);
5937
6306
  emit("completed", { success: false, error: err.message }, requestId);
5938
6307
  }
5939
- log9.warn("Command failed", {
6308
+ log10.warn("Command failed", {
5940
6309
  action: "message",
5941
6310
  requestId,
5942
6311
  error: err.message
@@ -5956,7 +6325,7 @@ ${xmlParts}
5956
6325
  return;
5957
6326
  }
5958
6327
  const { action, requestId } = parsed;
5959
- log9.info("Command received", { action, requestId });
6328
+ log10.info("Command received", { action, requestId });
5960
6329
  if (action === "tool_result" && parsed.id) {
5961
6330
  const id = parsed.id;
5962
6331
  const result = parsed.result ?? "";
@@ -6012,6 +6381,39 @@ ${xmlParts}
6012
6381
  }
6013
6382
  return;
6014
6383
  }
6384
+ if (action === "compact") {
6385
+ sessionStats.compactionInProgress = true;
6386
+ sessionStats.updatedAt = Date.now();
6387
+ try {
6388
+ writeFileSync(".remy-stats.json", JSON.stringify(sessionStats));
6389
+ } catch {
6390
+ }
6391
+ compactConversation(state, config).then(() => {
6392
+ saveSession(state);
6393
+ emit("compaction_complete", {}, requestId);
6394
+ emit("completed", { success: true }, requestId);
6395
+ }).catch((err) => {
6396
+ emit(
6397
+ "compaction_complete",
6398
+ { error: err.message || "Compaction failed" },
6399
+ requestId
6400
+ );
6401
+ emit(
6402
+ "completed",
6403
+ { success: false, error: err.message || "Compaction failed" },
6404
+ requestId
6405
+ );
6406
+ }).finally(() => {
6407
+ sessionStats.compactionInProgress = false;
6408
+ sessionStats.messageCount = state.messages.length;
6409
+ sessionStats.updatedAt = Date.now();
6410
+ try {
6411
+ writeFileSync(".remy-stats.json", JSON.stringify(sessionStats));
6412
+ } catch {
6413
+ }
6414
+ });
6415
+ return;
6416
+ }
6015
6417
  if (action === "message") {
6016
6418
  await handleMessage(parsed, requestId);
6017
6419
  return;
@@ -6037,19 +6439,20 @@ ${xmlParts}
6037
6439
  process.on("SIGINT", shutdown);
6038
6440
  emit("ready");
6039
6441
  }
6040
- var log9;
6442
+ var log10;
6041
6443
  var init_headless = __esm({
6042
6444
  "src/headless.ts"() {
6043
6445
  "use strict";
6044
6446
  init_logger();
6045
6447
  init_config();
6046
6448
  init_prompt4();
6449
+ init_compaction();
6047
6450
  init_lsp();
6048
6451
  init_agent();
6049
6452
  init_session();
6050
6453
  init_toolRegistry();
6051
6454
  init_resolve();
6052
- log9 = createLogger("headless");
6455
+ log10 = createLogger("headless");
6053
6456
  }
6054
6457
  });
6055
6458