@mindstudio-ai/remy 0.1.178 → 0.1.180

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/headless.js +317 -171
  2. package/dist/index.js +337 -174
  3. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -90,7 +90,7 @@ async function* streamChat(params) {
90
90
  const url = `${baseUrl2}/_internal/v2/agent/remy/chat`;
91
91
  const startTime = Date.now();
92
92
  const subAgentId = rest.subAgentId;
93
- const requestBody = { ...rest, ...model && { modelId: model } };
93
+ const requestBody = { ...rest, modelId: model };
94
94
  log.info("API request", {
95
95
  requestId,
96
96
  ...subAgentId && { subAgentId },
@@ -2024,6 +2024,104 @@ var init_prompt = __esm({
2024
2024
  }
2025
2025
  });
2026
2026
 
2027
+ // src/models/surfaces.ts
2028
+ function resolveModel(surfaceId, models, fallback) {
2029
+ return models?.[surfaceId] ?? fallback ?? MODEL_SURFACES[surfaceId].default;
2030
+ }
2031
+ var MODEL_SURFACES, ALLOWED_MODELS_BY_TYPE;
2032
+ var init_surfaces = __esm({
2033
+ "src/models/surfaces.ts"() {
2034
+ "use strict";
2035
+ MODEL_SURFACES = {
2036
+ parent: {
2037
+ default: "claude-4-7-opus",
2038
+ label: "Remy",
2039
+ description: "The main Remy agent you chat with about your product. Writes code and manages delegation to other agents.",
2040
+ modelType: "text",
2041
+ userPickable: true
2042
+ },
2043
+ visualDesignExpert: {
2044
+ default: "claude-4-7-opus",
2045
+ label: "Design Agent",
2046
+ description: "Designs your product's interfaces, including components, layouts, typography, color, and visual identity.",
2047
+ modelType: "text",
2048
+ userPickable: true
2049
+ },
2050
+ productVision: {
2051
+ default: "claude-4-6-sonnet",
2052
+ label: "Roadmap Agent",
2053
+ description: "Owns your product's roadmap and pitch deck. Helps decide what to build next and how to frame the big picture.",
2054
+ modelType: "text",
2055
+ userPickable: true
2056
+ },
2057
+ browserAutomation: {
2058
+ default: "claude-4-6-sonnet",
2059
+ label: "QA Agent",
2060
+ description: "Tests features and UI flows in an automated browser to verify they work end to end.",
2061
+ modelType: "text",
2062
+ userPickable: true
2063
+ },
2064
+ codeSanityCheck: {
2065
+ default: "claude-4-6-sonnet",
2066
+ label: "Architecture Agent",
2067
+ description: "Reviews the architecture and structure of code changes to avoid technical debt.",
2068
+ modelType: "text",
2069
+ userPickable: true
2070
+ },
2071
+ imageGeneration: {
2072
+ default: "seedream-4.5",
2073
+ label: "Image Generation",
2074
+ description: "Creates images for your product \u2014 icons, illustrations, photos, and any other visual assets.",
2075
+ modelType: "image_generation",
2076
+ userPickable: true
2077
+ },
2078
+ imageAnalysis: {
2079
+ default: "claude-4-6-sonnet",
2080
+ label: "Image Analysis",
2081
+ description: "Reads screenshots taken by the QA agent during automated browser tests. Other agents use their own built-in image analysis when they need to read images.",
2082
+ modelType: "vision",
2083
+ userPickable: true
2084
+ },
2085
+ conversationSummarizer: {
2086
+ default: "claude-4-6-sonnet",
2087
+ label: "Compaction Utility",
2088
+ description: "Compresses long conversations into summaries to keep things responsive.",
2089
+ modelType: "text",
2090
+ userPickable: true
2091
+ },
2092
+ brandExtractor: {
2093
+ default: "claude-4-6-sonnet",
2094
+ label: "Brand Utility",
2095
+ description: "Extracts your product's name, colors, and fonts from your spec for use in branded documents.",
2096
+ modelType: "text",
2097
+ userPickable: true
2098
+ },
2099
+ // Internal surface — not user-pickable. Remy uses this to rewrite design
2100
+ // briefs into model-optimized image prompts before image generation.
2101
+ imagePromptEnhancer: {
2102
+ default: "claude-4-6-sonnet",
2103
+ label: "Image Prompt Enhancer",
2104
+ description: "Rewrites image briefs into model-optimized prompts before image generation.",
2105
+ modelType: "text",
2106
+ userPickable: false
2107
+ }
2108
+ };
2109
+ ALLOWED_MODELS_BY_TYPE = {
2110
+ text: [
2111
+ "claude-4-7-opus",
2112
+ "claude-4-6-opus",
2113
+ "claude-4-6-sonnet",
2114
+ "gpt-5.5",
2115
+ "gemini-3-pro",
2116
+ "gemini-3.1-pro",
2117
+ "gemini-3-flash"
2118
+ ]
2119
+ // vision: undefined — unconstrained
2120
+ // image_generation: undefined — unconstrained
2121
+ };
2122
+ }
2123
+ });
2124
+
2027
2125
  // src/compaction/trigger.ts
2028
2126
  function getPendingSummaries() {
2029
2127
  return pendingSummaries.splice(0);
@@ -2044,7 +2142,7 @@ function triggerCompaction(state, apiConfig, opts = {}) {
2044
2142
  apiConfig,
2045
2143
  system,
2046
2144
  tools2,
2047
- state.models?.conversationSummarizer ?? model
2145
+ resolveModel("conversationSummarizer", state.models, model)
2048
2146
  ).then((summaries) => {
2049
2147
  pendingSummaries.push(...summaries);
2050
2148
  listener?.({ type: "complete", requestId });
@@ -2067,6 +2165,7 @@ var init_trigger = __esm({
2067
2165
  init_prompt();
2068
2166
  init_tools6();
2069
2167
  init_logger();
2168
+ init_surfaces();
2070
2169
  log3 = createLogger("compaction:trigger");
2071
2170
  pendingSummaries = [];
2072
2171
  inflightCompaction = null;
@@ -3085,7 +3184,7 @@ var init_queryDatabase = __esm({
3085
3184
 
3086
3185
  // src/subagents/common/analyzeImage.ts
3087
3186
  async function analyzeImage(params) {
3088
- const { prompt, imageUrl, timeout = 2e5, onLog } = params;
3187
+ const { prompt, imageUrl, model, timeout = 2e5, onLog } = params;
3089
3188
  return runMindstudioCli(
3090
3189
  [
3091
3190
  "analyze-image",
@@ -3094,21 +3193,15 @@ async function analyzeImage(params) {
3094
3193
  "--image-url",
3095
3194
  imageUrl,
3096
3195
  "--vision-model-override",
3097
- JSON.stringify(VISION_MODEL_OVERRIDE)
3196
+ JSON.stringify({ model })
3098
3197
  ],
3099
3198
  { outputKey: "analysis", timeout, onLog }
3100
3199
  );
3101
3200
  }
3102
- var VISION_MODEL, VISION_MODEL_OVERRIDE;
3103
3201
  var init_analyzeImage = __esm({
3104
3202
  "src/subagents/common/analyzeImage.ts"() {
3105
3203
  "use strict";
3106
3204
  init_runMindstudioCli();
3107
- VISION_MODEL = "claude-4-6-sonnet";
3108
- VISION_MODEL_OVERRIDE = {
3109
- model: VISION_MODEL,
3110
- config: { thinkingBudget: "off" }
3111
- };
3112
3205
  }
3113
3206
  });
3114
3207
 
@@ -3130,13 +3223,14 @@ ${TEXT_WRAP_DISCLAIMER}`;
3130
3223
  return p;
3131
3224
  }
3132
3225
  async function streamScreenshotAnalysis(opts) {
3133
- const { url, prompt, styleMap, onLog } = opts;
3226
+ const { url, prompt, styleMap, onLog, model } = opts;
3134
3227
  onLog?.(JSON.stringify({ url, analysis: null }));
3135
3228
  const analysisPrompt = buildScreenshotAnalysisPrompt({ prompt, styleMap });
3136
3229
  let accumulated = "";
3137
3230
  const analysis = await analyzeImage({
3138
3231
  prompt: analysisPrompt,
3139
3232
  imageUrl: url,
3233
+ model,
3140
3234
  onLog: (chunk) => {
3141
3235
  accumulated += chunk;
3142
3236
  onLog?.(JSON.stringify({ url, analysis: accumulated }));
@@ -3148,12 +3242,14 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
3148
3242
  let prompt;
3149
3243
  let existingUrl;
3150
3244
  let onLog;
3245
+ let model;
3151
3246
  let path13;
3152
3247
  if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
3153
3248
  prompt = promptOrOptions.prompt;
3154
3249
  existingUrl = promptOrOptions.imageUrl;
3155
3250
  path13 = promptOrOptions.path;
3156
3251
  onLog = promptOrOptions.onLog;
3252
+ model = promptOrOptions.model;
3157
3253
  } else {
3158
3254
  prompt = promptOrOptions;
3159
3255
  }
@@ -3178,11 +3274,17 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
3178
3274
  if (prompt === false) {
3179
3275
  return url;
3180
3276
  }
3277
+ if (!model) {
3278
+ throw new Error(
3279
+ "captureAndAnalyzeScreenshot: `model` is required when analysis is enabled"
3280
+ );
3281
+ }
3181
3282
  return streamScreenshotAnalysis({
3182
3283
  url,
3183
3284
  prompt: prompt || void 0,
3184
3285
  styleMap,
3185
- onLog
3286
+ onLog,
3287
+ model
3186
3288
  });
3187
3289
  }
3188
3290
  var SCREENSHOT_ANALYSIS_PROMPT, TEXT_WRAP_DISCLAIMER;
@@ -4074,6 +4176,121 @@ var init_prompt2 = __esm({
4074
4176
  });
4075
4177
 
4076
4178
  // src/subagents/browserAutomation/index.ts
4179
+ async function runBrowserAutomation(task, context) {
4180
+ const release = await acquireBrowserLock();
4181
+ try {
4182
+ const result = await runSubAgent({
4183
+ system: getBrowserAutomationPrompt(),
4184
+ task,
4185
+ tools: BROWSER_TOOLS,
4186
+ externalTools: BROWSER_EXTERNAL_TOOLS,
4187
+ executeTool: async (name, _input, _toolCallId, onLog) => {
4188
+ if (name === "setupBrowser") {
4189
+ try {
4190
+ const result2 = await sidecarRequest(
4191
+ "/setup-browser",
4192
+ {
4193
+ auth: _input.auth,
4194
+ path: _input.path
4195
+ },
4196
+ { timeout: 15e3 }
4197
+ );
4198
+ return JSON.stringify(result2);
4199
+ } catch (err) {
4200
+ return `Error setting up browser: ${err.message}`;
4201
+ }
4202
+ }
4203
+ if (name === "screenshotFullPage") {
4204
+ try {
4205
+ return await captureAndAnalyzeScreenshot({
4206
+ path: _input.path,
4207
+ onLog,
4208
+ model: resolveModel(
4209
+ "imageAnalysis",
4210
+ context.models,
4211
+ context.model
4212
+ )
4213
+ });
4214
+ } catch (err) {
4215
+ return `Error taking screenshot: ${err.message}`;
4216
+ }
4217
+ }
4218
+ return `Error: unknown local tool "${name}"`;
4219
+ },
4220
+ apiConfig: context.apiConfig,
4221
+ model: resolveModel("browserAutomation", context.models, context.model),
4222
+ subAgentId: "browserAutomation",
4223
+ signal: context.signal,
4224
+ parentToolId: context.toolCallId,
4225
+ requestId: context.requestId,
4226
+ onEvent: context.onEvent,
4227
+ resolveExternalTool: async (id, name, input) => {
4228
+ if (!context.resolveExternalTool) {
4229
+ return "Error: no external tool resolver";
4230
+ }
4231
+ const result2 = await context.resolveExternalTool(id, name, input);
4232
+ if (name === "browserCommand") {
4233
+ try {
4234
+ const parsed = JSON.parse(result2);
4235
+ const screenshotSteps = (parsed.steps || []).filter(
4236
+ (s) => s.command === "screenshotViewport" && s.result?.url
4237
+ );
4238
+ if (screenshotSteps.length > 0) {
4239
+ const visionOverride = {
4240
+ model: resolveModel(
4241
+ "imageAnalysis",
4242
+ context.models,
4243
+ context.model
4244
+ )
4245
+ };
4246
+ const batchInput = screenshotSteps.map((s) => ({
4247
+ stepType: "analyzeImage",
4248
+ step: {
4249
+ imageUrl: s.result.url,
4250
+ prompt: buildScreenshotAnalysisPrompt({
4251
+ styleMap: s.result.styleMap
4252
+ }),
4253
+ visionModelOverride: visionOverride
4254
+ }
4255
+ }));
4256
+ const batchResult = await runMindstudioCli(
4257
+ ["batch", JSON.stringify(batchInput)],
4258
+ { timeout: 2e5, caller: "browserAutomation" }
4259
+ );
4260
+ try {
4261
+ const analyses = JSON.parse(batchResult);
4262
+ let ai = 0;
4263
+ for (const step of parsed.steps) {
4264
+ if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
4265
+ step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
4266
+ ai++;
4267
+ }
4268
+ }
4269
+ } catch {
4270
+ log6.debug("Failed to parse batch analysis result", {
4271
+ batchResult
4272
+ });
4273
+ }
4274
+ return JSON.stringify(parsed);
4275
+ }
4276
+ } catch {
4277
+ }
4278
+ }
4279
+ return result2;
4280
+ },
4281
+ toolRegistry: context.toolRegistry,
4282
+ captureArtifacts: ["screenshotFullPage"]
4283
+ });
4284
+ context.subAgentMessages?.set(context.toolCallId, result.messages);
4285
+ const ss = result.artifacts?.screenshotFullPage;
4286
+ return {
4287
+ text: result.text,
4288
+ ...ss?.url ? { screenshot: { url: ss.url, styleMap: ss.styleMap } } : {}
4289
+ };
4290
+ } finally {
4291
+ release();
4292
+ }
4293
+ }
4077
4294
  var log6, browserAutomationTool;
4078
4295
  var init_browserAutomation = __esm({
4079
4296
  "src/subagents/browserAutomation/index.ts"() {
@@ -4085,7 +4302,7 @@ var init_browserAutomation = __esm({
4085
4302
  init_browserLock();
4086
4303
  init_screenshot();
4087
4304
  init_runMindstudioCli();
4088
- init_analyzeImage();
4305
+ init_surfaces();
4089
4306
  init_logger();
4090
4307
  log6 = createLogger("browser-automation");
4091
4308
  browserAutomationTool = {
@@ -4108,109 +4325,13 @@ var init_browserAutomation = __esm({
4108
4325
  if (!context) {
4109
4326
  return "Error: browser automation requires execution context (only available in headless mode)";
4110
4327
  }
4111
- const release = await acquireBrowserLock();
4112
- try {
4113
- const result = await runSubAgent({
4114
- system: getBrowserAutomationPrompt(),
4115
- task: input.task,
4116
- tools: BROWSER_TOOLS,
4117
- externalTools: BROWSER_EXTERNAL_TOOLS,
4118
- executeTool: async (name, _input, _toolCallId, onLog) => {
4119
- if (name === "setupBrowser") {
4120
- try {
4121
- const result2 = await sidecarRequest(
4122
- "/setup-browser",
4123
- {
4124
- auth: _input.auth,
4125
- path: _input.path
4126
- },
4127
- { timeout: 15e3 }
4128
- );
4129
- return JSON.stringify(result2);
4130
- } catch (err) {
4131
- return `Error setting up browser: ${err.message}`;
4132
- }
4133
- }
4134
- if (name === "screenshotFullPage") {
4135
- try {
4136
- return await captureAndAnalyzeScreenshot({
4137
- path: _input.path,
4138
- onLog
4139
- });
4140
- } catch (err) {
4141
- return `Error taking screenshot: ${err.message}`;
4142
- }
4143
- }
4144
- return `Error: unknown local tool "${name}"`;
4145
- },
4146
- apiConfig: context.apiConfig,
4147
- model: context.models?.browserAutomation ?? context.model,
4148
- subAgentId: "browserAutomation",
4149
- signal: context.signal,
4150
- parentToolId: context.toolCallId,
4151
- requestId: context.requestId,
4152
- onEvent: context.onEvent,
4153
- resolveExternalTool: async (id, name, input2) => {
4154
- if (!context.resolveExternalTool) {
4155
- return "Error: no external tool resolver";
4156
- }
4157
- const result2 = await context.resolveExternalTool(id, name, input2);
4158
- if (name === "browserCommand") {
4159
- try {
4160
- const parsed = JSON.parse(result2);
4161
- const screenshotSteps = (parsed.steps || []).filter(
4162
- (s) => s.command === "screenshotViewport" && s.result?.url
4163
- );
4164
- if (screenshotSteps.length > 0) {
4165
- const batchInput = screenshotSteps.map((s) => ({
4166
- stepType: "analyzeImage",
4167
- step: {
4168
- imageUrl: s.result.url,
4169
- prompt: buildScreenshotAnalysisPrompt({
4170
- styleMap: s.result.styleMap
4171
- }),
4172
- visionModelOverride: VISION_MODEL_OVERRIDE
4173
- }
4174
- }));
4175
- const batchResult = await runMindstudioCli(
4176
- ["batch", JSON.stringify(batchInput)],
4177
- { timeout: 2e5, caller: "browserAutomation" }
4178
- );
4179
- try {
4180
- const analyses = JSON.parse(batchResult);
4181
- let ai = 0;
4182
- for (const step of parsed.steps) {
4183
- if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
4184
- step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
4185
- ai++;
4186
- }
4187
- }
4188
- } catch {
4189
- log6.debug("Failed to parse batch analysis result", {
4190
- batchResult
4191
- });
4192
- }
4193
- return JSON.stringify(parsed);
4194
- }
4195
- } catch {
4196
- }
4197
- }
4198
- return result2;
4199
- },
4200
- toolRegistry: context.toolRegistry,
4201
- captureArtifacts: ["screenshotFullPage"]
4202
- });
4203
- context.subAgentMessages?.set(context.toolCallId, result.messages);
4204
- const ss = result.artifacts?.screenshotFullPage;
4205
- if (ss?.url) {
4206
- return `${result.text}
4328
+ const result = await runBrowserAutomation(input.task, context);
4329
+ if (result.screenshot) {
4330
+ return `${result.text}
4207
4331
 
4208
- ![Final state](${ss.url})`;
4209
- }
4210
- return result.text;
4211
- } finally {
4212
- release();
4332
+ ![Final state](${result.screenshot.url})`;
4213
4333
  }
4334
+ return result.text;
4214
4335
  }
4215
4336
  };
4216
4337
  }
@@ -4224,6 +4345,7 @@ var init_screenshot2 = __esm({
4224
4345
  init_screenshot();
4225
4346
  init_browserLock();
4226
4347
  init_browserAutomation();
4348
+ init_surfaces();
4227
4349
  screenshotTool = {
4228
4350
  clearable: true,
4229
4351
  definition: {
@@ -4257,29 +4379,22 @@ var init_screenshot2 = __esm({
4257
4379
  return await captureAndAnalyzeScreenshot({
4258
4380
  prompt: input.prompt,
4259
4381
  imageUrl: input.imageUrl,
4260
- onLog: context?.onLog
4382
+ onLog: context?.onLog,
4383
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4261
4384
  });
4262
4385
  }
4263
4386
  if (input.instructions && context) {
4264
4387
  const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
4265
- const result = await browserAutomationTool.execute({ task }, context);
4266
- const resultStr = result;
4267
- let url;
4268
- let styleMap;
4269
- try {
4270
- const parsed = JSON.parse(resultStr);
4271
- url = parsed.screenshotUrl;
4272
- styleMap = parsed.styleMap;
4273
- } catch {
4274
- }
4275
- if (!url) {
4276
- return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
4388
+ const result = await runBrowserAutomation(task, context);
4389
+ if (!result.screenshot) {
4390
+ return result.text;
4277
4391
  }
4278
4392
  return await streamScreenshotAnalysis({
4279
- url,
4393
+ url: result.screenshot.url,
4280
4394
  prompt: input.prompt,
4281
- styleMap,
4282
- onLog: context?.onLog
4395
+ styleMap: result.screenshot.styleMap,
4396
+ onLog: context?.onLog,
4397
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4283
4398
  });
4284
4399
  }
4285
4400
  const release = await acquireBrowserLock();
@@ -4287,7 +4402,8 @@ var init_screenshot2 = __esm({
4287
4402
  return await captureAndAnalyzeScreenshot({
4288
4403
  prompt: input.prompt,
4289
4404
  path: input.path,
4290
- onLog: context?.onLog
4405
+ onLog: context?.onLog,
4406
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4291
4407
  });
4292
4408
  } finally {
4293
4409
  release();
@@ -4478,7 +4594,7 @@ __export(analyzeDesign_exports, {
4478
4594
  definition: () => definition3,
4479
4595
  execute: () => execute3
4480
4596
  });
4481
- async function execute3(input, onLog) {
4597
+ async function execute3(input, onLog, context) {
4482
4598
  const url = input.url;
4483
4599
  const analysisPrompt = input.prompt || DESIGN_REFERENCE_PROMPT;
4484
4600
  const isImageUrl = /\.(png|jpe?g|webp|gif|svg|avif)(\?|$)/i.test(url);
@@ -4511,7 +4627,8 @@ async function execute3(input, onLog) {
4511
4627
  const analysis = await analyzeImage({
4512
4628
  prompt: analysisPrompt,
4513
4629
  imageUrl,
4514
- onLog
4630
+ onLog,
4631
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4515
4632
  });
4516
4633
  return JSON.stringify({ url: imageUrl, analysis });
4517
4634
  }
@@ -4521,6 +4638,7 @@ var init_analyzeDesign = __esm({
4521
4638
  "use strict";
4522
4639
  init_runMindstudioCli();
4523
4640
  init_analyzeImage();
4641
+ init_surfaces();
4524
4642
  DESIGN_REFERENCE_PROMPT = `
4525
4643
  You are analyzing a screenshot of a real website or app for a designer's personal technique/inspiration reference notes.
4526
4644
 
@@ -4572,7 +4690,7 @@ __export(analyzeImage_exports, {
4572
4690
  definition: () => definition4,
4573
4691
  execute: () => execute4
4574
4692
  });
4575
- async function execute4(input, onLog) {
4693
+ async function execute4(input, onLog, context) {
4576
4694
  const imageUrl = input.imageUrl;
4577
4695
  const prompt = buildScreenshotAnalysisPrompt({
4578
4696
  prompt: input.prompt
@@ -4580,7 +4698,8 @@ async function execute4(input, onLog) {
4580
4698
  const analysis = await analyzeImage({
4581
4699
  prompt,
4582
4700
  imageUrl,
4583
- onLog
4701
+ onLog,
4702
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4584
4703
  });
4585
4704
  return JSON.stringify({ url: imageUrl, analysis });
4586
4705
  }
@@ -4590,6 +4709,7 @@ var init_analyzeImage2 = __esm({
4590
4709
  "use strict";
4591
4710
  init_analyzeImage();
4592
4711
  init_screenshot();
4712
+ init_surfaces();
4593
4713
  definition4 = {
4594
4714
  clearable: true,
4595
4715
  name: "analyzeImage",
@@ -4622,24 +4742,16 @@ async function execute5(input, onLog, context) {
4622
4742
  if (input.instructions && context) {
4623
4743
  try {
4624
4744
  const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
4625
- const result = await browserAutomationTool.execute({ task }, context);
4626
- const resultStr = result;
4627
- let url;
4628
- let styleMap;
4629
- try {
4630
- const parsed = JSON.parse(resultStr);
4631
- url = parsed.screenshotUrl;
4632
- styleMap = parsed.styleMap;
4633
- } catch {
4634
- }
4635
- if (!url) {
4636
- return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
4745
+ const result = await runBrowserAutomation(task, context);
4746
+ if (!result.screenshot) {
4747
+ return result.text;
4637
4748
  }
4638
4749
  return await streamScreenshotAnalysis({
4639
- url,
4750
+ url: result.screenshot.url,
4640
4751
  prompt: input.prompt,
4641
- styleMap,
4642
- onLog
4752
+ styleMap: result.screenshot.styleMap,
4753
+ onLog,
4754
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4643
4755
  });
4644
4756
  } catch (err) {
4645
4757
  return `Error taking interactive screenshot: ${err.message}`;
@@ -4650,7 +4762,8 @@ async function execute5(input, onLog, context) {
4650
4762
  return await captureAndAnalyzeScreenshot({
4651
4763
  prompt: input.prompt,
4652
4764
  path: input.path,
4653
- onLog
4765
+ onLog,
4766
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4654
4767
  });
4655
4768
  } catch (err) {
4656
4769
  return `Error taking screenshot: ${err.message}`;
@@ -4665,6 +4778,7 @@ var init_screenshot3 = __esm({
4665
4778
  init_screenshot();
4666
4779
  init_browserLock();
4667
4780
  init_browserAutomation();
4781
+ init_surfaces();
4668
4782
  definition5 = {
4669
4783
  clearable: true,
4670
4784
  name: "screenshot",
@@ -4692,7 +4806,7 @@ var init_screenshot3 = __esm({
4692
4806
 
4693
4807
  // src/subagents/designExpert/tools/images/enhancePrompt.ts
4694
4808
  async function enhanceImagePrompt(params) {
4695
- const { brief, width, height, transparentBackground, onLog } = params;
4809
+ const { brief, width, height, transparentBackground, onLog, model } = params;
4696
4810
  const contextParts = [
4697
4811
  `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
4698
4812
  ];
@@ -4717,23 +4831,18 @@ ${brief}
4717
4831
  "--message",
4718
4832
  message,
4719
4833
  "--model-override",
4720
- JSON.stringify(MODEL_OVERRIDE)
4834
+ JSON.stringify({ model, config: { reasoning: "false" } })
4721
4835
  ],
4722
4836
  { outputKey: "content", timeout: 6e4, onLog, caller: "designExpert" }
4723
4837
  );
4724
4838
  return enhanced.trim();
4725
4839
  }
4726
- var ENHANCE_MODEL, MODEL_OVERRIDE, SYSTEM_PROMPT;
4840
+ var SYSTEM_PROMPT;
4727
4841
  var init_enhancePrompt = __esm({
4728
4842
  "src/subagents/designExpert/tools/images/enhancePrompt.ts"() {
4729
4843
  "use strict";
4730
4844
  init_runMindstudioCli();
4731
4845
  init_assets();
4732
- ENHANCE_MODEL = "claude-4-6-sonnet";
4733
- MODEL_OVERRIDE = {
4734
- model: ENHANCE_MODEL,
4735
- config: { reasoning: "false" }
4736
- };
4737
4846
  SYSTEM_PROMPT = readAsset(
4738
4847
  "subagents/designExpert/tools/images/enhance-image-prompt.md"
4739
4848
  );
@@ -4742,7 +4851,15 @@ var init_enhancePrompt = __esm({
4742
4851
 
4743
4852
  // src/subagents/designExpert/tools/images/imageGenerator.ts
4744
4853
  async function generateImageAssets(opts) {
4745
- const { prompts, sourceImages, transparentBackground, onLog } = opts;
4854
+ const {
4855
+ prompts,
4856
+ sourceImages,
4857
+ transparentBackground,
4858
+ onLog,
4859
+ imageGenerationModel: genModel,
4860
+ imageAnalysisModel,
4861
+ imagePromptEnhancerModel
4862
+ } = opts;
4746
4863
  const width = opts.width || 2048;
4747
4864
  const height = opts.height || 2048;
4748
4865
  const config = { width, height };
@@ -4757,7 +4874,8 @@ async function generateImageAssets(opts) {
4757
4874
  width,
4758
4875
  height,
4759
4876
  transparentBackground,
4760
- onLog
4877
+ onLog,
4878
+ model: imagePromptEnhancerModel
4761
4879
  })
4762
4880
  )
4763
4881
  );
@@ -4766,7 +4884,7 @@ async function generateImageAssets(opts) {
4766
4884
  const step = JSON.stringify({
4767
4885
  prompt: enhancedPrompts[0],
4768
4886
  imageModelOverride: {
4769
- model: "seedream-4.5",
4887
+ model: genModel,
4770
4888
  config
4771
4889
  }
4772
4890
  });
@@ -4785,7 +4903,7 @@ async function generateImageAssets(opts) {
4785
4903
  step: {
4786
4904
  prompt,
4787
4905
  imageModelOverride: {
4788
- model: "seedream-4.5",
4906
+ model: genModel,
4789
4907
  config
4790
4908
  }
4791
4909
  }
@@ -4837,7 +4955,8 @@ async function generateImageAssets(opts) {
4837
4955
  const analysis = await analyzeImage({
4838
4956
  prompt: ANALYZE_PROMPT,
4839
4957
  imageUrl: url,
4840
- onLog
4958
+ onLog,
4959
+ model: imageAnalysisModel
4841
4960
  });
4842
4961
  return {
4843
4962
  url,
@@ -4868,13 +4987,28 @@ __export(generateImages_exports, {
4868
4987
  definition: () => definition6,
4869
4988
  execute: () => execute6
4870
4989
  });
4871
- async function execute6(input, onLog) {
4990
+ async function execute6(input, onLog, context) {
4872
4991
  return generateImageAssets({
4873
4992
  prompts: input.prompts,
4874
4993
  width: input.width,
4875
4994
  height: input.height,
4876
4995
  transparentBackground: input.transparentBackground,
4877
- onLog
4996
+ onLog,
4997
+ imageGenerationModel: resolveModel(
4998
+ "imageGeneration",
4999
+ context?.models,
5000
+ context?.model
5001
+ ),
5002
+ imageAnalysisModel: resolveModel(
5003
+ "imageAnalysis",
5004
+ context?.models,
5005
+ context?.model
5006
+ ),
5007
+ imagePromptEnhancerModel: resolveModel(
5008
+ "imagePromptEnhancer",
5009
+ context?.models,
5010
+ context?.model
5011
+ )
4878
5012
  });
4879
5013
  }
4880
5014
  var definition6;
@@ -4882,6 +5016,7 @@ var init_generateImages = __esm({
4882
5016
  "src/subagents/designExpert/tools/images/generateImages.ts"() {
4883
5017
  "use strict";
4884
5018
  init_imageGenerator();
5019
+ init_surfaces();
4885
5020
  definition6 = {
4886
5021
  clearable: false,
4887
5022
  name: "generateImages",
@@ -4921,14 +5056,29 @@ __export(editImages_exports, {
4921
5056
  definition: () => definition7,
4922
5057
  execute: () => execute7
4923
5058
  });
4924
- async function execute7(input, onLog) {
5059
+ async function execute7(input, onLog, context) {
4925
5060
  return generateImageAssets({
4926
5061
  prompts: input.prompts,
4927
5062
  sourceImages: input.sourceImages,
4928
5063
  width: input.width,
4929
5064
  height: input.height,
4930
5065
  transparentBackground: input.transparentBackground,
4931
- onLog
5066
+ onLog,
5067
+ imageGenerationModel: resolveModel(
5068
+ "imageGeneration",
5069
+ context?.models,
5070
+ context?.model
5071
+ ),
5072
+ imageAnalysisModel: resolveModel(
5073
+ "imageAnalysis",
5074
+ context?.models,
5075
+ context?.model
5076
+ ),
5077
+ imagePromptEnhancerModel: resolveModel(
5078
+ "imagePromptEnhancer",
5079
+ context?.models,
5080
+ context?.model
5081
+ )
4932
5082
  });
4933
5083
  }
4934
5084
  var definition7;
@@ -4936,6 +5086,7 @@ var init_editImages = __esm({
4936
5086
  "src/subagents/designExpert/tools/images/editImages.ts"() {
4937
5087
  "use strict";
4938
5088
  init_imageGenerator();
5089
+ init_surfaces();
4939
5090
  definition7 = {
4940
5091
  clearable: false,
4941
5092
  name: "editImages",
@@ -5474,6 +5625,7 @@ var init_designExpert = __esm({
5474
5625
  init_tools2();
5475
5626
  init_prompt3();
5476
5627
  init_history();
5628
+ init_surfaces();
5477
5629
  DESCRIPTION = `
5478
5630
  Visual design expert. Describe the situation and what you need \u2014 the agent decides what to deliver. It reads the spec files automatically. Include relevant user requirements and context it can't get from the spec, but do not list specific deliverables or tell it how to do its job. Do not suggest implementation details or ideas - only relay what is needed.
5479
5631
  `.trim();
@@ -5522,7 +5674,7 @@ Visual design expert. Describe the situation and what you need \u2014 the agent
5522
5674
  );
5523
5675
  },
5524
5676
  apiConfig: context.apiConfig,
5525
- model: context.models?.visualDesignExpert ?? context.model,
5677
+ model: resolveModel("visualDesignExpert", context.models, context.model),
5526
5678
  subAgentId: "visualDesignExpert",
5527
5679
  signal: context.signal,
5528
5680
  parentToolId: context.toolCallId,
@@ -5740,6 +5892,7 @@ var init_productVision = __esm({
5740
5892
  init_executor();
5741
5893
  init_prompt4();
5742
5894
  init_history();
5895
+ init_surfaces();
5743
5896
  productVisionTool = {
5744
5897
  clearable: false,
5745
5898
  definition: {
@@ -5779,7 +5932,7 @@ var init_productVision = __esm({
5779
5932
  return executeVisionTool(name, input2, childCtx);
5780
5933
  },
5781
5934
  apiConfig: context.apiConfig,
5782
- model: context.models?.productVision ?? context.model,
5935
+ model: resolveModel("productVision", context.models, context.model),
5783
5936
  subAgentId: "productVision",
5784
5937
  signal: context.signal,
5785
5938
  parentToolId: context.toolCallId,
@@ -5870,6 +6023,7 @@ var init_codeSanityCheck = __esm({
5870
6023
  init_context();
5871
6024
  init_tools6();
5872
6025
  init_tools5();
6026
+ init_surfaces();
5873
6027
  BASE_PROMPT3 = readAsset("subagents/codeSanityCheck", "prompt.md");
5874
6028
  codeSanityCheckTool = {
5875
6029
  clearable: false,
@@ -5905,7 +6059,7 @@ var init_codeSanityCheck = __esm({
5905
6059
  externalTools: /* @__PURE__ */ new Set(),
5906
6060
  executeTool: (name, toolInput) => executeTool(name, toolInput, context),
5907
6061
  apiConfig: context.apiConfig,
5908
- model: context.models?.codeSanityCheck ?? context.model,
6062
+ model: resolveModel("codeSanityCheck", context.models, context.model),
5909
6063
  subAgentId: "codeSanityCheck",
5910
6064
  signal: context.signal,
5911
6065
  parentToolId: context.toolCallId,
@@ -6869,7 +7023,7 @@ async function runTurn(params) {
6869
7023
  onEvent({ type: "tool_input_delta", id, name, result: content });
6870
7024
  }
6871
7025
  }
6872
- const parentModel = state.models?.parent ?? model;
7026
+ const parentModel = resolveModel("parent", state.models, model);
6873
7027
  try {
6874
7028
  for await (const event of streamChatWithRetry(
6875
7029
  {
@@ -7213,7 +7367,10 @@ async function runTurn(params) {
7213
7367
  isError: r.isError
7214
7368
  });
7215
7369
  if (!r.isError && BRAND_TRIGGERING_TOOLS.has(tc.name)) {
7216
- triggerBrandExtraction(apiConfig);
7370
+ triggerBrandExtraction(
7371
+ apiConfig,
7372
+ resolveModel("brandExtractor", state.models, model)
7373
+ );
7217
7374
  }
7218
7375
  return r;
7219
7376
  })
@@ -7270,6 +7427,7 @@ var init_agent = __esm({
7270
7427
  init_tools6();
7271
7428
  init_sentinel();
7272
7429
  init_trigger2();
7430
+ init_surfaces();
7273
7431
  log10 = createLogger("agent");
7274
7432
  BRAND_TRIGGERING_TOOLS = /* @__PURE__ */ new Set(["writeSpec", "editSpec"]);
7275
7433
  EXTERNAL_TOOLS = /* @__PURE__ */ new Set([
@@ -7731,6 +7889,7 @@ var init_headless = __esm({
7731
7889
  init_lsp();
7732
7890
  init_agent();
7733
7891
  init_session();
7892
+ init_surfaces();
7734
7893
  init_toolRegistry();
7735
7894
  init_attachments();
7736
7895
  init_planFile();
@@ -7811,12 +7970,14 @@ var init_headless = __esm({
7811
7970
  this.emit("session_restored", {
7812
7971
  messageCount: this.state.messages.length,
7813
7972
  ...this.state.models && { models: this.state.models },
7973
+ modelSurfaces: MODEL_SURFACES,
7974
+ allowedModelsByType: ALLOWED_MODELS_BY_TYPE,
7814
7975
  ...this.queueFields()
7815
7976
  });
7816
7977
  }
7817
7978
  triggerBrandExtraction(
7818
7979
  this.config,
7819
- this.state.models?.brandExtractor ?? this.opts.model
7980
+ resolveModel("brandExtractor", this.state.models, this.opts.model)
7820
7981
  );
7821
7982
  this.toolRegistry.onEvent = this.onEvent;
7822
7983
  setCompactionListener((event) => {
@@ -8494,6 +8655,8 @@ var init_headless = __esm({
8494
8655
  running: this.running,
8495
8656
  ...this.running && this.currentRequestId ? { currentRequestId: this.currentRequestId } : {},
8496
8657
  ...this.state.models && { models: this.state.models },
8658
+ modelSurfaces: MODEL_SURFACES,
8659
+ allowedModelsByType: ALLOWED_MODELS_BY_TYPE,
8497
8660
  ...this.queueFields()
8498
8661
  }));
8499
8662
  return;