@mindstudio-ai/remy 0.1.178 → 0.1.180

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/headless.js +317 -171
  2. package/dist/index.js +337 -174
  3. package/package.json +1 -1
package/dist/headless.js CHANGED
@@ -404,7 +404,7 @@ async function* streamChat(params) {
404
404
  const url = `${baseUrl2}/_internal/v2/agent/remy/chat`;
405
405
  const startTime = Date.now();
406
406
  const subAgentId = rest.subAgentId;
407
- const requestBody = { ...rest, ...model && { modelId: model } };
407
+ const requestBody = { ...rest, modelId: model };
408
408
  log2.info("API request", {
409
409
  requestId,
410
410
  ...subAgentId && { subAgentId },
@@ -2787,13 +2787,8 @@ var queryDatabaseTool = {
2787
2787
  };
2788
2788
 
2789
2789
  // src/subagents/common/analyzeImage.ts
2790
- var VISION_MODEL = "claude-4-6-sonnet";
2791
- var VISION_MODEL_OVERRIDE = {
2792
- model: VISION_MODEL,
2793
- config: { thinkingBudget: "off" }
2794
- };
2795
2790
  async function analyzeImage(params) {
2796
- const { prompt, imageUrl, timeout = 2e5, onLog } = params;
2791
+ const { prompt, imageUrl, model, timeout = 2e5, onLog } = params;
2797
2792
  return runMindstudioCli(
2798
2793
  [
2799
2794
  "analyze-image",
@@ -2802,7 +2797,7 @@ async function analyzeImage(params) {
2802
2797
  "--image-url",
2803
2798
  imageUrl,
2804
2799
  "--vision-model-override",
2805
- JSON.stringify(VISION_MODEL_OVERRIDE)
2800
+ JSON.stringify({ model })
2806
2801
  ],
2807
2802
  { outputKey: "analysis", timeout, onLog }
2808
2803
  );
@@ -2830,13 +2825,14 @@ ${TEXT_WRAP_DISCLAIMER}`;
2830
2825
  return p;
2831
2826
  }
2832
2827
  async function streamScreenshotAnalysis(opts) {
2833
- const { url, prompt, styleMap, onLog } = opts;
2828
+ const { url, prompt, styleMap, onLog, model } = opts;
2834
2829
  onLog?.(JSON.stringify({ url, analysis: null }));
2835
2830
  const analysisPrompt = buildScreenshotAnalysisPrompt({ prompt, styleMap });
2836
2831
  let accumulated = "";
2837
2832
  const analysis = await analyzeImage({
2838
2833
  prompt: analysisPrompt,
2839
2834
  imageUrl: url,
2835
+ model,
2840
2836
  onLog: (chunk) => {
2841
2837
  accumulated += chunk;
2842
2838
  onLog?.(JSON.stringify({ url, analysis: accumulated }));
@@ -2848,12 +2844,14 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2848
2844
  let prompt;
2849
2845
  let existingUrl;
2850
2846
  let onLog;
2847
+ let model;
2851
2848
  let path12;
2852
2849
  if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
2853
2850
  prompt = promptOrOptions.prompt;
2854
2851
  existingUrl = promptOrOptions.imageUrl;
2855
2852
  path12 = promptOrOptions.path;
2856
2853
  onLog = promptOrOptions.onLog;
2854
+ model = promptOrOptions.model;
2857
2855
  } else {
2858
2856
  prompt = promptOrOptions;
2859
2857
  }
@@ -2878,11 +2876,17 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
2878
2876
  if (prompt === false) {
2879
2877
  return url;
2880
2878
  }
2879
+ if (!model) {
2880
+ throw new Error(
2881
+ "captureAndAnalyzeScreenshot: `model` is required when analysis is enabled"
2882
+ );
2883
+ }
2881
2884
  return streamScreenshotAnalysis({
2882
2885
  url,
2883
2886
  prompt: prompt || void 0,
2884
2887
  styleMap,
2885
- onLog
2888
+ onLog,
2889
+ model
2886
2890
  });
2887
2891
  }
2888
2892
 
@@ -3715,8 +3719,215 @@ ${appSpec}
3715
3719
  }
3716
3720
  }
3717
3721
 
3722
+ // src/models/surfaces.ts
3723
+ var MODEL_SURFACES = {
3724
+ parent: {
3725
+ default: "claude-4-7-opus",
3726
+ label: "Remy",
3727
+ description: "The main Remy agent you chat with about your product. Writes code and manages delegation to other agents.",
3728
+ modelType: "text",
3729
+ userPickable: true
3730
+ },
3731
+ visualDesignExpert: {
3732
+ default: "claude-4-7-opus",
3733
+ label: "Design Agent",
3734
+ description: "Designs your product's interfaces, including components, layouts, typography, color, and visual identity.",
3735
+ modelType: "text",
3736
+ userPickable: true
3737
+ },
3738
+ productVision: {
3739
+ default: "claude-4-6-sonnet",
3740
+ label: "Roadmap Agent",
3741
+ description: "Owns your product's roadmap and pitch deck. Helps decide what to build next and how to frame the big picture.",
3742
+ modelType: "text",
3743
+ userPickable: true
3744
+ },
3745
+ browserAutomation: {
3746
+ default: "claude-4-6-sonnet",
3747
+ label: "QA Agent",
3748
+ description: "Tests features and UI flows in an automated browser to verify they work end to end.",
3749
+ modelType: "text",
3750
+ userPickable: true
3751
+ },
3752
+ codeSanityCheck: {
3753
+ default: "claude-4-6-sonnet",
3754
+ label: "Architecture Agent",
3755
+ description: "Reviews the architecture and structure of code changes to avoid technical debt.",
3756
+ modelType: "text",
3757
+ userPickable: true
3758
+ },
3759
+ imageGeneration: {
3760
+ default: "seedream-4.5",
3761
+ label: "Image Generation",
3762
+ description: "Creates images for your product \u2014 icons, illustrations, photos, and any other visual assets.",
3763
+ modelType: "image_generation",
3764
+ userPickable: true
3765
+ },
3766
+ imageAnalysis: {
3767
+ default: "claude-4-6-sonnet",
3768
+ label: "Image Analysis",
3769
+ description: "Reads screenshots taken by the QA agent during automated browser tests. Other agents use their own built-in image analysis when they need to read images.",
3770
+ modelType: "vision",
3771
+ userPickable: true
3772
+ },
3773
+ conversationSummarizer: {
3774
+ default: "claude-4-6-sonnet",
3775
+ label: "Compaction Utility",
3776
+ description: "Compresses long conversations into summaries to keep things responsive.",
3777
+ modelType: "text",
3778
+ userPickable: true
3779
+ },
3780
+ brandExtractor: {
3781
+ default: "claude-4-6-sonnet",
3782
+ label: "Brand Utility",
3783
+ description: "Extracts your product's name, colors, and fonts from your spec for use in branded documents.",
3784
+ modelType: "text",
3785
+ userPickable: true
3786
+ },
3787
+ // Internal surface — not user-pickable. Remy uses this to rewrite design
3788
+ // briefs into model-optimized image prompts before image generation.
3789
+ imagePromptEnhancer: {
3790
+ default: "claude-4-6-sonnet",
3791
+ label: "Image Prompt Enhancer",
3792
+ description: "Rewrites image briefs into model-optimized prompts before image generation.",
3793
+ modelType: "text",
3794
+ userPickable: false
3795
+ }
3796
+ };
3797
+ var ALLOWED_MODELS_BY_TYPE = {
3798
+ text: [
3799
+ "claude-4-7-opus",
3800
+ "claude-4-6-opus",
3801
+ "claude-4-6-sonnet",
3802
+ "gpt-5.5",
3803
+ "gemini-3-pro",
3804
+ "gemini-3.1-pro",
3805
+ "gemini-3-flash"
3806
+ ]
3807
+ // vision: undefined — unconstrained
3808
+ // image_generation: undefined — unconstrained
3809
+ };
3810
+ function resolveModel(surfaceId, models, fallback) {
3811
+ return models?.[surfaceId] ?? fallback ?? MODEL_SURFACES[surfaceId].default;
3812
+ }
3813
+
3718
3814
  // src/subagents/browserAutomation/index.ts
3719
3815
  var log6 = createLogger("browser-automation");
3816
+ async function runBrowserAutomation(task, context) {
3817
+ const release = await acquireBrowserLock();
3818
+ try {
3819
+ const result = await runSubAgent({
3820
+ system: getBrowserAutomationPrompt(),
3821
+ task,
3822
+ tools: BROWSER_TOOLS,
3823
+ externalTools: BROWSER_EXTERNAL_TOOLS,
3824
+ executeTool: async (name, _input, _toolCallId, onLog) => {
3825
+ if (name === "setupBrowser") {
3826
+ try {
3827
+ const result2 = await sidecarRequest(
3828
+ "/setup-browser",
3829
+ {
3830
+ auth: _input.auth,
3831
+ path: _input.path
3832
+ },
3833
+ { timeout: 15e3 }
3834
+ );
3835
+ return JSON.stringify(result2);
3836
+ } catch (err) {
3837
+ return `Error setting up browser: ${err.message}`;
3838
+ }
3839
+ }
3840
+ if (name === "screenshotFullPage") {
3841
+ try {
3842
+ return await captureAndAnalyzeScreenshot({
3843
+ path: _input.path,
3844
+ onLog,
3845
+ model: resolveModel(
3846
+ "imageAnalysis",
3847
+ context.models,
3848
+ context.model
3849
+ )
3850
+ });
3851
+ } catch (err) {
3852
+ return `Error taking screenshot: ${err.message}`;
3853
+ }
3854
+ }
3855
+ return `Error: unknown local tool "${name}"`;
3856
+ },
3857
+ apiConfig: context.apiConfig,
3858
+ model: resolveModel("browserAutomation", context.models, context.model),
3859
+ subAgentId: "browserAutomation",
3860
+ signal: context.signal,
3861
+ parentToolId: context.toolCallId,
3862
+ requestId: context.requestId,
3863
+ onEvent: context.onEvent,
3864
+ resolveExternalTool: async (id, name, input) => {
3865
+ if (!context.resolveExternalTool) {
3866
+ return "Error: no external tool resolver";
3867
+ }
3868
+ const result2 = await context.resolveExternalTool(id, name, input);
3869
+ if (name === "browserCommand") {
3870
+ try {
3871
+ const parsed = JSON.parse(result2);
3872
+ const screenshotSteps = (parsed.steps || []).filter(
3873
+ (s) => s.command === "screenshotViewport" && s.result?.url
3874
+ );
3875
+ if (screenshotSteps.length > 0) {
3876
+ const visionOverride = {
3877
+ model: resolveModel(
3878
+ "imageAnalysis",
3879
+ context.models,
3880
+ context.model
3881
+ )
3882
+ };
3883
+ const batchInput = screenshotSteps.map((s) => ({
3884
+ stepType: "analyzeImage",
3885
+ step: {
3886
+ imageUrl: s.result.url,
3887
+ prompt: buildScreenshotAnalysisPrompt({
3888
+ styleMap: s.result.styleMap
3889
+ }),
3890
+ visionModelOverride: visionOverride
3891
+ }
3892
+ }));
3893
+ const batchResult = await runMindstudioCli(
3894
+ ["batch", JSON.stringify(batchInput)],
3895
+ { timeout: 2e5, caller: "browserAutomation" }
3896
+ );
3897
+ try {
3898
+ const analyses = JSON.parse(batchResult);
3899
+ let ai = 0;
3900
+ for (const step of parsed.steps) {
3901
+ if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
3902
+ step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
3903
+ ai++;
3904
+ }
3905
+ }
3906
+ } catch {
3907
+ log6.debug("Failed to parse batch analysis result", {
3908
+ batchResult
3909
+ });
3910
+ }
3911
+ return JSON.stringify(parsed);
3912
+ }
3913
+ } catch {
3914
+ }
3915
+ }
3916
+ return result2;
3917
+ },
3918
+ toolRegistry: context.toolRegistry,
3919
+ captureArtifacts: ["screenshotFullPage"]
3920
+ });
3921
+ context.subAgentMessages?.set(context.toolCallId, result.messages);
3922
+ const ss = result.artifacts?.screenshotFullPage;
3923
+ return {
3924
+ text: result.text,
3925
+ ...ss?.url ? { screenshot: { url: ss.url, styleMap: ss.styleMap } } : {}
3926
+ };
3927
+ } finally {
3928
+ release();
3929
+ }
3930
+ }
3720
3931
  var browserAutomationTool = {
3721
3932
  clearable: true,
3722
3933
  definition: {
@@ -3737,109 +3948,13 @@ var browserAutomationTool = {
3737
3948
  if (!context) {
3738
3949
  return "Error: browser automation requires execution context (only available in headless mode)";
3739
3950
  }
3740
- const release = await acquireBrowserLock();
3741
- try {
3742
- const result = await runSubAgent({
3743
- system: getBrowserAutomationPrompt(),
3744
- task: input.task,
3745
- tools: BROWSER_TOOLS,
3746
- externalTools: BROWSER_EXTERNAL_TOOLS,
3747
- executeTool: async (name, _input, _toolCallId, onLog) => {
3748
- if (name === "setupBrowser") {
3749
- try {
3750
- const result2 = await sidecarRequest(
3751
- "/setup-browser",
3752
- {
3753
- auth: _input.auth,
3754
- path: _input.path
3755
- },
3756
- { timeout: 15e3 }
3757
- );
3758
- return JSON.stringify(result2);
3759
- } catch (err) {
3760
- return `Error setting up browser: ${err.message}`;
3761
- }
3762
- }
3763
- if (name === "screenshotFullPage") {
3764
- try {
3765
- return await captureAndAnalyzeScreenshot({
3766
- path: _input.path,
3767
- onLog
3768
- });
3769
- } catch (err) {
3770
- return `Error taking screenshot: ${err.message}`;
3771
- }
3772
- }
3773
- return `Error: unknown local tool "${name}"`;
3774
- },
3775
- apiConfig: context.apiConfig,
3776
- model: context.models?.browserAutomation ?? context.model,
3777
- subAgentId: "browserAutomation",
3778
- signal: context.signal,
3779
- parentToolId: context.toolCallId,
3780
- requestId: context.requestId,
3781
- onEvent: context.onEvent,
3782
- resolveExternalTool: async (id, name, input2) => {
3783
- if (!context.resolveExternalTool) {
3784
- return "Error: no external tool resolver";
3785
- }
3786
- const result2 = await context.resolveExternalTool(id, name, input2);
3787
- if (name === "browserCommand") {
3788
- try {
3789
- const parsed = JSON.parse(result2);
3790
- const screenshotSteps = (parsed.steps || []).filter(
3791
- (s) => s.command === "screenshotViewport" && s.result?.url
3792
- );
3793
- if (screenshotSteps.length > 0) {
3794
- const batchInput = screenshotSteps.map((s) => ({
3795
- stepType: "analyzeImage",
3796
- step: {
3797
- imageUrl: s.result.url,
3798
- prompt: buildScreenshotAnalysisPrompt({
3799
- styleMap: s.result.styleMap
3800
- }),
3801
- visionModelOverride: VISION_MODEL_OVERRIDE
3802
- }
3803
- }));
3804
- const batchResult = await runMindstudioCli(
3805
- ["batch", JSON.stringify(batchInput)],
3806
- { timeout: 2e5, caller: "browserAutomation" }
3807
- );
3808
- try {
3809
- const analyses = JSON.parse(batchResult);
3810
- let ai = 0;
3811
- for (const step of parsed.steps) {
3812
- if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
3813
- step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
3814
- ai++;
3815
- }
3816
- }
3817
- } catch {
3818
- log6.debug("Failed to parse batch analysis result", {
3819
- batchResult
3820
- });
3821
- }
3822
- return JSON.stringify(parsed);
3823
- }
3824
- } catch {
3825
- }
3826
- }
3827
- return result2;
3828
- },
3829
- toolRegistry: context.toolRegistry,
3830
- captureArtifacts: ["screenshotFullPage"]
3831
- });
3832
- context.subAgentMessages?.set(context.toolCallId, result.messages);
3833
- const ss = result.artifacts?.screenshotFullPage;
3834
- if (ss?.url) {
3835
- return `${result.text}
3951
+ const result = await runBrowserAutomation(input.task, context);
3952
+ if (result.screenshot) {
3953
+ return `${result.text}
3836
3954
 
3837
- ![Final state](${ss.url})`;
3838
- }
3839
- return result.text;
3840
- } finally {
3841
- release();
3955
+ ![Final state](${result.screenshot.url})`;
3842
3956
  }
3957
+ return result.text;
3843
3958
  }
3844
3959
  };
3845
3960
 
@@ -3877,29 +3992,22 @@ var screenshotTool = {
3877
3992
  return await captureAndAnalyzeScreenshot({
3878
3993
  prompt: input.prompt,
3879
3994
  imageUrl: input.imageUrl,
3880
- onLog: context?.onLog
3995
+ onLog: context?.onLog,
3996
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
3881
3997
  });
3882
3998
  }
3883
3999
  if (input.instructions && context) {
3884
4000
  const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
3885
- const result = await browserAutomationTool.execute({ task }, context);
3886
- const resultStr = result;
3887
- let url;
3888
- let styleMap;
3889
- try {
3890
- const parsed = JSON.parse(resultStr);
3891
- url = parsed.screenshotUrl;
3892
- styleMap = parsed.styleMap;
3893
- } catch {
3894
- }
3895
- if (!url) {
3896
- return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
4001
+ const result = await runBrowserAutomation(task, context);
4002
+ if (!result.screenshot) {
4003
+ return result.text;
3897
4004
  }
3898
4005
  return await streamScreenshotAnalysis({
3899
- url,
4006
+ url: result.screenshot.url,
3900
4007
  prompt: input.prompt,
3901
- styleMap,
3902
- onLog: context?.onLog
4008
+ styleMap: result.screenshot.styleMap,
4009
+ onLog: context?.onLog,
4010
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
3903
4011
  });
3904
4012
  }
3905
4013
  const release = await acquireBrowserLock();
@@ -3907,7 +4015,8 @@ var screenshotTool = {
3907
4015
  return await captureAndAnalyzeScreenshot({
3908
4016
  prompt: input.prompt,
3909
4017
  path: input.path,
3910
- onLog: context?.onLog
4018
+ onLog: context?.onLog,
4019
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
3911
4020
  });
3912
4021
  } finally {
3913
4022
  release();
@@ -4118,7 +4227,7 @@ var definition3 = {
4118
4227
  required: ["url"]
4119
4228
  }
4120
4229
  };
4121
- async function execute3(input, onLog) {
4230
+ async function execute3(input, onLog, context) {
4122
4231
  const url = input.url;
4123
4232
  const analysisPrompt = input.prompt || DESIGN_REFERENCE_PROMPT;
4124
4233
  const isImageUrl = /\.(png|jpe?g|webp|gif|svg|avif)(\?|$)/i.test(url);
@@ -4151,7 +4260,8 @@ async function execute3(input, onLog) {
4151
4260
  const analysis = await analyzeImage({
4152
4261
  prompt: analysisPrompt,
4153
4262
  imageUrl,
4154
- onLog
4263
+ onLog,
4264
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4155
4265
  });
4156
4266
  return JSON.stringify({ url: imageUrl, analysis });
4157
4267
  }
@@ -4181,7 +4291,7 @@ var definition4 = {
4181
4291
  required: ["imageUrl"]
4182
4292
  }
4183
4293
  };
4184
- async function execute4(input, onLog) {
4294
+ async function execute4(input, onLog, context) {
4185
4295
  const imageUrl = input.imageUrl;
4186
4296
  const prompt = buildScreenshotAnalysisPrompt({
4187
4297
  prompt: input.prompt
@@ -4189,7 +4299,8 @@ async function execute4(input, onLog) {
4189
4299
  const analysis = await analyzeImage({
4190
4300
  prompt,
4191
4301
  imageUrl,
4192
- onLog
4302
+ onLog,
4303
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4193
4304
  });
4194
4305
  return JSON.stringify({ url: imageUrl, analysis });
4195
4306
  }
@@ -4226,24 +4337,16 @@ async function execute5(input, onLog, context) {
4226
4337
  if (input.instructions && context) {
4227
4338
  try {
4228
4339
  const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
4229
- const result = await browserAutomationTool.execute({ task }, context);
4230
- const resultStr = result;
4231
- let url;
4232
- let styleMap;
4233
- try {
4234
- const parsed = JSON.parse(resultStr);
4235
- url = parsed.screenshotUrl;
4236
- styleMap = parsed.styleMap;
4237
- } catch {
4238
- }
4239
- if (!url) {
4240
- return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
4340
+ const result = await runBrowserAutomation(task, context);
4341
+ if (!result.screenshot) {
4342
+ return result.text;
4241
4343
  }
4242
4344
  return await streamScreenshotAnalysis({
4243
- url,
4345
+ url: result.screenshot.url,
4244
4346
  prompt: input.prompt,
4245
- styleMap,
4246
- onLog
4347
+ styleMap: result.screenshot.styleMap,
4348
+ onLog,
4349
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4247
4350
  });
4248
4351
  } catch (err) {
4249
4352
  return `Error taking interactive screenshot: ${err.message}`;
@@ -4254,7 +4357,8 @@ async function execute5(input, onLog, context) {
4254
4357
  return await captureAndAnalyzeScreenshot({
4255
4358
  prompt: input.prompt,
4256
4359
  path: input.path,
4257
- onLog
4360
+ onLog,
4361
+ model: resolveModel("imageAnalysis", context?.models, context?.model)
4258
4362
  });
4259
4363
  } catch (err) {
4260
4364
  return `Error taking screenshot: ${err.message}`;
@@ -4271,16 +4375,11 @@ __export(generateImages_exports, {
4271
4375
  });
4272
4376
 
4273
4377
  // src/subagents/designExpert/tools/images/enhancePrompt.ts
4274
- var ENHANCE_MODEL = "claude-4-6-sonnet";
4275
- var MODEL_OVERRIDE = {
4276
- model: ENHANCE_MODEL,
4277
- config: { reasoning: "false" }
4278
- };
4279
4378
  var SYSTEM_PROMPT = readAsset(
4280
4379
  "subagents/designExpert/tools/images/enhance-image-prompt.md"
4281
4380
  );
4282
4381
  async function enhanceImagePrompt(params) {
4283
- const { brief, width, height, transparentBackground, onLog } = params;
4382
+ const { brief, width, height, transparentBackground, onLog, model } = params;
4284
4383
  const contextParts = [
4285
4384
  `Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
4286
4385
  ];
@@ -4305,7 +4404,7 @@ ${brief}
4305
4404
  "--message",
4306
4405
  message,
4307
4406
  "--model-override",
4308
- JSON.stringify(MODEL_OVERRIDE)
4407
+ JSON.stringify({ model, config: { reasoning: "false" } })
4309
4408
  ],
4310
4409
  { outputKey: "content", timeout: 6e4, onLog, caller: "designExpert" }
4311
4410
  );
@@ -4315,7 +4414,15 @@ ${brief}
4315
4414
  // src/subagents/designExpert/tools/images/imageGenerator.ts
4316
4415
  var ANALYZE_PROMPT = 'You are reviewing this image for a visual designer sourcing assets for a project. Describe: what the image depicts, the mood and color palette, how the lighting and composition work, any text present in the image, whether there are any issues (artifacts, distortions), and how it could be used in a layout for an app or website. Be concise and practical. Respond only with your analysis as Markdown (starting with the title "Asset Review") and absolutely no other text. Do not use emojis - use unicode if you need symbols.';
4317
4416
  async function generateImageAssets(opts) {
4318
- const { prompts, sourceImages, transparentBackground, onLog } = opts;
4417
+ const {
4418
+ prompts,
4419
+ sourceImages,
4420
+ transparentBackground,
4421
+ onLog,
4422
+ imageGenerationModel: genModel,
4423
+ imageAnalysisModel,
4424
+ imagePromptEnhancerModel
4425
+ } = opts;
4319
4426
  const width = opts.width || 2048;
4320
4427
  const height = opts.height || 2048;
4321
4428
  const config = { width, height };
@@ -4330,7 +4437,8 @@ async function generateImageAssets(opts) {
4330
4437
  width,
4331
4438
  height,
4332
4439
  transparentBackground,
4333
- onLog
4440
+ onLog,
4441
+ model: imagePromptEnhancerModel
4334
4442
  })
4335
4443
  )
4336
4444
  );
@@ -4339,7 +4447,7 @@ async function generateImageAssets(opts) {
4339
4447
  const step = JSON.stringify({
4340
4448
  prompt: enhancedPrompts[0],
4341
4449
  imageModelOverride: {
4342
- model: "seedream-4.5",
4450
+ model: genModel,
4343
4451
  config
4344
4452
  }
4345
4453
  });
@@ -4358,7 +4466,7 @@ async function generateImageAssets(opts) {
4358
4466
  step: {
4359
4467
  prompt,
4360
4468
  imageModelOverride: {
4361
- model: "seedream-4.5",
4469
+ model: genModel,
4362
4470
  config
4363
4471
  }
4364
4472
  }
@@ -4410,7 +4518,8 @@ async function generateImageAssets(opts) {
4410
4518
  const analysis = await analyzeImage({
4411
4519
  prompt: ANALYZE_PROMPT,
4412
4520
  imageUrl: url,
4413
- onLog
4521
+ onLog,
4522
+ model: imageAnalysisModel
4414
4523
  });
4415
4524
  return {
4416
4525
  url,
@@ -4456,13 +4565,28 @@ var definition6 = {
4456
4565
  required: ["prompts"]
4457
4566
  }
4458
4567
  };
4459
- async function execute6(input, onLog) {
4568
+ async function execute6(input, onLog, context) {
4460
4569
  return generateImageAssets({
4461
4570
  prompts: input.prompts,
4462
4571
  width: input.width,
4463
4572
  height: input.height,
4464
4573
  transparentBackground: input.transparentBackground,
4465
- onLog
4574
+ onLog,
4575
+ imageGenerationModel: resolveModel(
4576
+ "imageGeneration",
4577
+ context?.models,
4578
+ context?.model
4579
+ ),
4580
+ imageAnalysisModel: resolveModel(
4581
+ "imageAnalysis",
4582
+ context?.models,
4583
+ context?.model
4584
+ ),
4585
+ imagePromptEnhancerModel: resolveModel(
4586
+ "imagePromptEnhancer",
4587
+ context?.models,
4588
+ context?.model
4589
+ )
4466
4590
  });
4467
4591
  }
4468
4592
 
@@ -4509,14 +4633,29 @@ var definition7 = {
4509
4633
  required: ["prompts", "sourceImages"]
4510
4634
  }
4511
4635
  };
4512
- async function execute7(input, onLog) {
4636
+ async function execute7(input, onLog, context) {
4513
4637
  return generateImageAssets({
4514
4638
  prompts: input.prompts,
4515
4639
  sourceImages: input.sourceImages,
4516
4640
  width: input.width,
4517
4641
  height: input.height,
4518
4642
  transparentBackground: input.transparentBackground,
4519
- onLog
4643
+ onLog,
4644
+ imageGenerationModel: resolveModel(
4645
+ "imageGeneration",
4646
+ context?.models,
4647
+ context?.model
4648
+ ),
4649
+ imageAnalysisModel: resolveModel(
4650
+ "imageAnalysis",
4651
+ context?.models,
4652
+ context?.model
4653
+ ),
4654
+ imagePromptEnhancerModel: resolveModel(
4655
+ "imagePromptEnhancer",
4656
+ context?.models,
4657
+ context?.model
4658
+ )
4520
4659
  });
4521
4660
  }
4522
4661
 
@@ -4989,7 +5128,7 @@ var designExpertTool = {
4989
5128
  );
4990
5129
  },
4991
5130
  apiConfig: context.apiConfig,
4992
- model: context.models?.visualDesignExpert ?? context.model,
5131
+ model: resolveModel("visualDesignExpert", context.models, context.model),
4993
5132
  subAgentId: "visualDesignExpert",
4994
5133
  signal: context.signal,
4995
5134
  parentToolId: context.toolCallId,
@@ -5207,7 +5346,7 @@ var productVisionTool = {
5207
5346
  return executeVisionTool(name, input2, childCtx);
5208
5347
  },
5209
5348
  apiConfig: context.apiConfig,
5210
- model: context.models?.productVision ?? context.model,
5349
+ model: resolveModel("productVision", context.models, context.model),
5211
5350
  subAgentId: "productVision",
5212
5351
  signal: context.signal,
5213
5352
  parentToolId: context.toolCallId,
@@ -5315,7 +5454,7 @@ var codeSanityCheckTool = {
5315
5454
  externalTools: /* @__PURE__ */ new Set(),
5316
5455
  executeTool: (name, toolInput) => executeTool(name, toolInput, context),
5317
5456
  apiConfig: context.apiConfig,
5318
- model: context.models?.codeSanityCheck ?? context.model,
5457
+ model: resolveModel("codeSanityCheck", context.models, context.model),
5319
5458
  subAgentId: "codeSanityCheck",
5320
5459
  signal: context.signal,
5321
5460
  parentToolId: context.toolCallId,
@@ -5455,7 +5594,7 @@ function triggerCompaction(state, apiConfig, opts = {}) {
5455
5594
  apiConfig,
5456
5595
  system,
5457
5596
  tools2,
5458
- state.models?.conversationSummarizer ?? model
5597
+ resolveModel("conversationSummarizer", state.models, model)
5459
5598
  ).then((summaries) => {
5460
5599
  pendingSummaries.push(...summaries);
5461
5600
  listener?.({ type: "complete", requestId });
@@ -6252,7 +6391,7 @@ async function runTurn(params) {
6252
6391
  onEvent({ type: "tool_input_delta", id, name, result: content });
6253
6392
  }
6254
6393
  }
6255
- const parentModel = state.models?.parent ?? model;
6394
+ const parentModel = resolveModel("parent", state.models, model);
6256
6395
  try {
6257
6396
  for await (const event of streamChatWithRetry(
6258
6397
  {
@@ -6596,7 +6735,10 @@ async function runTurn(params) {
6596
6735
  isError: r.isError
6597
6736
  });
6598
6737
  if (!r.isError && BRAND_TRIGGERING_TOOLS.has(tc.name)) {
6599
- triggerBrandExtraction(apiConfig);
6738
+ triggerBrandExtraction(
6739
+ apiConfig,
6740
+ resolveModel("brandExtractor", state.models, model)
6741
+ );
6600
6742
  }
6601
6743
  return r;
6602
6744
  })
@@ -7039,12 +7181,14 @@ var HeadlessSession = class {
7039
7181
  this.emit("session_restored", {
7040
7182
  messageCount: this.state.messages.length,
7041
7183
  ...this.state.models && { models: this.state.models },
7184
+ modelSurfaces: MODEL_SURFACES,
7185
+ allowedModelsByType: ALLOWED_MODELS_BY_TYPE,
7042
7186
  ...this.queueFields()
7043
7187
  });
7044
7188
  }
7045
7189
  triggerBrandExtraction(
7046
7190
  this.config,
7047
- this.state.models?.brandExtractor ?? this.opts.model
7191
+ resolveModel("brandExtractor", this.state.models, this.opts.model)
7048
7192
  );
7049
7193
  this.toolRegistry.onEvent = this.onEvent;
7050
7194
  setCompactionListener((event) => {
@@ -7722,6 +7866,8 @@ var HeadlessSession = class {
7722
7866
  running: this.running,
7723
7867
  ...this.running && this.currentRequestId ? { currentRequestId: this.currentRequestId } : {},
7724
7868
  ...this.state.models && { models: this.state.models },
7869
+ modelSurfaces: MODEL_SURFACES,
7870
+ allowedModelsByType: ALLOWED_MODELS_BY_TYPE,
7725
7871
  ...this.queueFields()
7726
7872
  }));
7727
7873
  return;