@mindstudio-ai/remy 0.1.178 → 0.1.180
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/headless.js +317 -171
- package/dist/index.js +337 -174
- package/package.json +1 -1
package/dist/headless.js
CHANGED
|
@@ -404,7 +404,7 @@ async function* streamChat(params) {
|
|
|
404
404
|
const url = `${baseUrl2}/_internal/v2/agent/remy/chat`;
|
|
405
405
|
const startTime = Date.now();
|
|
406
406
|
const subAgentId = rest.subAgentId;
|
|
407
|
-
const requestBody = { ...rest,
|
|
407
|
+
const requestBody = { ...rest, modelId: model };
|
|
408
408
|
log2.info("API request", {
|
|
409
409
|
requestId,
|
|
410
410
|
...subAgentId && { subAgentId },
|
|
@@ -2787,13 +2787,8 @@ var queryDatabaseTool = {
|
|
|
2787
2787
|
};
|
|
2788
2788
|
|
|
2789
2789
|
// src/subagents/common/analyzeImage.ts
|
|
2790
|
-
var VISION_MODEL = "claude-4-6-sonnet";
|
|
2791
|
-
var VISION_MODEL_OVERRIDE = {
|
|
2792
|
-
model: VISION_MODEL,
|
|
2793
|
-
config: { thinkingBudget: "off" }
|
|
2794
|
-
};
|
|
2795
2790
|
async function analyzeImage(params) {
|
|
2796
|
-
const { prompt, imageUrl, timeout = 2e5, onLog } = params;
|
|
2791
|
+
const { prompt, imageUrl, model, timeout = 2e5, onLog } = params;
|
|
2797
2792
|
return runMindstudioCli(
|
|
2798
2793
|
[
|
|
2799
2794
|
"analyze-image",
|
|
@@ -2802,7 +2797,7 @@ async function analyzeImage(params) {
|
|
|
2802
2797
|
"--image-url",
|
|
2803
2798
|
imageUrl,
|
|
2804
2799
|
"--vision-model-override",
|
|
2805
|
-
JSON.stringify(
|
|
2800
|
+
JSON.stringify({ model })
|
|
2806
2801
|
],
|
|
2807
2802
|
{ outputKey: "analysis", timeout, onLog }
|
|
2808
2803
|
);
|
|
@@ -2830,13 +2825,14 @@ ${TEXT_WRAP_DISCLAIMER}`;
|
|
|
2830
2825
|
return p;
|
|
2831
2826
|
}
|
|
2832
2827
|
async function streamScreenshotAnalysis(opts) {
|
|
2833
|
-
const { url, prompt, styleMap, onLog } = opts;
|
|
2828
|
+
const { url, prompt, styleMap, onLog, model } = opts;
|
|
2834
2829
|
onLog?.(JSON.stringify({ url, analysis: null }));
|
|
2835
2830
|
const analysisPrompt = buildScreenshotAnalysisPrompt({ prompt, styleMap });
|
|
2836
2831
|
let accumulated = "";
|
|
2837
2832
|
const analysis = await analyzeImage({
|
|
2838
2833
|
prompt: analysisPrompt,
|
|
2839
2834
|
imageUrl: url,
|
|
2835
|
+
model,
|
|
2840
2836
|
onLog: (chunk) => {
|
|
2841
2837
|
accumulated += chunk;
|
|
2842
2838
|
onLog?.(JSON.stringify({ url, analysis: accumulated }));
|
|
@@ -2848,12 +2844,14 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
|
2848
2844
|
let prompt;
|
|
2849
2845
|
let existingUrl;
|
|
2850
2846
|
let onLog;
|
|
2847
|
+
let model;
|
|
2851
2848
|
let path12;
|
|
2852
2849
|
if (typeof promptOrOptions === "object" && promptOrOptions !== null) {
|
|
2853
2850
|
prompt = promptOrOptions.prompt;
|
|
2854
2851
|
existingUrl = promptOrOptions.imageUrl;
|
|
2855
2852
|
path12 = promptOrOptions.path;
|
|
2856
2853
|
onLog = promptOrOptions.onLog;
|
|
2854
|
+
model = promptOrOptions.model;
|
|
2857
2855
|
} else {
|
|
2858
2856
|
prompt = promptOrOptions;
|
|
2859
2857
|
}
|
|
@@ -2878,11 +2876,17 @@ async function captureAndAnalyzeScreenshot(promptOrOptions) {
|
|
|
2878
2876
|
if (prompt === false) {
|
|
2879
2877
|
return url;
|
|
2880
2878
|
}
|
|
2879
|
+
if (!model) {
|
|
2880
|
+
throw new Error(
|
|
2881
|
+
"captureAndAnalyzeScreenshot: `model` is required when analysis is enabled"
|
|
2882
|
+
);
|
|
2883
|
+
}
|
|
2881
2884
|
return streamScreenshotAnalysis({
|
|
2882
2885
|
url,
|
|
2883
2886
|
prompt: prompt || void 0,
|
|
2884
2887
|
styleMap,
|
|
2885
|
-
onLog
|
|
2888
|
+
onLog,
|
|
2889
|
+
model
|
|
2886
2890
|
});
|
|
2887
2891
|
}
|
|
2888
2892
|
|
|
@@ -3715,8 +3719,215 @@ ${appSpec}
|
|
|
3715
3719
|
}
|
|
3716
3720
|
}
|
|
3717
3721
|
|
|
3722
|
+
// src/models/surfaces.ts
|
|
3723
|
+
var MODEL_SURFACES = {
|
|
3724
|
+
parent: {
|
|
3725
|
+
default: "claude-4-7-opus",
|
|
3726
|
+
label: "Remy",
|
|
3727
|
+
description: "The main Remy agent you chat with about your product. Writes code and manages delegation to other agents.",
|
|
3728
|
+
modelType: "text",
|
|
3729
|
+
userPickable: true
|
|
3730
|
+
},
|
|
3731
|
+
visualDesignExpert: {
|
|
3732
|
+
default: "claude-4-7-opus",
|
|
3733
|
+
label: "Design Agent",
|
|
3734
|
+
description: "Designs your product's interfaces, including components, layouts, typography, color, and visual identity.",
|
|
3735
|
+
modelType: "text",
|
|
3736
|
+
userPickable: true
|
|
3737
|
+
},
|
|
3738
|
+
productVision: {
|
|
3739
|
+
default: "claude-4-6-sonnet",
|
|
3740
|
+
label: "Roadmap Agent",
|
|
3741
|
+
description: "Owns your product's roadmap and pitch deck. Helps decide what to build next and how to frame the big picture.",
|
|
3742
|
+
modelType: "text",
|
|
3743
|
+
userPickable: true
|
|
3744
|
+
},
|
|
3745
|
+
browserAutomation: {
|
|
3746
|
+
default: "claude-4-6-sonnet",
|
|
3747
|
+
label: "QA Agent",
|
|
3748
|
+
description: "Tests features and UI flows in an automated browser to verify they work end to end.",
|
|
3749
|
+
modelType: "text",
|
|
3750
|
+
userPickable: true
|
|
3751
|
+
},
|
|
3752
|
+
codeSanityCheck: {
|
|
3753
|
+
default: "claude-4-6-sonnet",
|
|
3754
|
+
label: "Architecture Agent",
|
|
3755
|
+
description: "Reviews the architecture and structure of code changes to avoid technical debt.",
|
|
3756
|
+
modelType: "text",
|
|
3757
|
+
userPickable: true
|
|
3758
|
+
},
|
|
3759
|
+
imageGeneration: {
|
|
3760
|
+
default: "seedream-4.5",
|
|
3761
|
+
label: "Image Generation",
|
|
3762
|
+
description: "Creates images for your product \u2014 icons, illustrations, photos, and any other visual assets.",
|
|
3763
|
+
modelType: "image_generation",
|
|
3764
|
+
userPickable: true
|
|
3765
|
+
},
|
|
3766
|
+
imageAnalysis: {
|
|
3767
|
+
default: "claude-4-6-sonnet",
|
|
3768
|
+
label: "Image Analysis",
|
|
3769
|
+
description: "Reads screenshots taken by the QA agent during automated browser tests. Other agents use their own built-in image analysis when they need to read images.",
|
|
3770
|
+
modelType: "vision",
|
|
3771
|
+
userPickable: true
|
|
3772
|
+
},
|
|
3773
|
+
conversationSummarizer: {
|
|
3774
|
+
default: "claude-4-6-sonnet",
|
|
3775
|
+
label: "Compaction Utility",
|
|
3776
|
+
description: "Compresses long conversations into summaries to keep things responsive.",
|
|
3777
|
+
modelType: "text",
|
|
3778
|
+
userPickable: true
|
|
3779
|
+
},
|
|
3780
|
+
brandExtractor: {
|
|
3781
|
+
default: "claude-4-6-sonnet",
|
|
3782
|
+
label: "Brand Utility",
|
|
3783
|
+
description: "Extracts your product's name, colors, and fonts from your spec for use in branded documents.",
|
|
3784
|
+
modelType: "text",
|
|
3785
|
+
userPickable: true
|
|
3786
|
+
},
|
|
3787
|
+
// Internal surface — not user-pickable. Remy uses this to rewrite design
|
|
3788
|
+
// briefs into model-optimized image prompts before image generation.
|
|
3789
|
+
imagePromptEnhancer: {
|
|
3790
|
+
default: "claude-4-6-sonnet",
|
|
3791
|
+
label: "Image Prompt Enhancer",
|
|
3792
|
+
description: "Rewrites image briefs into model-optimized prompts before image generation.",
|
|
3793
|
+
modelType: "text",
|
|
3794
|
+
userPickable: false
|
|
3795
|
+
}
|
|
3796
|
+
};
|
|
3797
|
+
var ALLOWED_MODELS_BY_TYPE = {
|
|
3798
|
+
text: [
|
|
3799
|
+
"claude-4-7-opus",
|
|
3800
|
+
"claude-4-6-opus",
|
|
3801
|
+
"claude-4-6-sonnet",
|
|
3802
|
+
"gpt-5.5",
|
|
3803
|
+
"gemini-3-pro",
|
|
3804
|
+
"gemini-3.1-pro",
|
|
3805
|
+
"gemini-3-flash"
|
|
3806
|
+
]
|
|
3807
|
+
// vision: undefined — unconstrained
|
|
3808
|
+
// image_generation: undefined — unconstrained
|
|
3809
|
+
};
|
|
3810
|
+
function resolveModel(surfaceId, models, fallback) {
|
|
3811
|
+
return models?.[surfaceId] ?? fallback ?? MODEL_SURFACES[surfaceId].default;
|
|
3812
|
+
}
|
|
3813
|
+
|
|
3718
3814
|
// src/subagents/browserAutomation/index.ts
|
|
3719
3815
|
var log6 = createLogger("browser-automation");
|
|
3816
|
+
async function runBrowserAutomation(task, context) {
|
|
3817
|
+
const release = await acquireBrowserLock();
|
|
3818
|
+
try {
|
|
3819
|
+
const result = await runSubAgent({
|
|
3820
|
+
system: getBrowserAutomationPrompt(),
|
|
3821
|
+
task,
|
|
3822
|
+
tools: BROWSER_TOOLS,
|
|
3823
|
+
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
3824
|
+
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
3825
|
+
if (name === "setupBrowser") {
|
|
3826
|
+
try {
|
|
3827
|
+
const result2 = await sidecarRequest(
|
|
3828
|
+
"/setup-browser",
|
|
3829
|
+
{
|
|
3830
|
+
auth: _input.auth,
|
|
3831
|
+
path: _input.path
|
|
3832
|
+
},
|
|
3833
|
+
{ timeout: 15e3 }
|
|
3834
|
+
);
|
|
3835
|
+
return JSON.stringify(result2);
|
|
3836
|
+
} catch (err) {
|
|
3837
|
+
return `Error setting up browser: ${err.message}`;
|
|
3838
|
+
}
|
|
3839
|
+
}
|
|
3840
|
+
if (name === "screenshotFullPage") {
|
|
3841
|
+
try {
|
|
3842
|
+
return await captureAndAnalyzeScreenshot({
|
|
3843
|
+
path: _input.path,
|
|
3844
|
+
onLog,
|
|
3845
|
+
model: resolveModel(
|
|
3846
|
+
"imageAnalysis",
|
|
3847
|
+
context.models,
|
|
3848
|
+
context.model
|
|
3849
|
+
)
|
|
3850
|
+
});
|
|
3851
|
+
} catch (err) {
|
|
3852
|
+
return `Error taking screenshot: ${err.message}`;
|
|
3853
|
+
}
|
|
3854
|
+
}
|
|
3855
|
+
return `Error: unknown local tool "${name}"`;
|
|
3856
|
+
},
|
|
3857
|
+
apiConfig: context.apiConfig,
|
|
3858
|
+
model: resolveModel("browserAutomation", context.models, context.model),
|
|
3859
|
+
subAgentId: "browserAutomation",
|
|
3860
|
+
signal: context.signal,
|
|
3861
|
+
parentToolId: context.toolCallId,
|
|
3862
|
+
requestId: context.requestId,
|
|
3863
|
+
onEvent: context.onEvent,
|
|
3864
|
+
resolveExternalTool: async (id, name, input) => {
|
|
3865
|
+
if (!context.resolveExternalTool) {
|
|
3866
|
+
return "Error: no external tool resolver";
|
|
3867
|
+
}
|
|
3868
|
+
const result2 = await context.resolveExternalTool(id, name, input);
|
|
3869
|
+
if (name === "browserCommand") {
|
|
3870
|
+
try {
|
|
3871
|
+
const parsed = JSON.parse(result2);
|
|
3872
|
+
const screenshotSteps = (parsed.steps || []).filter(
|
|
3873
|
+
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
3874
|
+
);
|
|
3875
|
+
if (screenshotSteps.length > 0) {
|
|
3876
|
+
const visionOverride = {
|
|
3877
|
+
model: resolveModel(
|
|
3878
|
+
"imageAnalysis",
|
|
3879
|
+
context.models,
|
|
3880
|
+
context.model
|
|
3881
|
+
)
|
|
3882
|
+
};
|
|
3883
|
+
const batchInput = screenshotSteps.map((s) => ({
|
|
3884
|
+
stepType: "analyzeImage",
|
|
3885
|
+
step: {
|
|
3886
|
+
imageUrl: s.result.url,
|
|
3887
|
+
prompt: buildScreenshotAnalysisPrompt({
|
|
3888
|
+
styleMap: s.result.styleMap
|
|
3889
|
+
}),
|
|
3890
|
+
visionModelOverride: visionOverride
|
|
3891
|
+
}
|
|
3892
|
+
}));
|
|
3893
|
+
const batchResult = await runMindstudioCli(
|
|
3894
|
+
["batch", JSON.stringify(batchInput)],
|
|
3895
|
+
{ timeout: 2e5, caller: "browserAutomation" }
|
|
3896
|
+
);
|
|
3897
|
+
try {
|
|
3898
|
+
const analyses = JSON.parse(batchResult);
|
|
3899
|
+
let ai = 0;
|
|
3900
|
+
for (const step of parsed.steps) {
|
|
3901
|
+
if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
|
|
3902
|
+
step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
|
|
3903
|
+
ai++;
|
|
3904
|
+
}
|
|
3905
|
+
}
|
|
3906
|
+
} catch {
|
|
3907
|
+
log6.debug("Failed to parse batch analysis result", {
|
|
3908
|
+
batchResult
|
|
3909
|
+
});
|
|
3910
|
+
}
|
|
3911
|
+
return JSON.stringify(parsed);
|
|
3912
|
+
}
|
|
3913
|
+
} catch {
|
|
3914
|
+
}
|
|
3915
|
+
}
|
|
3916
|
+
return result2;
|
|
3917
|
+
},
|
|
3918
|
+
toolRegistry: context.toolRegistry,
|
|
3919
|
+
captureArtifacts: ["screenshotFullPage"]
|
|
3920
|
+
});
|
|
3921
|
+
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3922
|
+
const ss = result.artifacts?.screenshotFullPage;
|
|
3923
|
+
return {
|
|
3924
|
+
text: result.text,
|
|
3925
|
+
...ss?.url ? { screenshot: { url: ss.url, styleMap: ss.styleMap } } : {}
|
|
3926
|
+
};
|
|
3927
|
+
} finally {
|
|
3928
|
+
release();
|
|
3929
|
+
}
|
|
3930
|
+
}
|
|
3720
3931
|
var browserAutomationTool = {
|
|
3721
3932
|
clearable: true,
|
|
3722
3933
|
definition: {
|
|
@@ -3737,109 +3948,13 @@ var browserAutomationTool = {
|
|
|
3737
3948
|
if (!context) {
|
|
3738
3949
|
return "Error: browser automation requires execution context (only available in headless mode)";
|
|
3739
3950
|
}
|
|
3740
|
-
const
|
|
3741
|
-
|
|
3742
|
-
|
|
3743
|
-
system: getBrowserAutomationPrompt(),
|
|
3744
|
-
task: input.task,
|
|
3745
|
-
tools: BROWSER_TOOLS,
|
|
3746
|
-
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
3747
|
-
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
3748
|
-
if (name === "setupBrowser") {
|
|
3749
|
-
try {
|
|
3750
|
-
const result2 = await sidecarRequest(
|
|
3751
|
-
"/setup-browser",
|
|
3752
|
-
{
|
|
3753
|
-
auth: _input.auth,
|
|
3754
|
-
path: _input.path
|
|
3755
|
-
},
|
|
3756
|
-
{ timeout: 15e3 }
|
|
3757
|
-
);
|
|
3758
|
-
return JSON.stringify(result2);
|
|
3759
|
-
} catch (err) {
|
|
3760
|
-
return `Error setting up browser: ${err.message}`;
|
|
3761
|
-
}
|
|
3762
|
-
}
|
|
3763
|
-
if (name === "screenshotFullPage") {
|
|
3764
|
-
try {
|
|
3765
|
-
return await captureAndAnalyzeScreenshot({
|
|
3766
|
-
path: _input.path,
|
|
3767
|
-
onLog
|
|
3768
|
-
});
|
|
3769
|
-
} catch (err) {
|
|
3770
|
-
return `Error taking screenshot: ${err.message}`;
|
|
3771
|
-
}
|
|
3772
|
-
}
|
|
3773
|
-
return `Error: unknown local tool "${name}"`;
|
|
3774
|
-
},
|
|
3775
|
-
apiConfig: context.apiConfig,
|
|
3776
|
-
model: context.models?.browserAutomation ?? context.model,
|
|
3777
|
-
subAgentId: "browserAutomation",
|
|
3778
|
-
signal: context.signal,
|
|
3779
|
-
parentToolId: context.toolCallId,
|
|
3780
|
-
requestId: context.requestId,
|
|
3781
|
-
onEvent: context.onEvent,
|
|
3782
|
-
resolveExternalTool: async (id, name, input2) => {
|
|
3783
|
-
if (!context.resolveExternalTool) {
|
|
3784
|
-
return "Error: no external tool resolver";
|
|
3785
|
-
}
|
|
3786
|
-
const result2 = await context.resolveExternalTool(id, name, input2);
|
|
3787
|
-
if (name === "browserCommand") {
|
|
3788
|
-
try {
|
|
3789
|
-
const parsed = JSON.parse(result2);
|
|
3790
|
-
const screenshotSteps = (parsed.steps || []).filter(
|
|
3791
|
-
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
3792
|
-
);
|
|
3793
|
-
if (screenshotSteps.length > 0) {
|
|
3794
|
-
const batchInput = screenshotSteps.map((s) => ({
|
|
3795
|
-
stepType: "analyzeImage",
|
|
3796
|
-
step: {
|
|
3797
|
-
imageUrl: s.result.url,
|
|
3798
|
-
prompt: buildScreenshotAnalysisPrompt({
|
|
3799
|
-
styleMap: s.result.styleMap
|
|
3800
|
-
}),
|
|
3801
|
-
visionModelOverride: VISION_MODEL_OVERRIDE
|
|
3802
|
-
}
|
|
3803
|
-
}));
|
|
3804
|
-
const batchResult = await runMindstudioCli(
|
|
3805
|
-
["batch", JSON.stringify(batchInput)],
|
|
3806
|
-
{ timeout: 2e5, caller: "browserAutomation" }
|
|
3807
|
-
);
|
|
3808
|
-
try {
|
|
3809
|
-
const analyses = JSON.parse(batchResult);
|
|
3810
|
-
let ai = 0;
|
|
3811
|
-
for (const step of parsed.steps) {
|
|
3812
|
-
if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
|
|
3813
|
-
step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
|
|
3814
|
-
ai++;
|
|
3815
|
-
}
|
|
3816
|
-
}
|
|
3817
|
-
} catch {
|
|
3818
|
-
log6.debug("Failed to parse batch analysis result", {
|
|
3819
|
-
batchResult
|
|
3820
|
-
});
|
|
3821
|
-
}
|
|
3822
|
-
return JSON.stringify(parsed);
|
|
3823
|
-
}
|
|
3824
|
-
} catch {
|
|
3825
|
-
}
|
|
3826
|
-
}
|
|
3827
|
-
return result2;
|
|
3828
|
-
},
|
|
3829
|
-
toolRegistry: context.toolRegistry,
|
|
3830
|
-
captureArtifacts: ["screenshotFullPage"]
|
|
3831
|
-
});
|
|
3832
|
-
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3833
|
-
const ss = result.artifacts?.screenshotFullPage;
|
|
3834
|
-
if (ss?.url) {
|
|
3835
|
-
return `${result.text}
|
|
3951
|
+
const result = await runBrowserAutomation(input.task, context);
|
|
3952
|
+
if (result.screenshot) {
|
|
3953
|
+
return `${result.text}
|
|
3836
3954
|
|
|
3837
|
-
;
|
|
3955
|
+
`;
|
|
3842
3956
|
}
|
|
3957
|
+
return result.text;
|
|
3843
3958
|
}
|
|
3844
3959
|
};
|
|
3845
3960
|
|
|
@@ -3877,29 +3992,22 @@ var screenshotTool = {
|
|
|
3877
3992
|
return await captureAndAnalyzeScreenshot({
|
|
3878
3993
|
prompt: input.prompt,
|
|
3879
3994
|
imageUrl: input.imageUrl,
|
|
3880
|
-
onLog: context?.onLog
|
|
3995
|
+
onLog: context?.onLog,
|
|
3996
|
+
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
3881
3997
|
});
|
|
3882
3998
|
}
|
|
3883
3999
|
if (input.instructions && context) {
|
|
3884
4000
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
3885
|
-
const result = await
|
|
3886
|
-
|
|
3887
|
-
|
|
3888
|
-
let styleMap;
|
|
3889
|
-
try {
|
|
3890
|
-
const parsed = JSON.parse(resultStr);
|
|
3891
|
-
url = parsed.screenshotUrl;
|
|
3892
|
-
styleMap = parsed.styleMap;
|
|
3893
|
-
} catch {
|
|
3894
|
-
}
|
|
3895
|
-
if (!url) {
|
|
3896
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
4001
|
+
const result = await runBrowserAutomation(task, context);
|
|
4002
|
+
if (!result.screenshot) {
|
|
4003
|
+
return result.text;
|
|
3897
4004
|
}
|
|
3898
4005
|
return await streamScreenshotAnalysis({
|
|
3899
|
-
url,
|
|
4006
|
+
url: result.screenshot.url,
|
|
3900
4007
|
prompt: input.prompt,
|
|
3901
|
-
styleMap,
|
|
3902
|
-
onLog: context?.onLog
|
|
4008
|
+
styleMap: result.screenshot.styleMap,
|
|
4009
|
+
onLog: context?.onLog,
|
|
4010
|
+
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
3903
4011
|
});
|
|
3904
4012
|
}
|
|
3905
4013
|
const release = await acquireBrowserLock();
|
|
@@ -3907,7 +4015,8 @@ var screenshotTool = {
|
|
|
3907
4015
|
return await captureAndAnalyzeScreenshot({
|
|
3908
4016
|
prompt: input.prompt,
|
|
3909
4017
|
path: input.path,
|
|
3910
|
-
onLog: context?.onLog
|
|
4018
|
+
onLog: context?.onLog,
|
|
4019
|
+
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
3911
4020
|
});
|
|
3912
4021
|
} finally {
|
|
3913
4022
|
release();
|
|
@@ -4118,7 +4227,7 @@ var definition3 = {
|
|
|
4118
4227
|
required: ["url"]
|
|
4119
4228
|
}
|
|
4120
4229
|
};
|
|
4121
|
-
async function execute3(input, onLog) {
|
|
4230
|
+
async function execute3(input, onLog, context) {
|
|
4122
4231
|
const url = input.url;
|
|
4123
4232
|
const analysisPrompt = input.prompt || DESIGN_REFERENCE_PROMPT;
|
|
4124
4233
|
const isImageUrl = /\.(png|jpe?g|webp|gif|svg|avif)(\?|$)/i.test(url);
|
|
@@ -4151,7 +4260,8 @@ async function execute3(input, onLog) {
|
|
|
4151
4260
|
const analysis = await analyzeImage({
|
|
4152
4261
|
prompt: analysisPrompt,
|
|
4153
4262
|
imageUrl,
|
|
4154
|
-
onLog
|
|
4263
|
+
onLog,
|
|
4264
|
+
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
4155
4265
|
});
|
|
4156
4266
|
return JSON.stringify({ url: imageUrl, analysis });
|
|
4157
4267
|
}
|
|
@@ -4181,7 +4291,7 @@ var definition4 = {
|
|
|
4181
4291
|
required: ["imageUrl"]
|
|
4182
4292
|
}
|
|
4183
4293
|
};
|
|
4184
|
-
async function execute4(input, onLog) {
|
|
4294
|
+
async function execute4(input, onLog, context) {
|
|
4185
4295
|
const imageUrl = input.imageUrl;
|
|
4186
4296
|
const prompt = buildScreenshotAnalysisPrompt({
|
|
4187
4297
|
prompt: input.prompt
|
|
@@ -4189,7 +4299,8 @@ async function execute4(input, onLog) {
|
|
|
4189
4299
|
const analysis = await analyzeImage({
|
|
4190
4300
|
prompt,
|
|
4191
4301
|
imageUrl,
|
|
4192
|
-
onLog
|
|
4302
|
+
onLog,
|
|
4303
|
+
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
4193
4304
|
});
|
|
4194
4305
|
return JSON.stringify({ url: imageUrl, analysis });
|
|
4195
4306
|
}
|
|
@@ -4226,24 +4337,16 @@ async function execute5(input, onLog, context) {
|
|
|
4226
4337
|
if (input.instructions && context) {
|
|
4227
4338
|
try {
|
|
4228
4339
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
4229
|
-
const result = await
|
|
4230
|
-
|
|
4231
|
-
|
|
4232
|
-
let styleMap;
|
|
4233
|
-
try {
|
|
4234
|
-
const parsed = JSON.parse(resultStr);
|
|
4235
|
-
url = parsed.screenshotUrl;
|
|
4236
|
-
styleMap = parsed.styleMap;
|
|
4237
|
-
} catch {
|
|
4238
|
-
}
|
|
4239
|
-
if (!url) {
|
|
4240
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
4340
|
+
const result = await runBrowserAutomation(task, context);
|
|
4341
|
+
if (!result.screenshot) {
|
|
4342
|
+
return result.text;
|
|
4241
4343
|
}
|
|
4242
4344
|
return await streamScreenshotAnalysis({
|
|
4243
|
-
url,
|
|
4345
|
+
url: result.screenshot.url,
|
|
4244
4346
|
prompt: input.prompt,
|
|
4245
|
-
styleMap,
|
|
4246
|
-
onLog
|
|
4347
|
+
styleMap: result.screenshot.styleMap,
|
|
4348
|
+
onLog,
|
|
4349
|
+
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
4247
4350
|
});
|
|
4248
4351
|
} catch (err) {
|
|
4249
4352
|
return `Error taking interactive screenshot: ${err.message}`;
|
|
@@ -4254,7 +4357,8 @@ async function execute5(input, onLog, context) {
|
|
|
4254
4357
|
return await captureAndAnalyzeScreenshot({
|
|
4255
4358
|
prompt: input.prompt,
|
|
4256
4359
|
path: input.path,
|
|
4257
|
-
onLog
|
|
4360
|
+
onLog,
|
|
4361
|
+
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
4258
4362
|
});
|
|
4259
4363
|
} catch (err) {
|
|
4260
4364
|
return `Error taking screenshot: ${err.message}`;
|
|
@@ -4271,16 +4375,11 @@ __export(generateImages_exports, {
|
|
|
4271
4375
|
});
|
|
4272
4376
|
|
|
4273
4377
|
// src/subagents/designExpert/tools/images/enhancePrompt.ts
|
|
4274
|
-
var ENHANCE_MODEL = "claude-4-6-sonnet";
|
|
4275
|
-
var MODEL_OVERRIDE = {
|
|
4276
|
-
model: ENHANCE_MODEL,
|
|
4277
|
-
config: { reasoning: "false" }
|
|
4278
|
-
};
|
|
4279
4378
|
var SYSTEM_PROMPT = readAsset(
|
|
4280
4379
|
"subagents/designExpert/tools/images/enhance-image-prompt.md"
|
|
4281
4380
|
);
|
|
4282
4381
|
async function enhanceImagePrompt(params) {
|
|
4283
|
-
const { brief, width, height, transparentBackground, onLog } = params;
|
|
4382
|
+
const { brief, width, height, transparentBackground, onLog, model } = params;
|
|
4284
4383
|
const contextParts = [
|
|
4285
4384
|
`Dimensions: ${width}x${height}${width > height ? " (landscape)" : width < height ? " (portrait)" : " (square)"}`
|
|
4286
4385
|
];
|
|
@@ -4305,7 +4404,7 @@ ${brief}
|
|
|
4305
4404
|
"--message",
|
|
4306
4405
|
message,
|
|
4307
4406
|
"--model-override",
|
|
4308
|
-
JSON.stringify(
|
|
4407
|
+
JSON.stringify({ model, config: { reasoning: "false" } })
|
|
4309
4408
|
],
|
|
4310
4409
|
{ outputKey: "content", timeout: 6e4, onLog, caller: "designExpert" }
|
|
4311
4410
|
);
|
|
@@ -4315,7 +4414,15 @@ ${brief}
|
|
|
4315
4414
|
// src/subagents/designExpert/tools/images/imageGenerator.ts
|
|
4316
4415
|
var ANALYZE_PROMPT = 'You are reviewing this image for a visual designer sourcing assets for a project. Describe: what the image depicts, the mood and color palette, how the lighting and composition work, any text present in the image, whether there are any issues (artifacts, distortions), and how it could be used in a layout for an app or website. Be concise and practical. Respond only with your analysis as Markdown (starting with the title "Asset Review") and absolutely no other text. Do not use emojis - use unicode if you need symbols.';
|
|
4317
4416
|
async function generateImageAssets(opts) {
|
|
4318
|
-
const {
|
|
4417
|
+
const {
|
|
4418
|
+
prompts,
|
|
4419
|
+
sourceImages,
|
|
4420
|
+
transparentBackground,
|
|
4421
|
+
onLog,
|
|
4422
|
+
imageGenerationModel: genModel,
|
|
4423
|
+
imageAnalysisModel,
|
|
4424
|
+
imagePromptEnhancerModel
|
|
4425
|
+
} = opts;
|
|
4319
4426
|
const width = opts.width || 2048;
|
|
4320
4427
|
const height = opts.height || 2048;
|
|
4321
4428
|
const config = { width, height };
|
|
@@ -4330,7 +4437,8 @@ async function generateImageAssets(opts) {
|
|
|
4330
4437
|
width,
|
|
4331
4438
|
height,
|
|
4332
4439
|
transparentBackground,
|
|
4333
|
-
onLog
|
|
4440
|
+
onLog,
|
|
4441
|
+
model: imagePromptEnhancerModel
|
|
4334
4442
|
})
|
|
4335
4443
|
)
|
|
4336
4444
|
);
|
|
@@ -4339,7 +4447,7 @@ async function generateImageAssets(opts) {
|
|
|
4339
4447
|
const step = JSON.stringify({
|
|
4340
4448
|
prompt: enhancedPrompts[0],
|
|
4341
4449
|
imageModelOverride: {
|
|
4342
|
-
model:
|
|
4450
|
+
model: genModel,
|
|
4343
4451
|
config
|
|
4344
4452
|
}
|
|
4345
4453
|
});
|
|
@@ -4358,7 +4466,7 @@ async function generateImageAssets(opts) {
|
|
|
4358
4466
|
step: {
|
|
4359
4467
|
prompt,
|
|
4360
4468
|
imageModelOverride: {
|
|
4361
|
-
model:
|
|
4469
|
+
model: genModel,
|
|
4362
4470
|
config
|
|
4363
4471
|
}
|
|
4364
4472
|
}
|
|
@@ -4410,7 +4518,8 @@ async function generateImageAssets(opts) {
|
|
|
4410
4518
|
const analysis = await analyzeImage({
|
|
4411
4519
|
prompt: ANALYZE_PROMPT,
|
|
4412
4520
|
imageUrl: url,
|
|
4413
|
-
onLog
|
|
4521
|
+
onLog,
|
|
4522
|
+
model: imageAnalysisModel
|
|
4414
4523
|
});
|
|
4415
4524
|
return {
|
|
4416
4525
|
url,
|
|
@@ -4456,13 +4565,28 @@ var definition6 = {
|
|
|
4456
4565
|
required: ["prompts"]
|
|
4457
4566
|
}
|
|
4458
4567
|
};
|
|
4459
|
-
async function execute6(input, onLog) {
|
|
4568
|
+
async function execute6(input, onLog, context) {
|
|
4460
4569
|
return generateImageAssets({
|
|
4461
4570
|
prompts: input.prompts,
|
|
4462
4571
|
width: input.width,
|
|
4463
4572
|
height: input.height,
|
|
4464
4573
|
transparentBackground: input.transparentBackground,
|
|
4465
|
-
onLog
|
|
4574
|
+
onLog,
|
|
4575
|
+
imageGenerationModel: resolveModel(
|
|
4576
|
+
"imageGeneration",
|
|
4577
|
+
context?.models,
|
|
4578
|
+
context?.model
|
|
4579
|
+
),
|
|
4580
|
+
imageAnalysisModel: resolveModel(
|
|
4581
|
+
"imageAnalysis",
|
|
4582
|
+
context?.models,
|
|
4583
|
+
context?.model
|
|
4584
|
+
),
|
|
4585
|
+
imagePromptEnhancerModel: resolveModel(
|
|
4586
|
+
"imagePromptEnhancer",
|
|
4587
|
+
context?.models,
|
|
4588
|
+
context?.model
|
|
4589
|
+
)
|
|
4466
4590
|
});
|
|
4467
4591
|
}
|
|
4468
4592
|
|
|
@@ -4509,14 +4633,29 @@ var definition7 = {
|
|
|
4509
4633
|
required: ["prompts", "sourceImages"]
|
|
4510
4634
|
}
|
|
4511
4635
|
};
|
|
4512
|
-
async function execute7(input, onLog) {
|
|
4636
|
+
async function execute7(input, onLog, context) {
|
|
4513
4637
|
return generateImageAssets({
|
|
4514
4638
|
prompts: input.prompts,
|
|
4515
4639
|
sourceImages: input.sourceImages,
|
|
4516
4640
|
width: input.width,
|
|
4517
4641
|
height: input.height,
|
|
4518
4642
|
transparentBackground: input.transparentBackground,
|
|
4519
|
-
onLog
|
|
4643
|
+
onLog,
|
|
4644
|
+
imageGenerationModel: resolveModel(
|
|
4645
|
+
"imageGeneration",
|
|
4646
|
+
context?.models,
|
|
4647
|
+
context?.model
|
|
4648
|
+
),
|
|
4649
|
+
imageAnalysisModel: resolveModel(
|
|
4650
|
+
"imageAnalysis",
|
|
4651
|
+
context?.models,
|
|
4652
|
+
context?.model
|
|
4653
|
+
),
|
|
4654
|
+
imagePromptEnhancerModel: resolveModel(
|
|
4655
|
+
"imagePromptEnhancer",
|
|
4656
|
+
context?.models,
|
|
4657
|
+
context?.model
|
|
4658
|
+
)
|
|
4520
4659
|
});
|
|
4521
4660
|
}
|
|
4522
4661
|
|
|
@@ -4989,7 +5128,7 @@ var designExpertTool = {
|
|
|
4989
5128
|
);
|
|
4990
5129
|
},
|
|
4991
5130
|
apiConfig: context.apiConfig,
|
|
4992
|
-
model: context.models
|
|
5131
|
+
model: resolveModel("visualDesignExpert", context.models, context.model),
|
|
4993
5132
|
subAgentId: "visualDesignExpert",
|
|
4994
5133
|
signal: context.signal,
|
|
4995
5134
|
parentToolId: context.toolCallId,
|
|
@@ -5207,7 +5346,7 @@ var productVisionTool = {
|
|
|
5207
5346
|
return executeVisionTool(name, input2, childCtx);
|
|
5208
5347
|
},
|
|
5209
5348
|
apiConfig: context.apiConfig,
|
|
5210
|
-
model: context.models
|
|
5349
|
+
model: resolveModel("productVision", context.models, context.model),
|
|
5211
5350
|
subAgentId: "productVision",
|
|
5212
5351
|
signal: context.signal,
|
|
5213
5352
|
parentToolId: context.toolCallId,
|
|
@@ -5315,7 +5454,7 @@ var codeSanityCheckTool = {
|
|
|
5315
5454
|
externalTools: /* @__PURE__ */ new Set(),
|
|
5316
5455
|
executeTool: (name, toolInput) => executeTool(name, toolInput, context),
|
|
5317
5456
|
apiConfig: context.apiConfig,
|
|
5318
|
-
model: context.models
|
|
5457
|
+
model: resolveModel("codeSanityCheck", context.models, context.model),
|
|
5319
5458
|
subAgentId: "codeSanityCheck",
|
|
5320
5459
|
signal: context.signal,
|
|
5321
5460
|
parentToolId: context.toolCallId,
|
|
@@ -5455,7 +5594,7 @@ function triggerCompaction(state, apiConfig, opts = {}) {
|
|
|
5455
5594
|
apiConfig,
|
|
5456
5595
|
system,
|
|
5457
5596
|
tools2,
|
|
5458
|
-
state.models
|
|
5597
|
+
resolveModel("conversationSummarizer", state.models, model)
|
|
5459
5598
|
).then((summaries) => {
|
|
5460
5599
|
pendingSummaries.push(...summaries);
|
|
5461
5600
|
listener?.({ type: "complete", requestId });
|
|
@@ -6252,7 +6391,7 @@ async function runTurn(params) {
|
|
|
6252
6391
|
onEvent({ type: "tool_input_delta", id, name, result: content });
|
|
6253
6392
|
}
|
|
6254
6393
|
}
|
|
6255
|
-
const parentModel = state.models
|
|
6394
|
+
const parentModel = resolveModel("parent", state.models, model);
|
|
6256
6395
|
try {
|
|
6257
6396
|
for await (const event of streamChatWithRetry(
|
|
6258
6397
|
{
|
|
@@ -6596,7 +6735,10 @@ async function runTurn(params) {
|
|
|
6596
6735
|
isError: r.isError
|
|
6597
6736
|
});
|
|
6598
6737
|
if (!r.isError && BRAND_TRIGGERING_TOOLS.has(tc.name)) {
|
|
6599
|
-
triggerBrandExtraction(
|
|
6738
|
+
triggerBrandExtraction(
|
|
6739
|
+
apiConfig,
|
|
6740
|
+
resolveModel("brandExtractor", state.models, model)
|
|
6741
|
+
);
|
|
6600
6742
|
}
|
|
6601
6743
|
return r;
|
|
6602
6744
|
})
|
|
@@ -7039,12 +7181,14 @@ var HeadlessSession = class {
|
|
|
7039
7181
|
this.emit("session_restored", {
|
|
7040
7182
|
messageCount: this.state.messages.length,
|
|
7041
7183
|
...this.state.models && { models: this.state.models },
|
|
7184
|
+
modelSurfaces: MODEL_SURFACES,
|
|
7185
|
+
allowedModelsByType: ALLOWED_MODELS_BY_TYPE,
|
|
7042
7186
|
...this.queueFields()
|
|
7043
7187
|
});
|
|
7044
7188
|
}
|
|
7045
7189
|
triggerBrandExtraction(
|
|
7046
7190
|
this.config,
|
|
7047
|
-
this.state.models
|
|
7191
|
+
resolveModel("brandExtractor", this.state.models, this.opts.model)
|
|
7048
7192
|
);
|
|
7049
7193
|
this.toolRegistry.onEvent = this.onEvent;
|
|
7050
7194
|
setCompactionListener((event) => {
|
|
@@ -7722,6 +7866,8 @@ var HeadlessSession = class {
|
|
|
7722
7866
|
running: this.running,
|
|
7723
7867
|
...this.running && this.currentRequestId ? { currentRequestId: this.currentRequestId } : {},
|
|
7724
7868
|
...this.state.models && { models: this.state.models },
|
|
7869
|
+
modelSurfaces: MODEL_SURFACES,
|
|
7870
|
+
allowedModelsByType: ALLOWED_MODELS_BY_TYPE,
|
|
7725
7871
|
...this.queueFields()
|
|
7726
7872
|
}));
|
|
7727
7873
|
return;
|