@mindstudio-ai/remy 0.1.179 → 0.1.180
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/headless.js +130 -141
- package/dist/index.js +130 -141
- package/package.json +1 -1
package/dist/headless.js
CHANGED
|
@@ -3813,6 +3813,121 @@ function resolveModel(surfaceId, models, fallback) {
|
|
|
3813
3813
|
|
|
3814
3814
|
// src/subagents/browserAutomation/index.ts
|
|
3815
3815
|
var log6 = createLogger("browser-automation");
|
|
3816
|
+
async function runBrowserAutomation(task, context) {
|
|
3817
|
+
const release = await acquireBrowserLock();
|
|
3818
|
+
try {
|
|
3819
|
+
const result = await runSubAgent({
|
|
3820
|
+
system: getBrowserAutomationPrompt(),
|
|
3821
|
+
task,
|
|
3822
|
+
tools: BROWSER_TOOLS,
|
|
3823
|
+
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
3824
|
+
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
3825
|
+
if (name === "setupBrowser") {
|
|
3826
|
+
try {
|
|
3827
|
+
const result2 = await sidecarRequest(
|
|
3828
|
+
"/setup-browser",
|
|
3829
|
+
{
|
|
3830
|
+
auth: _input.auth,
|
|
3831
|
+
path: _input.path
|
|
3832
|
+
},
|
|
3833
|
+
{ timeout: 15e3 }
|
|
3834
|
+
);
|
|
3835
|
+
return JSON.stringify(result2);
|
|
3836
|
+
} catch (err) {
|
|
3837
|
+
return `Error setting up browser: ${err.message}`;
|
|
3838
|
+
}
|
|
3839
|
+
}
|
|
3840
|
+
if (name === "screenshotFullPage") {
|
|
3841
|
+
try {
|
|
3842
|
+
return await captureAndAnalyzeScreenshot({
|
|
3843
|
+
path: _input.path,
|
|
3844
|
+
onLog,
|
|
3845
|
+
model: resolveModel(
|
|
3846
|
+
"imageAnalysis",
|
|
3847
|
+
context.models,
|
|
3848
|
+
context.model
|
|
3849
|
+
)
|
|
3850
|
+
});
|
|
3851
|
+
} catch (err) {
|
|
3852
|
+
return `Error taking screenshot: ${err.message}`;
|
|
3853
|
+
}
|
|
3854
|
+
}
|
|
3855
|
+
return `Error: unknown local tool "${name}"`;
|
|
3856
|
+
},
|
|
3857
|
+
apiConfig: context.apiConfig,
|
|
3858
|
+
model: resolveModel("browserAutomation", context.models, context.model),
|
|
3859
|
+
subAgentId: "browserAutomation",
|
|
3860
|
+
signal: context.signal,
|
|
3861
|
+
parentToolId: context.toolCallId,
|
|
3862
|
+
requestId: context.requestId,
|
|
3863
|
+
onEvent: context.onEvent,
|
|
3864
|
+
resolveExternalTool: async (id, name, input) => {
|
|
3865
|
+
if (!context.resolveExternalTool) {
|
|
3866
|
+
return "Error: no external tool resolver";
|
|
3867
|
+
}
|
|
3868
|
+
const result2 = await context.resolveExternalTool(id, name, input);
|
|
3869
|
+
if (name === "browserCommand") {
|
|
3870
|
+
try {
|
|
3871
|
+
const parsed = JSON.parse(result2);
|
|
3872
|
+
const screenshotSteps = (parsed.steps || []).filter(
|
|
3873
|
+
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
3874
|
+
);
|
|
3875
|
+
if (screenshotSteps.length > 0) {
|
|
3876
|
+
const visionOverride = {
|
|
3877
|
+
model: resolveModel(
|
|
3878
|
+
"imageAnalysis",
|
|
3879
|
+
context.models,
|
|
3880
|
+
context.model
|
|
3881
|
+
)
|
|
3882
|
+
};
|
|
3883
|
+
const batchInput = screenshotSteps.map((s) => ({
|
|
3884
|
+
stepType: "analyzeImage",
|
|
3885
|
+
step: {
|
|
3886
|
+
imageUrl: s.result.url,
|
|
3887
|
+
prompt: buildScreenshotAnalysisPrompt({
|
|
3888
|
+
styleMap: s.result.styleMap
|
|
3889
|
+
}),
|
|
3890
|
+
visionModelOverride: visionOverride
|
|
3891
|
+
}
|
|
3892
|
+
}));
|
|
3893
|
+
const batchResult = await runMindstudioCli(
|
|
3894
|
+
["batch", JSON.stringify(batchInput)],
|
|
3895
|
+
{ timeout: 2e5, caller: "browserAutomation" }
|
|
3896
|
+
);
|
|
3897
|
+
try {
|
|
3898
|
+
const analyses = JSON.parse(batchResult);
|
|
3899
|
+
let ai = 0;
|
|
3900
|
+
for (const step of parsed.steps) {
|
|
3901
|
+
if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
|
|
3902
|
+
step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
|
|
3903
|
+
ai++;
|
|
3904
|
+
}
|
|
3905
|
+
}
|
|
3906
|
+
} catch {
|
|
3907
|
+
log6.debug("Failed to parse batch analysis result", {
|
|
3908
|
+
batchResult
|
|
3909
|
+
});
|
|
3910
|
+
}
|
|
3911
|
+
return JSON.stringify(parsed);
|
|
3912
|
+
}
|
|
3913
|
+
} catch {
|
|
3914
|
+
}
|
|
3915
|
+
}
|
|
3916
|
+
return result2;
|
|
3917
|
+
},
|
|
3918
|
+
toolRegistry: context.toolRegistry,
|
|
3919
|
+
captureArtifacts: ["screenshotFullPage"]
|
|
3920
|
+
});
|
|
3921
|
+
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3922
|
+
const ss = result.artifacts?.screenshotFullPage;
|
|
3923
|
+
return {
|
|
3924
|
+
text: result.text,
|
|
3925
|
+
...ss?.url ? { screenshot: { url: ss.url, styleMap: ss.styleMap } } : {}
|
|
3926
|
+
};
|
|
3927
|
+
} finally {
|
|
3928
|
+
release();
|
|
3929
|
+
}
|
|
3930
|
+
}
|
|
3816
3931
|
var browserAutomationTool = {
|
|
3817
3932
|
clearable: true,
|
|
3818
3933
|
definition: {
|
|
@@ -3833,121 +3948,13 @@ var browserAutomationTool = {
|
|
|
3833
3948
|
if (!context) {
|
|
3834
3949
|
return "Error: browser automation requires execution context (only available in headless mode)";
|
|
3835
3950
|
}
|
|
3836
|
-
const
|
|
3837
|
-
|
|
3838
|
-
|
|
3839
|
-
system: getBrowserAutomationPrompt(),
|
|
3840
|
-
task: input.task,
|
|
3841
|
-
tools: BROWSER_TOOLS,
|
|
3842
|
-
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
3843
|
-
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
3844
|
-
if (name === "setupBrowser") {
|
|
3845
|
-
try {
|
|
3846
|
-
const result2 = await sidecarRequest(
|
|
3847
|
-
"/setup-browser",
|
|
3848
|
-
{
|
|
3849
|
-
auth: _input.auth,
|
|
3850
|
-
path: _input.path
|
|
3851
|
-
},
|
|
3852
|
-
{ timeout: 15e3 }
|
|
3853
|
-
);
|
|
3854
|
-
return JSON.stringify(result2);
|
|
3855
|
-
} catch (err) {
|
|
3856
|
-
return `Error setting up browser: ${err.message}`;
|
|
3857
|
-
}
|
|
3858
|
-
}
|
|
3859
|
-
if (name === "screenshotFullPage") {
|
|
3860
|
-
try {
|
|
3861
|
-
return await captureAndAnalyzeScreenshot({
|
|
3862
|
-
path: _input.path,
|
|
3863
|
-
onLog,
|
|
3864
|
-
model: resolveModel(
|
|
3865
|
-
"imageAnalysis",
|
|
3866
|
-
context.models,
|
|
3867
|
-
context.model
|
|
3868
|
-
)
|
|
3869
|
-
});
|
|
3870
|
-
} catch (err) {
|
|
3871
|
-
return `Error taking screenshot: ${err.message}`;
|
|
3872
|
-
}
|
|
3873
|
-
}
|
|
3874
|
-
return `Error: unknown local tool "${name}"`;
|
|
3875
|
-
},
|
|
3876
|
-
apiConfig: context.apiConfig,
|
|
3877
|
-
model: resolveModel("browserAutomation", context.models, context.model),
|
|
3878
|
-
subAgentId: "browserAutomation",
|
|
3879
|
-
signal: context.signal,
|
|
3880
|
-
parentToolId: context.toolCallId,
|
|
3881
|
-
requestId: context.requestId,
|
|
3882
|
-
onEvent: context.onEvent,
|
|
3883
|
-
resolveExternalTool: async (id, name, input2) => {
|
|
3884
|
-
if (!context.resolveExternalTool) {
|
|
3885
|
-
return "Error: no external tool resolver";
|
|
3886
|
-
}
|
|
3887
|
-
const result2 = await context.resolveExternalTool(id, name, input2);
|
|
3888
|
-
if (name === "browserCommand") {
|
|
3889
|
-
try {
|
|
3890
|
-
const parsed = JSON.parse(result2);
|
|
3891
|
-
const screenshotSteps = (parsed.steps || []).filter(
|
|
3892
|
-
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
3893
|
-
);
|
|
3894
|
-
if (screenshotSteps.length > 0) {
|
|
3895
|
-
const visionOverride = {
|
|
3896
|
-
model: resolveModel(
|
|
3897
|
-
"imageAnalysis",
|
|
3898
|
-
context.models,
|
|
3899
|
-
context.model
|
|
3900
|
-
)
|
|
3901
|
-
};
|
|
3902
|
-
const batchInput = screenshotSteps.map((s) => ({
|
|
3903
|
-
stepType: "analyzeImage",
|
|
3904
|
-
step: {
|
|
3905
|
-
imageUrl: s.result.url,
|
|
3906
|
-
prompt: buildScreenshotAnalysisPrompt({
|
|
3907
|
-
styleMap: s.result.styleMap
|
|
3908
|
-
}),
|
|
3909
|
-
visionModelOverride: visionOverride
|
|
3910
|
-
}
|
|
3911
|
-
}));
|
|
3912
|
-
const batchResult = await runMindstudioCli(
|
|
3913
|
-
["batch", JSON.stringify(batchInput)],
|
|
3914
|
-
{ timeout: 2e5, caller: "browserAutomation" }
|
|
3915
|
-
);
|
|
3916
|
-
try {
|
|
3917
|
-
const analyses = JSON.parse(batchResult);
|
|
3918
|
-
let ai = 0;
|
|
3919
|
-
for (const step of parsed.steps) {
|
|
3920
|
-
if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
|
|
3921
|
-
step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
|
|
3922
|
-
ai++;
|
|
3923
|
-
}
|
|
3924
|
-
}
|
|
3925
|
-
} catch {
|
|
3926
|
-
log6.debug("Failed to parse batch analysis result", {
|
|
3927
|
-
batchResult
|
|
3928
|
-
});
|
|
3929
|
-
}
|
|
3930
|
-
return JSON.stringify(parsed);
|
|
3931
|
-
}
|
|
3932
|
-
} catch {
|
|
3933
|
-
}
|
|
3934
|
-
}
|
|
3935
|
-
return result2;
|
|
3936
|
-
},
|
|
3937
|
-
toolRegistry: context.toolRegistry,
|
|
3938
|
-
captureArtifacts: ["screenshotFullPage"]
|
|
3939
|
-
});
|
|
3940
|
-
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
3941
|
-
const ss = result.artifacts?.screenshotFullPage;
|
|
3942
|
-
if (ss?.url) {
|
|
3943
|
-
return `${result.text}
|
|
3951
|
+
const result = await runBrowserAutomation(input.task, context);
|
|
3952
|
+
if (result.screenshot) {
|
|
3953
|
+
return `${result.text}
|
|
3944
3954
|
|
|
3945
|
-
;
|
|
3955
|
+
`;
|
|
3950
3956
|
}
|
|
3957
|
+
return result.text;
|
|
3951
3958
|
}
|
|
3952
3959
|
};
|
|
3953
3960
|
|
|
@@ -3991,23 +3998,14 @@ var screenshotTool = {
|
|
|
3991
3998
|
}
|
|
3992
3999
|
if (input.instructions && context) {
|
|
3993
4000
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
3994
|
-
const result = await
|
|
3995
|
-
|
|
3996
|
-
|
|
3997
|
-
let styleMap;
|
|
3998
|
-
try {
|
|
3999
|
-
const parsed = JSON.parse(resultStr);
|
|
4000
|
-
url = parsed.screenshotUrl;
|
|
4001
|
-
styleMap = parsed.styleMap;
|
|
4002
|
-
} catch {
|
|
4003
|
-
}
|
|
4004
|
-
if (!url) {
|
|
4005
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
4001
|
+
const result = await runBrowserAutomation(task, context);
|
|
4002
|
+
if (!result.screenshot) {
|
|
4003
|
+
return result.text;
|
|
4006
4004
|
}
|
|
4007
4005
|
return await streamScreenshotAnalysis({
|
|
4008
|
-
url,
|
|
4006
|
+
url: result.screenshot.url,
|
|
4009
4007
|
prompt: input.prompt,
|
|
4010
|
-
styleMap,
|
|
4008
|
+
styleMap: result.screenshot.styleMap,
|
|
4011
4009
|
onLog: context?.onLog,
|
|
4012
4010
|
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
4013
4011
|
});
|
|
@@ -4339,23 +4337,14 @@ async function execute5(input, onLog, context) {
|
|
|
4339
4337
|
if (input.instructions && context) {
|
|
4340
4338
|
try {
|
|
4341
4339
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
4342
|
-
const result = await
|
|
4343
|
-
|
|
4344
|
-
|
|
4345
|
-
let styleMap;
|
|
4346
|
-
try {
|
|
4347
|
-
const parsed = JSON.parse(resultStr);
|
|
4348
|
-
url = parsed.screenshotUrl;
|
|
4349
|
-
styleMap = parsed.styleMap;
|
|
4350
|
-
} catch {
|
|
4351
|
-
}
|
|
4352
|
-
if (!url) {
|
|
4353
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
4340
|
+
const result = await runBrowserAutomation(task, context);
|
|
4341
|
+
if (!result.screenshot) {
|
|
4342
|
+
return result.text;
|
|
4354
4343
|
}
|
|
4355
4344
|
return await streamScreenshotAnalysis({
|
|
4356
|
-
url,
|
|
4345
|
+
url: result.screenshot.url,
|
|
4357
4346
|
prompt: input.prompt,
|
|
4358
|
-
styleMap,
|
|
4347
|
+
styleMap: result.screenshot.styleMap,
|
|
4359
4348
|
onLog,
|
|
4360
4349
|
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
4361
4350
|
});
|
package/dist/index.js
CHANGED
|
@@ -4176,6 +4176,121 @@ var init_prompt2 = __esm({
|
|
|
4176
4176
|
});
|
|
4177
4177
|
|
|
4178
4178
|
// src/subagents/browserAutomation/index.ts
|
|
4179
|
+
async function runBrowserAutomation(task, context) {
|
|
4180
|
+
const release = await acquireBrowserLock();
|
|
4181
|
+
try {
|
|
4182
|
+
const result = await runSubAgent({
|
|
4183
|
+
system: getBrowserAutomationPrompt(),
|
|
4184
|
+
task,
|
|
4185
|
+
tools: BROWSER_TOOLS,
|
|
4186
|
+
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
4187
|
+
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
4188
|
+
if (name === "setupBrowser") {
|
|
4189
|
+
try {
|
|
4190
|
+
const result2 = await sidecarRequest(
|
|
4191
|
+
"/setup-browser",
|
|
4192
|
+
{
|
|
4193
|
+
auth: _input.auth,
|
|
4194
|
+
path: _input.path
|
|
4195
|
+
},
|
|
4196
|
+
{ timeout: 15e3 }
|
|
4197
|
+
);
|
|
4198
|
+
return JSON.stringify(result2);
|
|
4199
|
+
} catch (err) {
|
|
4200
|
+
return `Error setting up browser: ${err.message}`;
|
|
4201
|
+
}
|
|
4202
|
+
}
|
|
4203
|
+
if (name === "screenshotFullPage") {
|
|
4204
|
+
try {
|
|
4205
|
+
return await captureAndAnalyzeScreenshot({
|
|
4206
|
+
path: _input.path,
|
|
4207
|
+
onLog,
|
|
4208
|
+
model: resolveModel(
|
|
4209
|
+
"imageAnalysis",
|
|
4210
|
+
context.models,
|
|
4211
|
+
context.model
|
|
4212
|
+
)
|
|
4213
|
+
});
|
|
4214
|
+
} catch (err) {
|
|
4215
|
+
return `Error taking screenshot: ${err.message}`;
|
|
4216
|
+
}
|
|
4217
|
+
}
|
|
4218
|
+
return `Error: unknown local tool "${name}"`;
|
|
4219
|
+
},
|
|
4220
|
+
apiConfig: context.apiConfig,
|
|
4221
|
+
model: resolveModel("browserAutomation", context.models, context.model),
|
|
4222
|
+
subAgentId: "browserAutomation",
|
|
4223
|
+
signal: context.signal,
|
|
4224
|
+
parentToolId: context.toolCallId,
|
|
4225
|
+
requestId: context.requestId,
|
|
4226
|
+
onEvent: context.onEvent,
|
|
4227
|
+
resolveExternalTool: async (id, name, input) => {
|
|
4228
|
+
if (!context.resolveExternalTool) {
|
|
4229
|
+
return "Error: no external tool resolver";
|
|
4230
|
+
}
|
|
4231
|
+
const result2 = await context.resolveExternalTool(id, name, input);
|
|
4232
|
+
if (name === "browserCommand") {
|
|
4233
|
+
try {
|
|
4234
|
+
const parsed = JSON.parse(result2);
|
|
4235
|
+
const screenshotSteps = (parsed.steps || []).filter(
|
|
4236
|
+
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
4237
|
+
);
|
|
4238
|
+
if (screenshotSteps.length > 0) {
|
|
4239
|
+
const visionOverride = {
|
|
4240
|
+
model: resolveModel(
|
|
4241
|
+
"imageAnalysis",
|
|
4242
|
+
context.models,
|
|
4243
|
+
context.model
|
|
4244
|
+
)
|
|
4245
|
+
};
|
|
4246
|
+
const batchInput = screenshotSteps.map((s) => ({
|
|
4247
|
+
stepType: "analyzeImage",
|
|
4248
|
+
step: {
|
|
4249
|
+
imageUrl: s.result.url,
|
|
4250
|
+
prompt: buildScreenshotAnalysisPrompt({
|
|
4251
|
+
styleMap: s.result.styleMap
|
|
4252
|
+
}),
|
|
4253
|
+
visionModelOverride: visionOverride
|
|
4254
|
+
}
|
|
4255
|
+
}));
|
|
4256
|
+
const batchResult = await runMindstudioCli(
|
|
4257
|
+
["batch", JSON.stringify(batchInput)],
|
|
4258
|
+
{ timeout: 2e5, caller: "browserAutomation" }
|
|
4259
|
+
);
|
|
4260
|
+
try {
|
|
4261
|
+
const analyses = JSON.parse(batchResult);
|
|
4262
|
+
let ai = 0;
|
|
4263
|
+
for (const step of parsed.steps) {
|
|
4264
|
+
if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
|
|
4265
|
+
step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
|
|
4266
|
+
ai++;
|
|
4267
|
+
}
|
|
4268
|
+
}
|
|
4269
|
+
} catch {
|
|
4270
|
+
log6.debug("Failed to parse batch analysis result", {
|
|
4271
|
+
batchResult
|
|
4272
|
+
});
|
|
4273
|
+
}
|
|
4274
|
+
return JSON.stringify(parsed);
|
|
4275
|
+
}
|
|
4276
|
+
} catch {
|
|
4277
|
+
}
|
|
4278
|
+
}
|
|
4279
|
+
return result2;
|
|
4280
|
+
},
|
|
4281
|
+
toolRegistry: context.toolRegistry,
|
|
4282
|
+
captureArtifacts: ["screenshotFullPage"]
|
|
4283
|
+
});
|
|
4284
|
+
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
4285
|
+
const ss = result.artifacts?.screenshotFullPage;
|
|
4286
|
+
return {
|
|
4287
|
+
text: result.text,
|
|
4288
|
+
...ss?.url ? { screenshot: { url: ss.url, styleMap: ss.styleMap } } : {}
|
|
4289
|
+
};
|
|
4290
|
+
} finally {
|
|
4291
|
+
release();
|
|
4292
|
+
}
|
|
4293
|
+
}
|
|
4179
4294
|
var log6, browserAutomationTool;
|
|
4180
4295
|
var init_browserAutomation = __esm({
|
|
4181
4296
|
"src/subagents/browserAutomation/index.ts"() {
|
|
@@ -4210,121 +4325,13 @@ var init_browserAutomation = __esm({
|
|
|
4210
4325
|
if (!context) {
|
|
4211
4326
|
return "Error: browser automation requires execution context (only available in headless mode)";
|
|
4212
4327
|
}
|
|
4213
|
-
const
|
|
4214
|
-
|
|
4215
|
-
|
|
4216
|
-
system: getBrowserAutomationPrompt(),
|
|
4217
|
-
task: input.task,
|
|
4218
|
-
tools: BROWSER_TOOLS,
|
|
4219
|
-
externalTools: BROWSER_EXTERNAL_TOOLS,
|
|
4220
|
-
executeTool: async (name, _input, _toolCallId, onLog) => {
|
|
4221
|
-
if (name === "setupBrowser") {
|
|
4222
|
-
try {
|
|
4223
|
-
const result2 = await sidecarRequest(
|
|
4224
|
-
"/setup-browser",
|
|
4225
|
-
{
|
|
4226
|
-
auth: _input.auth,
|
|
4227
|
-
path: _input.path
|
|
4228
|
-
},
|
|
4229
|
-
{ timeout: 15e3 }
|
|
4230
|
-
);
|
|
4231
|
-
return JSON.stringify(result2);
|
|
4232
|
-
} catch (err) {
|
|
4233
|
-
return `Error setting up browser: ${err.message}`;
|
|
4234
|
-
}
|
|
4235
|
-
}
|
|
4236
|
-
if (name === "screenshotFullPage") {
|
|
4237
|
-
try {
|
|
4238
|
-
return await captureAndAnalyzeScreenshot({
|
|
4239
|
-
path: _input.path,
|
|
4240
|
-
onLog,
|
|
4241
|
-
model: resolveModel(
|
|
4242
|
-
"imageAnalysis",
|
|
4243
|
-
context.models,
|
|
4244
|
-
context.model
|
|
4245
|
-
)
|
|
4246
|
-
});
|
|
4247
|
-
} catch (err) {
|
|
4248
|
-
return `Error taking screenshot: ${err.message}`;
|
|
4249
|
-
}
|
|
4250
|
-
}
|
|
4251
|
-
return `Error: unknown local tool "${name}"`;
|
|
4252
|
-
},
|
|
4253
|
-
apiConfig: context.apiConfig,
|
|
4254
|
-
model: resolveModel("browserAutomation", context.models, context.model),
|
|
4255
|
-
subAgentId: "browserAutomation",
|
|
4256
|
-
signal: context.signal,
|
|
4257
|
-
parentToolId: context.toolCallId,
|
|
4258
|
-
requestId: context.requestId,
|
|
4259
|
-
onEvent: context.onEvent,
|
|
4260
|
-
resolveExternalTool: async (id, name, input2) => {
|
|
4261
|
-
if (!context.resolveExternalTool) {
|
|
4262
|
-
return "Error: no external tool resolver";
|
|
4263
|
-
}
|
|
4264
|
-
const result2 = await context.resolveExternalTool(id, name, input2);
|
|
4265
|
-
if (name === "browserCommand") {
|
|
4266
|
-
try {
|
|
4267
|
-
const parsed = JSON.parse(result2);
|
|
4268
|
-
const screenshotSteps = (parsed.steps || []).filter(
|
|
4269
|
-
(s) => s.command === "screenshotViewport" && s.result?.url
|
|
4270
|
-
);
|
|
4271
|
-
if (screenshotSteps.length > 0) {
|
|
4272
|
-
const visionOverride = {
|
|
4273
|
-
model: resolveModel(
|
|
4274
|
-
"imageAnalysis",
|
|
4275
|
-
context.models,
|
|
4276
|
-
context.model
|
|
4277
|
-
)
|
|
4278
|
-
};
|
|
4279
|
-
const batchInput = screenshotSteps.map((s) => ({
|
|
4280
|
-
stepType: "analyzeImage",
|
|
4281
|
-
step: {
|
|
4282
|
-
imageUrl: s.result.url,
|
|
4283
|
-
prompt: buildScreenshotAnalysisPrompt({
|
|
4284
|
-
styleMap: s.result.styleMap
|
|
4285
|
-
}),
|
|
4286
|
-
visionModelOverride: visionOverride
|
|
4287
|
-
}
|
|
4288
|
-
}));
|
|
4289
|
-
const batchResult = await runMindstudioCli(
|
|
4290
|
-
["batch", JSON.stringify(batchInput)],
|
|
4291
|
-
{ timeout: 2e5, caller: "browserAutomation" }
|
|
4292
|
-
);
|
|
4293
|
-
try {
|
|
4294
|
-
const analyses = JSON.parse(batchResult);
|
|
4295
|
-
let ai = 0;
|
|
4296
|
-
for (const step of parsed.steps) {
|
|
4297
|
-
if (step.command === "screenshotViewport" && step.result?.url && ai < analyses.length) {
|
|
4298
|
-
step.result.analysis = analyses[ai]?.output?.analysis || analyses[ai]?.output || "";
|
|
4299
|
-
ai++;
|
|
4300
|
-
}
|
|
4301
|
-
}
|
|
4302
|
-
} catch {
|
|
4303
|
-
log6.debug("Failed to parse batch analysis result", {
|
|
4304
|
-
batchResult
|
|
4305
|
-
});
|
|
4306
|
-
}
|
|
4307
|
-
return JSON.stringify(parsed);
|
|
4308
|
-
}
|
|
4309
|
-
} catch {
|
|
4310
|
-
}
|
|
4311
|
-
}
|
|
4312
|
-
return result2;
|
|
4313
|
-
},
|
|
4314
|
-
toolRegistry: context.toolRegistry,
|
|
4315
|
-
captureArtifacts: ["screenshotFullPage"]
|
|
4316
|
-
});
|
|
4317
|
-
context.subAgentMessages?.set(context.toolCallId, result.messages);
|
|
4318
|
-
const ss = result.artifacts?.screenshotFullPage;
|
|
4319
|
-
if (ss?.url) {
|
|
4320
|
-
return `${result.text}
|
|
4328
|
+
const result = await runBrowserAutomation(input.task, context);
|
|
4329
|
+
if (result.screenshot) {
|
|
4330
|
+
return `${result.text}
|
|
4321
4331
|
|
|
4322
|
-
;
|
|
4332
|
+
`;
|
|
4327
4333
|
}
|
|
4334
|
+
return result.text;
|
|
4328
4335
|
}
|
|
4329
4336
|
};
|
|
4330
4337
|
}
|
|
@@ -4378,23 +4385,14 @@ var init_screenshot2 = __esm({
|
|
|
4378
4385
|
}
|
|
4379
4386
|
if (input.instructions && context) {
|
|
4380
4387
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
4381
|
-
const result = await
|
|
4382
|
-
|
|
4383
|
-
|
|
4384
|
-
let styleMap;
|
|
4385
|
-
try {
|
|
4386
|
-
const parsed = JSON.parse(resultStr);
|
|
4387
|
-
url = parsed.screenshotUrl;
|
|
4388
|
-
styleMap = parsed.styleMap;
|
|
4389
|
-
} catch {
|
|
4390
|
-
}
|
|
4391
|
-
if (!url) {
|
|
4392
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
4388
|
+
const result = await runBrowserAutomation(task, context);
|
|
4389
|
+
if (!result.screenshot) {
|
|
4390
|
+
return result.text;
|
|
4393
4391
|
}
|
|
4394
4392
|
return await streamScreenshotAnalysis({
|
|
4395
|
-
url,
|
|
4393
|
+
url: result.screenshot.url,
|
|
4396
4394
|
prompt: input.prompt,
|
|
4397
|
-
styleMap,
|
|
4395
|
+
styleMap: result.screenshot.styleMap,
|
|
4398
4396
|
onLog: context?.onLog,
|
|
4399
4397
|
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
4400
4398
|
});
|
|
@@ -4744,23 +4742,14 @@ async function execute5(input, onLog, context) {
|
|
|
4744
4742
|
if (input.instructions && context) {
|
|
4745
4743
|
try {
|
|
4746
4744
|
const task = input.path ? `Navigate to "${input.path}", then: ${input.instructions}. After completing these steps, take a full-page screenshot.` : `${input.instructions}. After completing these steps, take a full-page screenshot.`;
|
|
4747
|
-
const result = await
|
|
4748
|
-
|
|
4749
|
-
|
|
4750
|
-
let styleMap;
|
|
4751
|
-
try {
|
|
4752
|
-
const parsed = JSON.parse(resultStr);
|
|
4753
|
-
url = parsed.screenshotUrl;
|
|
4754
|
-
styleMap = parsed.styleMap;
|
|
4755
|
-
} catch {
|
|
4756
|
-
}
|
|
4757
|
-
if (!url) {
|
|
4758
|
-
return `Error: browser navigation completed but no screenshot URL was returned. Agent output: ${resultStr}`;
|
|
4745
|
+
const result = await runBrowserAutomation(task, context);
|
|
4746
|
+
if (!result.screenshot) {
|
|
4747
|
+
return result.text;
|
|
4759
4748
|
}
|
|
4760
4749
|
return await streamScreenshotAnalysis({
|
|
4761
|
-
url,
|
|
4750
|
+
url: result.screenshot.url,
|
|
4762
4751
|
prompt: input.prompt,
|
|
4763
|
-
styleMap,
|
|
4752
|
+
styleMap: result.screenshot.styleMap,
|
|
4764
4753
|
onLog,
|
|
4765
4754
|
model: resolveModel("imageAnalysis", context?.models, context?.model)
|
|
4766
4755
|
});
|