@midscene/web 0.11.3 → 0.11.4-beta-20250220011346.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/appium.js +109 -76
- package/dist/es/bridge-mode-browser.js +11 -5
- package/dist/es/bridge-mode.js +113 -80
- package/dist/es/chrome-extension.js +115 -76
- package/dist/es/index.js +113 -81
- package/dist/es/midscene-playground.js +106 -73
- package/dist/es/playground.js +106 -73
- package/dist/es/playwright.js +111 -81
- package/dist/es/puppeteer.js +111 -81
- package/dist/es/ui-utils.js +1 -1
- package/dist/es/utils.js +7 -5
- package/dist/es/yaml.js +6 -2
- package/dist/lib/appium.js +109 -76
- package/dist/lib/bridge-mode-browser.js +11 -5
- package/dist/lib/bridge-mode.js +113 -80
- package/dist/lib/chrome-extension.js +115 -76
- package/dist/lib/index.js +113 -81
- package/dist/lib/midscene-playground.js +106 -73
- package/dist/lib/playground.js +106 -73
- package/dist/lib/playwright.js +111 -81
- package/dist/lib/puppeteer.js +111 -81
- package/dist/lib/ui-utils.js +1 -1
- package/dist/lib/utils.js +7 -5
- package/dist/lib/yaml.js +6 -2
- package/dist/types/{agent-ae110e80.d.ts → agent-9164441e.d.ts} +3 -3
- package/dist/types/appium.d.ts +3 -3
- package/dist/types/bridge-mode-browser.d.ts +3 -3
- package/dist/types/bridge-mode.d.ts +4 -4
- package/dist/types/{browser-9d620553.d.ts → browser-1edfa087.d.ts} +1 -1
- package/dist/types/chrome-extension.d.ts +4 -4
- package/dist/types/index.d.ts +4 -4
- package/dist/types/{page-97720803.d.ts → page-7a38974a.d.ts} +4 -2
- package/dist/types/playground.d.ts +4 -4
- package/dist/types/playwright.d.ts +3 -3
- package/dist/types/puppeteer.d.ts +3 -3
- package/dist/types/{utils-93b3f5f3.d.ts → utils-3fd47dad.d.ts} +1 -1
- package/dist/types/utils.d.ts +2 -2
- package/dist/types/yaml.d.ts +4 -4
- package/package.json +3 -3
package/dist/es/appium.js
CHANGED
|
@@ -452,11 +452,15 @@ var ScriptPlayer = class {
|
|
|
452
452
|
} else if (flowItem.sleep) {
|
|
453
453
|
const sleepTask = flowItem;
|
|
454
454
|
const ms = sleepTask.sleep;
|
|
455
|
+
let msNumber = ms;
|
|
456
|
+
if (typeof ms === "string") {
|
|
457
|
+
msNumber = Number.parseInt(ms, 10);
|
|
458
|
+
}
|
|
455
459
|
(0, import_node_assert.default)(
|
|
456
|
-
|
|
460
|
+
msNumber && msNumber > 0,
|
|
457
461
|
`ms for sleep must be greater than 0, but got ${ms}`
|
|
458
462
|
);
|
|
459
|
-
await new Promise((resolve) => setTimeout(resolve,
|
|
463
|
+
await new Promise((resolve) => setTimeout(resolve, msNumber));
|
|
460
464
|
} else {
|
|
461
465
|
throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
|
|
462
466
|
}
|
|
@@ -3243,13 +3247,15 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
|
|
|
3243
3247
|
}
|
|
3244
3248
|
|
|
3245
3249
|
// src/common/agent.ts
|
|
3246
|
-
var
|
|
3250
|
+
var import_env5 = require("@midscene/core/env");
|
|
3247
3251
|
var import_utils6 = require("@midscene/core/utils");
|
|
3248
3252
|
|
|
3249
3253
|
// src/common/tasks.ts
|
|
3250
3254
|
var import_node_assert3 = __toESM(require("assert"));
|
|
3251
3255
|
var import_core = require("@midscene/core");
|
|
3252
3256
|
var import_ai_model = require("@midscene/core/ai-model");
|
|
3257
|
+
var import_env2 = require("@midscene/core/env");
|
|
3258
|
+
var import_env3 = require("@midscene/core/env");
|
|
3253
3259
|
var import_utils3 = require("@midscene/core/utils");
|
|
3254
3260
|
|
|
3255
3261
|
// src/common/task-cache.ts
|
|
@@ -3422,7 +3428,7 @@ function paramStr(task) {
|
|
|
3422
3428
|
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k;
|
|
3423
3429
|
let value;
|
|
3424
3430
|
if (task.type === "Planning") {
|
|
3425
|
-
value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.
|
|
3431
|
+
value = (_a = task == null ? void 0 : task.param) == null ? void 0 : _a.userInstruction;
|
|
3426
3432
|
}
|
|
3427
3433
|
if (task.type === "Insight") {
|
|
3428
3434
|
value = ((_b = task == null ? void 0 : task.param) == null ? void 0 : _b.prompt) || ((_c = task == null ? void 0 : task.param) == null ? void 0 : _c.id) || ((_d = task == null ? void 0 : task.param) == null ? void 0 : _d.dataDemand) || ((_e = task == null ? void 0 : task.param) == null ? void 0 : _e.assertion);
|
|
@@ -3449,6 +3455,7 @@ function paramStr(task) {
|
|
|
3449
3455
|
}
|
|
3450
3456
|
|
|
3451
3457
|
// src/common/tasks.ts
|
|
3458
|
+
var replanningCountLimit = 10;
|
|
3452
3459
|
var PageTaskExecutor = class {
|
|
3453
3460
|
constructor(page, insight, opts) {
|
|
3454
3461
|
this.conversationHistory = [];
|
|
@@ -3507,9 +3514,9 @@ var PageTaskExecutor = class {
|
|
|
3507
3514
|
async convertPlanToExecutable(plans, cacheGroup) {
|
|
3508
3515
|
const tasks = [];
|
|
3509
3516
|
plans.forEach((plan2) => {
|
|
3510
|
-
var _a, _b;
|
|
3517
|
+
var _a, _b, _c;
|
|
3511
3518
|
if (plan2.type === "Locate") {
|
|
3512
|
-
if (((_a = plan2.locate) == null ? void 0 : _a.id) === null || ((_b = plan2.locate) == null ? void 0 : _b.id) === "null") {
|
|
3519
|
+
if (plan2.locate === null || ((_a = plan2.locate) == null ? void 0 : _a.id) === null || ((_b = plan2.locate) == null ? void 0 : _b.id) === "null") {
|
|
3513
3520
|
return;
|
|
3514
3521
|
}
|
|
3515
3522
|
const taskFind = {
|
|
@@ -3521,12 +3528,15 @@ var PageTaskExecutor = class {
|
|
|
3521
3528
|
executor: async (param, taskContext) => {
|
|
3522
3529
|
const { task } = taskContext;
|
|
3523
3530
|
(0, import_node_assert3.default)(
|
|
3524
|
-
(param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id) || (param == null ? void 0 : param.position),
|
|
3525
|
-
"No prompt or id or position to locate"
|
|
3531
|
+
(param == null ? void 0 : param.prompt) || (param == null ? void 0 : param.id) || (param == null ? void 0 : param.position) || (param == null ? void 0 : param.bbox),
|
|
3532
|
+
"No prompt or id or position or bbox to locate"
|
|
3526
3533
|
);
|
|
3527
3534
|
let insightDump;
|
|
3535
|
+
let usage;
|
|
3528
3536
|
const dumpCollector = (dump2) => {
|
|
3537
|
+
var _a2;
|
|
3529
3538
|
insightDump = dump2;
|
|
3539
|
+
usage = (_a2 = dump2 == null ? void 0 : dump2.taskInfo) == null ? void 0 : _a2.usage;
|
|
3530
3540
|
};
|
|
3531
3541
|
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
3532
3542
|
const shotTime = Date.now();
|
|
@@ -3546,7 +3556,8 @@ var PageTaskExecutor = class {
|
|
|
3546
3556
|
const callAI = this.insight.aiVendorFn;
|
|
3547
3557
|
const quickAnswer = {
|
|
3548
3558
|
id: param == null ? void 0 : param.id,
|
|
3549
|
-
position: param == null ? void 0 : param.position
|
|
3559
|
+
position: param == null ? void 0 : param.position,
|
|
3560
|
+
bbox: param == null ? void 0 : param.bbox
|
|
3550
3561
|
};
|
|
3551
3562
|
const startTime = Date.now();
|
|
3552
3563
|
const element = await this.insight.locate(param.prompt, {
|
|
@@ -3556,8 +3567,8 @@ var PageTaskExecutor = class {
|
|
|
3556
3567
|
locateResult = locateCache;
|
|
3557
3568
|
return Promise.resolve({ content: locateCache });
|
|
3558
3569
|
}
|
|
3559
|
-
const { content: aiResult, usage } = await callAI(...message);
|
|
3560
|
-
return { content: aiResult, usage };
|
|
3570
|
+
const { content: aiResult, usage: usage2 } = await callAI(...message);
|
|
3571
|
+
return { content: aiResult, usage: usage2 };
|
|
3561
3572
|
}
|
|
3562
3573
|
});
|
|
3563
3574
|
const aiCost = Date.now() - startTime;
|
|
@@ -3590,7 +3601,8 @@ var PageTaskExecutor = class {
|
|
|
3590
3601
|
hit: Boolean(locateCache)
|
|
3591
3602
|
},
|
|
3592
3603
|
recorder: [recordItem],
|
|
3593
|
-
aiCost
|
|
3604
|
+
aiCost,
|
|
3605
|
+
usage
|
|
3594
3606
|
};
|
|
3595
3607
|
}
|
|
3596
3608
|
};
|
|
@@ -3789,12 +3801,12 @@ var PageTaskExecutor = class {
|
|
|
3789
3801
|
}
|
|
3790
3802
|
};
|
|
3791
3803
|
tasks.push(taskActionError);
|
|
3792
|
-
} else if (plan2.type === "
|
|
3804
|
+
} else if (plan2.type === "ExpectedFalsyCondition") {
|
|
3793
3805
|
const taskActionFalsyConditionStatement = {
|
|
3794
3806
|
type: "Action",
|
|
3795
|
-
subType: "
|
|
3807
|
+
subType: "ExpectedFalsyCondition",
|
|
3796
3808
|
param: null,
|
|
3797
|
-
thought: plan2.
|
|
3809
|
+
thought: (_c = plan2.param) == null ? void 0 : _c.reason,
|
|
3798
3810
|
locate: plan2.locate,
|
|
3799
3811
|
executor: async () => {
|
|
3800
3812
|
}
|
|
@@ -3830,14 +3842,13 @@ var PageTaskExecutor = class {
|
|
|
3830
3842
|
tasks: wrappedTasks
|
|
3831
3843
|
};
|
|
3832
3844
|
}
|
|
3833
|
-
planningTaskFromPrompt(
|
|
3845
|
+
planningTaskFromPrompt(userInstruction, cacheGroup, log) {
|
|
3834
3846
|
const task = {
|
|
3835
3847
|
type: "Planning",
|
|
3836
3848
|
locate: null,
|
|
3837
3849
|
param: {
|
|
3838
|
-
|
|
3839
|
-
|
|
3840
|
-
originalPrompt
|
|
3850
|
+
userInstruction,
|
|
3851
|
+
log
|
|
3841
3852
|
},
|
|
3842
3853
|
executor: async (param, executorContext) => {
|
|
3843
3854
|
const shotTime = Date.now();
|
|
@@ -3850,80 +3861,101 @@ var PageTaskExecutor = class {
|
|
|
3850
3861
|
};
|
|
3851
3862
|
executorContext.task.recorder = [recordItem];
|
|
3852
3863
|
executorContext.task.pageContext = pageContext;
|
|
3853
|
-
const
|
|
3864
|
+
const qwenMode = (0, import_env2.getAIConfigInBoolean)(import_env3.MIDSCENE_USE_QWEN_VL);
|
|
3865
|
+
const planCache = cacheGroup.readCache(
|
|
3866
|
+
pageContext,
|
|
3867
|
+
"plan",
|
|
3868
|
+
param.userInstruction
|
|
3869
|
+
);
|
|
3854
3870
|
let planResult;
|
|
3855
|
-
if (planCache) {
|
|
3871
|
+
if (planCache && !qwenMode) {
|
|
3856
3872
|
planResult = planCache;
|
|
3857
3873
|
} else {
|
|
3858
|
-
planResult = await (0, import_core.plan)(param.
|
|
3874
|
+
planResult = await (0, import_core.plan)(param.userInstruction, {
|
|
3859
3875
|
context: pageContext,
|
|
3860
|
-
|
|
3861
|
-
originalPrompt: param.originalPrompt
|
|
3876
|
+
log: param.log
|
|
3862
3877
|
});
|
|
3863
3878
|
}
|
|
3864
|
-
const { actions,
|
|
3879
|
+
const { actions, log: log2, finish, error, usage, rawResponse, sleep: sleep3 } = planResult;
|
|
3865
3880
|
let stopCollecting = false;
|
|
3881
|
+
let bboxCollected = false;
|
|
3882
|
+
let planParsingError = "";
|
|
3866
3883
|
const finalActions = actions.reduce(
|
|
3867
3884
|
(acc, planningAction) => {
|
|
3868
3885
|
if (stopCollecting) {
|
|
3869
3886
|
return acc;
|
|
3870
3887
|
}
|
|
3871
3888
|
if (planningAction.locate) {
|
|
3889
|
+
if (bboxCollected && planningAction.locate.bbox) {
|
|
3890
|
+
delete planningAction.locate.bbox;
|
|
3891
|
+
}
|
|
3892
|
+
if (planningAction.locate.bbox) {
|
|
3893
|
+
bboxCollected = true;
|
|
3894
|
+
}
|
|
3872
3895
|
acc.push({
|
|
3873
3896
|
type: "Locate",
|
|
3874
3897
|
locate: planningAction.locate,
|
|
3875
|
-
// remove id from planning, since the result is not accurate
|
|
3876
|
-
// locate: {
|
|
3877
|
-
// prompt: planningAction.locate.prompt,
|
|
3878
|
-
// },
|
|
3879
3898
|
param: null,
|
|
3880
3899
|
thought: planningAction.locate.prompt
|
|
3881
3900
|
});
|
|
3882
3901
|
} else if (["Tap", "Hover", "Input"].includes(planningAction.type)) {
|
|
3902
|
+
planParsingError = `invalid planning response: ${JSON.stringify(planningAction)}`;
|
|
3883
3903
|
stopCollecting = true;
|
|
3884
3904
|
return acc;
|
|
3885
3905
|
}
|
|
3886
3906
|
acc.push(planningAction);
|
|
3907
|
+
if (planResult.sleep) {
|
|
3908
|
+
acc.push({
|
|
3909
|
+
type: "Sleep",
|
|
3910
|
+
param: {
|
|
3911
|
+
timeMs: planResult.sleep
|
|
3912
|
+
},
|
|
3913
|
+
locate: null
|
|
3914
|
+
});
|
|
3915
|
+
}
|
|
3887
3916
|
return acc;
|
|
3888
3917
|
},
|
|
3889
3918
|
[]
|
|
3890
3919
|
);
|
|
3891
|
-
(0
|
|
3892
|
-
|
|
3893
|
-
|
|
3894
|
-
|
|
3920
|
+
if (finalActions.length === 0) {
|
|
3921
|
+
(0, import_node_assert3.default)(
|
|
3922
|
+
finish,
|
|
3923
|
+
error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
|
|
3924
|
+
);
|
|
3925
|
+
}
|
|
3895
3926
|
cacheGroup.saveCache({
|
|
3896
3927
|
type: "plan",
|
|
3897
3928
|
pageContext: {
|
|
3898
3929
|
url: pageContext.url,
|
|
3899
3930
|
size: pageContext.size
|
|
3900
3931
|
},
|
|
3901
|
-
prompt:
|
|
3932
|
+
prompt: userInstruction,
|
|
3902
3933
|
response: planResult
|
|
3903
3934
|
});
|
|
3904
3935
|
return {
|
|
3905
3936
|
output: {
|
|
3906
3937
|
actions: finalActions,
|
|
3907
|
-
|
|
3908
|
-
|
|
3938
|
+
finish,
|
|
3939
|
+
log: log2
|
|
3909
3940
|
},
|
|
3910
3941
|
cache: {
|
|
3911
3942
|
hit: Boolean(planCache)
|
|
3912
3943
|
},
|
|
3913
3944
|
pageContext,
|
|
3914
|
-
|
|
3915
|
-
|
|
3945
|
+
recorder: [recordItem],
|
|
3946
|
+
usage,
|
|
3947
|
+
rawResponse
|
|
3916
3948
|
};
|
|
3917
3949
|
}
|
|
3918
3950
|
};
|
|
3919
3951
|
return task;
|
|
3920
3952
|
}
|
|
3921
|
-
planningTaskToGoal(
|
|
3953
|
+
planningTaskToGoal(userInstruction, cacheGroup) {
|
|
3922
3954
|
const task = {
|
|
3923
3955
|
type: "Planning",
|
|
3924
3956
|
locate: null,
|
|
3925
3957
|
param: {
|
|
3926
|
-
|
|
3958
|
+
userInstruction
|
|
3927
3959
|
},
|
|
3928
3960
|
executor: async (param, executorContext) => {
|
|
3929
3961
|
var _a;
|
|
@@ -3952,14 +3984,14 @@ var PageTaskExecutor = class {
|
|
|
3952
3984
|
const planCache = cacheGroup.readCache(
|
|
3953
3985
|
pageContext,
|
|
3954
3986
|
"ui-tars-plan",
|
|
3955
|
-
|
|
3987
|
+
userInstruction
|
|
3956
3988
|
);
|
|
3957
3989
|
let planResult;
|
|
3958
3990
|
if (planCache) {
|
|
3959
3991
|
planResult = planCache;
|
|
3960
3992
|
} else {
|
|
3961
3993
|
planResult = await (0, import_ai_model.vlmPlanning)({
|
|
3962
|
-
userInstruction: param.
|
|
3994
|
+
userInstruction: param.userInstruction,
|
|
3963
3995
|
conversationHistory: this.conversationHistory,
|
|
3964
3996
|
size: pageContext.size
|
|
3965
3997
|
});
|
|
@@ -3970,7 +4002,7 @@ var PageTaskExecutor = class {
|
|
|
3970
4002
|
url: pageContext.url,
|
|
3971
4003
|
size: pageContext.size
|
|
3972
4004
|
},
|
|
3973
|
-
prompt:
|
|
4005
|
+
prompt: userInstruction,
|
|
3974
4006
|
response: planResult
|
|
3975
4007
|
});
|
|
3976
4008
|
const aiCost = Date.now() - startTime;
|
|
@@ -3984,11 +4016,8 @@ var PageTaskExecutor = class {
|
|
|
3984
4016
|
actions,
|
|
3985
4017
|
thought: (_a = actions[0]) == null ? void 0 : _a.thought,
|
|
3986
4018
|
actionType: actions[0].type,
|
|
3987
|
-
|
|
3988
|
-
|
|
3989
|
-
whatToDoNext: "",
|
|
3990
|
-
whatHaveDone: ""
|
|
3991
|
-
}
|
|
4019
|
+
finish: false,
|
|
4020
|
+
log: ""
|
|
3992
4021
|
},
|
|
3993
4022
|
cache: {
|
|
3994
4023
|
hit: Boolean(planCache)
|
|
@@ -4000,23 +4029,19 @@ var PageTaskExecutor = class {
|
|
|
4000
4029
|
return task;
|
|
4001
4030
|
}
|
|
4002
4031
|
async action(userPrompt, options) {
|
|
4003
|
-
var _a;
|
|
4004
4032
|
const taskExecutor = new import_core.Executor(userPrompt, void 0, void 0, {
|
|
4005
4033
|
onTaskStart: options == null ? void 0 : options.onTaskStart
|
|
4006
4034
|
});
|
|
4007
4035
|
const cacheGroup = this.taskCache.getCacheGroupByPrompt(userPrompt);
|
|
4008
|
-
|
|
4009
|
-
let planningTask = this.planningTaskFromPrompt(originalPrompt, cacheGroup);
|
|
4036
|
+
let planningTask = this.planningTaskFromPrompt(userPrompt, cacheGroup);
|
|
4010
4037
|
let result;
|
|
4011
4038
|
let replanCount = 0;
|
|
4039
|
+
const logLog = [];
|
|
4012
4040
|
while (planningTask) {
|
|
4013
|
-
if (replanCount >
|
|
4041
|
+
if (replanCount > replanningCountLimit) {
|
|
4014
4042
|
const errorMsg = "Replanning too many times, please split the task into multiple steps";
|
|
4015
4043
|
return this.appendErrorPlan(taskExecutor, errorMsg);
|
|
4016
4044
|
}
|
|
4017
|
-
if (replanCount > 0) {
|
|
4018
|
-
await (0, import_utils3.sleep)(300);
|
|
4019
|
-
}
|
|
4020
4045
|
await taskExecutor.append(planningTask);
|
|
4021
4046
|
const planResult = await taskExecutor.flush();
|
|
4022
4047
|
if (taskExecutor.isInErrorState()) {
|
|
@@ -4045,18 +4070,19 @@ var PageTaskExecutor = class {
|
|
|
4045
4070
|
executor: taskExecutor
|
|
4046
4071
|
};
|
|
4047
4072
|
}
|
|
4048
|
-
if (
|
|
4049
|
-
|
|
4050
|
-
|
|
4051
|
-
|
|
4052
|
-
planResult.furtherPlan.whatHaveDone,
|
|
4053
|
-
originalPrompt
|
|
4054
|
-
);
|
|
4055
|
-
replanCount++;
|
|
4056
|
-
} else {
|
|
4073
|
+
if (planResult == null ? void 0 : planResult.log) {
|
|
4074
|
+
logLog.push(planResult.log);
|
|
4075
|
+
}
|
|
4076
|
+
if (planResult.finish) {
|
|
4057
4077
|
planningTask = null;
|
|
4058
4078
|
break;
|
|
4059
4079
|
}
|
|
4080
|
+
planningTask = this.planningTaskFromPrompt(
|
|
4081
|
+
userPrompt,
|
|
4082
|
+
cacheGroup,
|
|
4083
|
+
logLog.join("\n")
|
|
4084
|
+
);
|
|
4085
|
+
replanCount++;
|
|
4060
4086
|
}
|
|
4061
4087
|
return {
|
|
4062
4088
|
output: result,
|
|
@@ -4210,7 +4236,9 @@ var PageTaskExecutor = class {
|
|
|
4210
4236
|
}
|
|
4211
4237
|
async waitFor(assertion, opt) {
|
|
4212
4238
|
const description = `waitFor: ${assertion}`;
|
|
4213
|
-
const taskExecutor = new import_core.Executor(description
|
|
4239
|
+
const taskExecutor = new import_core.Executor(description, void 0, void 0, {
|
|
4240
|
+
onTaskStart: opt.onTaskStart
|
|
4241
|
+
});
|
|
4214
4242
|
const { timeoutMs, checkIntervalMs } = opt;
|
|
4215
4243
|
(0, import_node_assert3.default)(assertion, "No assertion for waitFor");
|
|
4216
4244
|
(0, import_node_assert3.default)(timeoutMs, "No timeoutMs for waitFor");
|
|
@@ -4295,7 +4323,7 @@ var WebElementInfo = class {
|
|
|
4295
4323
|
var import_node_assert4 = __toESM(require("assert"));
|
|
4296
4324
|
var import_node_fs3 = require("fs");
|
|
4297
4325
|
var import_node_path3 = __toESM(require("path"));
|
|
4298
|
-
var
|
|
4326
|
+
var import_env4 = require("@midscene/core/env");
|
|
4299
4327
|
var import_utils4 = require("@midscene/core/utils");
|
|
4300
4328
|
var import_constants = require("@midscene/shared/constants");
|
|
4301
4329
|
var import_extractor = require("@midscene/shared/extractor");
|
|
@@ -4342,14 +4370,16 @@ async function parseContextFromWebPage(page, _opt) {
|
|
|
4342
4370
|
}
|
|
4343
4371
|
);
|
|
4344
4372
|
const size = await page.size();
|
|
4373
|
+
if (size.dpr && size.dpr > 1) {
|
|
4374
|
+
screenshotBase64 = await (0, import_img.resizeImgBase64)(screenshotBase64, {
|
|
4375
|
+
width: size.width,
|
|
4376
|
+
height: size.height
|
|
4377
|
+
});
|
|
4378
|
+
}
|
|
4345
4379
|
let screenshotBase64WithElementMarker = screenshotBase64;
|
|
4346
|
-
if (!(0,
|
|
4380
|
+
if (!(0, import_env4.getAIConfig)(import_env4.MIDSCENE_USE_VLM_UI_TARS)) {
|
|
4347
4381
|
if (_opt == null ? void 0 : _opt.ignoreMarker) {
|
|
4348
|
-
screenshotBase64WithElementMarker =
|
|
4349
|
-
inputImgBase64: screenshotBase64,
|
|
4350
|
-
elementsPositionInfo: [],
|
|
4351
|
-
size
|
|
4352
|
-
});
|
|
4382
|
+
screenshotBase64WithElementMarker = screenshotBase64;
|
|
4353
4383
|
} else {
|
|
4354
4384
|
screenshotBase64WithElementMarker = await (0, import_img.compositeElementInfoImg)({
|
|
4355
4385
|
inputImgBase64: screenshotBase64,
|
|
@@ -4368,7 +4398,7 @@ async function parseContextFromWebPage(page, _opt) {
|
|
|
4368
4398
|
};
|
|
4369
4399
|
}
|
|
4370
4400
|
function reportFileName(tag = "web") {
|
|
4371
|
-
const reportTagName = (0,
|
|
4401
|
+
const reportTagName = (0, import_env4.getAIConfig)(import_env4.MIDSCENE_REPORT_TAG_NAME);
|
|
4372
4402
|
const dateTimeInFileName = (0, import_dayjs.default)().format("YYYY-MM-DD_HH-mm-ss-SSS");
|
|
4373
4403
|
return `${reportTagName || tag}-${dateTimeInFileName}`;
|
|
4374
4404
|
}
|
|
@@ -4421,7 +4451,9 @@ var PageAgent = class {
|
|
|
4421
4451
|
ignoreMarker: true
|
|
4422
4452
|
});
|
|
4423
4453
|
}
|
|
4424
|
-
return await parseContextFromWebPage(this.page
|
|
4454
|
+
return await parseContextFromWebPage(this.page, {
|
|
4455
|
+
ignoreMarker: (0, import_env5.getAIConfigInBoolean)(import_env5.MATCH_BY_POSITION)
|
|
4456
|
+
});
|
|
4425
4457
|
}
|
|
4426
4458
|
resetDump() {
|
|
4427
4459
|
this.dump = {
|
|
@@ -4468,7 +4500,7 @@ var PageAgent = class {
|
|
|
4468
4500
|
}
|
|
4469
4501
|
}
|
|
4470
4502
|
async aiAction(taskPrompt) {
|
|
4471
|
-
if ((0,
|
|
4503
|
+
if ((0, import_env5.getAIConfig)(import_env5.MIDSCENE_USE_VLM_UI_TARS)) {
|
|
4472
4504
|
const { executor } = await this.taskExecutor.actionToGoal(taskPrompt, {
|
|
4473
4505
|
onTaskStart: this.callbackOnTaskStartTip.bind(this)
|
|
4474
4506
|
});
|
|
@@ -4526,7 +4558,8 @@ ${reasonMsg}`);
|
|
|
4526
4558
|
const { executor } = await this.taskExecutor.waitFor(assertion, {
|
|
4527
4559
|
timeoutMs: (opt == null ? void 0 : opt.timeoutMs) || 15 * 1e3,
|
|
4528
4560
|
checkIntervalMs: (opt == null ? void 0 : opt.checkIntervalMs) || 3 * 1e3,
|
|
4529
|
-
assertion
|
|
4561
|
+
assertion,
|
|
4562
|
+
onTaskStart: this.callbackOnTaskStartTip.bind(this)
|
|
4530
4563
|
});
|
|
4531
4564
|
this.appendExecutionDump(executor.dump());
|
|
4532
4565
|
this.writeOutActionDumps();
|
|
@@ -291,11 +291,13 @@ function sleep(ms) {
|
|
|
291
291
|
var ChromeExtensionProxyPage = class {
|
|
292
292
|
constructor(forceSameTabNavigation) {
|
|
293
293
|
this.pageType = "chrome-extension-proxy";
|
|
294
|
-
this.version = "0.11.
|
|
294
|
+
this.version = "0.11.4-beta-20250220011346.0";
|
|
295
295
|
this.activeTabId = null;
|
|
296
296
|
this.tabIdOfDebuggerAttached = null;
|
|
297
297
|
this.attachingDebugger = null;
|
|
298
298
|
this.destroyed = false;
|
|
299
|
+
this.latestMouseX = 50;
|
|
300
|
+
this.latestMouseY = 50;
|
|
299
301
|
this.mouse = {
|
|
300
302
|
click: async (x, y) => {
|
|
301
303
|
await this.showMousePointer(x, y);
|
|
@@ -315,8 +317,8 @@ var ChromeExtensionProxyPage = class {
|
|
|
315
317
|
});
|
|
316
318
|
},
|
|
317
319
|
wheel: async (deltaX, deltaY, startX, startY) => {
|
|
318
|
-
const finalX = startX ||
|
|
319
|
-
const finalY = startY ||
|
|
320
|
+
const finalX = startX || this.latestMouseX;
|
|
321
|
+
const finalY = startY || this.latestMouseY;
|
|
320
322
|
await this.showMousePointer(finalX, finalY);
|
|
321
323
|
await this.sendCommandToDebugger("Input.dispatchMouseEvent", {
|
|
322
324
|
type: "mouseWheel",
|
|
@@ -325,6 +327,8 @@ var ChromeExtensionProxyPage = class {
|
|
|
325
327
|
deltaX,
|
|
326
328
|
deltaY
|
|
327
329
|
});
|
|
330
|
+
this.latestMouseX = finalX;
|
|
331
|
+
this.latestMouseY = finalY;
|
|
328
332
|
},
|
|
329
333
|
move: async (x, y) => {
|
|
330
334
|
await this.showMousePointer(x, y);
|
|
@@ -333,6 +337,8 @@ var ChromeExtensionProxyPage = class {
|
|
|
333
337
|
x,
|
|
334
338
|
y
|
|
335
339
|
});
|
|
340
|
+
this.latestMouseX = x;
|
|
341
|
+
this.latestMouseY = y;
|
|
336
342
|
},
|
|
337
343
|
drag: async (from, to) => {
|
|
338
344
|
await this.mouse.move(from.x, from.y);
|
|
@@ -690,7 +696,7 @@ var BridgeClient = class {
|
|
|
690
696
|
this.socket = (0, import_socket.io)(this.endpoint, {
|
|
691
697
|
reconnection: false,
|
|
692
698
|
query: {
|
|
693
|
-
version: "0.11.
|
|
699
|
+
version: "0.11.4-beta-20250220011346.0"
|
|
694
700
|
}
|
|
695
701
|
});
|
|
696
702
|
const timeout = setTimeout(() => {
|
|
@@ -812,7 +818,7 @@ var ChromeExtensionPageBrowserSide = class extends ChromeExtensionProxyPage {
|
|
|
812
818
|
);
|
|
813
819
|
await this.bridgeClient.connect();
|
|
814
820
|
this.onLogMessage(
|
|
815
|
-
`Bridge connected, cli-side version v${this.bridgeClient.serverVersion}, browser-side version v${"0.11.
|
|
821
|
+
`Bridge connected, cli-side version v${this.bridgeClient.serverVersion}, browser-side version v${"0.11.4-beta-20250220011346.0"}`,
|
|
816
822
|
"log"
|
|
817
823
|
);
|
|
818
824
|
}
|