@midscene/core 0.30.10 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +233 -144
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/execution-session.mjs +41 -0
- package/dist/es/agent/execution-session.mjs.map +1 -0
- package/dist/es/agent/index.mjs +3 -3
- package/dist/es/agent/task-builder.mjs +319 -0
- package/dist/es/agent/task-builder.mjs.map +1 -0
- package/dist/es/agent/task-cache.mjs +4 -4
- package/dist/es/agent/task-cache.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +197 -504
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/ui-utils.mjs +54 -35
- package/dist/es/agent/ui-utils.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +16 -58
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/conversation-history.mjs +25 -13
- package/dist/es/ai-model/conversation-history.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +4 -4
- package/dist/es/ai-model/inspect.mjs +45 -54
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +47 -65
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
- package/dist/es/ai-model/prompt/common.mjs.map +1 -1
- package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs +11 -235
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +76 -322
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/util.mjs +3 -88
- package/dist/es/ai-model/prompt/util.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +182 -274
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +69 -8
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/{ai-model/common.mjs → common.mjs} +18 -30
- package/dist/es/common.mjs.map +1 -0
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/es/device/index.mjs +29 -12
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/index.mjs +5 -4
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/report.mjs.map +1 -1
- package/dist/es/{insight → service}/index.mjs +38 -51
- package/dist/es/service/index.mjs.map +1 -0
- package/dist/es/{insight → service}/utils.mjs +3 -3
- package/dist/es/service/utils.mjs.map +1 -0
- package/dist/es/task-runner.mjs +264 -0
- package/dist/es/task-runner.mjs.map +1 -0
- package/dist/es/tree.mjs +13 -2
- package/dist/es/tree.mjs.map +1 -0
- package/dist/es/types.mjs +18 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +6 -7
- package/dist/es/utils.mjs.map +1 -1
- package/dist/es/yaml/builder.mjs.map +1 -1
- package/dist/es/yaml/player.mjs +121 -98
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/es/yaml/utils.mjs +1 -1
- package/dist/es/yaml/utils.mjs.map +1 -1
- package/dist/lib/agent/agent.js +231 -142
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/common.js +1 -1
- package/dist/lib/agent/execution-session.js +75 -0
- package/dist/lib/agent/execution-session.js.map +1 -0
- package/dist/lib/agent/index.js +14 -14
- package/dist/lib/agent/index.js.map +1 -1
- package/dist/lib/agent/task-builder.js +356 -0
- package/dist/lib/agent/task-builder.js.map +1 -0
- package/dist/lib/agent/task-cache.js +8 -8
- package/dist/lib/agent/task-cache.js.map +1 -1
- package/dist/lib/agent/tasks.js +202 -506
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/ui-utils.js +58 -36
- package/dist/lib/agent/ui-utils.js.map +1 -1
- package/dist/lib/agent/utils.js +26 -68
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/conversation-history.js +27 -15
- package/dist/lib/ai-model/conversation-history.js.map +1 -1
- package/dist/lib/ai-model/index.js +27 -27
- package/dist/lib/ai-model/index.js.map +1 -1
- package/dist/lib/ai-model/inspect.js +51 -57
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +49 -67
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/assertion.js +2 -2
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
- package/dist/lib/ai-model/prompt/common.js +2 -2
- package/dist/lib/ai-model/prompt/common.js.map +1 -1
- package/dist/lib/ai-model/prompt/describe.js +2 -2
- package/dist/lib/ai-model/prompt/describe.js.map +1 -1
- package/dist/lib/ai-model/prompt/extraction.js +2 -2
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js +14 -241
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +79 -328
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-section-locator.js +17 -16
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +11 -11
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
- package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
- package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/util.js +7 -95
- package/dist/lib/ai-model/prompt/util.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +18 -18
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +288 -401
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +71 -10
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/{ai-model/common.js → common.js} +40 -55
- package/dist/lib/common.js.map +1 -0
- package/dist/lib/device/device-options.js +20 -0
- package/dist/lib/device/device-options.js.map +1 -0
- package/dist/lib/device/index.js +63 -40
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/image/index.js +5 -5
- package/dist/lib/image/index.js.map +1 -1
- package/dist/lib/index.js +24 -20
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/report.js +2 -2
- package/dist/lib/report.js.map +1 -1
- package/dist/lib/{insight → service}/index.js +41 -54
- package/dist/lib/service/index.js.map +1 -0
- package/dist/lib/{insight → service}/utils.js +7 -7
- package/dist/lib/service/utils.js.map +1 -0
- package/dist/lib/task-runner.js +301 -0
- package/dist/lib/task-runner.js.map +1 -0
- package/dist/lib/tree.js +13 -4
- package/dist/lib/tree.js.map +1 -1
- package/dist/lib/types.js +31 -12
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +16 -17
- package/dist/lib/utils.js.map +1 -1
- package/dist/lib/yaml/builder.js +2 -2
- package/dist/lib/yaml/builder.js.map +1 -1
- package/dist/lib/yaml/index.js +16 -22
- package/dist/lib/yaml/index.js.map +1 -1
- package/dist/lib/yaml/player.js +123 -100
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/lib/yaml/utils.js +6 -6
- package/dist/lib/yaml/utils.js.map +1 -1
- package/dist/lib/yaml.js +1 -1
- package/dist/lib/yaml.js.map +1 -1
- package/dist/types/agent/agent.d.ts +62 -17
- package/dist/types/agent/execution-session.d.ts +36 -0
- package/dist/types/agent/index.d.ts +3 -2
- package/dist/types/agent/task-builder.d.ts +35 -0
- package/dist/types/agent/tasks.d.ts +32 -23
- package/dist/types/agent/ui-utils.d.ts +9 -2
- package/dist/types/agent/utils.d.ts +9 -35
- package/dist/types/ai-model/conversation-history.d.ts +8 -4
- package/dist/types/ai-model/index.d.ts +5 -5
- package/dist/types/ai-model/inspect.d.ts +20 -12
- package/dist/types/ai-model/llm-planning.d.ts +3 -1
- package/dist/types/ai-model/prompt/llm-locator.d.ts +1 -6
- package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -3
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +1 -3
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +2 -34
- package/dist/types/ai-model/service-caller/index.d.ts +2 -3
- package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
- package/dist/types/{ai-model/common.d.ts → common.d.ts} +6 -6
- package/dist/types/device/device-options.d.ts +57 -0
- package/dist/types/device/index.d.ts +55 -39
- package/dist/types/index.d.ts +7 -6
- package/dist/types/service/index.d.ts +26 -0
- package/dist/types/service/utils.d.ts +2 -0
- package/dist/types/task-runner.d.ts +49 -0
- package/dist/types/tree.d.ts +4 -1
- package/dist/types/types.d.ts +103 -66
- package/dist/types/yaml/utils.d.ts +1 -1
- package/dist/types/yaml.d.ts +68 -43
- package/package.json +9 -12
- package/dist/es/ai-model/action-executor.mjs +0 -129
- package/dist/es/ai-model/action-executor.mjs.map +0 -1
- package/dist/es/ai-model/common.mjs.map +0 -1
- package/dist/es/insight/index.mjs.map +0 -1
- package/dist/es/insight/utils.mjs.map +0 -1
- package/dist/lib/ai-model/action-executor.js +0 -163
- package/dist/lib/ai-model/action-executor.js.map +0 -1
- package/dist/lib/ai-model/common.js.map +0 -1
- package/dist/lib/insight/index.js.map +0 -1
- package/dist/lib/insight/utils.js.map +0 -1
- package/dist/types/ai-model/action-executor.d.ts +0 -19
- package/dist/types/insight/index.d.ts +0 -31
- package/dist/types/insight/utils.d.ts +0 -2
package/dist/es/agent/tasks.mjs
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
import { ConversationHistory,
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import { MIDSCENE_REPLANNING_CYCLE_LIMIT, globalConfigManager } from "@midscene/shared/env";
|
|
1
|
+
import { ConversationHistory, plan, uiTarsPlanning } from "../ai-model/index.mjs";
|
|
2
|
+
import { TaskExecutionError } from "../task-runner.mjs";
|
|
3
|
+
import { ServiceError } from "../types.mjs";
|
|
5
4
|
import { getDebug } from "@midscene/shared/logger";
|
|
6
5
|
import { assert } from "@midscene/shared/utils";
|
|
6
|
+
import { ExecutionSession } from "./execution-session.mjs";
|
|
7
|
+
import { TaskBuilder, locatePlanForLocate } from "./task-builder.mjs";
|
|
8
|
+
import { descriptionOfTree } from "@midscene/shared/extractor";
|
|
7
9
|
import { taskTitleStr } from "./ui-utils.mjs";
|
|
8
|
-
import {
|
|
10
|
+
import { parsePrompt } from "./utils.mjs";
|
|
9
11
|
function _define_property(obj, key, value) {
|
|
10
12
|
if (key in obj) Object.defineProperty(obj, key, {
|
|
11
13
|
value: value,
|
|
@@ -17,337 +19,35 @@ function _define_property(obj, key, value) {
|
|
|
17
19
|
return obj;
|
|
18
20
|
}
|
|
19
21
|
const debug = getDebug('device-task-executor');
|
|
20
|
-
const
|
|
21
|
-
const defaultVlmUiTarsReplanningCycleLimit = 40;
|
|
22
|
-
function locatePlanForLocate(param) {
|
|
23
|
-
const locate = 'string' == typeof param ? {
|
|
24
|
-
prompt: param
|
|
25
|
-
} : param;
|
|
26
|
-
const locatePlan = {
|
|
27
|
-
type: 'Locate',
|
|
28
|
-
locate,
|
|
29
|
-
param: locate,
|
|
30
|
-
thought: ''
|
|
31
|
-
};
|
|
32
|
-
return locatePlan;
|
|
33
|
-
}
|
|
22
|
+
const maxErrorCountAllowedInOnePlanningLoop = 5;
|
|
34
23
|
class TaskExecutor {
|
|
35
24
|
get page() {
|
|
36
25
|
return this.interface;
|
|
37
26
|
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
screenshot: base64,
|
|
44
|
-
timing
|
|
45
|
-
};
|
|
46
|
-
return item;
|
|
47
|
-
}
|
|
48
|
-
prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
|
|
49
|
-
const taskWithScreenshot = {
|
|
50
|
-
...taskApply,
|
|
51
|
-
executor: async (param, context, ...args)=>{
|
|
52
|
-
const recorder = [];
|
|
53
|
-
const { task } = context;
|
|
54
|
-
task.recorder = recorder;
|
|
55
|
-
const shot = await this.recordScreenshot(`before ${task.type}`);
|
|
56
|
-
recorder.push(shot);
|
|
57
|
-
const result = await taskApply.executor(param, context, ...args);
|
|
58
|
-
if (appendAfterExecution) {
|
|
59
|
-
const shot2 = await this.recordScreenshot('after Action');
|
|
60
|
-
recorder.push(shot2);
|
|
61
|
-
}
|
|
62
|
-
return result;
|
|
63
|
-
}
|
|
64
|
-
};
|
|
65
|
-
return taskWithScreenshot;
|
|
66
|
-
}
|
|
67
|
-
async convertPlanToExecutable(plans, modelConfig, cacheable) {
|
|
68
|
-
const tasks = [];
|
|
69
|
-
const taskForLocatePlan = (plan, detailedLocateParam, onResult)=>{
|
|
70
|
-
if ('string' == typeof detailedLocateParam) detailedLocateParam = {
|
|
71
|
-
prompt: detailedLocateParam
|
|
72
|
-
};
|
|
73
|
-
if (void 0 !== cacheable) detailedLocateParam = {
|
|
74
|
-
...detailedLocateParam,
|
|
75
|
-
cacheable
|
|
76
|
-
};
|
|
77
|
-
const taskFind = {
|
|
78
|
-
type: 'Insight',
|
|
79
|
-
subType: 'Locate',
|
|
80
|
-
param: detailedLocateParam,
|
|
81
|
-
thought: plan.thought,
|
|
82
|
-
executor: async (param, taskContext)=>{
|
|
83
|
-
var _this_taskCache, _locateCacheRecord_cacheContent;
|
|
84
|
-
const { task } = taskContext;
|
|
85
|
-
assert((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
|
|
86
|
-
let insightDump;
|
|
87
|
-
let usage;
|
|
88
|
-
const dumpCollector = (dump)=>{
|
|
89
|
-
var _dump_taskInfo, _dump_taskInfo1;
|
|
90
|
-
insightDump = dump;
|
|
91
|
-
usage = null == dump ? void 0 : null == (_dump_taskInfo = dump.taskInfo) ? void 0 : _dump_taskInfo.usage;
|
|
92
|
-
task.log = {
|
|
93
|
-
dump: insightDump
|
|
94
|
-
};
|
|
95
|
-
task.usage = usage;
|
|
96
|
-
if (null == dump ? void 0 : null == (_dump_taskInfo1 = dump.taskInfo) ? void 0 : _dump_taskInfo1.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
|
|
97
|
-
};
|
|
98
|
-
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
99
|
-
const shotTime = Date.now();
|
|
100
|
-
const uiContext = await this.insight.contextRetrieverFn('locate');
|
|
101
|
-
task.uiContext = uiContext;
|
|
102
|
-
const recordItem = {
|
|
103
|
-
type: 'screenshot',
|
|
104
|
-
ts: shotTime,
|
|
105
|
-
screenshot: uiContext.screenshotBase64,
|
|
106
|
-
timing: 'before Insight'
|
|
107
|
-
};
|
|
108
|
-
task.recorder = [
|
|
109
|
-
recordItem
|
|
110
|
-
];
|
|
111
|
-
const elementFromXpath = param.xpath && this.interface.getElementInfoByXpath ? await this.interface.getElementInfoByXpath(param.xpath) : void 0;
|
|
112
|
-
const userExpectedPathHitFlag = !!elementFromXpath;
|
|
113
|
-
const cachePrompt = param.prompt;
|
|
114
|
-
const locateCacheRecord = null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchLocateCache(cachePrompt);
|
|
115
|
-
const cacheEntry = null == locateCacheRecord ? void 0 : null == (_locateCacheRecord_cacheContent = locateCacheRecord.cacheContent) ? void 0 : _locateCacheRecord_cacheContent.cache;
|
|
116
|
-
const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(this, cacheEntry, cachePrompt, param.cacheable);
|
|
117
|
-
const cacheHitFlag = !!elementFromCache;
|
|
118
|
-
const elementFromPlan = userExpectedPathHitFlag || cacheHitFlag ? void 0 : matchElementFromPlan(param, uiContext.tree);
|
|
119
|
-
const planHitFlag = !!elementFromPlan;
|
|
120
|
-
const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
|
|
121
|
-
context: uiContext
|
|
122
|
-
}, modelConfig)).element;
|
|
123
|
-
const aiLocateHitFlag = !!elementFromAiLocate;
|
|
124
|
-
const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
|
|
125
|
-
let currentCacheEntry;
|
|
126
|
-
if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
|
|
127
|
-
const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
|
|
128
|
-
_orderSensitive: element.isOrderSensitive
|
|
129
|
-
} : void 0);
|
|
130
|
-
if (feature && Object.keys(feature).length > 0) {
|
|
131
|
-
debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
|
|
132
|
-
currentCacheEntry = feature;
|
|
133
|
-
this.taskCache.updateOrAppendCacheRecord({
|
|
134
|
-
type: 'locate',
|
|
135
|
-
prompt: cachePrompt,
|
|
136
|
-
cache: feature
|
|
137
|
-
}, locateCacheRecord);
|
|
138
|
-
} else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
|
|
139
|
-
} catch (error) {
|
|
140
|
-
debug('cacheFeatureForRect failed: %s', error);
|
|
141
|
-
}
|
|
142
|
-
else debug('cacheFeatureForRect is not supported, skip cache update');
|
|
143
|
-
if (!element) throw new Error(`Element not found: ${param.prompt}`);
|
|
144
|
-
let hitBy;
|
|
145
|
-
if (userExpectedPathHitFlag) hitBy = {
|
|
146
|
-
from: 'User expected path',
|
|
147
|
-
context: {
|
|
148
|
-
xpath: param.xpath
|
|
149
|
-
}
|
|
150
|
-
};
|
|
151
|
-
else if (cacheHitFlag) hitBy = {
|
|
152
|
-
from: 'Cache',
|
|
153
|
-
context: {
|
|
154
|
-
cacheEntry,
|
|
155
|
-
cacheToSave: currentCacheEntry
|
|
156
|
-
}
|
|
157
|
-
};
|
|
158
|
-
else if (planHitFlag) hitBy = {
|
|
159
|
-
from: 'Planning',
|
|
160
|
-
context: {
|
|
161
|
-
id: null == elementFromPlan ? void 0 : elementFromPlan.id,
|
|
162
|
-
bbox: null == elementFromPlan ? void 0 : elementFromPlan.bbox
|
|
163
|
-
}
|
|
164
|
-
};
|
|
165
|
-
else if (aiLocateHitFlag) hitBy = {
|
|
166
|
-
from: 'AI model',
|
|
167
|
-
context: {
|
|
168
|
-
prompt: param.prompt
|
|
169
|
-
}
|
|
170
|
-
};
|
|
171
|
-
null == onResult || onResult(element);
|
|
172
|
-
return {
|
|
173
|
-
output: {
|
|
174
|
-
element
|
|
175
|
-
},
|
|
176
|
-
uiContext,
|
|
177
|
-
hitBy
|
|
178
|
-
};
|
|
179
|
-
}
|
|
180
|
-
};
|
|
181
|
-
return taskFind;
|
|
182
|
-
};
|
|
183
|
-
for (const plan of plans)if ('Locate' === plan.type) {
|
|
184
|
-
var _plan_locate, _plan_locate1;
|
|
185
|
-
if (!plan.locate || null === plan.locate || (null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.id) === null || (null == (_plan_locate1 = plan.locate) ? void 0 : _plan_locate1.id) === 'null') {
|
|
186
|
-
debug('Locate action with id is null, will be ignored', plan);
|
|
187
|
-
continue;
|
|
188
|
-
}
|
|
189
|
-
const taskLocate = taskForLocatePlan(plan, plan.locate);
|
|
190
|
-
tasks.push(taskLocate);
|
|
191
|
-
} else if ('Error' === plan.type) {
|
|
192
|
-
var _plan_param;
|
|
193
|
-
const taskActionError = {
|
|
194
|
-
type: 'Action',
|
|
195
|
-
subType: 'Error',
|
|
196
|
-
param: plan.param,
|
|
197
|
-
thought: plan.thought || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought),
|
|
198
|
-
locate: plan.locate,
|
|
199
|
-
executor: async ()=>{
|
|
200
|
-
var _plan_param;
|
|
201
|
-
throw new Error((null == plan ? void 0 : plan.thought) || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought) || 'error without thought');
|
|
202
|
-
}
|
|
203
|
-
};
|
|
204
|
-
tasks.push(taskActionError);
|
|
205
|
-
} else if ('Finished' === plan.type) {
|
|
206
|
-
const taskActionFinished = {
|
|
207
|
-
type: 'Action',
|
|
208
|
-
subType: 'Finished',
|
|
209
|
-
param: null,
|
|
210
|
-
thought: plan.thought,
|
|
211
|
-
locate: plan.locate,
|
|
212
|
-
executor: async (param)=>{}
|
|
213
|
-
};
|
|
214
|
-
tasks.push(taskActionFinished);
|
|
215
|
-
} else if ('Sleep' === plan.type) {
|
|
216
|
-
const taskActionSleep = {
|
|
217
|
-
type: 'Action',
|
|
218
|
-
subType: 'Sleep',
|
|
219
|
-
param: plan.param,
|
|
220
|
-
thought: plan.thought,
|
|
221
|
-
locate: plan.locate,
|
|
222
|
-
executor: async (taskParam)=>{
|
|
223
|
-
await external_utils_mjs_sleep((null == taskParam ? void 0 : taskParam.timeMs) || 3000);
|
|
224
|
-
}
|
|
225
|
-
};
|
|
226
|
-
tasks.push(taskActionSleep);
|
|
227
|
-
} else {
|
|
228
|
-
const planType = plan.type;
|
|
229
|
-
const actionSpace = await this.interface.actionSpace();
|
|
230
|
-
const action = actionSpace.find((action)=>action.name === planType);
|
|
231
|
-
const param = plan.param;
|
|
232
|
-
if (!action) throw new Error(`Action type '${planType}' not found`);
|
|
233
|
-
const locateFields = action ? findAllMidsceneLocatorField(action.paramSchema) : [];
|
|
234
|
-
const requiredLocateFields = action ? findAllMidsceneLocatorField(action.paramSchema, true) : [];
|
|
235
|
-
locateFields.forEach((field)=>{
|
|
236
|
-
if (param[field]) {
|
|
237
|
-
const locatePlan = locatePlanForLocate(param[field]);
|
|
238
|
-
debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`);
|
|
239
|
-
const locateTask = taskForLocatePlan(locatePlan, param[field], (result)=>{
|
|
240
|
-
param[field] = result;
|
|
241
|
-
});
|
|
242
|
-
tasks.push(locateTask);
|
|
243
|
-
} else {
|
|
244
|
-
assert(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
|
|
245
|
-
debug(`field '${field}' is not provided for action ${planType}`);
|
|
246
|
-
}
|
|
247
|
-
});
|
|
248
|
-
const task = {
|
|
249
|
-
type: 'Action',
|
|
250
|
-
subType: planType,
|
|
251
|
-
thought: plan.thought,
|
|
252
|
-
param: plan.param,
|
|
253
|
-
executor: async (param, context)=>{
|
|
254
|
-
var _context_element;
|
|
255
|
-
debug('executing action', planType, param, `context.element.center: ${null == (_context_element = context.element) ? void 0 : _context_element.center}`);
|
|
256
|
-
const uiContext = await this.insight.contextRetrieverFn('locate');
|
|
257
|
-
context.task.uiContext = uiContext;
|
|
258
|
-
requiredLocateFields.forEach((field)=>{
|
|
259
|
-
assert(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
|
|
260
|
-
});
|
|
261
|
-
try {
|
|
262
|
-
await Promise.all([
|
|
263
|
-
(async ()=>{
|
|
264
|
-
if (this.interface.beforeInvokeAction) {
|
|
265
|
-
debug('will call "beforeInvokeAction" for interface');
|
|
266
|
-
await this.interface.beforeInvokeAction(action.name, param);
|
|
267
|
-
debug('called "beforeInvokeAction" for interface');
|
|
268
|
-
}
|
|
269
|
-
})(),
|
|
270
|
-
external_utils_mjs_sleep(200)
|
|
271
|
-
]);
|
|
272
|
-
} catch (originalError) {
|
|
273
|
-
const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
|
|
274
|
-
throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
|
|
275
|
-
cause: originalError
|
|
276
|
-
});
|
|
277
|
-
}
|
|
278
|
-
if (action.paramSchema) try {
|
|
279
|
-
param = parseActionParam(param, action.paramSchema);
|
|
280
|
-
} catch (error) {
|
|
281
|
-
throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
|
|
282
|
-
cause: error
|
|
283
|
-
});
|
|
284
|
-
}
|
|
285
|
-
debug('calling action', action.name);
|
|
286
|
-
const actionFn = action.call.bind(this.interface);
|
|
287
|
-
await actionFn(param, context);
|
|
288
|
-
debug('called action', action.name);
|
|
289
|
-
await external_utils_mjs_sleep(300);
|
|
290
|
-
try {
|
|
291
|
-
if (this.interface.afterInvokeAction) {
|
|
292
|
-
debug('will call "afterInvokeAction" for interface');
|
|
293
|
-
await this.interface.afterInvokeAction(action.name, param);
|
|
294
|
-
debug('called "afterInvokeAction" for interface');
|
|
295
|
-
}
|
|
296
|
-
} catch (originalError) {
|
|
297
|
-
const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
|
|
298
|
-
throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
|
|
299
|
-
cause: originalError
|
|
300
|
-
});
|
|
301
|
-
}
|
|
302
|
-
return {
|
|
303
|
-
output: {
|
|
304
|
-
success: true,
|
|
305
|
-
action: planType,
|
|
306
|
-
param: param
|
|
307
|
-
}
|
|
308
|
-
};
|
|
309
|
-
}
|
|
310
|
-
};
|
|
311
|
-
tasks.push(task);
|
|
312
|
-
}
|
|
313
|
-
const wrappedTasks = tasks.map((task, index)=>{
|
|
314
|
-
if ('Action' === task.type) return this.prependExecutorWithScreenshot(task, index === tasks.length - 1);
|
|
315
|
-
return task;
|
|
27
|
+
createExecutionSession(title, options) {
|
|
28
|
+
return new ExecutionSession(title, ()=>Promise.resolve(this.service.contextRetrieverFn()), {
|
|
29
|
+
onTaskStart: this.onTaskStartCallback,
|
|
30
|
+
tasks: options?.tasks,
|
|
31
|
+
onTaskUpdate: this.hooks?.onTaskUpdate
|
|
316
32
|
});
|
|
317
|
-
return {
|
|
318
|
-
tasks: wrappedTasks
|
|
319
|
-
};
|
|
320
33
|
}
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
ts: shotTime,
|
|
327
|
-
screenshot: uiContext.screenshotBase64,
|
|
328
|
-
timing: 'before Planning'
|
|
329
|
-
};
|
|
330
|
-
executorContext.task.recorder = [
|
|
331
|
-
recordItem
|
|
332
|
-
];
|
|
333
|
-
executorContext.task.uiContext = uiContext;
|
|
334
|
-
return {
|
|
335
|
-
uiContext
|
|
336
|
-
};
|
|
34
|
+
getActionSpace() {
|
|
35
|
+
return this.providedActionSpace;
|
|
36
|
+
}
|
|
37
|
+
async convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent, options) {
|
|
38
|
+
return this.taskBuilder.build(plans, modelConfigForPlanning, modelConfigForDefaultIntent, options);
|
|
337
39
|
}
|
|
338
40
|
async loadYamlFlowAsPlanning(userInstruction, yamlString) {
|
|
339
|
-
const
|
|
340
|
-
onTaskStart: this.onTaskStartCallback
|
|
341
|
-
});
|
|
41
|
+
const session = this.createExecutionSession(taskTitleStr('Action', userInstruction));
|
|
342
42
|
const task = {
|
|
343
43
|
type: 'Planning',
|
|
344
44
|
subType: 'LoadYaml',
|
|
345
|
-
locate: null,
|
|
346
45
|
param: {
|
|
347
46
|
userInstruction
|
|
348
47
|
},
|
|
349
48
|
executor: async (param, executorContext)=>{
|
|
350
|
-
|
|
49
|
+
const { uiContext } = executorContext;
|
|
50
|
+
assert(uiContext, 'uiContext is required for Planning task');
|
|
351
51
|
return {
|
|
352
52
|
output: {
|
|
353
53
|
actions: [],
|
|
@@ -367,140 +67,137 @@ class TaskExecutor {
|
|
|
367
67
|
};
|
|
368
68
|
}
|
|
369
69
|
};
|
|
370
|
-
|
|
371
|
-
await
|
|
70
|
+
const runner = session.getRunner();
|
|
71
|
+
await session.appendAndRun(task);
|
|
372
72
|
return {
|
|
373
|
-
|
|
73
|
+
runner
|
|
374
74
|
};
|
|
375
75
|
}
|
|
376
|
-
|
|
377
|
-
const
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
userInstruction
|
|
383
|
-
},
|
|
384
|
-
executor: async (param, executorContext)=>{
|
|
385
|
-
const startTime = Date.now();
|
|
386
|
-
const { uiContext } = await this.setupPlanningContext(executorContext);
|
|
387
|
-
const { vlMode } = modelConfig;
|
|
388
|
-
const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfig.uiTarsModelVersion : void 0;
|
|
389
|
-
assert(this.interface.actionSpace, 'actionSpace for device is not implemented');
|
|
390
|
-
const actionSpace = await this.interface.actionSpace();
|
|
391
|
-
debug('actionSpace for this interface is:', actionSpace.map((action)=>action.name).join(', '));
|
|
392
|
-
assert(Array.isArray(actionSpace), 'actionSpace must be an array');
|
|
393
|
-
if (0 === actionSpace.length) console.warn(`ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`);
|
|
394
|
-
const planResult = await (uiTarsModelVersion ? uiTarsPlanning : index_mjs_plan)(param.userInstruction, {
|
|
395
|
-
context: uiContext,
|
|
396
|
-
actionContext,
|
|
397
|
-
interfaceType: this.interface.interfaceType,
|
|
398
|
-
actionSpace,
|
|
399
|
-
modelConfig,
|
|
400
|
-
conversationHistory: this.conversationHistory
|
|
401
|
-
});
|
|
402
|
-
debug('planResult', JSON.stringify(planResult, null, 2));
|
|
403
|
-
const { actions, log, more_actions_needed_by_instruction, error, usage, rawResponse, sleep } = planResult;
|
|
404
|
-
executorContext.task.log = {
|
|
405
|
-
...executorContext.task.log || {},
|
|
406
|
-
rawResponse
|
|
407
|
-
};
|
|
408
|
-
executorContext.task.usage = usage;
|
|
409
|
-
const finalActions = actions || [];
|
|
410
|
-
if (sleep) {
|
|
411
|
-
const timeNow = Date.now();
|
|
412
|
-
const timeRemaining = sleep - (timeNow - startTime);
|
|
413
|
-
if (timeRemaining > 0) finalActions.push({
|
|
414
|
-
type: 'Sleep',
|
|
415
|
-
param: {
|
|
416
|
-
timeMs: timeRemaining
|
|
417
|
-
},
|
|
418
|
-
locate: null
|
|
419
|
-
});
|
|
420
|
-
}
|
|
421
|
-
if (0 === finalActions.length) assert(!more_actions_needed_by_instruction || sleep, error ? `Failed to plan: ${error}` : 'No plan found');
|
|
422
|
-
return {
|
|
423
|
-
output: {
|
|
424
|
-
actions: finalActions,
|
|
425
|
-
more_actions_needed_by_instruction,
|
|
426
|
-
log,
|
|
427
|
-
yamlFlow: planResult.yamlFlow
|
|
428
|
-
},
|
|
429
|
-
cache: {
|
|
430
|
-
hit: false
|
|
431
|
-
},
|
|
432
|
-
uiContext
|
|
433
|
-
};
|
|
434
|
-
}
|
|
435
|
-
};
|
|
436
|
-
return task;
|
|
437
|
-
}
|
|
438
|
-
async runPlans(title, plans, modelConfig) {
|
|
439
|
-
const taskExecutor = new Executor(title, {
|
|
440
|
-
onTaskStart: this.onTaskStartCallback
|
|
441
|
-
});
|
|
442
|
-
const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);
|
|
443
|
-
await taskExecutor.append(tasks);
|
|
444
|
-
const result = await taskExecutor.flush();
|
|
445
|
-
const { output } = result;
|
|
76
|
+
async runPlans(title, plans, modelConfigForPlanning, modelConfigForDefaultIntent) {
|
|
77
|
+
const session = this.createExecutionSession(title);
|
|
78
|
+
const { tasks } = await this.convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent);
|
|
79
|
+
const runner = session.getRunner();
|
|
80
|
+
const result = await session.appendAndRun(tasks);
|
|
81
|
+
const { output } = result ?? {};
|
|
446
82
|
return {
|
|
447
83
|
output,
|
|
448
|
-
|
|
84
|
+
runner
|
|
449
85
|
};
|
|
450
86
|
}
|
|
451
|
-
|
|
452
|
-
return this.replanningCycleLimit || globalConfigManager.getEnvConfigInNumber(MIDSCENE_REPLANNING_CYCLE_LIMIT) || (isVlmUiTars ? defaultVlmUiTarsReplanningCycleLimit : defaultReplanningCycleLimit);
|
|
453
|
-
}
|
|
454
|
-
async action(userPrompt, modelConfig, actionContext, cacheable) {
|
|
87
|
+
async action(userPrompt, modelConfigForPlanning, modelConfigForDefaultIntent, includeBboxInPlanning, aiActContext, cacheable, replanningCycleLimitOverride, imagesIncludeCount) {
|
|
455
88
|
this.conversationHistory.reset();
|
|
456
|
-
const
|
|
457
|
-
|
|
458
|
-
});
|
|
89
|
+
const session = this.createExecutionSession(taskTitleStr('Action', userPrompt));
|
|
90
|
+
const runner = session.getRunner();
|
|
459
91
|
let replanCount = 0;
|
|
460
92
|
const yamlFlow = [];
|
|
461
|
-
const replanningCycleLimit =
|
|
93
|
+
const replanningCycleLimit = replanningCycleLimitOverride ?? this.replanningCycleLimit;
|
|
94
|
+
assert(void 0 !== replanningCycleLimit, 'replanningCycleLimit is required for TaskExecutor.action');
|
|
95
|
+
let errorCountInOnePlanningLoop = 0;
|
|
462
96
|
while(true){
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
97
|
+
const result = await session.appendAndRun({
|
|
98
|
+
type: 'Planning',
|
|
99
|
+
subType: 'Plan',
|
|
100
|
+
param: {
|
|
101
|
+
userInstruction: userPrompt,
|
|
102
|
+
aiActContext,
|
|
103
|
+
imagesIncludeCount
|
|
104
|
+
},
|
|
105
|
+
executor: async (param, executorContext)=>{
|
|
106
|
+
const startTime = Date.now();
|
|
107
|
+
const { uiContext } = executorContext;
|
|
108
|
+
assert(uiContext, 'uiContext is required for Planning task');
|
|
109
|
+
const { vlMode } = modelConfigForPlanning;
|
|
110
|
+
const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfigForPlanning.uiTarsModelVersion : void 0;
|
|
111
|
+
const actionSpace = this.getActionSpace();
|
|
112
|
+
debug('actionSpace for this interface is:', actionSpace.map((action)=>action.name).join(', '));
|
|
113
|
+
assert(Array.isArray(actionSpace), 'actionSpace must be an array');
|
|
114
|
+
if (0 === actionSpace.length) console.warn(`ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`);
|
|
115
|
+
const planResult = await (uiTarsModelVersion ? uiTarsPlanning : plan)(param.userInstruction, {
|
|
116
|
+
context: uiContext,
|
|
117
|
+
actionContext: param.aiActContext,
|
|
118
|
+
interfaceType: this.interface.interfaceType,
|
|
119
|
+
actionSpace,
|
|
120
|
+
modelConfig: modelConfigForPlanning,
|
|
121
|
+
conversationHistory: this.conversationHistory,
|
|
122
|
+
includeBbox: includeBboxInPlanning,
|
|
123
|
+
imagesIncludeCount
|
|
124
|
+
});
|
|
125
|
+
debug('planResult', JSON.stringify(planResult, null, 2));
|
|
126
|
+
const { actions, log, more_actions_needed_by_instruction, error, usage, rawResponse, sleep } = planResult;
|
|
127
|
+
executorContext.task.log = {
|
|
128
|
+
...executorContext.task.log || {},
|
|
129
|
+
rawResponse
|
|
130
|
+
};
|
|
131
|
+
executorContext.task.usage = usage;
|
|
132
|
+
executorContext.task.output = {
|
|
133
|
+
actions: actions || [],
|
|
134
|
+
more_actions_needed_by_instruction,
|
|
135
|
+
log,
|
|
136
|
+
yamlFlow: planResult.yamlFlow
|
|
137
|
+
};
|
|
138
|
+
executorContext.uiContext = uiContext;
|
|
139
|
+
const finalActions = [
|
|
140
|
+
...actions || []
|
|
141
|
+
];
|
|
142
|
+
if (sleep) {
|
|
143
|
+
const timeNow = Date.now();
|
|
144
|
+
const timeRemaining = sleep - (timeNow - startTime);
|
|
145
|
+
if (timeRemaining > 0) finalActions.push(this.sleepPlan(timeRemaining));
|
|
146
|
+
}
|
|
147
|
+
assert(!error, `Failed to continue: ${error}\n${log || ''}`);
|
|
148
|
+
return {
|
|
149
|
+
cache: {
|
|
150
|
+
hit: false
|
|
151
|
+
}
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
}, {
|
|
155
|
+
allowWhenError: true
|
|
156
|
+
});
|
|
157
|
+
const planResult = result?.output;
|
|
158
|
+
const plans = planResult?.actions || [];
|
|
159
|
+
yamlFlow.push(...planResult?.yamlFlow || []);
|
|
477
160
|
let executables;
|
|
478
161
|
try {
|
|
479
|
-
executables = await this.convertPlanToExecutable(plans,
|
|
480
|
-
|
|
162
|
+
executables = await this.convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent, {
|
|
163
|
+
cacheable,
|
|
164
|
+
subTask: true
|
|
165
|
+
});
|
|
481
166
|
} catch (error) {
|
|
482
|
-
return
|
|
167
|
+
return session.appendErrorPlan(`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`);
|
|
483
168
|
}
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
|
|
169
|
+
if (this.conversationHistory.pendingFeedbackMessage) console.warn('unconsumed pending feedback message detected, this may lead to unexpected planning result:', this.conversationHistory.pendingFeedbackMessage);
|
|
170
|
+
let errorFlag = false;
|
|
171
|
+
try {
|
|
172
|
+
await session.appendAndRun(executables.tasks);
|
|
173
|
+
} catch (error) {
|
|
174
|
+
errorFlag = true;
|
|
175
|
+
errorCountInOnePlanningLoop++;
|
|
176
|
+
this.conversationHistory.pendingFeedbackMessage = `Error executing running tasks: ${error?.message || String(error)}`;
|
|
177
|
+
debug('error when executing running tasks, but continue to run if it is not too many errors:', error instanceof Error ? error.message : String(error), 'current error count in one planning loop:', errorCountInOnePlanningLoop);
|
|
178
|
+
}
|
|
179
|
+
if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) return session.appendErrorPlan('Too many errors in one planning loop');
|
|
180
|
+
if (!planResult?.more_actions_needed_by_instruction) if (errorFlag) debug('more_actions_needed_by_instruction is false, but there are errors in one planning loop, continue to run');
|
|
181
|
+
else break;
|
|
182
|
+
++replanCount;
|
|
183
|
+
if (replanCount > replanningCycleLimit) {
|
|
184
|
+
const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;
|
|
185
|
+
return session.appendErrorPlan(errorMsg);
|
|
186
|
+
}
|
|
187
|
+
if (!this.conversationHistory.pendingFeedbackMessage) this.conversationHistory.pendingFeedbackMessage = 'I have finished the action previously planned.';
|
|
491
188
|
}
|
|
492
|
-
|
|
189
|
+
const finalResult = {
|
|
493
190
|
output: {
|
|
494
191
|
yamlFlow
|
|
495
192
|
},
|
|
496
|
-
|
|
193
|
+
runner
|
|
497
194
|
};
|
|
195
|
+
return finalResult;
|
|
498
196
|
}
|
|
499
197
|
createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
|
|
500
198
|
const queryTask = {
|
|
501
199
|
type: 'Insight',
|
|
502
200
|
subType: type,
|
|
503
|
-
locate: null,
|
|
504
201
|
param: {
|
|
505
202
|
dataDemand: multimodalPrompt ? {
|
|
506
203
|
demand,
|
|
@@ -509,23 +206,15 @@ class TaskExecutor {
|
|
|
509
206
|
},
|
|
510
207
|
executor: async (param, taskContext)=>{
|
|
511
208
|
const { task } = taskContext;
|
|
512
|
-
let
|
|
513
|
-
const
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
const uiContext = await this.insight.contextRetrieverFn('extract');
|
|
519
|
-
task.uiContext = uiContext;
|
|
520
|
-
const recordItem = {
|
|
521
|
-
type: 'screenshot',
|
|
522
|
-
ts: shotTime,
|
|
523
|
-
screenshot: uiContext.screenshotBase64,
|
|
524
|
-
timing: 'before Extract'
|
|
209
|
+
let queryDump;
|
|
210
|
+
const applyDump = (dump)=>{
|
|
211
|
+
queryDump = dump;
|
|
212
|
+
task.log = {
|
|
213
|
+
dump
|
|
214
|
+
};
|
|
525
215
|
};
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
];
|
|
216
|
+
const uiContext = taskContext.uiContext;
|
|
217
|
+
assert(uiContext, 'uiContext is required for Query task');
|
|
529
218
|
const ifTypeRestricted = 'Query' !== type;
|
|
530
219
|
let demandInput = demand;
|
|
531
220
|
let keyOfResult = 'result';
|
|
@@ -538,13 +227,27 @@ class TaskExecutor {
|
|
|
538
227
|
} else if (ifTypeRestricted) demandInput = {
|
|
539
228
|
[keyOfResult]: `${type}, ${demand}`
|
|
540
229
|
};
|
|
541
|
-
|
|
230
|
+
let extractResult;
|
|
231
|
+
let extraPageDescription = '';
|
|
232
|
+
if (opt?.domIncluded && this.interface.getElementsNodeTree) {
|
|
233
|
+
debug('appending tree info for page');
|
|
234
|
+
const tree = await this.interface.getElementsNodeTree();
|
|
235
|
+
extraPageDescription = await descriptionOfTree(tree, 200, false, opt?.domIncluded === 'visible-only');
|
|
236
|
+
}
|
|
237
|
+
try {
|
|
238
|
+
extractResult = await this.service.extract(demandInput, modelConfig, opt, extraPageDescription, multimodalPrompt);
|
|
239
|
+
} catch (error) {
|
|
240
|
+
if (error instanceof ServiceError) applyDump(error.dump);
|
|
241
|
+
throw error;
|
|
242
|
+
}
|
|
243
|
+
const { data, usage, thought, dump } = extractResult;
|
|
244
|
+
applyDump(dump);
|
|
542
245
|
let outputResult = data;
|
|
543
246
|
if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
|
|
544
247
|
else if ('WaitFor' === type) outputResult = null == data ? false : data[keyOfResult];
|
|
545
248
|
else if (null == data) outputResult = null;
|
|
546
249
|
else {
|
|
547
|
-
assert(
|
|
250
|
+
assert(data?.[keyOfResult] !== void 0, 'No result in query data');
|
|
548
251
|
outputResult = data[keyOfResult];
|
|
549
252
|
}
|
|
550
253
|
if ('Assert' === type && !outputResult) {
|
|
@@ -554,7 +257,7 @@ class TaskExecutor {
|
|
|
554
257
|
}
|
|
555
258
|
return {
|
|
556
259
|
output: outputResult,
|
|
557
|
-
log:
|
|
260
|
+
log: queryDump,
|
|
558
261
|
usage,
|
|
559
262
|
thought
|
|
560
263
|
};
|
|
@@ -563,101 +266,91 @@ class TaskExecutor {
|
|
|
563
266
|
return queryTask;
|
|
564
267
|
}
|
|
565
268
|
async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
|
|
566
|
-
const
|
|
567
|
-
onTaskStart: this.onTaskStartCallback
|
|
568
|
-
});
|
|
269
|
+
const session = this.createExecutionSession(taskTitleStr(type, 'string' == typeof demand ? demand : JSON.stringify(demand)));
|
|
569
270
|
const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
|
|
570
|
-
|
|
571
|
-
const result = await
|
|
271
|
+
const runner = session.getRunner();
|
|
272
|
+
const result = await session.appendAndRun(queryTask);
|
|
572
273
|
if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
|
|
573
274
|
const { output, thought } = result;
|
|
574
275
|
return {
|
|
575
276
|
output,
|
|
576
277
|
thought,
|
|
577
|
-
|
|
278
|
+
runner
|
|
578
279
|
};
|
|
579
280
|
}
|
|
580
|
-
|
|
581
|
-
const errorPlan = {
|
|
582
|
-
type: 'Error',
|
|
583
|
-
param: {
|
|
584
|
-
thought: errorMsg
|
|
585
|
-
},
|
|
586
|
-
locate: null
|
|
587
|
-
};
|
|
588
|
-
const { tasks } = await this.convertPlanToExecutable([
|
|
589
|
-
errorPlan
|
|
590
|
-
], modelConfig);
|
|
591
|
-
await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
|
|
592
|
-
await taskExecutor.flush();
|
|
281
|
+
sleepPlan(timeMs) {
|
|
593
282
|
return {
|
|
594
|
-
output: void 0,
|
|
595
|
-
executor: taskExecutor
|
|
596
|
-
};
|
|
597
|
-
}
|
|
598
|
-
async taskForSleep(timeMs, modelConfig) {
|
|
599
|
-
const sleepPlan = {
|
|
600
283
|
type: 'Sleep',
|
|
601
284
|
param: {
|
|
602
285
|
timeMs
|
|
603
|
-
}
|
|
604
|
-
locate: null
|
|
286
|
+
}
|
|
605
287
|
};
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
288
|
+
}
|
|
289
|
+
async taskForSleep(timeMs, _modelConfig) {
|
|
290
|
+
return this.taskBuilder.createSleepTask({
|
|
291
|
+
timeMs
|
|
292
|
+
});
|
|
610
293
|
}
|
|
611
294
|
async waitFor(assertion, opt, modelConfig) {
|
|
612
295
|
const { textPrompt, multimodalPrompt } = parsePrompt(assertion);
|
|
613
296
|
const description = `waitFor: ${textPrompt}`;
|
|
614
|
-
const
|
|
615
|
-
|
|
616
|
-
});
|
|
297
|
+
const session = this.createExecutionSession(taskTitleStr('WaitFor', description));
|
|
298
|
+
const runner = session.getRunner();
|
|
617
299
|
const { timeoutMs, checkIntervalMs } = opt;
|
|
618
300
|
assert(assertion, 'No assertion for waitFor');
|
|
619
301
|
assert(timeoutMs, 'No timeoutMs for waitFor');
|
|
620
302
|
assert(checkIntervalMs, 'No checkIntervalMs for waitFor');
|
|
621
303
|
assert(checkIntervalMs <= timeoutMs, `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`);
|
|
622
304
|
const overallStartTime = Date.now();
|
|
623
|
-
let
|
|
305
|
+
let lastCheckStart = overallStartTime;
|
|
624
306
|
let errorThought = '';
|
|
625
|
-
while(
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
const result = await taskExecutor.flush();
|
|
632
|
-
if (null == result ? void 0 : result.output) return {
|
|
307
|
+
while(lastCheckStart - overallStartTime <= timeoutMs){
|
|
308
|
+
const currentCheckStart = Date.now();
|
|
309
|
+
lastCheckStart = currentCheckStart;
|
|
310
|
+
const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, void 0, multimodalPrompt);
|
|
311
|
+
const result = await session.appendAndRun(queryTask);
|
|
312
|
+
if (result?.output) return {
|
|
633
313
|
output: void 0,
|
|
634
|
-
|
|
314
|
+
runner
|
|
635
315
|
};
|
|
636
|
-
errorThought =
|
|
316
|
+
errorThought = result?.thought || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
|
|
637
317
|
const now = Date.now();
|
|
638
|
-
if (now -
|
|
639
|
-
const timeRemaining = checkIntervalMs - (now -
|
|
640
|
-
const sleepTask =
|
|
641
|
-
|
|
318
|
+
if (now - currentCheckStart < checkIntervalMs) {
|
|
319
|
+
const timeRemaining = checkIntervalMs - (now - currentCheckStart);
|
|
320
|
+
const sleepTask = this.taskBuilder.createSleepTask({
|
|
321
|
+
timeMs: timeRemaining
|
|
322
|
+
});
|
|
323
|
+
await session.append(sleepTask);
|
|
642
324
|
}
|
|
643
325
|
}
|
|
644
|
-
return
|
|
326
|
+
return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);
|
|
645
327
|
}
|
|
646
|
-
constructor(interfaceInstance,
|
|
328
|
+
constructor(interfaceInstance, service, opts){
|
|
647
329
|
_define_property(this, "interface", void 0);
|
|
648
|
-
_define_property(this, "
|
|
330
|
+
_define_property(this, "service", void 0);
|
|
649
331
|
_define_property(this, "taskCache", void 0);
|
|
332
|
+
_define_property(this, "providedActionSpace", void 0);
|
|
333
|
+
_define_property(this, "taskBuilder", void 0);
|
|
650
334
|
_define_property(this, "conversationHistory", void 0);
|
|
651
335
|
_define_property(this, "onTaskStartCallback", void 0);
|
|
336
|
+
_define_property(this, "hooks", void 0);
|
|
652
337
|
_define_property(this, "replanningCycleLimit", void 0);
|
|
653
338
|
this.interface = interfaceInstance;
|
|
654
|
-
this.
|
|
339
|
+
this.service = service;
|
|
655
340
|
this.taskCache = opts.taskCache;
|
|
656
|
-
this.onTaskStartCallback =
|
|
341
|
+
this.onTaskStartCallback = opts?.onTaskStart;
|
|
657
342
|
this.replanningCycleLimit = opts.replanningCycleLimit;
|
|
343
|
+
this.hooks = opts.hooks;
|
|
658
344
|
this.conversationHistory = new ConversationHistory();
|
|
345
|
+
this.providedActionSpace = opts.actionSpace;
|
|
346
|
+
this.taskBuilder = new TaskBuilder({
|
|
347
|
+
interfaceInstance,
|
|
348
|
+
service,
|
|
349
|
+
taskCache: opts.taskCache,
|
|
350
|
+
actionSpace: this.getActionSpace()
|
|
351
|
+
});
|
|
659
352
|
}
|
|
660
353
|
}
|
|
661
|
-
export { TaskExecutor, locatePlanForLocate };
|
|
354
|
+
export { TaskExecutionError, TaskExecutor, locatePlanForLocate };
|
|
662
355
|
|
|
663
356
|
//# sourceMappingURL=tasks.mjs.map
|