@donggui/core 1.6.1 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/cache-confidence.mjs +121 -0
- package/dist/es/agent/cache-confidence.mjs.map +1 -0
- package/dist/es/agent/task-builder.mjs +86 -18
- package/dist/es/agent/task-builder.mjs.map +1 -1
- package/dist/es/ai-model/prompt/semantic-anchor-search.mjs +34 -0
- package/dist/es/ai-model/prompt/semantic-anchor-search.mjs.map +1 -0
- package/dist/es/ai-model/prompt/semantic-anchor.mjs +41 -0
- package/dist/es/ai-model/prompt/semantic-anchor.mjs.map +1 -0
- package/dist/es/service/index.mjs +156 -0
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/lib/agent/cache-confidence.js +173 -0
- package/dist/lib/agent/cache-confidence.js.map +1 -0
- package/dist/lib/agent/task-builder.js +85 -17
- package/dist/lib/agent/task-builder.js.map +1 -1
- package/dist/lib/ai-model/prompt/semantic-anchor-search.js +68 -0
- package/dist/lib/ai-model/prompt/semantic-anchor-search.js.map +1 -0
- package/dist/lib/ai-model/prompt/semantic-anchor.js +75 -0
- package/dist/lib/ai-model/prompt/semantic-anchor.js.map +1 -0
- package/dist/lib/service/index.js +156 -0
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/types/agent/cache-confidence.d.ts +13 -0
- package/dist/types/ai-model/prompt/semantic-anchor-search.d.ts +1 -0
- package/dist/types/ai-model/prompt/semantic-anchor.d.ts +1 -0
- package/dist/types/service/index.d.ts +3 -1
- package/dist/types/types.d.ts +22 -0
- package/package.json +1 -1
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS, MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE } from "@midscene/shared/env/constants";
|
|
2
|
+
import { getDebug } from "@midscene/shared/logger";
|
|
3
|
+
const debug = getDebug('cache-confidence');
|
|
4
|
+
const DEFAULT_HALF_LIFE_MS = 1800000;
|
|
5
|
+
function calculateConfidence(state) {
|
|
6
|
+
const halfLifeMs = Number.parseInt(process.env[MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS] || '', 10) || DEFAULT_HALF_LIFE_MS;
|
|
7
|
+
const ageMs = Date.now() - state.lastVerifiedAt;
|
|
8
|
+
const ageDecay = Math.exp(-ageMs * Math.LN2 / halfLifeMs);
|
|
9
|
+
const experienceBonus = Math.min(state.verificationCount / 10, 0.3);
|
|
10
|
+
const score = Math.max(0.1, 0.7 * ageDecay + experienceBonus);
|
|
11
|
+
debug('calculateConfidence', {
|
|
12
|
+
ageMs,
|
|
13
|
+
ageDecay: ageDecay.toFixed(3),
|
|
14
|
+
verificationCount: state.verificationCount,
|
|
15
|
+
experienceBonus: experienceBonus.toFixed(3),
|
|
16
|
+
score: score.toFixed(3)
|
|
17
|
+
});
|
|
18
|
+
return score;
|
|
19
|
+
}
|
|
20
|
+
function determineVerificationLevel(confidence) {
|
|
21
|
+
if (confidence > 0.8) return 'minimal';
|
|
22
|
+
if (confidence > 0.5) return 'standard';
|
|
23
|
+
if (confidence > 0.2) return 'enhanced';
|
|
24
|
+
return 'full';
|
|
25
|
+
}
|
|
26
|
+
function getVerificationActions(level) {
|
|
27
|
+
switch(level){
|
|
28
|
+
case 'minimal':
|
|
29
|
+
return {
|
|
30
|
+
coordCheck: true,
|
|
31
|
+
visualVerify: false,
|
|
32
|
+
semanticAnchor: false,
|
|
33
|
+
skipCache: false
|
|
34
|
+
};
|
|
35
|
+
case 'standard':
|
|
36
|
+
return {
|
|
37
|
+
coordCheck: true,
|
|
38
|
+
visualVerify: true,
|
|
39
|
+
semanticAnchor: false,
|
|
40
|
+
skipCache: false
|
|
41
|
+
};
|
|
42
|
+
case 'enhanced':
|
|
43
|
+
return {
|
|
44
|
+
coordCheck: true,
|
|
45
|
+
visualVerify: true,
|
|
46
|
+
semanticAnchor: true,
|
|
47
|
+
skipCache: false
|
|
48
|
+
};
|
|
49
|
+
case 'full':
|
|
50
|
+
return {
|
|
51
|
+
coordCheck: false,
|
|
52
|
+
visualVerify: false,
|
|
53
|
+
semanticAnchor: false,
|
|
54
|
+
skipCache: true
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
function createInitialConfidenceState() {
|
|
59
|
+
return {
|
|
60
|
+
lastVerifiedAt: Date.now(),
|
|
61
|
+
verificationCount: 1,
|
|
62
|
+
confidenceScore: 1.0
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
function updateConfidenceOnVerify(state, passed) {
|
|
66
|
+
return {
|
|
67
|
+
lastVerifiedAt: passed ? Date.now() : state.lastVerifiedAt,
|
|
68
|
+
verificationCount: passed ? state.verificationCount + 1 : state.verificationCount,
|
|
69
|
+
confidenceScore: passed ? calculateConfidence({
|
|
70
|
+
...state,
|
|
71
|
+
lastVerifiedAt: Date.now(),
|
|
72
|
+
verificationCount: state.verificationCount + 1
|
|
73
|
+
}) : Math.max(0.1, 0.5 * state.confidenceScore)
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
function createInitialProgressiveRecord(center) {
|
|
77
|
+
return {
|
|
78
|
+
convergedCenter: center,
|
|
79
|
+
convergenceRadius: 0,
|
|
80
|
+
sampleCount: 1,
|
|
81
|
+
lastUpdatedAt: Date.now()
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
function updateProgressiveConvergence(record, newCenter, confidence) {
|
|
85
|
+
const enableProgressive = 'false' !== process.env[MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE];
|
|
86
|
+
if (!enableProgressive) return {
|
|
87
|
+
convergedCenter: newCenter,
|
|
88
|
+
convergenceRadius: 0,
|
|
89
|
+
sampleCount: record.sampleCount + 1,
|
|
90
|
+
lastUpdatedAt: Date.now()
|
|
91
|
+
};
|
|
92
|
+
const weight = Math.max(0.1, confidence);
|
|
93
|
+
const totalWeight = record.sampleCount + weight;
|
|
94
|
+
const convergedCenter = [
|
|
95
|
+
(record.convergedCenter[0] * record.sampleCount + newCenter[0] * weight) / totalWeight,
|
|
96
|
+
(record.convergedCenter[1] * record.sampleCount + newCenter[1] * weight) / totalWeight
|
|
97
|
+
];
|
|
98
|
+
const allPoints = [
|
|
99
|
+
[
|
|
100
|
+
record.convergedCenter[0],
|
|
101
|
+
record.convergedCenter[1]
|
|
102
|
+
],
|
|
103
|
+
newCenter
|
|
104
|
+
];
|
|
105
|
+
const convergenceRadius = Math.max(...allPoints.map((p)=>Math.sqrt((p[0] - convergedCenter[0]) ** 2 + (p[1] - convergedCenter[1]) ** 2)));
|
|
106
|
+
debug('updateProgressiveConvergence', {
|
|
107
|
+
newCenter,
|
|
108
|
+
convergedCenter: convergedCenter.map((v)=>v.toFixed(1)),
|
|
109
|
+
convergenceRadius: convergenceRadius.toFixed(1),
|
|
110
|
+
sampleCount: record.sampleCount + 1
|
|
111
|
+
});
|
|
112
|
+
return {
|
|
113
|
+
convergedCenter,
|
|
114
|
+
convergenceRadius,
|
|
115
|
+
sampleCount: record.sampleCount + 1,
|
|
116
|
+
lastUpdatedAt: Date.now()
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
export { calculateConfidence, createInitialConfidenceState, createInitialProgressiveRecord, determineVerificationLevel, getVerificationActions, updateConfidenceOnVerify, updateProgressiveConvergence };
|
|
120
|
+
|
|
121
|
+
//# sourceMappingURL=cache-confidence.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent/cache-confidence.mjs","sources":["../../../src/agent/cache-confidence.ts"],"sourcesContent":["import type {\n CacheConfidenceState,\n ProgressiveLocateRecord,\n VerificationLevel,\n} from '@/types';\nimport {\n MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS,\n MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE,\n} from '@midscene/shared/env/constants';\nimport { getDebug } from '@midscene/shared/logger';\n\nconst debug = getDebug('cache-confidence');\n\nconst DEFAULT_HALF_LIFE_MS = 30 * 60 * 1000;\n\nexport function calculateConfidence(state: CacheConfidenceState): number {\n const halfLifeMs =\n Number.parseInt(\n process.env[MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS] || '',\n 10,\n ) || DEFAULT_HALF_LIFE_MS;\n\n const ageMs = Date.now() - state.lastVerifiedAt;\n const ageDecay = Math.exp((-ageMs * Math.LN2) / halfLifeMs);\n const experienceBonus = Math.min(state.verificationCount / 10, 0.3);\n const score = Math.max(0.1, ageDecay * 0.7 + experienceBonus);\n\n debug('calculateConfidence', {\n ageMs,\n ageDecay: ageDecay.toFixed(3),\n verificationCount: state.verificationCount,\n experienceBonus: experienceBonus.toFixed(3),\n score: score.toFixed(3),\n });\n\n return score;\n}\n\nexport function determineVerificationLevel(\n confidence: number,\n): VerificationLevel {\n if (confidence > 0.8) return 'minimal';\n if (confidence > 0.5) return 'standard';\n if (confidence > 0.2) return 'enhanced';\n return 'full';\n}\n\nexport function getVerificationActions(level: VerificationLevel): {\n coordCheck: boolean;\n visualVerify: boolean;\n semanticAnchor: boolean;\n skipCache: boolean;\n} {\n switch (level) {\n case 'minimal':\n return {\n coordCheck: true,\n visualVerify: false,\n semanticAnchor: false,\n skipCache: false,\n };\n case 'standard':\n return {\n coordCheck: true,\n visualVerify: true,\n semanticAnchor: false,\n skipCache: false,\n };\n case 'enhanced':\n return {\n coordCheck: true,\n visualVerify: true,\n semanticAnchor: true,\n skipCache: false,\n };\n case 'full':\n return {\n coordCheck: false,\n visualVerify: false,\n semanticAnchor: false,\n skipCache: true,\n };\n }\n}\n\nexport function createInitialConfidenceState(): CacheConfidenceState {\n return {\n lastVerifiedAt: Date.now(),\n verificationCount: 1,\n confidenceScore: 1.0,\n };\n}\n\nexport function updateConfidenceOnVerify(\n state: CacheConfidenceState,\n passed: boolean,\n): CacheConfidenceState {\n return {\n lastVerifiedAt: passed ? Date.now() : state.lastVerifiedAt,\n verificationCount: passed\n ? state.verificationCount + 1\n : state.verificationCount,\n confidenceScore: passed\n ? calculateConfidence({\n ...state,\n lastVerifiedAt: Date.now(),\n verificationCount: state.verificationCount + 1,\n })\n : Math.max(0.1, state.confidenceScore * 0.5),\n };\n}\n\nexport function createInitialProgressiveRecord(\n center: [number, number],\n): ProgressiveLocateRecord {\n return {\n convergedCenter: center,\n convergenceRadius: 0,\n sampleCount: 1,\n lastUpdatedAt: Date.now(),\n };\n}\n\nexport function updateProgressiveConvergence(\n record: ProgressiveLocateRecord,\n newCenter: [number, number],\n confidence: number,\n): ProgressiveLocateRecord {\n const enableProgressive =\n process.env[MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE] !== 'false';\n if (!enableProgressive) {\n return {\n convergedCenter: newCenter,\n convergenceRadius: 0,\n sampleCount: record.sampleCount + 1,\n lastUpdatedAt: Date.now(),\n };\n }\n\n const weight = Math.max(0.1, confidence);\n const totalWeight = record.sampleCount + weight;\n\n const convergedCenter: [number, number] = [\n (record.convergedCenter[0] * record.sampleCount + newCenter[0] * weight) /\n totalWeight,\n (record.convergedCenter[1] * record.sampleCount + newCenter[1] * weight) /\n totalWeight,\n ];\n\n const allPoints = [\n [record.convergedCenter[0], record.convergedCenter[1]],\n newCenter,\n ];\n const convergenceRadius = Math.max(\n ...allPoints.map((p) =>\n Math.sqrt(\n (p[0] - convergedCenter[0]) ** 2 + (p[1] - convergedCenter[1]) ** 2,\n ),\n ),\n );\n\n debug('updateProgressiveConvergence', {\n newCenter,\n convergedCenter: convergedCenter.map((v) => v.toFixed(1)),\n convergenceRadius: convergenceRadius.toFixed(1),\n sampleCount: record.sampleCount + 1,\n });\n\n return {\n convergedCenter,\n convergenceRadius,\n sampleCount: record.sampleCount + 1,\n lastUpdatedAt: Date.now(),\n };\n}\n"],"names":["debug","getDebug","DEFAULT_HALF_LIFE_MS","calculateConfidence","state","halfLifeMs","Number","process","MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS","ageMs","Date","ageDecay","Math","experienceBonus","score","determineVerificationLevel","confidence","getVerificationActions","level","createInitialConfidenceState","updateConfidenceOnVerify","passed","createInitialProgressiveRecord","center","updateProgressiveConvergence","record","newCenter","enableProgressive","MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE","weight","totalWeight","convergedCenter","allPoints","convergenceRadius","p","v"],"mappings":";;AAWA,MAAMA,QAAQC,SAAS;AAEvB,MAAMC,uBAAuB;AAEtB,SAASC,oBAAoBC,KAA2B;IAC7D,MAAMC,aACJC,OAAO,QAAQ,CACbC,QAAQ,GAAG,CAACC,uCAAuC,IAAI,IACvD,OACGN;IAEP,MAAMO,QAAQC,KAAK,GAAG,KAAKN,MAAM,cAAc;IAC/C,MAAMO,WAAWC,KAAK,GAAG,CAAE,CAACH,QAAQG,KAAK,GAAG,GAAIP;IAChD,MAAMQ,kBAAkBD,KAAK,GAAG,CAACR,MAAM,iBAAiB,GAAG,IAAI;IAC/D,MAAMU,QAAQF,KAAK,GAAG,CAAC,KAAKD,AAAW,MAAXA,WAAiBE;IAE7Cb,MAAM,uBAAuB;QAC3BS;QACA,UAAUE,SAAS,OAAO,CAAC;QAC3B,mBAAmBP,MAAM,iBAAiB;QAC1C,iBAAiBS,gBAAgB,OAAO,CAAC;QACzC,OAAOC,MAAM,OAAO,CAAC;IACvB;IAEA,OAAOA;AACT;AAEO,SAASC,2BACdC,UAAkB;IAElB,IAAIA,aAAa,KAAK,OAAO;IAC7B,IAAIA,aAAa,KAAK,OAAO;IAC7B,IAAIA,aAAa,KAAK,OAAO;IAC7B,OAAO;AACT;AAEO,SAASC,uBAAuBC,KAAwB;IAM7D,OAAQA;QACN,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;QACF,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;QACF,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;QACF,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;IACJ;AACF;AAEO,SAASC;IACd,OAAO;QACL,gBAAgBT,KAAK,GAAG;QACxB,mBAAmB;QACnB,iBAAiB;IACnB;AACF;AAEO,SAASU,yBACdhB,KAA2B,EAC3BiB,MAAe;IAEf,OAAO;QACL,gBAAgBA,SAASX,KAAK,GAAG,KAAKN,MAAM,cAAc;QAC1D,mBAAmBiB,SACfjB,MAAM,iBAAiB,GAAG,IAC1BA,MAAM,iBAAiB;QAC3B,iBAAiBiB,SACblB,oBAAoB;YAClB,GAAGC,KAAK;YACR,gBAAgBM,KAAK,GAAG;YACxB,mBAAmBN,MAAM,iBAAiB,GAAG;QAC/C,KACAQ,KAAK,GAAG,CAAC,KAAKR,AAAwB,MAAxBA,MAAM,eAAe;IACzC;AACF;AAEO,SAASkB,+BACdC,MAAwB;IAExB,OAAO;QACL,iBAAiBA;QACjB,mBAAmB;QACnB,aAAa;QACb,eAAeb,KAAK,GAAG;IACzB;AACF;AAEO,SAASc,6BACdC,MAA+B,EAC/BC,SAA2B,EAC3BV,UAAkB;IAElB,MAAMW,oBACJpB,AAA+D,YAA/DA,QAAQ,GAAG,CAACqB,8CAA8C;IAC5D,IAAI,CAACD,mBACH,OAAO;QACL,iBAAiBD;QACjB,mBAAmB;QACnB,aAAaD,OAAO,WAAW,GAAG;QAClC,eAAef,KAAK,GAAG;IACzB;IAGF,MAAMmB,SAASjB,KAAK,GAAG,CAAC,KAAKI;IAC7B,MAAMc,cAAcL,OAAO,WAAW,GAAGI;IAEzC,MAAME,kBAAoC;QACvCN,CAAAA,OAAO,eAAe,CAAC,EAAE,GAAGA,OAAO,WAAW,GAAGC,SAAS,CAAC,EAAE,GAAGG,MAAK,IACpEC;QACDL,CAAAA,OAAO,eAAe,CAAC,EAAE,GAAGA,OAAO,WAAW,GAAGC,SAAS,CAAC,EAAE,GAAGG,MAAK,IACpEC;KACH;IAED,MAAME,YAAY;QAChB;YAACP,OAAO,eAAe,CAAC,EAAE;YAAEA,OAAO,eAAe,CAAC,EAAE;SAAC;QACtDC;KACD;IACD,MAAMO,oBAAoBrB,KAAK,GAAG,IAC7BoB,UAAU,GAAG,CAAC,CAACE,IAChBtB,KAAK,IAAI,CACNsB,AAAAA,CAAAA,CAAC,CAAC,EAAE,GAAGH,eAAe,CAAC,EAAC,KAAM,IAAKG,AAAAA,CAAAA,CAAC,CAAC,EAAE,GAAGH,eAAe,CAAC,EAAC,KAAM;IAKxE/B,MAAM,gCAAgC;QACpC0B;QACA,iBAAiBK,gBAAgB,GAAG,CAAC,CAACI,IAAMA,EAAE,OAAO,CAAC;QACtD,mBAAmBF,kBAAkB,OAAO,CAAC;QAC7C,aAAaR,OAAO,WAAW,GAAG;IACpC;IAEA,OAAO;QACLM;QACAE;QACA,aAAaR,OAAO,WAAW,GAAG;QAClC,eAAef,KAAK,GAAG;IACzB;AACF"}
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import { calculateConfidence, createInitialConfidenceState, createInitialProgressiveRecord, determineVerificationLevel, getVerificationActions, updateConfidenceOnVerify, updateProgressiveConvergence } from "./cache-confidence.mjs";
|
|
1
2
|
import { findAllMidsceneLocatorField, parseActionParam } from "../ai-model/index.mjs";
|
|
2
3
|
import { setTimingFieldOnce } from "../task-timing.mjs";
|
|
3
4
|
import { ServiceError } from "../types.mjs";
|
|
4
5
|
import { sleep } from "../utils.mjs";
|
|
5
|
-
import { MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD,
|
|
6
|
+
import { MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD, MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR } from "@midscene/shared/env/constants";
|
|
6
7
|
import { generateElementByRect } from "@midscene/shared/extractor";
|
|
7
8
|
import { getDebug } from "@midscene/shared/logger";
|
|
8
9
|
import { assert } from "@midscene/shared/utils";
|
|
@@ -229,7 +230,7 @@ class TaskBuilder {
|
|
|
229
230
|
taskCache: this.taskCache,
|
|
230
231
|
interfaceInstance: this.interface
|
|
231
232
|
}, cacheEntry, cachePrompt, param.cacheable);
|
|
232
|
-
|
|
233
|
+
let elementFromCache = elementFromCacheResult ? transformLogicalElementToScreenshot(elementFromCacheResult, shrunkShotToLogicalRatio) : void 0;
|
|
233
234
|
let isCacheHit = !!elementFromCache;
|
|
234
235
|
const timing = taskContext.task.timing;
|
|
235
236
|
let elementFromAiLocate;
|
|
@@ -237,10 +238,25 @@ class TaskBuilder {
|
|
|
237
238
|
const cacheFeature = cacheEntry;
|
|
238
239
|
const cachedCenter = cacheFeature?.cachedCenter;
|
|
239
240
|
const coordOffsetThreshold = Number.parseInt(process.env[MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD] || '16', 10) || 16;
|
|
240
|
-
const
|
|
241
|
-
const
|
|
241
|
+
const confidenceState = cacheFeature?.confidenceState || createInitialConfidenceState();
|
|
242
|
+
const confidence = calculateConfidence(confidenceState);
|
|
243
|
+
const level = determineVerificationLevel(confidence);
|
|
244
|
+
const actions = getVerificationActions(level);
|
|
245
|
+
debug('cache confidence assessment', {
|
|
246
|
+
confidence: confidence.toFixed(3),
|
|
247
|
+
level,
|
|
248
|
+
actions,
|
|
249
|
+
verificationCount: confidenceState.verificationCount
|
|
250
|
+
});
|
|
251
|
+
if (actions.skipCache) {
|
|
252
|
+
debug('cache confidence too low, skipping cache entirely', {
|
|
253
|
+
confidence,
|
|
254
|
+
level
|
|
255
|
+
});
|
|
256
|
+
isCacheHit = false;
|
|
257
|
+
}
|
|
242
258
|
try {
|
|
243
|
-
if (
|
|
259
|
+
if (isCacheHit && actions.coordCheck && cachedCenter) {
|
|
244
260
|
const offset = Math.sqrt((elementFromCache.center[0] - cachedCenter[0]) ** 2 + (elementFromCache.center[1] - cachedCenter[1]) ** 2);
|
|
245
261
|
debug('cache coord offset check', {
|
|
246
262
|
cachedCenter,
|
|
@@ -256,7 +272,7 @@ class TaskBuilder {
|
|
|
256
272
|
isCacheHit = false;
|
|
257
273
|
}
|
|
258
274
|
}
|
|
259
|
-
if (isCacheHit &&
|
|
275
|
+
if (isCacheHit && actions.visualVerify) {
|
|
260
276
|
const verification = await this.service.verifyCachedElement(elementFromCache.center, cachePrompt, modelConfigForDefaultIntent, uiContext);
|
|
261
277
|
if (verification.pass) debug('cache hit and visual verification passed', {
|
|
262
278
|
description: verification.description
|
|
@@ -274,19 +290,59 @@ class TaskBuilder {
|
|
|
274
290
|
debug('cache verification error, fallback to AI locate', verifyError);
|
|
275
291
|
isCacheHit = false;
|
|
276
292
|
}
|
|
293
|
+
if (isCacheHit) {
|
|
294
|
+
const updatedState = updateConfidenceOnVerify(confidenceState, true);
|
|
295
|
+
cacheFeature.confidenceState = updatedState;
|
|
296
|
+
const progressiveRecord = cacheFeature.progressiveRecord;
|
|
297
|
+
if (progressiveRecord) {
|
|
298
|
+
const updated = updateProgressiveConvergence(progressiveRecord, elementFromCache.center, updatedState.confidenceScore);
|
|
299
|
+
cacheFeature.progressiveRecord = updated;
|
|
300
|
+
if (updated.convergenceRadius < 5 && updated.sampleCount >= 3) {
|
|
301
|
+
debug('using converged center instead of single-result center', {
|
|
302
|
+
convergedCenter: updated.convergedCenter.map((v)=>v.toFixed(1)),
|
|
303
|
+
singleCenter: elementFromCache.center,
|
|
304
|
+
convergenceRadius: updated.convergenceRadius.toFixed(1),
|
|
305
|
+
sampleCount: updated.sampleCount
|
|
306
|
+
});
|
|
307
|
+
elementFromCache = {
|
|
308
|
+
...elementFromCache,
|
|
309
|
+
center: [
|
|
310
|
+
Math.round(updated.convergedCenter[0]),
|
|
311
|
+
Math.round(updated.convergedCenter[1])
|
|
312
|
+
]
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
} else {
|
|
317
|
+
const updatedState = updateConfidenceOnVerify(confidenceState, false);
|
|
318
|
+
cacheFeature.confidenceState = updatedState;
|
|
319
|
+
}
|
|
277
320
|
}
|
|
278
|
-
if (!isXpathHit && !isCacheHit && !isPlanHit)
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
321
|
+
if (!isXpathHit && !isCacheHit && !isPlanHit) {
|
|
322
|
+
const cacheFeature = cacheEntry;
|
|
323
|
+
const semanticAnchor = cacheFeature?.semanticAnchor;
|
|
324
|
+
if (semanticAnchor && 'false' !== process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR]) try {
|
|
325
|
+
const anchorResult = await this.service.locateBySemanticAnchor(semanticAnchor, modelConfigForDefaultIntent, this.interface, uiContext);
|
|
326
|
+
if (anchorResult) {
|
|
327
|
+
elementFromAiLocate = anchorResult;
|
|
328
|
+
debug('semantic anchor locate succeeded, skipping full AI locate');
|
|
329
|
+
}
|
|
330
|
+
} catch (anchorError) {
|
|
331
|
+
debug('semantic anchor locate failed:', anchorError);
|
|
332
|
+
}
|
|
333
|
+
if (!elementFromAiLocate) try {
|
|
334
|
+
setTimingFieldOnce(timing, 'callAiStart');
|
|
335
|
+
locateResult = await this.service.locate(param, {
|
|
336
|
+
context: uiContext
|
|
337
|
+
}, modelConfigForDefaultIntent, abortSignal);
|
|
338
|
+
applyDump(locateResult.dump);
|
|
339
|
+
elementFromAiLocate = locateResult.element;
|
|
340
|
+
} catch (error) {
|
|
341
|
+
if (error instanceof ServiceError) applyDump(error.dump);
|
|
342
|
+
throw error;
|
|
343
|
+
} finally{
|
|
344
|
+
setTimingFieldOnce(timing, 'callAiEnd');
|
|
345
|
+
}
|
|
290
346
|
}
|
|
291
347
|
const element = elementFromBbox || elementFromXpath || elementFromCache || elementFromAiLocate;
|
|
292
348
|
const locateCacheAlreadyExists = hasNonEmptyCache(locateCacheRecord?.cacheContent?.cache);
|
|
@@ -306,7 +362,19 @@ class TaskBuilder {
|
|
|
306
362
|
});
|
|
307
363
|
if (hasNonEmptyCache(feature)) {
|
|
308
364
|
feature.cachedCenter = pointForCache;
|
|
365
|
+
feature.confidenceState = createInitialConfidenceState();
|
|
366
|
+
feature.progressiveRecord = createInitialProgressiveRecord(pointForCache);
|
|
309
367
|
debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
|
|
368
|
+
const enableSemanticAnchor = 'false' !== process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR];
|
|
369
|
+
if (enableSemanticAnchor) try {
|
|
370
|
+
const anchor = await this.service.generateSemanticAnchor(pointForCache, modelConfigForDefaultIntent, uiContext);
|
|
371
|
+
if (anchor) {
|
|
372
|
+
feature.semanticAnchor = anchor;
|
|
373
|
+
debug('semantic anchor generated for prompt: %s', cachePrompt);
|
|
374
|
+
}
|
|
375
|
+
} catch (anchorError) {
|
|
376
|
+
debug('generateSemanticAnchor failed:', anchorError);
|
|
377
|
+
}
|
|
310
378
|
currentCacheEntry = feature;
|
|
311
379
|
await this.taskCache.updateOrAppendCacheRecord({
|
|
312
380
|
type: 'locate',
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent/task-builder.mjs","sources":["../../../src/agent/task-builder.ts"],"sourcesContent":["import { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport type {\n CacheValidationOptions,\n DetailedLocateParam,\n DeviceAction,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskPlanningLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningLocateParam,\n Rect,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD,\n MIDSCENE_CACHE_ENABLE_COORD_CHECK,\n MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY,\n} from '@midscene/shared/env/constants';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport {\n ifPlanLocateParamIsBbox,\n matchElementFromCache,\n matchElementFromPlan,\n transformLogicalElementToScreenshot,\n transformLogicalRectToScreenshotRect,\n} from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\n/**\n * Check if a cache object is non-empty\n */\nfunction hasNonEmptyCache(cache: unknown): boolean {\n return (\n cache !== null &&\n cache !== undefined &&\n typeof cache === 'object' &&\n Object.keys(cache).length > 0\n );\n}\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n actionSpace: DeviceAction[];\n waitAfterAction?: number;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfigForPlanning: IModelConfig;\n modelConfigForDefaultIntent: IModelConfig;\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n private readonly actionSpace: DeviceAction[];\n\n private readonly waitAfterAction?: number;\n\n constructor({\n interfaceInstance,\n service,\n taskCache,\n actionSpace,\n waitAfterAction,\n }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n this.actionSpace = actionSpace;\n this.waitAfterAction = waitAfterAction;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n cacheable,\n deepLocate: options?.deepLocate,\n abortSignal: options?.abortSignal,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action Space',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n const taskLocate = this.createLocateTask(plan, plan.param, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = this.actionSpace;\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n // Always use createLocateTask for all locate params (including bbox)\n // This ensures cache writing happens even when bbox is available\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n `hasBbox=${ifPlanLocateParamIsBbox(param[field])}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action Space',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action Space',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n executor: async (param, taskContext) => {\n const timing = taskContext.task.timing;\n\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n setTimingFieldOnce(timing, 'beforeInvokeActionHookStart');\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug(\n `will call \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.beforeInvokeAction(action.name, param);\n debug(\n `called \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n setTimingFieldOnce(timing, 'beforeInvokeActionHookEnd');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in Action task',\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema, {\n shrunkShotToLogicalRatio,\n });\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n setTimingFieldOnce(timing, 'callActionStart');\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n const actionResult = await actionFn(param, taskContext);\n setTimingFieldOnce(timing, 'callActionEnd');\n debug('called action', action.name, 'result:', actionResult);\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookStart');\n\n const delayAfterRunner =\n action.delayAfterRunner ?? this.waitAfterAction ?? 300;\n if (delayAfterRunner > 0) {\n await sleep(delayAfterRunner);\n }\n\n try {\n if (this.interface.afterInvokeAction) {\n debug(\n `will call \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.afterInvokeAction(action.name, param);\n debug(\n `called \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookEnd');\n\n return {\n output: actionResult,\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskPlanningLocateApply {\n const { cacheable, modelConfigForDefaultIntent, deepLocate, abortSignal } =\n context;\n\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n if (deepLocate && !locateParam.deepLocate) {\n locateParam = {\n ...locateParam,\n deepLocate: true,\n };\n }\n\n const taskLocator: ExecutionTaskPlanningLocateApply = {\n type: 'Planning',\n subType: 'Locate',\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in locate task',\n );\n }\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // from bbox (plan hit)\n const elementFromBbox = ifPlanLocateParamIsBbox(param)\n ? matchElementFromPlan(param)\n : undefined;\n const isPlanHit = !!elementFromBbox;\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (\n !isPlanHit &&\n param.xpath &&\n this.interface.rectMatchesCacheFeature\n ) {\n try {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n } catch {\n // xpath locate failed, allow fallback to cache or AI locate\n }\n }\n\n const elementFromXpath = rectFromXpath\n ? generateElementByRect(\n // rectFromXpath is in logical coordinates, which should be transformed to screenshot coordinates;\n transformLogicalRectToScreenshotRect(\n rectFromXpath,\n shrunkShotToLogicalRatio,\n ),\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt || '',\n )\n : undefined;\n\n const isXpathHit = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord =\n await this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCacheResult =\n isPlanHit || isXpathHit\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n\n // elementFromCacheResult is in logical coordinates, which should be transformed to screenshot coordinates;\n const elementFromCache = elementFromCacheResult\n ? transformLogicalElementToScreenshot(\n elementFromCacheResult,\n shrunkShotToLogicalRatio,\n )\n : undefined;\n\n let isCacheHit = !!elementFromCache;\n const timing = taskContext.task.timing;\n let elementFromAiLocate: LocateResultElement | null | undefined;\n\n if (isCacheHit) {\n const cacheFeature = cacheEntry as ElementCacheFeature;\n const cachedCenter = cacheFeature?.cachedCenter as\n | [number, number]\n | undefined;\n\n const coordOffsetThreshold =\n Number.parseInt(\n process.env[MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD] || '16',\n 10,\n ) || 16;\n const enableCoordCheck =\n process.env[MIDSCENE_CACHE_ENABLE_COORD_CHECK] !== 'false';\n const enableVisualVerify =\n process.env[MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY] !== 'false';\n\n try {\n if (enableCoordCheck && cachedCenter) {\n const offset = Math.sqrt(\n (elementFromCache!.center[0] - cachedCenter[0]) ** 2 +\n (elementFromCache!.center[1] - cachedCenter[1]) ** 2,\n );\n\n debug('cache coord offset check', {\n cachedCenter,\n currentCenter: elementFromCache!.center,\n offset: Math.round(offset),\n threshold: coordOffsetThreshold,\n });\n\n if (offset > coordOffsetThreshold) {\n debug(\n 'cache coord offset exceeded threshold, fallback to AI locate',\n { offset, threshold: coordOffsetThreshold },\n );\n isCacheHit = false;\n }\n }\n\n if (isCacheHit && enableVisualVerify) {\n const verification = await this.service.verifyCachedElement(\n elementFromCache!.center,\n cachePrompt,\n modelConfigForDefaultIntent,\n uiContext,\n );\n if (!verification.pass) {\n debug(\n 'cache hit but visual verification failed, fallback to AI locate',\n {\n reason: verification.reason,\n description: verification.description,\n prompt: cachePrompt,\n },\n );\n isCacheHit = false;\n } else {\n debug('cache hit and visual verification passed', {\n description: verification.description,\n });\n }\n }\n } catch (verifyError) {\n debug(\n 'cache verification error, fallback to AI locate',\n verifyError,\n );\n isCacheHit = false;\n }\n }\n\n if (!isXpathHit && !isCacheHit && !isPlanHit) {\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfigForDefaultIntent,\n abortSignal,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n }\n\n const element =\n elementFromBbox ||\n elementFromXpath ||\n elementFromCache ||\n elementFromAiLocate;\n\n // Check if locate cache already exists (for planHitFlag case)\n const locateCacheAlreadyExists = hasNonEmptyCache(\n locateCacheRecord?.cacheContent?.cache,\n );\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n // Write cache if:\n // 1. element found\n // 2. taskCache enabled\n // 3. not a cache hit (otherwise we'd be writing what we just read)\n // 4. not already cached for plan hit case (avoid redundant writes), OR allow update if cache validation failed\n // 5. cacheable is not explicitly false\n if (\n element &&\n this.taskCache &&\n !isCacheHit &&\n (!isPlanHit || !locateCacheAlreadyExists) &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForPoint) {\n try {\n // Transform coordinates to logical space for cacheFeatureForPoint\n // cacheFeatureForPoint needs logical coordinates to locate elements in DOM\n let pointForCache: [number, number] = element.center;\n if (shrunkShotToLogicalRatio !== 1) {\n pointForCache = [\n Math.round(element.center[0] / shrunkShotToLogicalRatio),\n Math.round(element.center[1] / shrunkShotToLogicalRatio),\n ];\n debug(\n 'Transformed coordinates for cacheFeatureForPoint: %o -> %o',\n element.center,\n pointForCache,\n );\n }\n\n const feature = await this.interface.cacheFeatureForPoint(\n pointForCache,\n {\n targetDescription:\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt,\n modelConfig: modelConfigForDefaultIntent,\n },\n );\n if (hasNonEmptyCache(feature)) {\n feature.cachedCenter = pointForCache;\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n currentCacheEntry = feature;\n await this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForPoint failed: %s', error);\n }\n } else {\n debug('cacheFeatureForPoint is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (isPlanHit) {\n hitBy = {\n from: 'Plan',\n context: {\n bbox: param.bbox,\n },\n };\n } else if (isXpathHit) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (isCacheHit) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element: {\n ...element,\n // backward compatibility for aiLocate, which return value needs a dpr field\n dpr: uiContext.deprecatedDpr,\n },\n },\n hitBy,\n };\n },\n };\n\n return taskLocator;\n }\n}\n"],"names":["debug","getDebug","hasNonEmptyCache","cache","Object","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfigForPlanning","modelConfigForDefaultIntent","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","ifPlanLocateParamIsBbox","locateTask","result","assert","task","taskContext","timing","uiContext","setTimingFieldOnce","Promise","sleep","originalError","originalMessage","String","shrunkShotToLogicalRatio","undefined","parseActionParam","error","actionFn","actionResult","delayAfterRunner","detailedLocateParam","onResult","deepLocate","abortSignal","locateParam","taskLocator","locateDump","locateResult","applyDump","dump","elementFromBbox","matchElementFromPlan","isPlanHit","rectFromXpath","elementFromXpath","generateElementByRect","transformLogicalRectToScreenshotRect","isXpathHit","cachePrompt","locateCacheRecord","cacheEntry","elementFromCacheResult","matchElementFromCache","elementFromCache","transformLogicalElementToScreenshot","isCacheHit","elementFromAiLocate","cacheFeature","cachedCenter","coordOffsetThreshold","Number","process","MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD","enableCoordCheck","MIDSCENE_CACHE_ENABLE_COORD_CHECK","enableVisualVerify","MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY","offset","Math","verification","verifyError","ServiceError","element","locateCacheAlreadyExists","currentCacheEntry","pointForCache","feature","hitBy","interfaceInstance","service","taskCache","waitAfterAction"],"mappings":";;;;;;;;;;;;;;;;;;;AAwCA,MAAMA,QAAQC,SAAS;AAKvB,SAASC,iBAAiBC,KAAc;IACtC,OACEA,QAAAA,SAEA,AAAiB,YAAjB,OAAOA,SACPC,OAAO,IAAI,CAACD,OAAO,MAAM,GAAG;AAEhC;AAEO,SAASE,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACN,OAAOD;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAyBO,MAAMC;IAyBX,MAAa,MACXC,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,SAAS;QAE3B,MAAMG,UAA4B;YAChCF;YACAH;YACAC;YACAG;YACA,YAAYF,SAAS;YACrB,aAAaA,SAAS;QACxB;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;SAC/D;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQT,MAAO;YACxB,MAAMW,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,UAAU,WAAa;QACzB;QACAH,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEA,MAAc,iBACZH,IAAyC,EACzCH,OAAyB,EACV;QACf,MAAMO,aAAa,IAAI,CAAC,gBAAgB,CAACJ,MAAMA,KAAK,KAAK,EAAEH;QAC3DA,QAAQ,KAAK,CAAC,IAAI,CAACO;IACrB;IAEA,MAAc,iBACZJ,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMQ,WAAWL,KAAK,IAAI;QAC1B,MAAMM,cAAc,IAAI,CAAC,WAAW;QACpC,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMlB,QAAQa,KAAK,KAAK;QAExB,IAAI,CAACO,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI1B,KAAK,CAAC0B,MAAM,EAAE;gBAGhB,MAAMxB,aAAaH,oBAAoBC,KAAK,CAAC0B,MAAM;gBACnDhC,MACE,uCACA,CAAC,YAAY,EAAEwB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC3B,KAAK,CAAC0B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAACzB,aAAa,EAC1C,CAAC,QAAQ,EAAE0B,wBAAwB5B,KAAK,CAAC0B,MAAM,GAAG;gBAEpD,MAAMG,aAAa,IAAI,CAAC,gBAAgB,CACtC3B,YACAF,KAAK,CAAC0B,MAAM,EACZhB,SACA,CAACoB;oBACC9B,KAAK,CAAC0B,MAAM,GAAGI;gBACjB;gBAEFpB,QAAQ,KAAK,CAAC,IAAI,CAACmB;YACrB,OAAO;gBACLE,OACE,CAACN,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3ExB,MAAM,CAAC,OAAO,EAAEgC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMc,OAKF;YACF,MAAM;YACN,SAASd;YACT,SAASL,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,UAAU,OAAOb,OAAOiC;gBACtB,MAAMC,SAASD,YAAY,IAAI,CAAC,MAAM;gBAEtCvC,MACE,oBACAwB,UACAlB,OACA,CAAC,4BAA4B,EAAEiC,YAAY,OAAO,EAAE,QAAQ;gBAG9D,MAAME,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBV,qBAAqB,OAAO,CAAC,CAACC;oBAC5BK,OACE/B,KAAK,CAAC0B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEAkB,mBAAmBF,QAAQ;gBAC3B,IAAI;oBACF,MAAMG,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC3C,MACE,CAAC,8DAA8D,EAAE0B,OAAO,IAAI,EAAE;gCAEhF,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAACA,OAAO,IAAI,EAAEpB;gCACrDN,MACE,CAAC,2DAA2D,EAAE0B,OAAO,IAAI,EAAE;4BAE/E;wBACF;wBACAkB,MAAM;qBACP;gBACH,EAAE,OAAOC,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBACAH,mBAAmBF,QAAQ;gBAE3B,MAAM,EAAEQ,wBAAwB,EAAE,GAAGP;gBACrC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIF,OAAO,WAAW,EACpB,IAAI;oBACFpB,QAAQ4C,iBAAiB5C,OAAOoB,OAAO,WAAW,EAAE;wBAClDsB;oBACF;gBACF,EAAE,OAAOG,OAAY;oBACnB,MAAM,IAAIvB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEyB,MAAM,OAAO,CAAC,cAAc,EAAElB,KAAK,SAAS,CAAC3B,QAAQ,EACtG;wBAAE,OAAO6C;oBAAM;gBAEnB;gBAGFT,mBAAmBF,QAAQ;gBAE3BxC,MAAM,kBAAkB0B,OAAO,IAAI;gBACnC,MAAM0B,WAAW1B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAM2B,eAAe,MAAMD,SAAS9C,OAAOiC;gBAC3CG,mBAAmBF,QAAQ;gBAC3BxC,MAAM,iBAAiB0B,OAAO,IAAI,EAAE,WAAW2B;gBAE/CX,mBAAmBF,QAAQ;gBAE3B,MAAMc,mBACJ5B,OAAO,gBAAgB,IAAI,IAAI,CAAC,eAAe,IAAI;gBACrD,IAAI4B,mBAAmB,GACrB,MAAMV,MAAMU;gBAGd,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpCtD,MACE,CAAC,6DAA6D,EAAE0B,OAAO,IAAI,EAAE;wBAE/E,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAACA,OAAO,IAAI,EAAEpB;wBACpDN,MACE,CAAC,0DAA0D,EAAE0B,OAAO,IAAI,EAAE;oBAE9E;gBACF,EAAE,OAAOmB,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEAH,mBAAmBF,QAAQ;gBAE3B,OAAO;oBACL,QAAQa;gBACV;YACF;QACF;QAEArC,QAAQ,KAAK,CAAC,IAAI,CAACsB;IACrB;IAEQ,iBACNnB,IAAyC,EACzCoC,mBAAiD,EACjDvC,OAAyB,EACzBwC,QAAgD,EACd;QAClC,MAAM,EAAEzC,SAAS,EAAEH,2BAA2B,EAAE6C,UAAU,EAAEC,WAAW,EAAE,GACvE1C;QAEF,IAAI2C,cAAcJ;QAElB,IAAI,AAAuB,YAAvB,OAAOI,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAI5C,AAAckC,WAAdlC,WACF4C,cAAc;YACZ,GAAGA,WAAW;YACd5C;QACF;QAGF,IAAI0C,cAAc,CAACE,YAAY,UAAU,EACvCA,cAAc;YACZ,GAAGA,WAAW;YACd,YAAY;QACd;QAGF,MAAMC,cAAgD;YACpD,MAAM;YACN,SAAS;YACT,OAAOD;YACP,SAASxC,KAAK,OAAO;YACrB,UAAU,OAAOb,OAAOiC;gBACtB,MAAM,EAAED,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACE/B,OAAO,UAAUA,OAAO,MACxB,CAAC,qDAAqD,EAAE2B,KAAK,SAAS,CACpE3B,QACC;gBAGL,IAAI,CAACmC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,MAAM,EAAEO,wBAAwB,EAAE,GAAGP;gBAErC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIiC;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;oBACjB,IAAI,CAACA,MACH;oBAEFH,aAAaG;oBACb1B,KAAK,GAAG,GAAG;wBACT0B;wBACA,aAAaA,KAAK,QAAQ,EAAE;oBAC9B;oBACA1B,KAAK,KAAK,GAAG0B,KAAK,QAAQ,EAAE;oBAC5B,IAAIA,KAAK,QAAQ,EAAE,iBACjB1B,KAAK,eAAe,GAAG0B,KAAK,QAAQ,CAAC,eAAe;oBAEtD,IAAIA,KAAK,QAAQ,EAAE,mBACjB1B,KAAK,iBAAiB,GAAG0B,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAMC,kBAAkB/B,wBAAwB5B,SAC5C4D,qBAAqB5D,SACrB2C;gBACJ,MAAMkB,YAAY,CAAC,CAACF;gBAGpB,IAAIG;gBACJ,IACE,CAACD,aACD7D,MAAM,KAAK,IACX,IAAI,CAAC,SAAS,CAAC,uBAAuB,EAEtC,IAAI;oBACF8D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;wBAC3D,QAAQ;4BAAC9D,MAAM,KAAK;yBAAC;oBACvB;gBACF,EAAE,OAAM,CAER;gBAGF,MAAM+D,mBAAmBD,gBACrBE,sBAEEC,qCACEH,eACApB,2BAEF,AAAwB,YAAxB,OAAO1C,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE,UAAU,MAE9B2C;gBAEJ,MAAMuB,aAAa,CAAC,CAACH;gBAErB,MAAMI,cAAcnE,MAAM,MAAM;gBAChC,MAAMoE,oBACJ,MAAM,IAAI,CAAC,SAAS,EAAE,iBAAiBD;gBACzC,MAAME,aAAaD,mBAAmB,cAAc;gBAEpD,MAAME,yBACJT,aAAaK,aACT,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACAnE,MAAM,SAAS;gBAIvB,MAAMwE,mBAAmBF,yBACrBG,oCACEH,wBACA5B,4BAEFC;gBAEJ,IAAI+B,aAAa,CAAC,CAACF;gBACnB,MAAMtC,SAASD,YAAY,IAAI,CAAC,MAAM;gBACtC,IAAI0C;gBAEJ,IAAID,YAAY;oBACd,MAAME,eAAeP;oBACrB,MAAMQ,eAAeD,cAAc;oBAInC,MAAME,uBACJC,OAAO,QAAQ,CACbC,QAAQ,GAAG,CAACC,sCAAsC,IAAI,MACtD,OACG;oBACP,MAAMC,mBACJF,AAAmD,YAAnDA,QAAQ,GAAG,CAACG,kCAAkC;oBAChD,MAAMC,qBACJJ,AAAqD,YAArDA,QAAQ,GAAG,CAACK,oCAAoC;oBAElD,IAAI;wBACF,IAAIH,oBAAoBL,cAAc;4BACpC,MAAMS,SAASC,KAAK,IAAI,CACrBf,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM,IAChDL,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM;4BAGvDnF,MAAM,4BAA4B;gCAChCmF;gCACA,eAAeL,iBAAkB,MAAM;gCACvC,QAAQe,KAAK,KAAK,CAACD;gCACnB,WAAWR;4BACb;4BAEA,IAAIQ,SAASR,sBAAsB;gCACjCpF,MACE,gEACA;oCAAE4F;oCAAQ,WAAWR;gCAAqB;gCAE5CJ,aAAa;4BACf;wBACF;wBAEA,IAAIA,cAAcU,oBAAoB;4BACpC,MAAMI,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,mBAAmB,CACzDhB,iBAAkB,MAAM,EACxBL,aACA7D,6BACA6B;4BAEF,IAAKqD,aAAa,IAAI,EAWpB9F,MAAM,4CAA4C;gCAChD,aAAa8F,aAAa,WAAW;4BACvC;iCAbsB;gCACtB9F,MACE,mEACA;oCACE,QAAQ8F,aAAa,MAAM;oCAC3B,aAAaA,aAAa,WAAW;oCACrC,QAAQrB;gCACV;gCAEFO,aAAa;4BACf;wBAKF;oBACF,EAAE,OAAOe,aAAa;wBACpB/F,MACE,mDACA+F;wBAEFf,aAAa;oBACf;gBACF;gBAEA,IAAI,CAACR,cAAc,CAACQ,cAAc,CAACb,WACjC,IAAI;oBACFzB,mBAAmBF,QAAQ;oBAC3BsB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCxD,OACA;wBACE,SAASmC;oBACX,GACA7B,6BACA8C;oBAEFK,UAAUD,aAAa,IAAI;oBAC3BmB,sBAAsBnB,aAAa,OAAO;gBAC5C,EAAE,OAAOX,OAAO;oBACd,IAAIA,iBAAiB6C,cACnBjC,UAAUZ,MAAM,IAAI;oBAEtB,MAAMA;gBACR,SAAU;oBACRT,mBAAmBF,QAAQ;gBAC7B;gBAGF,MAAMyD,UACJhC,mBACAI,oBACAS,oBACAG;gBAGF,MAAMiB,2BAA2BhG,iBAC/BwE,mBAAmB,cAAc;gBAGnC,IAAIyB;gBAOJ,IACEF,WACA,IAAI,CAAC,SAAS,IACd,CAACjB,cACA,EAACb,aAAa,CAAC+B,wBAAuB,KACvC5F,OAAO,cAAc,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,oBAAoB,EACrC,IAAI;oBAGF,IAAI8F,gBAAkCH,QAAQ,MAAM;oBACpD,IAAIjD,AAA6B,MAA7BA,0BAAgC;wBAClCoD,gBAAgB;4BACdP,KAAK,KAAK,CAACI,QAAQ,MAAM,CAAC,EAAE,GAAGjD;4BAC/B6C,KAAK,KAAK,CAACI,QAAQ,MAAM,CAAC,EAAE,GAAGjD;yBAChC;wBACDhD,MACE,8DACAiG,QAAQ,MAAM,EACdG;oBAEJ;oBAEA,MAAMC,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,oBAAoB,CACvDD,eACA;wBACE,mBACE,AAAwB,YAAxB,OAAO9F,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE;wBACpB,aAAaM;oBACf;oBAEF,IAAIV,iBAAiBmG,UAAU;wBAC7BA,QAAQ,YAAY,GAAGD;wBACvBpG,MACE,uCACAyE,aACA4B;wBAEFF,oBAAoBE;wBACpB,MAAM,IAAI,CAAC,SAAS,CAAC,yBAAyB,CAC5C;4BACE,MAAM;4BACN,QAAQ5B;4BACR,OAAO4B;wBACT,GACA3B;oBAEJ,OACE1E,MACE,yDACAyE;gBAGN,EAAE,OAAOtB,OAAO;oBACdnD,MAAM,mCAAmCmD;gBAC3C;qBAEAnD,MAAM;gBAIV,IAAI,CAACiG,SAAS;oBACZ,IAAIpC,YACF,MAAM,IAAImC,aACR,CAAC,oBAAoB,EAAE1F,MAAM,MAAM,EAAE,EACrCuD;oBAGJ,MAAM,IAAIjC,MAAM,CAAC,mBAAmB,EAAEtB,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAIgG;gBAEJ,IAAInC,WACFmC,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMhG,MAAM,IAAI;oBAClB;gBACF;qBACK,IAAIkE,YACT8B,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAOhG,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAI0E,YACTsB,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP3B;wBACA,aAAawB;oBACf;gBACF;gBAGF3C,WAAWyC;gBAEX,OAAO;oBACL,QAAQ;wBACN,SAAS;4BACP,GAAGA,OAAO;4BAEV,KAAKxD,UAAU,aAAa;wBAC9B;oBACF;oBACA6D;gBACF;YACF;QACF;QAEA,OAAO1C;IACT;IAvnBA,YAAY,EACV2C,iBAAiB,EACjBC,OAAO,EACPC,SAAS,EACThF,WAAW,EACXiF,eAAe,EACC,CAAE;QAhBpB,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAiB,mBAAjB;QASE,IAAI,CAAC,SAAS,GAAGH;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;QACjB,IAAI,CAAC,WAAW,GAAGhF;QACnB,IAAI,CAAC,eAAe,GAAGiF;IACzB;AA4mBF"}
|
|
1
|
+
{"version":3,"file":"agent/task-builder.mjs","sources":["../../../src/agent/task-builder.ts"],"sourcesContent":["import {\n calculateConfidence,\n createInitialConfidenceState,\n createInitialProgressiveRecord,\n determineVerificationLevel,\n getVerificationActions,\n updateConfidenceOnVerify,\n updateProgressiveConvergence,\n} from '@/agent/cache-confidence';\nimport { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport type {\n CacheConfidenceState,\n CacheValidationOptions,\n DetailedLocateParam,\n DeviceAction,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskPlanningLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningLocateParam,\n ProgressiveLocateRecord,\n Rect,\n SemanticAnchor,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD,\n MIDSCENE_CACHE_ENABLE_COORD_CHECK,\n MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR,\n MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY,\n} from '@midscene/shared/env/constants';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport {\n ifPlanLocateParamIsBbox,\n matchElementFromCache,\n matchElementFromPlan,\n transformLogicalElementToScreenshot,\n transformLogicalRectToScreenshotRect,\n} from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\n/**\n * Check if a cache object is non-empty\n */\nfunction hasNonEmptyCache(cache: unknown): boolean {\n return (\n cache !== null &&\n cache !== undefined &&\n typeof cache === 'object' &&\n Object.keys(cache).length > 0\n );\n}\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n actionSpace: DeviceAction[];\n waitAfterAction?: number;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfigForPlanning: IModelConfig;\n modelConfigForDefaultIntent: IModelConfig;\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n private readonly actionSpace: DeviceAction[];\n\n private readonly waitAfterAction?: number;\n\n constructor({\n interfaceInstance,\n service,\n taskCache,\n actionSpace,\n waitAfterAction,\n }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n this.actionSpace = actionSpace;\n this.waitAfterAction = waitAfterAction;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n cacheable,\n deepLocate: options?.deepLocate,\n abortSignal: options?.abortSignal,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action Space',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n const taskLocate = this.createLocateTask(plan, plan.param, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = this.actionSpace;\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n // Always use createLocateTask for all locate params (including bbox)\n // This ensures cache writing happens even when bbox is available\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n `hasBbox=${ifPlanLocateParamIsBbox(param[field])}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action Space',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action Space',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n executor: async (param, taskContext) => {\n const timing = taskContext.task.timing;\n\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n setTimingFieldOnce(timing, 'beforeInvokeActionHookStart');\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug(\n `will call \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.beforeInvokeAction(action.name, param);\n debug(\n `called \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n setTimingFieldOnce(timing, 'beforeInvokeActionHookEnd');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in Action task',\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema, {\n shrunkShotToLogicalRatio,\n });\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n setTimingFieldOnce(timing, 'callActionStart');\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n const actionResult = await actionFn(param, taskContext);\n setTimingFieldOnce(timing, 'callActionEnd');\n debug('called action', action.name, 'result:', actionResult);\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookStart');\n\n const delayAfterRunner =\n action.delayAfterRunner ?? this.waitAfterAction ?? 300;\n if (delayAfterRunner > 0) {\n await sleep(delayAfterRunner);\n }\n\n try {\n if (this.interface.afterInvokeAction) {\n debug(\n `will call \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.afterInvokeAction(action.name, param);\n debug(\n `called \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookEnd');\n\n return {\n output: actionResult,\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskPlanningLocateApply {\n const { cacheable, modelConfigForDefaultIntent, deepLocate, abortSignal } =\n context;\n\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n if (deepLocate && !locateParam.deepLocate) {\n locateParam = {\n ...locateParam,\n deepLocate: true,\n };\n }\n\n const taskLocator: ExecutionTaskPlanningLocateApply = {\n type: 'Planning',\n subType: 'Locate',\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in locate task',\n );\n }\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // from bbox (plan hit)\n const elementFromBbox = ifPlanLocateParamIsBbox(param)\n ? matchElementFromPlan(param)\n : undefined;\n const isPlanHit = !!elementFromBbox;\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (\n !isPlanHit &&\n param.xpath &&\n this.interface.rectMatchesCacheFeature\n ) {\n try {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n } catch {\n // xpath locate failed, allow fallback to cache or AI locate\n }\n }\n\n const elementFromXpath = rectFromXpath\n ? generateElementByRect(\n // rectFromXpath is in logical coordinates, which should be transformed to screenshot coordinates;\n transformLogicalRectToScreenshotRect(\n rectFromXpath,\n shrunkShotToLogicalRatio,\n ),\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt || '',\n )\n : undefined;\n\n const isXpathHit = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord =\n await this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCacheResult =\n isPlanHit || isXpathHit\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n\n // elementFromCacheResult is in logical coordinates, which should be transformed to screenshot coordinates;\n let elementFromCache = elementFromCacheResult\n ? transformLogicalElementToScreenshot(\n elementFromCacheResult,\n shrunkShotToLogicalRatio,\n )\n : undefined;\n\n let isCacheHit = !!elementFromCache;\n const timing = taskContext.task.timing;\n let elementFromAiLocate: LocateResultElement | null | undefined;\n\n if (isCacheHit) {\n const cacheFeature = cacheEntry as ElementCacheFeature;\n const cachedCenter = cacheFeature?.cachedCenter as\n | [number, number]\n | undefined;\n\n const coordOffsetThreshold =\n Number.parseInt(\n process.env[MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD] || '16',\n 10,\n ) || 16;\n\n const confidenceState = (cacheFeature?.confidenceState ||\n createInitialConfidenceState()) as CacheConfidenceState;\n const confidence = calculateConfidence(confidenceState);\n const level = determineVerificationLevel(confidence);\n const actions = getVerificationActions(level);\n\n debug('cache confidence assessment', {\n confidence: confidence.toFixed(3),\n level,\n actions,\n verificationCount: confidenceState.verificationCount,\n });\n\n if (actions.skipCache) {\n debug('cache confidence too low, skipping cache entirely', {\n confidence,\n level,\n });\n isCacheHit = false;\n }\n\n try {\n if (isCacheHit && actions.coordCheck && cachedCenter) {\n const offset = Math.sqrt(\n (elementFromCache!.center[0] - cachedCenter[0]) ** 2 +\n (elementFromCache!.center[1] - cachedCenter[1]) ** 2,\n );\n\n debug('cache coord offset check', {\n cachedCenter,\n currentCenter: elementFromCache!.center,\n offset: Math.round(offset),\n threshold: coordOffsetThreshold,\n });\n\n if (offset > coordOffsetThreshold) {\n debug(\n 'cache coord offset exceeded threshold, fallback to AI locate',\n { offset, threshold: coordOffsetThreshold },\n );\n isCacheHit = false;\n }\n }\n\n if (isCacheHit && actions.visualVerify) {\n const verification = await this.service.verifyCachedElement(\n elementFromCache!.center,\n cachePrompt,\n modelConfigForDefaultIntent,\n uiContext,\n );\n if (!verification.pass) {\n debug(\n 'cache hit but visual verification failed, fallback to AI locate',\n {\n reason: verification.reason,\n description: verification.description,\n prompt: cachePrompt,\n },\n );\n isCacheHit = false;\n } else {\n debug('cache hit and visual verification passed', {\n description: verification.description,\n });\n }\n }\n } catch (verifyError) {\n debug(\n 'cache verification error, fallback to AI locate',\n verifyError,\n );\n isCacheHit = false;\n }\n\n if (isCacheHit) {\n const updatedState = updateConfidenceOnVerify(\n confidenceState,\n true,\n );\n cacheFeature.confidenceState = updatedState;\n\n const progressiveRecord = cacheFeature.progressiveRecord as\n | ProgressiveLocateRecord\n | undefined;\n if (progressiveRecord) {\n const updated = updateProgressiveConvergence(\n progressiveRecord,\n elementFromCache!.center,\n updatedState.confidenceScore,\n );\n cacheFeature.progressiveRecord = updated;\n\n if (updated.convergenceRadius < 5 && updated.sampleCount >= 3) {\n debug(\n 'using converged center instead of single-result center',\n {\n convergedCenter: updated.convergedCenter.map((v) =>\n v.toFixed(1),\n ),\n singleCenter: elementFromCache!.center,\n convergenceRadius: updated.convergenceRadius.toFixed(1),\n sampleCount: updated.sampleCount,\n },\n );\n elementFromCache = {\n ...elementFromCache!,\n center: [\n Math.round(updated.convergedCenter[0]),\n Math.round(updated.convergedCenter[1]),\n ],\n };\n }\n }\n } else {\n const updatedState = updateConfidenceOnVerify(\n confidenceState,\n false,\n );\n cacheFeature.confidenceState = updatedState;\n }\n }\n\n if (!isXpathHit && !isCacheHit && !isPlanHit) {\n const cacheFeature = cacheEntry as ElementCacheFeature;\n const semanticAnchor = cacheFeature?.semanticAnchor as\n | SemanticAnchor\n | undefined;\n\n if (\n semanticAnchor &&\n process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR] !== 'false'\n ) {\n try {\n const anchorResult = await this.service.locateBySemanticAnchor(\n semanticAnchor,\n modelConfigForDefaultIntent,\n this.interface,\n uiContext,\n );\n if (anchorResult) {\n elementFromAiLocate = anchorResult;\n debug(\n 'semantic anchor locate succeeded, skipping full AI locate',\n );\n }\n } catch (anchorError) {\n debug('semantic anchor locate failed:', anchorError);\n }\n }\n\n if (!elementFromAiLocate) {\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfigForDefaultIntent,\n abortSignal,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n }\n }\n\n const element =\n elementFromBbox ||\n elementFromXpath ||\n elementFromCache ||\n elementFromAiLocate;\n\n // Check if locate cache already exists (for planHitFlag case)\n const locateCacheAlreadyExists = hasNonEmptyCache(\n locateCacheRecord?.cacheContent?.cache,\n );\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n // Write cache if:\n // 1. element found\n // 2. taskCache enabled\n // 3. not a cache hit (otherwise we'd be writing what we just read)\n // 4. not already cached for plan hit case (avoid redundant writes), OR allow update if cache validation failed\n // 5. cacheable is not explicitly false\n if (\n element &&\n this.taskCache &&\n !isCacheHit &&\n (!isPlanHit || !locateCacheAlreadyExists) &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForPoint) {\n try {\n // Transform coordinates to logical space for cacheFeatureForPoint\n // cacheFeatureForPoint needs logical coordinates to locate elements in DOM\n let pointForCache: [number, number] = element.center;\n if (shrunkShotToLogicalRatio !== 1) {\n pointForCache = [\n Math.round(element.center[0] / shrunkShotToLogicalRatio),\n Math.round(element.center[1] / shrunkShotToLogicalRatio),\n ];\n debug(\n 'Transformed coordinates for cacheFeatureForPoint: %o -> %o',\n element.center,\n pointForCache,\n );\n }\n\n const feature = await this.interface.cacheFeatureForPoint(\n pointForCache,\n {\n targetDescription:\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt,\n modelConfig: modelConfigForDefaultIntent,\n },\n );\n if (hasNonEmptyCache(feature)) {\n feature.cachedCenter = pointForCache;\n feature.confidenceState = createInitialConfidenceState();\n feature.progressiveRecord =\n createInitialProgressiveRecord(pointForCache);\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n\n const enableSemanticAnchor =\n process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR] !==\n 'false';\n if (enableSemanticAnchor) {\n try {\n const anchor = await this.service.generateSemanticAnchor(\n pointForCache,\n modelConfigForDefaultIntent,\n uiContext,\n );\n if (anchor) {\n feature.semanticAnchor = anchor;\n debug(\n 'semantic anchor generated for prompt: %s',\n cachePrompt,\n );\n }\n } catch (anchorError) {\n debug('generateSemanticAnchor failed:', anchorError);\n }\n }\n\n currentCacheEntry = feature;\n await this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForPoint failed: %s', error);\n }\n } else {\n debug('cacheFeatureForPoint is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (isPlanHit) {\n hitBy = {\n from: 'Plan',\n context: {\n bbox: param.bbox,\n },\n };\n } else if (isXpathHit) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (isCacheHit) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element: {\n ...element,\n // backward compatibility for aiLocate, which return value needs a dpr field\n dpr: uiContext.deprecatedDpr,\n },\n },\n hitBy,\n };\n },\n };\n\n return taskLocator;\n }\n}\n"],"names":["debug","getDebug","hasNonEmptyCache","cache","Object","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfigForPlanning","modelConfigForDefaultIntent","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","ifPlanLocateParamIsBbox","locateTask","result","assert","task","taskContext","timing","uiContext","setTimingFieldOnce","Promise","sleep","originalError","originalMessage","String","shrunkShotToLogicalRatio","undefined","parseActionParam","error","actionFn","actionResult","delayAfterRunner","detailedLocateParam","onResult","deepLocate","abortSignal","locateParam","taskLocator","locateDump","locateResult","applyDump","dump","elementFromBbox","matchElementFromPlan","isPlanHit","rectFromXpath","elementFromXpath","generateElementByRect","transformLogicalRectToScreenshotRect","isXpathHit","cachePrompt","locateCacheRecord","cacheEntry","elementFromCacheResult","matchElementFromCache","elementFromCache","transformLogicalElementToScreenshot","isCacheHit","elementFromAiLocate","cacheFeature","cachedCenter","coordOffsetThreshold","Number","process","MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD","confidenceState","createInitialConfidenceState","confidence","calculateConfidence","level","determineVerificationLevel","actions","getVerificationActions","offset","Math","verification","verifyError","updatedState","updateConfidenceOnVerify","progressiveRecord","updated","updateProgressiveConvergence","v","semanticAnchor","MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR","anchorResult","anchorError","ServiceError","element","locateCacheAlreadyExists","currentCacheEntry","pointForCache","feature","createInitialProgressiveRecord","enableSemanticAnchor","anchor","hitBy","interfaceInstance","service","taskCache","waitAfterAction"],"mappings":";;;;;;;;;;;;;;;;;;;;AAqDA,MAAMA,QAAQC,SAAS;AAKvB,SAASC,iBAAiBC,KAAc;IACtC,OACEA,QAAAA,SAEA,AAAiB,YAAjB,OAAOA,SACPC,OAAO,IAAI,CAACD,OAAO,MAAM,GAAG;AAEhC;AAEO,SAASE,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACN,OAAOD;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAyBO,MAAMC;IAyBX,MAAa,MACXC,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,SAAS;QAE3B,MAAMG,UAA4B;YAChCF;YACAH;YACAC;YACAG;YACA,YAAYF,SAAS;YACrB,aAAaA,SAAS;QACxB;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;SAC/D;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQT,MAAO;YACxB,MAAMW,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,UAAU,WAAa;QACzB;QACAH,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEA,MAAc,iBACZH,IAAyC,EACzCH,OAAyB,EACV;QACf,MAAMO,aAAa,IAAI,CAAC,gBAAgB,CAACJ,MAAMA,KAAK,KAAK,EAAEH;QAC3DA,QAAQ,KAAK,CAAC,IAAI,CAACO;IACrB;IAEA,MAAc,iBACZJ,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMQ,WAAWL,KAAK,IAAI;QAC1B,MAAMM,cAAc,IAAI,CAAC,WAAW;QACpC,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMlB,QAAQa,KAAK,KAAK;QAExB,IAAI,CAACO,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI1B,KAAK,CAAC0B,MAAM,EAAE;gBAGhB,MAAMxB,aAAaH,oBAAoBC,KAAK,CAAC0B,MAAM;gBACnDhC,MACE,uCACA,CAAC,YAAY,EAAEwB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC3B,KAAK,CAAC0B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAACzB,aAAa,EAC1C,CAAC,QAAQ,EAAE0B,wBAAwB5B,KAAK,CAAC0B,MAAM,GAAG;gBAEpD,MAAMG,aAAa,IAAI,CAAC,gBAAgB,CACtC3B,YACAF,KAAK,CAAC0B,MAAM,EACZhB,SACA,CAACoB;oBACC9B,KAAK,CAAC0B,MAAM,GAAGI;gBACjB;gBAEFpB,QAAQ,KAAK,CAAC,IAAI,CAACmB;YACrB,OAAO;gBACLE,OACE,CAACN,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3ExB,MAAM,CAAC,OAAO,EAAEgC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMc,OAKF;YACF,MAAM;YACN,SAASd;YACT,SAASL,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,UAAU,OAAOb,OAAOiC;gBACtB,MAAMC,SAASD,YAAY,IAAI,CAAC,MAAM;gBAEtCvC,MACE,oBACAwB,UACAlB,OACA,CAAC,4BAA4B,EAAEiC,YAAY,OAAO,EAAE,QAAQ;gBAG9D,MAAME,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBV,qBAAqB,OAAO,CAAC,CAACC;oBAC5BK,OACE/B,KAAK,CAAC0B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEAkB,mBAAmBF,QAAQ;gBAC3B,IAAI;oBACF,MAAMG,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC3C,MACE,CAAC,8DAA8D,EAAE0B,OAAO,IAAI,EAAE;gCAEhF,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAACA,OAAO,IAAI,EAAEpB;gCACrDN,MACE,CAAC,2DAA2D,EAAE0B,OAAO,IAAI,EAAE;4BAE/E;wBACF;wBACAkB,MAAM;qBACP;gBACH,EAAE,OAAOC,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBACAH,mBAAmBF,QAAQ;gBAE3B,MAAM,EAAEQ,wBAAwB,EAAE,GAAGP;gBACrC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIF,OAAO,WAAW,EACpB,IAAI;oBACFpB,QAAQ4C,iBAAiB5C,OAAOoB,OAAO,WAAW,EAAE;wBAClDsB;oBACF;gBACF,EAAE,OAAOG,OAAY;oBACnB,MAAM,IAAIvB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEyB,MAAM,OAAO,CAAC,cAAc,EAAElB,KAAK,SAAS,CAAC3B,QAAQ,EACtG;wBAAE,OAAO6C;oBAAM;gBAEnB;gBAGFT,mBAAmBF,QAAQ;gBAE3BxC,MAAM,kBAAkB0B,OAAO,IAAI;gBACnC,MAAM0B,WAAW1B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAM2B,eAAe,MAAMD,SAAS9C,OAAOiC;gBAC3CG,mBAAmBF,QAAQ;gBAC3BxC,MAAM,iBAAiB0B,OAAO,IAAI,EAAE,WAAW2B;gBAE/CX,mBAAmBF,QAAQ;gBAE3B,MAAMc,mBACJ5B,OAAO,gBAAgB,IAAI,IAAI,CAAC,eAAe,IAAI;gBACrD,IAAI4B,mBAAmB,GACrB,MAAMV,MAAMU;gBAGd,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpCtD,MACE,CAAC,6DAA6D,EAAE0B,OAAO,IAAI,EAAE;wBAE/E,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAACA,OAAO,IAAI,EAAEpB;wBACpDN,MACE,CAAC,0DAA0D,EAAE0B,OAAO,IAAI,EAAE;oBAE9E;gBACF,EAAE,OAAOmB,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEAH,mBAAmBF,QAAQ;gBAE3B,OAAO;oBACL,QAAQa;gBACV;YACF;QACF;QAEArC,QAAQ,KAAK,CAAC,IAAI,CAACsB;IACrB;IAEQ,iBACNnB,IAAyC,EACzCoC,mBAAiD,EACjDvC,OAAyB,EACzBwC,QAAgD,EACd;QAClC,MAAM,EAAEzC,SAAS,EAAEH,2BAA2B,EAAE6C,UAAU,EAAEC,WAAW,EAAE,GACvE1C;QAEF,IAAI2C,cAAcJ;QAElB,IAAI,AAAuB,YAAvB,OAAOI,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAI5C,AAAckC,WAAdlC,WACF4C,cAAc;YACZ,GAAGA,WAAW;YACd5C;QACF;QAGF,IAAI0C,cAAc,CAACE,YAAY,UAAU,EACvCA,cAAc;YACZ,GAAGA,WAAW;YACd,YAAY;QACd;QAGF,MAAMC,cAAgD;YACpD,MAAM;YACN,SAAS;YACT,OAAOD;YACP,SAASxC,KAAK,OAAO;YACrB,UAAU,OAAOb,OAAOiC;gBACtB,MAAM,EAAED,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACE/B,OAAO,UAAUA,OAAO,MACxB,CAAC,qDAAqD,EAAE2B,KAAK,SAAS,CACpE3B,QACC;gBAGL,IAAI,CAACmC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,MAAM,EAAEO,wBAAwB,EAAE,GAAGP;gBAErC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIiC;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;oBACjB,IAAI,CAACA,MACH;oBAEFH,aAAaG;oBACb1B,KAAK,GAAG,GAAG;wBACT0B;wBACA,aAAaA,KAAK,QAAQ,EAAE;oBAC9B;oBACA1B,KAAK,KAAK,GAAG0B,KAAK,QAAQ,EAAE;oBAC5B,IAAIA,KAAK,QAAQ,EAAE,iBACjB1B,KAAK,eAAe,GAAG0B,KAAK,QAAQ,CAAC,eAAe;oBAEtD,IAAIA,KAAK,QAAQ,EAAE,mBACjB1B,KAAK,iBAAiB,GAAG0B,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAMC,kBAAkB/B,wBAAwB5B,SAC5C4D,qBAAqB5D,SACrB2C;gBACJ,MAAMkB,YAAY,CAAC,CAACF;gBAGpB,IAAIG;gBACJ,IACE,CAACD,aACD7D,MAAM,KAAK,IACX,IAAI,CAAC,SAAS,CAAC,uBAAuB,EAEtC,IAAI;oBACF8D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;wBAC3D,QAAQ;4BAAC9D,MAAM,KAAK;yBAAC;oBACvB;gBACF,EAAE,OAAM,CAER;gBAGF,MAAM+D,mBAAmBD,gBACrBE,sBAEEC,qCACEH,eACApB,2BAEF,AAAwB,YAAxB,OAAO1C,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE,UAAU,MAE9B2C;gBAEJ,MAAMuB,aAAa,CAAC,CAACH;gBAErB,MAAMI,cAAcnE,MAAM,MAAM;gBAChC,MAAMoE,oBACJ,MAAM,IAAI,CAAC,SAAS,EAAE,iBAAiBD;gBACzC,MAAME,aAAaD,mBAAmB,cAAc;gBAEpD,MAAME,yBACJT,aAAaK,aACT,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACAnE,MAAM,SAAS;gBAIvB,IAAIwE,mBAAmBF,yBACnBG,oCACEH,wBACA5B,4BAEFC;gBAEJ,IAAI+B,aAAa,CAAC,CAACF;gBACnB,MAAMtC,SAASD,YAAY,IAAI,CAAC,MAAM;gBACtC,IAAI0C;gBAEJ,IAAID,YAAY;oBACd,MAAME,eAAeP;oBACrB,MAAMQ,eAAeD,cAAc;oBAInC,MAAME,uBACJC,OAAO,QAAQ,CACbC,QAAQ,GAAG,CAACC,sCAAsC,IAAI,MACtD,OACG;oBAEP,MAAMC,kBAAmBN,cAAc,mBACrCO;oBACF,MAAMC,aAAaC,oBAAoBH;oBACvC,MAAMI,QAAQC,2BAA2BH;oBACzC,MAAMI,UAAUC,uBAAuBH;oBAEvC5F,MAAM,+BAA+B;wBACnC,YAAY0F,WAAW,OAAO,CAAC;wBAC/BE;wBACAE;wBACA,mBAAmBN,gBAAgB,iBAAiB;oBACtD;oBAEA,IAAIM,QAAQ,SAAS,EAAE;wBACrB9F,MAAM,qDAAqD;4BACzD0F;4BACAE;wBACF;wBACAZ,aAAa;oBACf;oBAEA,IAAI;wBACF,IAAIA,cAAcc,QAAQ,UAAU,IAAIX,cAAc;4BACpD,MAAMa,SAASC,KAAK,IAAI,CACrBnB,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM,IAChDL,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM;4BAGvDnF,MAAM,4BAA4B;gCAChCmF;gCACA,eAAeL,iBAAkB,MAAM;gCACvC,QAAQmB,KAAK,KAAK,CAACD;gCACnB,WAAWZ;4BACb;4BAEA,IAAIY,SAASZ,sBAAsB;gCACjCpF,MACE,gEACA;oCAAEgG;oCAAQ,WAAWZ;gCAAqB;gCAE5CJ,aAAa;4BACf;wBACF;wBAEA,IAAIA,cAAcc,QAAQ,YAAY,EAAE;4BACtC,MAAMI,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,mBAAmB,CACzDpB,iBAAkB,MAAM,EACxBL,aACA7D,6BACA6B;4BAEF,IAAKyD,aAAa,IAAI,EAWpBlG,MAAM,4CAA4C;gCAChD,aAAakG,aAAa,WAAW;4BACvC;iCAbsB;gCACtBlG,MACE,mEACA;oCACE,QAAQkG,aAAa,MAAM;oCAC3B,aAAaA,aAAa,WAAW;oCACrC,QAAQzB;gCACV;gCAEFO,aAAa;4BACf;wBAKF;oBACF,EAAE,OAAOmB,aAAa;wBACpBnG,MACE,mDACAmG;wBAEFnB,aAAa;oBACf;oBAEA,IAAIA,YAAY;wBACd,MAAMoB,eAAeC,yBACnBb,iBACA;wBAEFN,aAAa,eAAe,GAAGkB;wBAE/B,MAAME,oBAAoBpB,aAAa,iBAAiB;wBAGxD,IAAIoB,mBAAmB;4BACrB,MAAMC,UAAUC,6BACdF,mBACAxB,iBAAkB,MAAM,EACxBsB,aAAa,eAAe;4BAE9BlB,aAAa,iBAAiB,GAAGqB;4BAEjC,IAAIA,QAAQ,iBAAiB,GAAG,KAAKA,QAAQ,WAAW,IAAI,GAAG;gCAC7DvG,MACE,0DACA;oCACE,iBAAiBuG,QAAQ,eAAe,CAAC,GAAG,CAAC,CAACE,IAC5CA,EAAE,OAAO,CAAC;oCAEZ,cAAc3B,iBAAkB,MAAM;oCACtC,mBAAmByB,QAAQ,iBAAiB,CAAC,OAAO,CAAC;oCACrD,aAAaA,QAAQ,WAAW;gCAClC;gCAEFzB,mBAAmB;oCACjB,GAAGA,gBAAgB;oCACnB,QAAQ;wCACNmB,KAAK,KAAK,CAACM,QAAQ,eAAe,CAAC,EAAE;wCACrCN,KAAK,KAAK,CAACM,QAAQ,eAAe,CAAC,EAAE;qCACtC;gCACH;4BACF;wBACF;oBACF,OAAO;wBACL,MAAMH,eAAeC,yBACnBb,iBACA;wBAEFN,aAAa,eAAe,GAAGkB;oBACjC;gBACF;gBAEA,IAAI,CAAC5B,cAAc,CAACQ,cAAc,CAACb,WAAW;oBAC5C,MAAMe,eAAeP;oBACrB,MAAM+B,iBAAiBxB,cAAc;oBAIrC,IACEwB,kBACApB,AAAuD,YAAvDA,QAAQ,GAAG,CAACqB,sCAAsC,EAElD,IAAI;wBACF,MAAMC,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,sBAAsB,CAC5DF,gBACA9F,6BACA,IAAI,CAAC,SAAS,EACd6B;wBAEF,IAAImE,cAAc;4BAChB3B,sBAAsB2B;4BACtB5G,MACE;wBAEJ;oBACF,EAAE,OAAO6G,aAAa;wBACpB7G,MAAM,kCAAkC6G;oBAC1C;oBAGF,IAAI,CAAC5B,qBACH,IAAI;wBACFvC,mBAAmBF,QAAQ;wBAC3BsB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCxD,OACA;4BACE,SAASmC;wBACX,GACA7B,6BACA8C;wBAEFK,UAAUD,aAAa,IAAI;wBAC3BmB,sBAAsBnB,aAAa,OAAO;oBAC5C,EAAE,OAAOX,OAAO;wBACd,IAAIA,iBAAiB2D,cACnB/C,UAAUZ,MAAM,IAAI;wBAEtB,MAAMA;oBACR,SAAU;wBACRT,mBAAmBF,QAAQ;oBAC7B;gBAEJ;gBAEA,MAAMuE,UACJ9C,mBACAI,oBACAS,oBACAG;gBAGF,MAAM+B,2BAA2B9G,iBAC/BwE,mBAAmB,cAAc;gBAGnC,IAAIuC;gBAOJ,IACEF,WACA,IAAI,CAAC,SAAS,IACd,CAAC/B,cACA,EAACb,aAAa,CAAC6C,wBAAuB,KACvC1G,OAAO,cAAc,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,oBAAoB,EACrC,IAAI;oBAGF,IAAI4G,gBAAkCH,QAAQ,MAAM;oBACpD,IAAI/D,AAA6B,MAA7BA,0BAAgC;wBAClCkE,gBAAgB;4BACdjB,KAAK,KAAK,CAACc,QAAQ,MAAM,CAAC,EAAE,GAAG/D;4BAC/BiD,KAAK,KAAK,CAACc,QAAQ,MAAM,CAAC,EAAE,GAAG/D;yBAChC;wBACDhD,MACE,8DACA+G,QAAQ,MAAM,EACdG;oBAEJ;oBAEA,MAAMC,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,oBAAoB,CACvDD,eACA;wBACE,mBACE,AAAwB,YAAxB,OAAO5G,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE;wBACpB,aAAaM;oBACf;oBAEF,IAAIV,iBAAiBiH,UAAU;wBAC7BA,QAAQ,YAAY,GAAGD;wBACvBC,QAAQ,eAAe,GAAG1B;wBAC1B0B,QAAQ,iBAAiB,GACvBC,+BAA+BF;wBACjClH,MACE,uCACAyE,aACA0C;wBAGF,MAAME,uBACJ/B,AACA,YADAA,QAAQ,GAAG,CAACqB,sCAAsC;wBAEpD,IAAIU,sBACF,IAAI;4BACF,MAAMC,SAAS,MAAM,IAAI,CAAC,OAAO,CAAC,sBAAsB,CACtDJ,eACAtG,6BACA6B;4BAEF,IAAI6E,QAAQ;gCACVH,QAAQ,cAAc,GAAGG;gCACzBtH,MACE,4CACAyE;4BAEJ;wBACF,EAAE,OAAOoC,aAAa;4BACpB7G,MAAM,kCAAkC6G;wBAC1C;wBAGFI,oBAAoBE;wBACpB,MAAM,IAAI,CAAC,SAAS,CAAC,yBAAyB,CAC5C;4BACE,MAAM;4BACN,QAAQ1C;4BACR,OAAO0C;wBACT,GACAzC;oBAEJ,OACE1E,MACE,yDACAyE;gBAGN,EAAE,OAAOtB,OAAO;oBACdnD,MAAM,mCAAmCmD;gBAC3C;qBAEAnD,MAAM;gBAIV,IAAI,CAAC+G,SAAS;oBACZ,IAAIlD,YACF,MAAM,IAAIiD,aACR,CAAC,oBAAoB,EAAExG,MAAM,MAAM,EAAE,EACrCuD;oBAGJ,MAAM,IAAIjC,MAAM,CAAC,mBAAmB,EAAEtB,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAIiH;gBAEJ,IAAIpD,WACFoD,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMjH,MAAM,IAAI;oBAClB;gBACF;qBACK,IAAIkE,YACT+C,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAOjH,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAI0E,YACTuC,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP5C;wBACA,aAAasC;oBACf;gBACF;gBAGFzD,WAAWuD;gBAEX,OAAO;oBACL,QAAQ;wBACN,SAAS;4BACP,GAAGA,OAAO;4BAEV,KAAKtE,UAAU,aAAa;wBAC9B;oBACF;oBACA8E;gBACF;YACF;QACF;QAEA,OAAO3D;IACT;IA9uBA,YAAY,EACV4D,iBAAiB,EACjBC,OAAO,EACPC,SAAS,EACTjG,WAAW,EACXkG,eAAe,EACC,CAAE;QAhBpB,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAiB,mBAAjB;QASE,IAAI,CAAC,SAAS,GAAGH;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;QACjB,IAAI,CAAC,WAAW,GAAGjG;QACnB,IAAI,CAAC,eAAe,GAAGkG;IACzB;AAmuBF"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { getPreferredLanguage } from "@midscene/shared/env";
|
|
2
|
+
const semanticAnchorSearchInstruction = ()=>{
|
|
3
|
+
const preferredLanguage = getPreferredLanguage();
|
|
4
|
+
return `
|
|
5
|
+
You are searching for an element based on its semantic anchor description.
|
|
6
|
+
The image shows a CROPPED AREA of the page around a known landmark element.
|
|
7
|
+
|
|
8
|
+
TARGET ELEMENT DESCRIPTION:
|
|
9
|
+
- Visual: {{visualFingerprint}}
|
|
10
|
+
- Context: {{contextDescription}}
|
|
11
|
+
|
|
12
|
+
TASK: Find the element matching this description in the image.
|
|
13
|
+
|
|
14
|
+
RESPONSE FORMAT (JSON):
|
|
15
|
+
{
|
|
16
|
+
"found": true/false,
|
|
17
|
+
"center": [x, y] or null,
|
|
18
|
+
"rect": {"left": 0, "top": 0, "width": 0, "height": 0} or null,
|
|
19
|
+
"confidence": 0.0-1.0,
|
|
20
|
+
"reason": "brief explanation"
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
RULES:
|
|
24
|
+
1. Look for an element matching the visualFingerprint description
|
|
25
|
+
2. Verify it's in the expected position relative to the contextDescription
|
|
26
|
+
3. Return center coordinates and bounding rect if found
|
|
27
|
+
4. Set confidence based on how well the match is
|
|
28
|
+
5. If not found or confidence < 0.5, set found=false
|
|
29
|
+
6. Write reason in ${preferredLanguage}
|
|
30
|
+
`;
|
|
31
|
+
};
|
|
32
|
+
export { semanticAnchorSearchInstruction };
|
|
33
|
+
|
|
34
|
+
//# sourceMappingURL=semantic-anchor-search.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/semantic-anchor-search.mjs","sources":["../../../../src/ai-model/prompt/semantic-anchor-search.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const semanticAnchorSearchInstruction = () => {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are searching for an element based on its semantic anchor description.\nThe image shows a CROPPED AREA of the page around a known landmark element.\n\nTARGET ELEMENT DESCRIPTION:\n- Visual: {{visualFingerprint}}\n- Context: {{contextDescription}}\n\nTASK: Find the element matching this description in the image.\n\nRESPONSE FORMAT (JSON):\n{\n \"found\": true/false,\n \"center\": [x, y] or null,\n \"rect\": {\"left\": 0, \"top\": 0, \"width\": 0, \"height\": 0} or null,\n \"confidence\": 0.0-1.0,\n \"reason\": \"brief explanation\"\n}\n\nRULES:\n1. Look for an element matching the visualFingerprint description\n2. Verify it's in the expected position relative to the contextDescription\n3. Return center coordinates and bounding rect if found\n4. Set confidence based on how well the match is\n5. If not found or confidence < 0.5, set found=false\n6. Write reason in ${preferredLanguage}\n`;\n};\n"],"names":["semanticAnchorSearchInstruction","preferredLanguage","getPreferredLanguage"],"mappings":";AAEO,MAAMA,kCAAkC;IAC7C,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;mBAyBS,EAAED,kBAAkB;AACvC,CAAC;AACD"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { getPreferredLanguage } from "@midscene/shared/env";
|
|
2
|
+
const semanticAnchorGenerateInstruction = ()=>{
|
|
3
|
+
const preferredLanguage = getPreferredLanguage();
|
|
4
|
+
return `
|
|
5
|
+
Analyze the element in the red rectangle and its surrounding context.
|
|
6
|
+
|
|
7
|
+
TASK: Generate a semantic anchor for this element that can be used to relocate it even when the DOM structure changes.
|
|
8
|
+
|
|
9
|
+
IMPORTANT: Write all descriptions in ${preferredLanguage}.
|
|
10
|
+
|
|
11
|
+
RESPONSE FORMAT (JSON):
|
|
12
|
+
{
|
|
13
|
+
"visualFingerprint": "concise visual description of the element (color, shape, icon, text style)",
|
|
14
|
+
"contextDescription": "spatial and semantic context (e.g., 'second button in top navigation bar')",
|
|
15
|
+
"nearbyLandmarks": [
|
|
16
|
+
{
|
|
17
|
+
"description": "description of a nearby stable structural element",
|
|
18
|
+
"xpath": "XPath of the landmark element"
|
|
19
|
+
}
|
|
20
|
+
],
|
|
21
|
+
"error"?: "error message if any"
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
RULES:
|
|
25
|
+
1. visualFingerprint: Focus on visual characteristics that survive DOM changes
|
|
26
|
+
- Colors, icons, text content, shape
|
|
27
|
+
- NOT CSS classes or IDs (they change)
|
|
28
|
+
2. contextDescription: Describe WHERE the element is relative to page structure
|
|
29
|
+
- Use spatial terms: "top-right", "below header", "in sidebar"
|
|
30
|
+
- Use structural terms: "in navigation bar", "in modal dialog"
|
|
31
|
+
3. nearbyLandmarks: Identify 1-3 STABLE structural elements near the target
|
|
32
|
+
- Prefer: <nav>, <header>, <footer>, <aside>, <main>, <form>
|
|
33
|
+
- Avoid: <div>, <span>, <li> (too generic, likely to change)
|
|
34
|
+
- Each landmark MUST have both description and xpath
|
|
35
|
+
4. Keep descriptions under 20 words each
|
|
36
|
+
5. Prioritize landmarks that are large and visually distinct
|
|
37
|
+
`;
|
|
38
|
+
};
|
|
39
|
+
export { semanticAnchorGenerateInstruction };
|
|
40
|
+
|
|
41
|
+
//# sourceMappingURL=semantic-anchor.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/semantic-anchor.mjs","sources":["../../../../src/ai-model/prompt/semantic-anchor.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const semanticAnchorGenerateInstruction = () => {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nAnalyze the element in the red rectangle and its surrounding context.\n\nTASK: Generate a semantic anchor for this element that can be used to relocate it even when the DOM structure changes.\n\nIMPORTANT: Write all descriptions in ${preferredLanguage}.\n\nRESPONSE FORMAT (JSON):\n{\n \"visualFingerprint\": \"concise visual description of the element (color, shape, icon, text style)\",\n \"contextDescription\": \"spatial and semantic context (e.g., 'second button in top navigation bar')\",\n \"nearbyLandmarks\": [\n {\n \"description\": \"description of a nearby stable structural element\",\n \"xpath\": \"XPath of the landmark element\"\n }\n ],\n \"error\"?: \"error message if any\"\n}\n\nRULES:\n1. visualFingerprint: Focus on visual characteristics that survive DOM changes\n - Colors, icons, text content, shape\n - NOT CSS classes or IDs (they change)\n2. contextDescription: Describe WHERE the element is relative to page structure\n - Use spatial terms: \"top-right\", \"below header\", \"in sidebar\"\n - Use structural terms: \"in navigation bar\", \"in modal dialog\"\n3. nearbyLandmarks: Identify 1-3 STABLE structural elements near the target\n - Prefer: <nav>, <header>, <footer>, <aside>, <main>, <form>\n - Avoid: <div>, <span>, <li> (too generic, likely to change)\n - Each landmark MUST have both description and xpath\n4. Keep descriptions under 20 words each\n5. Prioritize landmarks that are large and visually distinct\n`;\n};\n"],"names":["semanticAnchorGenerateInstruction","preferredLanguage","getPreferredLanguage"],"mappings":";AAEO,MAAMA,oCAAoC;IAC/C,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;qCAK2B,EAAED,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4BzD,CAAC;AACD"}
|