@donggui/core 1.6.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/es/agent/cache-confidence.mjs +121 -0
  2. package/dist/es/agent/cache-confidence.mjs.map +1 -0
  3. package/dist/es/agent/task-builder.mjs +127 -15
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/ai-model/prompt/cache-verify.mjs +34 -0
  6. package/dist/es/ai-model/prompt/cache-verify.mjs.map +1 -0
  7. package/dist/es/ai-model/prompt/semantic-anchor-search.mjs +34 -0
  8. package/dist/es/ai-model/prompt/semantic-anchor-search.mjs.map +1 -0
  9. package/dist/es/ai-model/prompt/semantic-anchor.mjs +41 -0
  10. package/dist/es/ai-model/prompt/semantic-anchor.mjs.map +1 -0
  11. package/dist/es/service/index.mjs +233 -0
  12. package/dist/es/service/index.mjs.map +1 -1
  13. package/dist/es/types.mjs.map +1 -1
  14. package/dist/lib/agent/cache-confidence.js +173 -0
  15. package/dist/lib/agent/cache-confidence.js.map +1 -0
  16. package/dist/lib/agent/task-builder.js +127 -15
  17. package/dist/lib/agent/task-builder.js.map +1 -1
  18. package/dist/lib/ai-model/prompt/cache-verify.js +68 -0
  19. package/dist/lib/ai-model/prompt/cache-verify.js.map +1 -0
  20. package/dist/lib/ai-model/prompt/semantic-anchor-search.js +68 -0
  21. package/dist/lib/ai-model/prompt/semantic-anchor-search.js.map +1 -0
  22. package/dist/lib/ai-model/prompt/semantic-anchor.js +75 -0
  23. package/dist/lib/ai-model/prompt/semantic-anchor.js.map +1 -0
  24. package/dist/lib/service/index.js +233 -0
  25. package/dist/lib/service/index.js.map +1 -1
  26. package/dist/lib/types.js.map +1 -1
  27. package/dist/types/agent/cache-confidence.d.ts +13 -0
  28. package/dist/types/ai-model/prompt/cache-verify.d.ts +1 -0
  29. package/dist/types/ai-model/prompt/semantic-anchor-search.d.ts +1 -0
  30. package/dist/types/ai-model/prompt/semantic-anchor.d.ts +1 -0
  31. package/dist/types/service/index.d.ts +5 -2
  32. package/dist/types/types.d.ts +32 -0
  33. package/package.json +1 -1
@@ -0,0 +1,121 @@
1
+ import { MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS, MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE } from "@midscene/shared/env/constants";
2
+ import { getDebug } from "@midscene/shared/logger";
3
+ const debug = getDebug('cache-confidence');
4
+ const DEFAULT_HALF_LIFE_MS = 1800000;
5
+ function calculateConfidence(state) {
6
+ const halfLifeMs = Number.parseInt(process.env[MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS] || '', 10) || DEFAULT_HALF_LIFE_MS;
7
+ const ageMs = Date.now() - state.lastVerifiedAt;
8
+ const ageDecay = Math.exp(-ageMs * Math.LN2 / halfLifeMs);
9
+ const experienceBonus = Math.min(state.verificationCount / 10, 0.3);
10
+ const score = Math.max(0.1, 0.7 * ageDecay + experienceBonus);
11
+ debug('calculateConfidence', {
12
+ ageMs,
13
+ ageDecay: ageDecay.toFixed(3),
14
+ verificationCount: state.verificationCount,
15
+ experienceBonus: experienceBonus.toFixed(3),
16
+ score: score.toFixed(3)
17
+ });
18
+ return score;
19
+ }
20
+ function determineVerificationLevel(confidence) {
21
+ if (confidence > 0.8) return 'minimal';
22
+ if (confidence > 0.5) return 'standard';
23
+ if (confidence > 0.2) return 'enhanced';
24
+ return 'full';
25
+ }
26
+ function getVerificationActions(level) {
27
+ switch(level){
28
+ case 'minimal':
29
+ return {
30
+ coordCheck: true,
31
+ visualVerify: false,
32
+ semanticAnchor: false,
33
+ skipCache: false
34
+ };
35
+ case 'standard':
36
+ return {
37
+ coordCheck: true,
38
+ visualVerify: true,
39
+ semanticAnchor: false,
40
+ skipCache: false
41
+ };
42
+ case 'enhanced':
43
+ return {
44
+ coordCheck: true,
45
+ visualVerify: true,
46
+ semanticAnchor: true,
47
+ skipCache: false
48
+ };
49
+ case 'full':
50
+ return {
51
+ coordCheck: false,
52
+ visualVerify: false,
53
+ semanticAnchor: false,
54
+ skipCache: true
55
+ };
56
+ }
57
+ }
58
+ function createInitialConfidenceState() {
59
+ return {
60
+ lastVerifiedAt: Date.now(),
61
+ verificationCount: 1,
62
+ confidenceScore: 1.0
63
+ };
64
+ }
65
+ function updateConfidenceOnVerify(state, passed) {
66
+ return {
67
+ lastVerifiedAt: passed ? Date.now() : state.lastVerifiedAt,
68
+ verificationCount: passed ? state.verificationCount + 1 : state.verificationCount,
69
+ confidenceScore: passed ? calculateConfidence({
70
+ ...state,
71
+ lastVerifiedAt: Date.now(),
72
+ verificationCount: state.verificationCount + 1
73
+ }) : Math.max(0.1, 0.5 * state.confidenceScore)
74
+ };
75
+ }
76
+ function createInitialProgressiveRecord(center) {
77
+ return {
78
+ convergedCenter: center,
79
+ convergenceRadius: 0,
80
+ sampleCount: 1,
81
+ lastUpdatedAt: Date.now()
82
+ };
83
+ }
84
+ function updateProgressiveConvergence(record, newCenter, confidence) {
85
+ const enableProgressive = 'false' !== process.env[MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE];
86
+ if (!enableProgressive) return {
87
+ convergedCenter: newCenter,
88
+ convergenceRadius: 0,
89
+ sampleCount: record.sampleCount + 1,
90
+ lastUpdatedAt: Date.now()
91
+ };
92
+ const weight = Math.max(0.1, confidence);
93
+ const totalWeight = record.sampleCount + weight;
94
+ const convergedCenter = [
95
+ (record.convergedCenter[0] * record.sampleCount + newCenter[0] * weight) / totalWeight,
96
+ (record.convergedCenter[1] * record.sampleCount + newCenter[1] * weight) / totalWeight
97
+ ];
98
+ const allPoints = [
99
+ [
100
+ record.convergedCenter[0],
101
+ record.convergedCenter[1]
102
+ ],
103
+ newCenter
104
+ ];
105
+ const convergenceRadius = Math.max(...allPoints.map((p)=>Math.sqrt((p[0] - convergedCenter[0]) ** 2 + (p[1] - convergedCenter[1]) ** 2)));
106
+ debug('updateProgressiveConvergence', {
107
+ newCenter,
108
+ convergedCenter: convergedCenter.map((v)=>v.toFixed(1)),
109
+ convergenceRadius: convergenceRadius.toFixed(1),
110
+ sampleCount: record.sampleCount + 1
111
+ });
112
+ return {
113
+ convergedCenter,
114
+ convergenceRadius,
115
+ sampleCount: record.sampleCount + 1,
116
+ lastUpdatedAt: Date.now()
117
+ };
118
+ }
119
+ export { calculateConfidence, createInitialConfidenceState, createInitialProgressiveRecord, determineVerificationLevel, getVerificationActions, updateConfidenceOnVerify, updateProgressiveConvergence };
120
+
121
+ //# sourceMappingURL=cache-confidence.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent/cache-confidence.mjs","sources":["../../../src/agent/cache-confidence.ts"],"sourcesContent":["import type {\n CacheConfidenceState,\n ProgressiveLocateRecord,\n VerificationLevel,\n} from '@/types';\nimport {\n MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS,\n MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE,\n} from '@midscene/shared/env/constants';\nimport { getDebug } from '@midscene/shared/logger';\n\nconst debug = getDebug('cache-confidence');\n\nconst DEFAULT_HALF_LIFE_MS = 30 * 60 * 1000;\n\nexport function calculateConfidence(state: CacheConfidenceState): number {\n const halfLifeMs =\n Number.parseInt(\n process.env[MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS] || '',\n 10,\n ) || DEFAULT_HALF_LIFE_MS;\n\n const ageMs = Date.now() - state.lastVerifiedAt;\n const ageDecay = Math.exp((-ageMs * Math.LN2) / halfLifeMs);\n const experienceBonus = Math.min(state.verificationCount / 10, 0.3);\n const score = Math.max(0.1, ageDecay * 0.7 + experienceBonus);\n\n debug('calculateConfidence', {\n ageMs,\n ageDecay: ageDecay.toFixed(3),\n verificationCount: state.verificationCount,\n experienceBonus: experienceBonus.toFixed(3),\n score: score.toFixed(3),\n });\n\n return score;\n}\n\nexport function determineVerificationLevel(\n confidence: number,\n): VerificationLevel {\n if (confidence > 0.8) return 'minimal';\n if (confidence > 0.5) return 'standard';\n if (confidence > 0.2) return 'enhanced';\n return 'full';\n}\n\nexport function getVerificationActions(level: VerificationLevel): {\n coordCheck: boolean;\n visualVerify: boolean;\n semanticAnchor: boolean;\n skipCache: boolean;\n} {\n switch (level) {\n case 'minimal':\n return {\n coordCheck: true,\n visualVerify: false,\n semanticAnchor: false,\n skipCache: false,\n };\n case 'standard':\n return {\n coordCheck: true,\n visualVerify: true,\n semanticAnchor: false,\n skipCache: false,\n };\n case 'enhanced':\n return {\n coordCheck: true,\n visualVerify: true,\n semanticAnchor: true,\n skipCache: false,\n };\n case 'full':\n return {\n coordCheck: false,\n visualVerify: false,\n semanticAnchor: false,\n skipCache: true,\n };\n }\n}\n\nexport function createInitialConfidenceState(): CacheConfidenceState {\n return {\n lastVerifiedAt: Date.now(),\n verificationCount: 1,\n confidenceScore: 1.0,\n };\n}\n\nexport function updateConfidenceOnVerify(\n state: CacheConfidenceState,\n passed: boolean,\n): CacheConfidenceState {\n return {\n lastVerifiedAt: passed ? Date.now() : state.lastVerifiedAt,\n verificationCount: passed\n ? state.verificationCount + 1\n : state.verificationCount,\n confidenceScore: passed\n ? calculateConfidence({\n ...state,\n lastVerifiedAt: Date.now(),\n verificationCount: state.verificationCount + 1,\n })\n : Math.max(0.1, state.confidenceScore * 0.5),\n };\n}\n\nexport function createInitialProgressiveRecord(\n center: [number, number],\n): ProgressiveLocateRecord {\n return {\n convergedCenter: center,\n convergenceRadius: 0,\n sampleCount: 1,\n lastUpdatedAt: Date.now(),\n };\n}\n\nexport function updateProgressiveConvergence(\n record: ProgressiveLocateRecord,\n newCenter: [number, number],\n confidence: number,\n): ProgressiveLocateRecord {\n const enableProgressive =\n process.env[MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE] !== 'false';\n if (!enableProgressive) {\n return {\n convergedCenter: newCenter,\n convergenceRadius: 0,\n sampleCount: record.sampleCount + 1,\n lastUpdatedAt: Date.now(),\n };\n }\n\n const weight = Math.max(0.1, confidence);\n const totalWeight = record.sampleCount + weight;\n\n const convergedCenter: [number, number] = [\n (record.convergedCenter[0] * record.sampleCount + newCenter[0] * weight) /\n totalWeight,\n (record.convergedCenter[1] * record.sampleCount + newCenter[1] * weight) /\n totalWeight,\n ];\n\n const allPoints = [\n [record.convergedCenter[0], record.convergedCenter[1]],\n newCenter,\n ];\n const convergenceRadius = Math.max(\n ...allPoints.map((p) =>\n Math.sqrt(\n (p[0] - convergedCenter[0]) ** 2 + (p[1] - convergedCenter[1]) ** 2,\n ),\n ),\n );\n\n debug('updateProgressiveConvergence', {\n newCenter,\n convergedCenter: convergedCenter.map((v) => v.toFixed(1)),\n convergenceRadius: convergenceRadius.toFixed(1),\n sampleCount: record.sampleCount + 1,\n });\n\n return {\n convergedCenter,\n convergenceRadius,\n sampleCount: record.sampleCount + 1,\n lastUpdatedAt: Date.now(),\n };\n}\n"],"names":["debug","getDebug","DEFAULT_HALF_LIFE_MS","calculateConfidence","state","halfLifeMs","Number","process","MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS","ageMs","Date","ageDecay","Math","experienceBonus","score","determineVerificationLevel","confidence","getVerificationActions","level","createInitialConfidenceState","updateConfidenceOnVerify","passed","createInitialProgressiveRecord","center","updateProgressiveConvergence","record","newCenter","enableProgressive","MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE","weight","totalWeight","convergedCenter","allPoints","convergenceRadius","p","v"],"mappings":";;AAWA,MAAMA,QAAQC,SAAS;AAEvB,MAAMC,uBAAuB;AAEtB,SAASC,oBAAoBC,KAA2B;IAC7D,MAAMC,aACJC,OAAO,QAAQ,CACbC,QAAQ,GAAG,CAACC,uCAAuC,IAAI,IACvD,OACGN;IAEP,MAAMO,QAAQC,KAAK,GAAG,KAAKN,MAAM,cAAc;IAC/C,MAAMO,WAAWC,KAAK,GAAG,CAAE,CAACH,QAAQG,KAAK,GAAG,GAAIP;IAChD,MAAMQ,kBAAkBD,KAAK,GAAG,CAACR,MAAM,iBAAiB,GAAG,IAAI;IAC/D,MAAMU,QAAQF,KAAK,GAAG,CAAC,KAAKD,AAAW,MAAXA,WAAiBE;IAE7Cb,MAAM,uBAAuB;QAC3BS;QACA,UAAUE,SAAS,OAAO,CAAC;QAC3B,mBAAmBP,MAAM,iBAAiB;QAC1C,iBAAiBS,gBAAgB,OAAO,CAAC;QACzC,OAAOC,MAAM,OAAO,CAAC;IACvB;IAEA,OAAOA;AACT;AAEO,SAASC,2BACdC,UAAkB;IAElB,IAAIA,aAAa,KAAK,OAAO;IAC7B,IAAIA,aAAa,KAAK,OAAO;IAC7B,IAAIA,aAAa,KAAK,OAAO;IAC7B,OAAO;AACT;AAEO,SAASC,uBAAuBC,KAAwB;IAM7D,OAAQA;QACN,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;QACF,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;QACF,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;QACF,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;IACJ;AACF;AAEO,SAASC;IACd,OAAO;QACL,gBAAgBT,KAAK,GAAG;QACxB,mBAAmB;QACnB,iBAAiB;IACnB;AACF;AAEO,SAASU,yBACdhB,KAA2B,EAC3BiB,MAAe;IAEf,OAAO;QACL,gBAAgBA,SAASX,KAAK,GAAG,KAAKN,MAAM,cAAc;QAC1D,mBAAmBiB,SACfjB,MAAM,iBAAiB,GAAG,IAC1BA,MAAM,iBAAiB;QAC3B,iBAAiBiB,SACblB,oBAAoB;YAClB,GAAGC,KAAK;YACR,gBAAgBM,KAAK,GAAG;YACxB,mBAAmBN,MAAM,iBAAiB,GAAG;QAC/C,KACAQ,KAAK,GAAG,CAAC,KAAKR,AAAwB,MAAxBA,MAAM,eAAe;IACzC;AACF;AAEO,SAASkB,+BACdC,MAAwB;IAExB,OAAO;QACL,iBAAiBA;QACjB,mBAAmB;QACnB,aAAa;QACb,eAAeb,KAAK,GAAG;IACzB;AACF;AAEO,SAASc,6BACdC,MAA+B,EAC/BC,SAA2B,EAC3BV,UAAkB;IAElB,MAAMW,oBACJpB,AAA+D,YAA/DA,QAAQ,GAAG,CAACqB,8CAA8C;IAC5D,IAAI,CAACD,mBACH,OAAO;QACL,iBAAiBD;QACjB,mBAAmB;QACnB,aAAaD,OAAO,WAAW,GAAG;QAClC,eAAef,KAAK,GAAG;IACzB;IAGF,MAAMmB,SAASjB,KAAK,GAAG,CAAC,KAAKI;IAC7B,MAAMc,cAAcL,OAAO,WAAW,GAAGI;IAEzC,MAAME,kBAAoC;QACvCN,CAAAA,OAAO,eAAe,CAAC,EAAE,GAAGA,OAAO,WAAW,GAAGC,SAAS,CAAC,EAAE,GAAGG,MAAK,IACpEC;QACDL,CAAAA,OAAO,eAAe,CAAC,EAAE,GAAGA,OAAO,WAAW,GAAGC,SAAS,CAAC,EAAE,GAAGG,MAAK,IACpEC;KACH;IAED,MAAME,YAAY;QAChB;YAACP,OAAO,eAAe,CAAC,EAAE;YAAEA,OAAO,eAAe,CAAC,EAAE;SAAC;QACtDC;KACD;IACD,MAAMO,oBAAoBrB,KAAK,GAAG,IAC7BoB,UAAU,GAAG,CAAC,CAACE,IAChBtB,KAAK,IAAI,CACNsB,AAAAA,CAAAA,CAAC,CAAC,EAAE,GAAGH,eAAe,CAAC,EAAC,KAAM,IAAKG,AAAAA,CAAAA,CAAC,CAAC,EAAE,GAAGH,eAAe,CAAC,EAAC,KAAM;IAKxE/B,MAAM,gCAAgC;QACpC0B;QACA,iBAAiBK,gBAAgB,GAAG,CAAC,CAACI,IAAMA,EAAE,OAAO,CAAC;QACtD,mBAAmBF,kBAAkB,OAAO,CAAC;QAC7C,aAAaR,OAAO,WAAW,GAAG;IACpC;IAEA,OAAO;QACLM;QACAE;QACA,aAAaR,OAAO,WAAW,GAAG;QAClC,eAAef,KAAK,GAAG;IACzB;AACF"}
@@ -1,7 +1,9 @@
1
+ import { calculateConfidence, createInitialConfidenceState, createInitialProgressiveRecord, determineVerificationLevel, getVerificationActions, updateConfidenceOnVerify, updateProgressiveConvergence } from "./cache-confidence.mjs";
1
2
  import { findAllMidsceneLocatorField, parseActionParam } from "../ai-model/index.mjs";
2
3
  import { setTimingFieldOnce } from "../task-timing.mjs";
3
4
  import { ServiceError } from "../types.mjs";
4
5
  import { sleep } from "../utils.mjs";
6
+ import { MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD, MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR } from "@midscene/shared/env/constants";
5
7
  import { generateElementByRect } from "@midscene/shared/extractor";
6
8
  import { getDebug } from "@midscene/shared/logger";
7
9
  import { assert } from "@midscene/shared/utils";
@@ -228,22 +230,119 @@ class TaskBuilder {
228
230
  taskCache: this.taskCache,
229
231
  interfaceInstance: this.interface
230
232
  }, cacheEntry, cachePrompt, param.cacheable);
231
- const elementFromCache = elementFromCacheResult ? transformLogicalElementToScreenshot(elementFromCacheResult, shrunkShotToLogicalRatio) : void 0;
232
- const isCacheHit = !!elementFromCache;
233
- let elementFromAiLocate;
233
+ let elementFromCache = elementFromCacheResult ? transformLogicalElementToScreenshot(elementFromCacheResult, shrunkShotToLogicalRatio) : void 0;
234
+ let isCacheHit = !!elementFromCache;
234
235
  const timing = taskContext.task.timing;
235
- if (!isXpathHit && !isCacheHit && !isPlanHit) try {
236
- setTimingFieldOnce(timing, 'callAiStart');
237
- locateResult = await this.service.locate(param, {
238
- context: uiContext
239
- }, modelConfigForDefaultIntent, abortSignal);
240
- applyDump(locateResult.dump);
241
- elementFromAiLocate = locateResult.element;
242
- } catch (error) {
243
- if (error instanceof ServiceError) applyDump(error.dump);
244
- throw error;
245
- } finally{
246
- setTimingFieldOnce(timing, 'callAiEnd');
236
+ let elementFromAiLocate;
237
+ if (isCacheHit) {
238
+ const cacheFeature = cacheEntry;
239
+ const cachedCenter = cacheFeature?.cachedCenter;
240
+ const coordOffsetThreshold = Number.parseInt(process.env[MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD] || '16', 10) || 16;
241
+ const confidenceState = cacheFeature?.confidenceState || createInitialConfidenceState();
242
+ const confidence = calculateConfidence(confidenceState);
243
+ const level = determineVerificationLevel(confidence);
244
+ const actions = getVerificationActions(level);
245
+ debug('cache confidence assessment', {
246
+ confidence: confidence.toFixed(3),
247
+ level,
248
+ actions,
249
+ verificationCount: confidenceState.verificationCount
250
+ });
251
+ if (actions.skipCache) {
252
+ debug('cache confidence too low, skipping cache entirely', {
253
+ confidence,
254
+ level
255
+ });
256
+ isCacheHit = false;
257
+ }
258
+ try {
259
+ if (isCacheHit && actions.coordCheck && cachedCenter) {
260
+ const offset = Math.sqrt((elementFromCache.center[0] - cachedCenter[0]) ** 2 + (elementFromCache.center[1] - cachedCenter[1]) ** 2);
261
+ debug('cache coord offset check', {
262
+ cachedCenter,
263
+ currentCenter: elementFromCache.center,
264
+ offset: Math.round(offset),
265
+ threshold: coordOffsetThreshold
266
+ });
267
+ if (offset > coordOffsetThreshold) {
268
+ debug('cache coord offset exceeded threshold, fallback to AI locate', {
269
+ offset,
270
+ threshold: coordOffsetThreshold
271
+ });
272
+ isCacheHit = false;
273
+ }
274
+ }
275
+ if (isCacheHit && actions.visualVerify) {
276
+ const verification = await this.service.verifyCachedElement(elementFromCache.center, cachePrompt, modelConfigForDefaultIntent, uiContext);
277
+ if (verification.pass) debug('cache hit and visual verification passed', {
278
+ description: verification.description
279
+ });
280
+ else {
281
+ debug('cache hit but visual verification failed, fallback to AI locate', {
282
+ reason: verification.reason,
283
+ description: verification.description,
284
+ prompt: cachePrompt
285
+ });
286
+ isCacheHit = false;
287
+ }
288
+ }
289
+ } catch (verifyError) {
290
+ debug('cache verification error, fallback to AI locate', verifyError);
291
+ isCacheHit = false;
292
+ }
293
+ if (isCacheHit) {
294
+ const updatedState = updateConfidenceOnVerify(confidenceState, true);
295
+ cacheFeature.confidenceState = updatedState;
296
+ const progressiveRecord = cacheFeature.progressiveRecord;
297
+ if (progressiveRecord) {
298
+ const updated = updateProgressiveConvergence(progressiveRecord, elementFromCache.center, updatedState.confidenceScore);
299
+ cacheFeature.progressiveRecord = updated;
300
+ if (updated.convergenceRadius < 5 && updated.sampleCount >= 3) {
301
+ debug('using converged center instead of single-result center', {
302
+ convergedCenter: updated.convergedCenter.map((v)=>v.toFixed(1)),
303
+ singleCenter: elementFromCache.center,
304
+ convergenceRadius: updated.convergenceRadius.toFixed(1),
305
+ sampleCount: updated.sampleCount
306
+ });
307
+ elementFromCache = {
308
+ ...elementFromCache,
309
+ center: [
310
+ Math.round(updated.convergedCenter[0]),
311
+ Math.round(updated.convergedCenter[1])
312
+ ]
313
+ };
314
+ }
315
+ }
316
+ } else {
317
+ const updatedState = updateConfidenceOnVerify(confidenceState, false);
318
+ cacheFeature.confidenceState = updatedState;
319
+ }
320
+ }
321
+ if (!isXpathHit && !isCacheHit && !isPlanHit) {
322
+ const cacheFeature = cacheEntry;
323
+ const semanticAnchor = cacheFeature?.semanticAnchor;
324
+ if (semanticAnchor && 'false' !== process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR]) try {
325
+ const anchorResult = await this.service.locateBySemanticAnchor(semanticAnchor, modelConfigForDefaultIntent, this.interface, uiContext);
326
+ if (anchorResult) {
327
+ elementFromAiLocate = anchorResult;
328
+ debug('semantic anchor locate succeeded, skipping full AI locate');
329
+ }
330
+ } catch (anchorError) {
331
+ debug('semantic anchor locate failed:', anchorError);
332
+ }
333
+ if (!elementFromAiLocate) try {
334
+ setTimingFieldOnce(timing, 'callAiStart');
335
+ locateResult = await this.service.locate(param, {
336
+ context: uiContext
337
+ }, modelConfigForDefaultIntent, abortSignal);
338
+ applyDump(locateResult.dump);
339
+ elementFromAiLocate = locateResult.element;
340
+ } catch (error) {
341
+ if (error instanceof ServiceError) applyDump(error.dump);
342
+ throw error;
343
+ } finally{
344
+ setTimingFieldOnce(timing, 'callAiEnd');
345
+ }
247
346
  }
248
347
  const element = elementFromBbox || elementFromXpath || elementFromCache || elementFromAiLocate;
249
348
  const locateCacheAlreadyExists = hasNonEmptyCache(locateCacheRecord?.cacheContent?.cache);
@@ -262,7 +361,20 @@ class TaskBuilder {
262
361
  modelConfig: modelConfigForDefaultIntent
263
362
  });
264
363
  if (hasNonEmptyCache(feature)) {
364
+ feature.cachedCenter = pointForCache;
365
+ feature.confidenceState = createInitialConfidenceState();
366
+ feature.progressiveRecord = createInitialProgressiveRecord(pointForCache);
265
367
  debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
368
+ const enableSemanticAnchor = 'false' !== process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR];
369
+ if (enableSemanticAnchor) try {
370
+ const anchor = await this.service.generateSemanticAnchor(pointForCache, modelConfigForDefaultIntent, uiContext);
371
+ if (anchor) {
372
+ feature.semanticAnchor = anchor;
373
+ debug('semantic anchor generated for prompt: %s', cachePrompt);
374
+ }
375
+ } catch (anchorError) {
376
+ debug('generateSemanticAnchor failed:', anchorError);
377
+ }
266
378
  currentCacheEntry = feature;
267
379
  await this.taskCache.updateOrAppendCacheRecord({
268
380
  type: 'locate',
@@ -1 +1 @@
1
- {"version":3,"file":"agent/task-builder.mjs","sources":["../../../src/agent/task-builder.ts"],"sourcesContent":["import { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport type {\n DetailedLocateParam,\n DeviceAction,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskPlanningLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningLocateParam,\n Rect,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport {\n ifPlanLocateParamIsBbox,\n matchElementFromCache,\n matchElementFromPlan,\n transformLogicalElementToScreenshot,\n transformLogicalRectToScreenshotRect,\n} from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\n/**\n * Check if a cache object is non-empty\n */\nfunction hasNonEmptyCache(cache: unknown): boolean {\n return (\n cache !== null &&\n cache !== undefined &&\n typeof cache === 'object' &&\n Object.keys(cache).length > 0\n );\n}\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n actionSpace: DeviceAction[];\n waitAfterAction?: number;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfigForPlanning: IModelConfig;\n modelConfigForDefaultIntent: IModelConfig;\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n private readonly actionSpace: DeviceAction[];\n\n private readonly waitAfterAction?: number;\n\n constructor({\n interfaceInstance,\n service,\n taskCache,\n actionSpace,\n waitAfterAction,\n }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n this.actionSpace = actionSpace;\n this.waitAfterAction = waitAfterAction;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n cacheable,\n deepLocate: options?.deepLocate,\n abortSignal: options?.abortSignal,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action Space',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n const taskLocate = this.createLocateTask(plan, plan.param, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = this.actionSpace;\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n // Always use createLocateTask for all locate params (including bbox)\n // This ensures cache writing happens even when bbox is available\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n `hasBbox=${ifPlanLocateParamIsBbox(param[field])}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action Space',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action Space',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n executor: async (param, taskContext) => {\n const timing = taskContext.task.timing;\n\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n setTimingFieldOnce(timing, 'beforeInvokeActionHookStart');\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug(\n `will call \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.beforeInvokeAction(action.name, param);\n debug(\n `called \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n setTimingFieldOnce(timing, 'beforeInvokeActionHookEnd');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in Action task',\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema, {\n shrunkShotToLogicalRatio,\n });\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n setTimingFieldOnce(timing, 'callActionStart');\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n const actionResult = await actionFn(param, taskContext);\n setTimingFieldOnce(timing, 'callActionEnd');\n debug('called action', action.name, 'result:', actionResult);\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookStart');\n\n const delayAfterRunner =\n action.delayAfterRunner ?? this.waitAfterAction ?? 300;\n if (delayAfterRunner > 0) {\n await sleep(delayAfterRunner);\n }\n\n try {\n if (this.interface.afterInvokeAction) {\n debug(\n `will call \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.afterInvokeAction(action.name, param);\n debug(\n `called \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookEnd');\n\n return {\n output: actionResult,\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskPlanningLocateApply {\n const { cacheable, modelConfigForDefaultIntent, deepLocate, abortSignal } =\n context;\n\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n if (deepLocate && !locateParam.deepLocate) {\n locateParam = {\n ...locateParam,\n deepLocate: true,\n };\n }\n\n const taskLocator: ExecutionTaskPlanningLocateApply = {\n type: 'Planning',\n subType: 'Locate',\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in locate task',\n );\n }\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // from bbox (plan hit)\n const elementFromBbox = ifPlanLocateParamIsBbox(param)\n ? matchElementFromPlan(param)\n : undefined;\n const isPlanHit = !!elementFromBbox;\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (\n !isPlanHit &&\n param.xpath &&\n this.interface.rectMatchesCacheFeature\n ) {\n try {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n } catch {\n // xpath locate failed, allow fallback to cache or AI locate\n }\n }\n\n const elementFromXpath = rectFromXpath\n ? generateElementByRect(\n // rectFromXpath is in logical coordinates, which should be transformed to screenshot coordinates;\n transformLogicalRectToScreenshotRect(\n rectFromXpath,\n shrunkShotToLogicalRatio,\n ),\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt || '',\n )\n : undefined;\n\n const isXpathHit = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord =\n await this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCacheResult =\n isPlanHit || isXpathHit\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n\n // elementFromCacheResult is in logical coordinates, which should be transformed to screenshot coordinates;\n const elementFromCache = elementFromCacheResult\n ? transformLogicalElementToScreenshot(\n elementFromCacheResult,\n shrunkShotToLogicalRatio,\n )\n : undefined;\n\n const isCacheHit = !!elementFromCache;\n\n let elementFromAiLocate: LocateResultElement | null | undefined;\n const timing = taskContext.task.timing;\n if (!isXpathHit && !isCacheHit && !isPlanHit) {\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfigForDefaultIntent,\n abortSignal,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n }\n\n const element =\n elementFromBbox ||\n elementFromXpath ||\n elementFromCache ||\n elementFromAiLocate;\n\n // Check if locate cache already exists (for planHitFlag case)\n const locateCacheAlreadyExists = hasNonEmptyCache(\n locateCacheRecord?.cacheContent?.cache,\n );\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n // Write cache if:\n // 1. element found\n // 2. taskCache enabled\n // 3. not a cache hit (otherwise we'd be writing what we just read)\n // 4. not already cached for plan hit case (avoid redundant writes), OR allow update if cache validation failed\n // 5. cacheable is not explicitly false\n if (\n element &&\n this.taskCache &&\n !isCacheHit &&\n (!isPlanHit || !locateCacheAlreadyExists) &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForPoint) {\n try {\n // Transform coordinates to logical space for cacheFeatureForPoint\n // cacheFeatureForPoint needs logical coordinates to locate elements in DOM\n let pointForCache: [number, number] = element.center;\n if (shrunkShotToLogicalRatio !== 1) {\n pointForCache = [\n Math.round(element.center[0] / shrunkShotToLogicalRatio),\n Math.round(element.center[1] / shrunkShotToLogicalRatio),\n ];\n debug(\n 'Transformed coordinates for cacheFeatureForPoint: %o -> %o',\n element.center,\n pointForCache,\n );\n }\n\n const feature = await this.interface.cacheFeatureForPoint(\n pointForCache,\n {\n targetDescription:\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt,\n modelConfig: modelConfigForDefaultIntent,\n },\n );\n if (hasNonEmptyCache(feature)) {\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n currentCacheEntry = feature;\n await this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForPoint failed: %s', error);\n }\n } else {\n debug('cacheFeatureForPoint is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (isPlanHit) {\n hitBy = {\n from: 'Plan',\n context: {\n bbox: param.bbox,\n },\n };\n } else if (isXpathHit) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (isCacheHit) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element: {\n ...element,\n // backward compatibility for aiLocate, which return value needs a dpr field\n dpr: uiContext.deprecatedDpr,\n },\n },\n hitBy,\n };\n },\n };\n\n return taskLocator;\n }\n}\n"],"names":["debug","getDebug","hasNonEmptyCache","cache","Object","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfigForPlanning","modelConfigForDefaultIntent","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","ifPlanLocateParamIsBbox","locateTask","result","assert","task","taskContext","timing","uiContext","setTimingFieldOnce","Promise","sleep","originalError","originalMessage","String","shrunkShotToLogicalRatio","undefined","parseActionParam","error","actionFn","actionResult","delayAfterRunner","detailedLocateParam","onResult","deepLocate","abortSignal","locateParam","taskLocator","locateDump","locateResult","applyDump","dump","elementFromBbox","matchElementFromPlan","isPlanHit","rectFromXpath","elementFromXpath","generateElementByRect","transformLogicalRectToScreenshotRect","isXpathHit","cachePrompt","locateCacheRecord","cacheEntry","elementFromCacheResult","matchElementFromCache","elementFromCache","transformLogicalElementToScreenshot","isCacheHit","elementFromAiLocate","ServiceError","element","locateCacheAlreadyExists","currentCacheEntry","pointForCache","Math","feature","hitBy","interfaceInstance","service","taskCache","waitAfterAction"],"mappings":";;;;;;;;;;;;;;;;;;AAkCA,MAAMA,QAAQC,SAAS;AAKvB,SAASC,iBAAiBC,KAAc;IACtC,OACEA,QAAAA,SAEA,AAAiB,YAAjB,OAAOA,SACPC,OAAO,IAAI,CAACD,OAAO,MAAM,GAAG;AAEhC;AAEO,SAASE,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACN,OAAOD;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAyBO,MAAMC;IAyBX,MAAa,MACXC,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,SAAS;QAE3B,MAAMG,UAA4B;YAChCF;YACAH;YACAC;YACAG;YACA,YAAYF,SAAS;YACrB,aAAaA,SAAS;QACxB;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;SAC/D;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQT,MAAO;YACxB,MAAMW,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,UAAU,WAAa;QACzB;QACAH,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEA,MAAc,iBACZH,IAAyC,EACzCH,OAAyB,EACV;QACf,MAAMO,aAAa,IAAI,CAAC,gBAAgB,CAACJ,MAAMA,KAAK,KAAK,EAAEH;QAC3DA,QAAQ,KAAK,CAAC,IAAI,CAACO;IACrB;IAEA,MAAc,iBACZJ,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMQ,WAAWL,KAAK,IAAI;QAC1B,MAAMM,cAAc,IAAI,CAAC,WAAW;QACpC,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMlB,QAAQa,KAAK,KAAK;QAExB,IAAI,CAACO,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI1B,KAAK,CAAC0B,MAAM,EAAE;gBAGhB,MAAMxB,aAAaH,oBAAoBC,KAAK,CAAC0B,MAAM;gBACnDhC,MACE,uCACA,CAAC,YAAY,EAAEwB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC3B,KAAK,CAAC0B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAACzB,aAAa,EAC1C,CAAC,QAAQ,EAAE0B,wBAAwB5B,KAAK,CAAC0B,MAAM,GAAG;gBAEpD,MAAMG,aAAa,IAAI,CAAC,gBAAgB,CACtC3B,YACAF,KAAK,CAAC0B,MAAM,EACZhB,SACA,CAACoB;oBACC9B,KAAK,CAAC0B,MAAM,GAAGI;gBACjB;gBAEFpB,QAAQ,KAAK,CAAC,IAAI,CAACmB;YACrB,OAAO;gBACLE,OACE,CAACN,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3ExB,MAAM,CAAC,OAAO,EAAEgC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMc,OAKF;YACF,MAAM;YACN,SAASd;YACT,SAASL,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,UAAU,OAAOb,OAAOiC;gBACtB,MAAMC,SAASD,YAAY,IAAI,CAAC,MAAM;gBAEtCvC,MACE,oBACAwB,UACAlB,OACA,CAAC,4BAA4B,EAAEiC,YAAY,OAAO,EAAE,QAAQ;gBAG9D,MAAME,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBV,qBAAqB,OAAO,CAAC,CAACC;oBAC5BK,OACE/B,KAAK,CAAC0B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEAkB,mBAAmBF,QAAQ;gBAC3B,IAAI;oBACF,MAAMG,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC3C,MACE,CAAC,8DAA8D,EAAE0B,OAAO,IAAI,EAAE;gCAEhF,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAACA,OAAO,IAAI,EAAEpB;gCACrDN,MACE,CAAC,2DAA2D,EAAE0B,OAAO,IAAI,EAAE;4BAE/E;wBACF;wBACAkB,MAAM;qBACP;gBACH,EAAE,OAAOC,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBACAH,mBAAmBF,QAAQ;gBAE3B,MAAM,EAAEQ,wBAAwB,EAAE,GAAGP;gBACrC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIF,OAAO,WAAW,EACpB,IAAI;oBACFpB,QAAQ4C,iBAAiB5C,OAAOoB,OAAO,WAAW,EAAE;wBAClDsB;oBACF;gBACF,EAAE,OAAOG,OAAY;oBACnB,MAAM,IAAIvB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEyB,MAAM,OAAO,CAAC,cAAc,EAAElB,KAAK,SAAS,CAAC3B,QAAQ,EACtG;wBAAE,OAAO6C;oBAAM;gBAEnB;gBAGFT,mBAAmBF,QAAQ;gBAE3BxC,MAAM,kBAAkB0B,OAAO,IAAI;gBACnC,MAAM0B,WAAW1B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAM2B,eAAe,MAAMD,SAAS9C,OAAOiC;gBAC3CG,mBAAmBF,QAAQ;gBAC3BxC,MAAM,iBAAiB0B,OAAO,IAAI,EAAE,WAAW2B;gBAE/CX,mBAAmBF,QAAQ;gBAE3B,MAAMc,mBACJ5B,OAAO,gBAAgB,IAAI,IAAI,CAAC,eAAe,IAAI;gBACrD,IAAI4B,mBAAmB,GACrB,MAAMV,MAAMU;gBAGd,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpCtD,MACE,CAAC,6DAA6D,EAAE0B,OAAO,IAAI,EAAE;wBAE/E,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAACA,OAAO,IAAI,EAAEpB;wBACpDN,MACE,CAAC,0DAA0D,EAAE0B,OAAO,IAAI,EAAE;oBAE9E;gBACF,EAAE,OAAOmB,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEAH,mBAAmBF,QAAQ;gBAE3B,OAAO;oBACL,QAAQa;gBACV;YACF;QACF;QAEArC,QAAQ,KAAK,CAAC,IAAI,CAACsB;IACrB;IAEQ,iBACNnB,IAAyC,EACzCoC,mBAAiD,EACjDvC,OAAyB,EACzBwC,QAAgD,EACd;QAClC,MAAM,EAAEzC,SAAS,EAAEH,2BAA2B,EAAE6C,UAAU,EAAEC,WAAW,EAAE,GACvE1C;QAEF,IAAI2C,cAAcJ;QAElB,IAAI,AAAuB,YAAvB,OAAOI,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAI5C,AAAckC,WAAdlC,WACF4C,cAAc;YACZ,GAAGA,WAAW;YACd5C;QACF;QAGF,IAAI0C,cAAc,CAACE,YAAY,UAAU,EACvCA,cAAc;YACZ,GAAGA,WAAW;YACd,YAAY;QACd;QAGF,MAAMC,cAAgD;YACpD,MAAM;YACN,SAAS;YACT,OAAOD;YACP,SAASxC,KAAK,OAAO;YACrB,UAAU,OAAOb,OAAOiC;gBACtB,MAAM,EAAED,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACE/B,OAAO,UAAUA,OAAO,MACxB,CAAC,qDAAqD,EAAE2B,KAAK,SAAS,CACpE3B,QACC;gBAGL,IAAI,CAACmC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,MAAM,EAAEO,wBAAwB,EAAE,GAAGP;gBAErC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIiC;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;oBACjB,IAAI,CAACA,MACH;oBAEFH,aAAaG;oBACb1B,KAAK,GAAG,GAAG;wBACT0B;wBACA,aAAaA,KAAK,QAAQ,EAAE;oBAC9B;oBACA1B,KAAK,KAAK,GAAG0B,KAAK,QAAQ,EAAE;oBAC5B,IAAIA,KAAK,QAAQ,EAAE,iBACjB1B,KAAK,eAAe,GAAG0B,KAAK,QAAQ,CAAC,eAAe;oBAEtD,IAAIA,KAAK,QAAQ,EAAE,mBACjB1B,KAAK,iBAAiB,GAAG0B,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAMC,kBAAkB/B,wBAAwB5B,SAC5C4D,qBAAqB5D,SACrB2C;gBACJ,MAAMkB,YAAY,CAAC,CAACF;gBAGpB,IAAIG;gBACJ,IACE,CAACD,aACD7D,MAAM,KAAK,IACX,IAAI,CAAC,SAAS,CAAC,uBAAuB,EAEtC,IAAI;oBACF8D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;wBAC3D,QAAQ;4BAAC9D,MAAM,KAAK;yBAAC;oBACvB;gBACF,EAAE,OAAM,CAER;gBAGF,MAAM+D,mBAAmBD,gBACrBE,sBAEEC,qCACEH,eACApB,2BAEF,AAAwB,YAAxB,OAAO1C,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE,UAAU,MAE9B2C;gBAEJ,MAAMuB,aAAa,CAAC,CAACH;gBAErB,MAAMI,cAAcnE,MAAM,MAAM;gBAChC,MAAMoE,oBACJ,MAAM,IAAI,CAAC,SAAS,EAAE,iBAAiBD;gBACzC,MAAME,aAAaD,mBAAmB,cAAc;gBAEpD,MAAME,yBACJT,aAAaK,aACT,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACAnE,MAAM,SAAS;gBAIvB,MAAMwE,mBAAmBF,yBACrBG,oCACEH,wBACA5B,4BAEFC;gBAEJ,MAAM+B,aAAa,CAAC,CAACF;gBAErB,IAAIG;gBACJ,MAAMzC,SAASD,YAAY,IAAI,CAAC,MAAM;gBACtC,IAAI,CAACiC,cAAc,CAACQ,cAAc,CAACb,WACjC,IAAI;oBACFzB,mBAAmBF,QAAQ;oBAC3BsB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCxD,OACA;wBACE,SAASmC;oBACX,GACA7B,6BACA8C;oBAEFK,UAAUD,aAAa,IAAI;oBAC3BmB,sBAAsBnB,aAAa,OAAO;gBAC5C,EAAE,OAAOX,OAAO;oBACd,IAAIA,iBAAiB+B,cACnBnB,UAAUZ,MAAM,IAAI;oBAEtB,MAAMA;gBACR,SAAU;oBACRT,mBAAmBF,QAAQ;gBAC7B;gBAGF,MAAM2C,UACJlB,mBACAI,oBACAS,oBACAG;gBAGF,MAAMG,2BAA2BlF,iBAC/BwE,mBAAmB,cAAc;gBAGnC,IAAIW;gBAOJ,IACEF,WACA,IAAI,CAAC,SAAS,IACd,CAACH,cACA,EAACb,aAAa,CAACiB,wBAAuB,KACvC9E,OAAO,cAAc,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,oBAAoB,EACrC,IAAI;oBAGF,IAAIgF,gBAAkCH,QAAQ,MAAM;oBACpD,IAAInC,AAA6B,MAA7BA,0BAAgC;wBAClCsC,gBAAgB;4BACdC,KAAK,KAAK,CAACJ,QAAQ,MAAM,CAAC,EAAE,GAAGnC;4BAC/BuC,KAAK,KAAK,CAACJ,QAAQ,MAAM,CAAC,EAAE,GAAGnC;yBAChC;wBACDhD,MACE,8DACAmF,QAAQ,MAAM,EACdG;oBAEJ;oBAEA,MAAME,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,oBAAoB,CACvDF,eACA;wBACE,mBACE,AAAwB,YAAxB,OAAOhF,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE;wBACpB,aAAaM;oBACf;oBAEF,IAAIV,iBAAiBsF,UAAU;wBAC7BxF,MACE,uCACAyE,aACAe;wBAEFH,oBAAoBG;wBACpB,MAAM,IAAI,CAAC,SAAS,CAAC,yBAAyB,CAC5C;4BACE,MAAM;4BACN,QAAQf;4BACR,OAAOe;wBACT,GACAd;oBAEJ,OACE1E,MACE,yDACAyE;gBAGN,EAAE,OAAOtB,OAAO;oBACdnD,MAAM,mCAAmCmD;gBAC3C;qBAEAnD,MAAM;gBAIV,IAAI,CAACmF,SAAS;oBACZ,IAAItB,YACF,MAAM,IAAIqB,aACR,CAAC,oBAAoB,EAAE5E,MAAM,MAAM,EAAE,EACrCuD;oBAGJ,MAAM,IAAIjC,MAAM,CAAC,mBAAmB,EAAEtB,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAImF;gBAEJ,IAAItB,WACFsB,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMnF,MAAM,IAAI;oBAClB;gBACF;qBACK,IAAIkE,YACTiB,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAOnF,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAI0E,YACTS,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACPd;wBACA,aAAaU;oBACf;gBACF;gBAGF7B,WAAW2B;gBAEX,OAAO;oBACL,QAAQ;wBACN,SAAS;4BACP,GAAGA,OAAO;4BAEV,KAAK1C,UAAU,aAAa;wBAC9B;oBACF;oBACAgD;gBACF;YACF;QACF;QAEA,OAAO7B;IACT;IA/iBA,YAAY,EACV8B,iBAAiB,EACjBC,OAAO,EACPC,SAAS,EACTnE,WAAW,EACXoE,eAAe,EACC,CAAE;QAhBpB,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAiB,mBAAjB;QASE,IAAI,CAAC,SAAS,GAAGH;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;QACjB,IAAI,CAAC,WAAW,GAAGnE;QACnB,IAAI,CAAC,eAAe,GAAGoE;IACzB;AAoiBF"}
1
+ {"version":3,"file":"agent/task-builder.mjs","sources":["../../../src/agent/task-builder.ts"],"sourcesContent":["import {\n calculateConfidence,\n createInitialConfidenceState,\n createInitialProgressiveRecord,\n determineVerificationLevel,\n getVerificationActions,\n updateConfidenceOnVerify,\n updateProgressiveConvergence,\n} from '@/agent/cache-confidence';\nimport { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport type {\n CacheConfidenceState,\n CacheValidationOptions,\n DetailedLocateParam,\n DeviceAction,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskPlanningLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningLocateParam,\n ProgressiveLocateRecord,\n Rect,\n SemanticAnchor,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD,\n MIDSCENE_CACHE_ENABLE_COORD_CHECK,\n MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR,\n MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY,\n} from '@midscene/shared/env/constants';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport {\n ifPlanLocateParamIsBbox,\n matchElementFromCache,\n matchElementFromPlan,\n transformLogicalElementToScreenshot,\n transformLogicalRectToScreenshotRect,\n} from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\n/**\n * Check if a cache object is non-empty\n */\nfunction hasNonEmptyCache(cache: unknown): boolean {\n return (\n cache !== null &&\n cache !== undefined &&\n typeof cache === 'object' &&\n Object.keys(cache).length > 0\n );\n}\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n actionSpace: DeviceAction[];\n waitAfterAction?: number;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfigForPlanning: IModelConfig;\n modelConfigForDefaultIntent: IModelConfig;\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n private readonly actionSpace: DeviceAction[];\n\n private readonly waitAfterAction?: number;\n\n constructor({\n interfaceInstance,\n service,\n taskCache,\n actionSpace,\n waitAfterAction,\n }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n this.actionSpace = actionSpace;\n this.waitAfterAction = waitAfterAction;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n cacheable,\n deepLocate: options?.deepLocate,\n abortSignal: options?.abortSignal,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action Space',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n const taskLocate = this.createLocateTask(plan, plan.param, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = this.actionSpace;\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n // Always use createLocateTask for all locate params (including bbox)\n // This ensures cache writing happens even when bbox is available\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n `hasBbox=${ifPlanLocateParamIsBbox(param[field])}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action Space',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action Space',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n executor: async (param, taskContext) => {\n const timing = taskContext.task.timing;\n\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n setTimingFieldOnce(timing, 'beforeInvokeActionHookStart');\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug(\n `will call \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.beforeInvokeAction(action.name, param);\n debug(\n `called \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n setTimingFieldOnce(timing, 'beforeInvokeActionHookEnd');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in Action task',\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema, {\n shrunkShotToLogicalRatio,\n });\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n setTimingFieldOnce(timing, 'callActionStart');\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n const actionResult = await actionFn(param, taskContext);\n setTimingFieldOnce(timing, 'callActionEnd');\n debug('called action', action.name, 'result:', actionResult);\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookStart');\n\n const delayAfterRunner =\n action.delayAfterRunner ?? this.waitAfterAction ?? 300;\n if (delayAfterRunner > 0) {\n await sleep(delayAfterRunner);\n }\n\n try {\n if (this.interface.afterInvokeAction) {\n debug(\n `will call \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.afterInvokeAction(action.name, param);\n debug(\n `called \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookEnd');\n\n return {\n output: actionResult,\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskPlanningLocateApply {\n const { cacheable, modelConfigForDefaultIntent, deepLocate, abortSignal } =\n context;\n\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n if (deepLocate && !locateParam.deepLocate) {\n locateParam = {\n ...locateParam,\n deepLocate: true,\n };\n }\n\n const taskLocator: ExecutionTaskPlanningLocateApply = {\n type: 'Planning',\n subType: 'Locate',\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in locate task',\n );\n }\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // from bbox (plan hit)\n const elementFromBbox = ifPlanLocateParamIsBbox(param)\n ? matchElementFromPlan(param)\n : undefined;\n const isPlanHit = !!elementFromBbox;\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (\n !isPlanHit &&\n param.xpath &&\n this.interface.rectMatchesCacheFeature\n ) {\n try {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n } catch {\n // xpath locate failed, allow fallback to cache or AI locate\n }\n }\n\n const elementFromXpath = rectFromXpath\n ? generateElementByRect(\n // rectFromXpath is in logical coordinates, which should be transformed to screenshot coordinates;\n transformLogicalRectToScreenshotRect(\n rectFromXpath,\n shrunkShotToLogicalRatio,\n ),\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt || '',\n )\n : undefined;\n\n const isXpathHit = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord =\n await this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCacheResult =\n isPlanHit || isXpathHit\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n\n // elementFromCacheResult is in logical coordinates, which should be transformed to screenshot coordinates;\n let elementFromCache = elementFromCacheResult\n ? transformLogicalElementToScreenshot(\n elementFromCacheResult,\n shrunkShotToLogicalRatio,\n )\n : undefined;\n\n let isCacheHit = !!elementFromCache;\n const timing = taskContext.task.timing;\n let elementFromAiLocate: LocateResultElement | null | undefined;\n\n if (isCacheHit) {\n const cacheFeature = cacheEntry as ElementCacheFeature;\n const cachedCenter = cacheFeature?.cachedCenter as\n | [number, number]\n | undefined;\n\n const coordOffsetThreshold =\n Number.parseInt(\n process.env[MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD] || '16',\n 10,\n ) || 16;\n\n const confidenceState = (cacheFeature?.confidenceState ||\n createInitialConfidenceState()) as CacheConfidenceState;\n const confidence = calculateConfidence(confidenceState);\n const level = determineVerificationLevel(confidence);\n const actions = getVerificationActions(level);\n\n debug('cache confidence assessment', {\n confidence: confidence.toFixed(3),\n level,\n actions,\n verificationCount: confidenceState.verificationCount,\n });\n\n if (actions.skipCache) {\n debug('cache confidence too low, skipping cache entirely', {\n confidence,\n level,\n });\n isCacheHit = false;\n }\n\n try {\n if (isCacheHit && actions.coordCheck && cachedCenter) {\n const offset = Math.sqrt(\n (elementFromCache!.center[0] - cachedCenter[0]) ** 2 +\n (elementFromCache!.center[1] - cachedCenter[1]) ** 2,\n );\n\n debug('cache coord offset check', {\n cachedCenter,\n currentCenter: elementFromCache!.center,\n offset: Math.round(offset),\n threshold: coordOffsetThreshold,\n });\n\n if (offset > coordOffsetThreshold) {\n debug(\n 'cache coord offset exceeded threshold, fallback to AI locate',\n { offset, threshold: coordOffsetThreshold },\n );\n isCacheHit = false;\n }\n }\n\n if (isCacheHit && actions.visualVerify) {\n const verification = await this.service.verifyCachedElement(\n elementFromCache!.center,\n cachePrompt,\n modelConfigForDefaultIntent,\n uiContext,\n );\n if (!verification.pass) {\n debug(\n 'cache hit but visual verification failed, fallback to AI locate',\n {\n reason: verification.reason,\n description: verification.description,\n prompt: cachePrompt,\n },\n );\n isCacheHit = false;\n } else {\n debug('cache hit and visual verification passed', {\n description: verification.description,\n });\n }\n }\n } catch (verifyError) {\n debug(\n 'cache verification error, fallback to AI locate',\n verifyError,\n );\n isCacheHit = false;\n }\n\n if (isCacheHit) {\n const updatedState = updateConfidenceOnVerify(\n confidenceState,\n true,\n );\n cacheFeature.confidenceState = updatedState;\n\n const progressiveRecord = cacheFeature.progressiveRecord as\n | ProgressiveLocateRecord\n | undefined;\n if (progressiveRecord) {\n const updated = updateProgressiveConvergence(\n progressiveRecord,\n elementFromCache!.center,\n updatedState.confidenceScore,\n );\n cacheFeature.progressiveRecord = updated;\n\n if (updated.convergenceRadius < 5 && updated.sampleCount >= 3) {\n debug(\n 'using converged center instead of single-result center',\n {\n convergedCenter: updated.convergedCenter.map((v) =>\n v.toFixed(1),\n ),\n singleCenter: elementFromCache!.center,\n convergenceRadius: updated.convergenceRadius.toFixed(1),\n sampleCount: updated.sampleCount,\n },\n );\n elementFromCache = {\n ...elementFromCache!,\n center: [\n Math.round(updated.convergedCenter[0]),\n Math.round(updated.convergedCenter[1]),\n ],\n };\n }\n }\n } else {\n const updatedState = updateConfidenceOnVerify(\n confidenceState,\n false,\n );\n cacheFeature.confidenceState = updatedState;\n }\n }\n\n if (!isXpathHit && !isCacheHit && !isPlanHit) {\n const cacheFeature = cacheEntry as ElementCacheFeature;\n const semanticAnchor = cacheFeature?.semanticAnchor as\n | SemanticAnchor\n | undefined;\n\n if (\n semanticAnchor &&\n process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR] !== 'false'\n ) {\n try {\n const anchorResult = await this.service.locateBySemanticAnchor(\n semanticAnchor,\n modelConfigForDefaultIntent,\n this.interface,\n uiContext,\n );\n if (anchorResult) {\n elementFromAiLocate = anchorResult;\n debug(\n 'semantic anchor locate succeeded, skipping full AI locate',\n );\n }\n } catch (anchorError) {\n debug('semantic anchor locate failed:', anchorError);\n }\n }\n\n if (!elementFromAiLocate) {\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfigForDefaultIntent,\n abortSignal,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n }\n }\n\n const element =\n elementFromBbox ||\n elementFromXpath ||\n elementFromCache ||\n elementFromAiLocate;\n\n // Check if locate cache already exists (for planHitFlag case)\n const locateCacheAlreadyExists = hasNonEmptyCache(\n locateCacheRecord?.cacheContent?.cache,\n );\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n // Write cache if:\n // 1. element found\n // 2. taskCache enabled\n // 3. not a cache hit (otherwise we'd be writing what we just read)\n // 4. not already cached for plan hit case (avoid redundant writes), OR allow update if cache validation failed\n // 5. cacheable is not explicitly false\n if (\n element &&\n this.taskCache &&\n !isCacheHit &&\n (!isPlanHit || !locateCacheAlreadyExists) &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForPoint) {\n try {\n // Transform coordinates to logical space for cacheFeatureForPoint\n // cacheFeatureForPoint needs logical coordinates to locate elements in DOM\n let pointForCache: [number, number] = element.center;\n if (shrunkShotToLogicalRatio !== 1) {\n pointForCache = [\n Math.round(element.center[0] / shrunkShotToLogicalRatio),\n Math.round(element.center[1] / shrunkShotToLogicalRatio),\n ];\n debug(\n 'Transformed coordinates for cacheFeatureForPoint: %o -> %o',\n element.center,\n pointForCache,\n );\n }\n\n const feature = await this.interface.cacheFeatureForPoint(\n pointForCache,\n {\n targetDescription:\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt,\n modelConfig: modelConfigForDefaultIntent,\n },\n );\n if (hasNonEmptyCache(feature)) {\n feature.cachedCenter = pointForCache;\n feature.confidenceState = createInitialConfidenceState();\n feature.progressiveRecord =\n createInitialProgressiveRecord(pointForCache);\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n\n const enableSemanticAnchor =\n process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR] !==\n 'false';\n if (enableSemanticAnchor) {\n try {\n const anchor = await this.service.generateSemanticAnchor(\n pointForCache,\n modelConfigForDefaultIntent,\n uiContext,\n );\n if (anchor) {\n feature.semanticAnchor = anchor;\n debug(\n 'semantic anchor generated for prompt: %s',\n cachePrompt,\n );\n }\n } catch (anchorError) {\n debug('generateSemanticAnchor failed:', anchorError);\n }\n }\n\n currentCacheEntry = feature;\n await this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForPoint failed: %s', error);\n }\n } else {\n debug('cacheFeatureForPoint is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (isPlanHit) {\n hitBy = {\n from: 'Plan',\n context: {\n bbox: param.bbox,\n },\n };\n } else if (isXpathHit) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (isCacheHit) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element: {\n ...element,\n // backward compatibility for aiLocate, which return value needs a dpr field\n dpr: uiContext.deprecatedDpr,\n },\n },\n hitBy,\n };\n },\n };\n\n return taskLocator;\n }\n}\n"],"names":["debug","getDebug","hasNonEmptyCache","cache","Object","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfigForPlanning","modelConfigForDefaultIntent","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","ifPlanLocateParamIsBbox","locateTask","result","assert","task","taskContext","timing","uiContext","setTimingFieldOnce","Promise","sleep","originalError","originalMessage","String","shrunkShotToLogicalRatio","undefined","parseActionParam","error","actionFn","actionResult","delayAfterRunner","detailedLocateParam","onResult","deepLocate","abortSignal","locateParam","taskLocator","locateDump","locateResult","applyDump","dump","elementFromBbox","matchElementFromPlan","isPlanHit","rectFromXpath","elementFromXpath","generateElementByRect","transformLogicalRectToScreenshotRect","isXpathHit","cachePrompt","locateCacheRecord","cacheEntry","elementFromCacheResult","matchElementFromCache","elementFromCache","transformLogicalElementToScreenshot","isCacheHit","elementFromAiLocate","cacheFeature","cachedCenter","coordOffsetThreshold","Number","process","MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD","confidenceState","createInitialConfidenceState","confidence","calculateConfidence","level","determineVerificationLevel","actions","getVerificationActions","offset","Math","verification","verifyError","updatedState","updateConfidenceOnVerify","progressiveRecord","updated","updateProgressiveConvergence","v","semanticAnchor","MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR","anchorResult","anchorError","ServiceError","element","locateCacheAlreadyExists","currentCacheEntry","pointForCache","feature","createInitialProgressiveRecord","enableSemanticAnchor","anchor","hitBy","interfaceInstance","service","taskCache","waitAfterAction"],"mappings":";;;;;;;;;;;;;;;;;;;;AAqDA,MAAMA,QAAQC,SAAS;AAKvB,SAASC,iBAAiBC,KAAc;IACtC,OACEA,QAAAA,SAEA,AAAiB,YAAjB,OAAOA,SACPC,OAAO,IAAI,CAACD,OAAO,MAAM,GAAG;AAEhC;AAEO,SAASE,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACN,OAAOD;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAyBO,MAAMC;IAyBX,MAAa,MACXC,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,SAAS;QAE3B,MAAMG,UAA4B;YAChCF;YACAH;YACAC;YACAG;YACA,YAAYF,SAAS;YACrB,aAAaA,SAAS;QACxB;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;SAC/D;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQT,MAAO;YACxB,MAAMW,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,UAAU,WAAa;QACzB;QACAH,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEA,MAAc,iBACZH,IAAyC,EACzCH,OAAyB,EACV;QACf,MAAMO,aAAa,IAAI,CAAC,gBAAgB,CAACJ,MAAMA,KAAK,KAAK,EAAEH;QAC3DA,QAAQ,KAAK,CAAC,IAAI,CAACO;IACrB;IAEA,MAAc,iBACZJ,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMQ,WAAWL,KAAK,IAAI;QAC1B,MAAMM,cAAc,IAAI,CAAC,WAAW;QACpC,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMlB,QAAQa,KAAK,KAAK;QAExB,IAAI,CAACO,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI1B,KAAK,CAAC0B,MAAM,EAAE;gBAGhB,MAAMxB,aAAaH,oBAAoBC,KAAK,CAAC0B,MAAM;gBACnDhC,MACE,uCACA,CAAC,YAAY,EAAEwB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC3B,KAAK,CAAC0B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAACzB,aAAa,EAC1C,CAAC,QAAQ,EAAE0B,wBAAwB5B,KAAK,CAAC0B,MAAM,GAAG;gBAEpD,MAAMG,aAAa,IAAI,CAAC,gBAAgB,CACtC3B,YACAF,KAAK,CAAC0B,MAAM,EACZhB,SACA,CAACoB;oBACC9B,KAAK,CAAC0B,MAAM,GAAGI;gBACjB;gBAEFpB,QAAQ,KAAK,CAAC,IAAI,CAACmB;YACrB,OAAO;gBACLE,OACE,CAACN,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3ExB,MAAM,CAAC,OAAO,EAAEgC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMc,OAKF;YACF,MAAM;YACN,SAASd;YACT,SAASL,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,UAAU,OAAOb,OAAOiC;gBACtB,MAAMC,SAASD,YAAY,IAAI,CAAC,MAAM;gBAEtCvC,MACE,oBACAwB,UACAlB,OACA,CAAC,4BAA4B,EAAEiC,YAAY,OAAO,EAAE,QAAQ;gBAG9D,MAAME,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBV,qBAAqB,OAAO,CAAC,CAACC;oBAC5BK,OACE/B,KAAK,CAAC0B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEAkB,mBAAmBF,QAAQ;gBAC3B,IAAI;oBACF,MAAMG,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC3C,MACE,CAAC,8DAA8D,EAAE0B,OAAO,IAAI,EAAE;gCAEhF,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAACA,OAAO,IAAI,EAAEpB;gCACrDN,MACE,CAAC,2DAA2D,EAAE0B,OAAO,IAAI,EAAE;4BAE/E;wBACF;wBACAkB,MAAM;qBACP;gBACH,EAAE,OAAOC,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBACAH,mBAAmBF,QAAQ;gBAE3B,MAAM,EAAEQ,wBAAwB,EAAE,GAAGP;gBACrC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIF,OAAO,WAAW,EACpB,IAAI;oBACFpB,QAAQ4C,iBAAiB5C,OAAOoB,OAAO,WAAW,EAAE;wBAClDsB;oBACF;gBACF,EAAE,OAAOG,OAAY;oBACnB,MAAM,IAAIvB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEyB,MAAM,OAAO,CAAC,cAAc,EAAElB,KAAK,SAAS,CAAC3B,QAAQ,EACtG;wBAAE,OAAO6C;oBAAM;gBAEnB;gBAGFT,mBAAmBF,QAAQ;gBAE3BxC,MAAM,kBAAkB0B,OAAO,IAAI;gBACnC,MAAM0B,WAAW1B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAM2B,eAAe,MAAMD,SAAS9C,OAAOiC;gBAC3CG,mBAAmBF,QAAQ;gBAC3BxC,MAAM,iBAAiB0B,OAAO,IAAI,EAAE,WAAW2B;gBAE/CX,mBAAmBF,QAAQ;gBAE3B,MAAMc,mBACJ5B,OAAO,gBAAgB,IAAI,IAAI,CAAC,eAAe,IAAI;gBACrD,IAAI4B,mBAAmB,GACrB,MAAMV,MAAMU;gBAGd,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpCtD,MACE,CAAC,6DAA6D,EAAE0B,OAAO,IAAI,EAAE;wBAE/E,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAACA,OAAO,IAAI,EAAEpB;wBACpDN,MACE,CAAC,0DAA0D,EAAE0B,OAAO,IAAI,EAAE;oBAE9E;gBACF,EAAE,OAAOmB,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEAH,mBAAmBF,QAAQ;gBAE3B,OAAO;oBACL,QAAQa;gBACV;YACF;QACF;QAEArC,QAAQ,KAAK,CAAC,IAAI,CAACsB;IACrB;IAEQ,iBACNnB,IAAyC,EACzCoC,mBAAiD,EACjDvC,OAAyB,EACzBwC,QAAgD,EACd;QAClC,MAAM,EAAEzC,SAAS,EAAEH,2BAA2B,EAAE6C,UAAU,EAAEC,WAAW,EAAE,GACvE1C;QAEF,IAAI2C,cAAcJ;QAElB,IAAI,AAAuB,YAAvB,OAAOI,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAI5C,AAAckC,WAAdlC,WACF4C,cAAc;YACZ,GAAGA,WAAW;YACd5C;QACF;QAGF,IAAI0C,cAAc,CAACE,YAAY,UAAU,EACvCA,cAAc;YACZ,GAAGA,WAAW;YACd,YAAY;QACd;QAGF,MAAMC,cAAgD;YACpD,MAAM;YACN,SAAS;YACT,OAAOD;YACP,SAASxC,KAAK,OAAO;YACrB,UAAU,OAAOb,OAAOiC;gBACtB,MAAM,EAAED,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACE/B,OAAO,UAAUA,OAAO,MACxB,CAAC,qDAAqD,EAAE2B,KAAK,SAAS,CACpE3B,QACC;gBAGL,IAAI,CAACmC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,MAAM,EAAEO,wBAAwB,EAAE,GAAGP;gBAErC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIiC;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;oBACjB,IAAI,CAACA,MACH;oBAEFH,aAAaG;oBACb1B,KAAK,GAAG,GAAG;wBACT0B;wBACA,aAAaA,KAAK,QAAQ,EAAE;oBAC9B;oBACA1B,KAAK,KAAK,GAAG0B,KAAK,QAAQ,EAAE;oBAC5B,IAAIA,KAAK,QAAQ,EAAE,iBACjB1B,KAAK,eAAe,GAAG0B,KAAK,QAAQ,CAAC,eAAe;oBAEtD,IAAIA,KAAK,QAAQ,EAAE,mBACjB1B,KAAK,iBAAiB,GAAG0B,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAMC,kBAAkB/B,wBAAwB5B,SAC5C4D,qBAAqB5D,SACrB2C;gBACJ,MAAMkB,YAAY,CAAC,CAACF;gBAGpB,IAAIG;gBACJ,IACE,CAACD,aACD7D,MAAM,KAAK,IACX,IAAI,CAAC,SAAS,CAAC,uBAAuB,EAEtC,IAAI;oBACF8D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;wBAC3D,QAAQ;4BAAC9D,MAAM,KAAK;yBAAC;oBACvB;gBACF,EAAE,OAAM,CAER;gBAGF,MAAM+D,mBAAmBD,gBACrBE,sBAEEC,qCACEH,eACApB,2BAEF,AAAwB,YAAxB,OAAO1C,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE,UAAU,MAE9B2C;gBAEJ,MAAMuB,aAAa,CAAC,CAACH;gBAErB,MAAMI,cAAcnE,MAAM,MAAM;gBAChC,MAAMoE,oBACJ,MAAM,IAAI,CAAC,SAAS,EAAE,iBAAiBD;gBACzC,MAAME,aAAaD,mBAAmB,cAAc;gBAEpD,MAAME,yBACJT,aAAaK,aACT,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACAnE,MAAM,SAAS;gBAIvB,IAAIwE,mBAAmBF,yBACnBG,oCACEH,wBACA5B,4BAEFC;gBAEJ,IAAI+B,aAAa,CAAC,CAACF;gBACnB,MAAMtC,SAASD,YAAY,IAAI,CAAC,MAAM;gBACtC,IAAI0C;gBAEJ,IAAID,YAAY;oBACd,MAAME,eAAeP;oBACrB,MAAMQ,eAAeD,cAAc;oBAInC,MAAME,uBACJC,OAAO,QAAQ,CACbC,QAAQ,GAAG,CAACC,sCAAsC,IAAI,MACtD,OACG;oBAEP,MAAMC,kBAAmBN,cAAc,mBACrCO;oBACF,MAAMC,aAAaC,oBAAoBH;oBACvC,MAAMI,QAAQC,2BAA2BH;oBACzC,MAAMI,UAAUC,uBAAuBH;oBAEvC5F,MAAM,+BAA+B;wBACnC,YAAY0F,WAAW,OAAO,CAAC;wBAC/BE;wBACAE;wBACA,mBAAmBN,gBAAgB,iBAAiB;oBACtD;oBAEA,IAAIM,QAAQ,SAAS,EAAE;wBACrB9F,MAAM,qDAAqD;4BACzD0F;4BACAE;wBACF;wBACAZ,aAAa;oBACf;oBAEA,IAAI;wBACF,IAAIA,cAAcc,QAAQ,UAAU,IAAIX,cAAc;4BACpD,MAAMa,SAASC,KAAK,IAAI,CACrBnB,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM,IAChDL,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM;4BAGvDnF,MAAM,4BAA4B;gCAChCmF;gCACA,eAAeL,iBAAkB,MAAM;gCACvC,QAAQmB,KAAK,KAAK,CAACD;gCACnB,WAAWZ;4BACb;4BAEA,IAAIY,SAASZ,sBAAsB;gCACjCpF,MACE,gEACA;oCAAEgG;oCAAQ,WAAWZ;gCAAqB;gCAE5CJ,aAAa;4BACf;wBACF;wBAEA,IAAIA,cAAcc,QAAQ,YAAY,EAAE;4BACtC,MAAMI,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,mBAAmB,CACzDpB,iBAAkB,MAAM,EACxBL,aACA7D,6BACA6B;4BAEF,IAAKyD,aAAa,IAAI,EAWpBlG,MAAM,4CAA4C;gCAChD,aAAakG,aAAa,WAAW;4BACvC;iCAbsB;gCACtBlG,MACE,mEACA;oCACE,QAAQkG,aAAa,MAAM;oCAC3B,aAAaA,aAAa,WAAW;oCACrC,QAAQzB;gCACV;gCAEFO,aAAa;4BACf;wBAKF;oBACF,EAAE,OAAOmB,aAAa;wBACpBnG,MACE,mDACAmG;wBAEFnB,aAAa;oBACf;oBAEA,IAAIA,YAAY;wBACd,MAAMoB,eAAeC,yBACnBb,iBACA;wBAEFN,aAAa,eAAe,GAAGkB;wBAE/B,MAAME,oBAAoBpB,aAAa,iBAAiB;wBAGxD,IAAIoB,mBAAmB;4BACrB,MAAMC,UAAUC,6BACdF,mBACAxB,iBAAkB,MAAM,EACxBsB,aAAa,eAAe;4BAE9BlB,aAAa,iBAAiB,GAAGqB;4BAEjC,IAAIA,QAAQ,iBAAiB,GAAG,KAAKA,QAAQ,WAAW,IAAI,GAAG;gCAC7DvG,MACE,0DACA;oCACE,iBAAiBuG,QAAQ,eAAe,CAAC,GAAG,CAAC,CAACE,IAC5CA,EAAE,OAAO,CAAC;oCAEZ,cAAc3B,iBAAkB,MAAM;oCACtC,mBAAmByB,QAAQ,iBAAiB,CAAC,OAAO,CAAC;oCACrD,aAAaA,QAAQ,WAAW;gCAClC;gCAEFzB,mBAAmB;oCACjB,GAAGA,gBAAgB;oCACnB,QAAQ;wCACNmB,KAAK,KAAK,CAACM,QAAQ,eAAe,CAAC,EAAE;wCACrCN,KAAK,KAAK,CAACM,QAAQ,eAAe,CAAC,EAAE;qCACtC;gCACH;4BACF;wBACF;oBACF,OAAO;wBACL,MAAMH,eAAeC,yBACnBb,iBACA;wBAEFN,aAAa,eAAe,GAAGkB;oBACjC;gBACF;gBAEA,IAAI,CAAC5B,cAAc,CAACQ,cAAc,CAACb,WAAW;oBAC5C,MAAMe,eAAeP;oBACrB,MAAM+B,iBAAiBxB,cAAc;oBAIrC,IACEwB,kBACApB,AAAuD,YAAvDA,QAAQ,GAAG,CAACqB,sCAAsC,EAElD,IAAI;wBACF,MAAMC,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,sBAAsB,CAC5DF,gBACA9F,6BACA,IAAI,CAAC,SAAS,EACd6B;wBAEF,IAAImE,cAAc;4BAChB3B,sBAAsB2B;4BACtB5G,MACE;wBAEJ;oBACF,EAAE,OAAO6G,aAAa;wBACpB7G,MAAM,kCAAkC6G;oBAC1C;oBAGF,IAAI,CAAC5B,qBACH,IAAI;wBACFvC,mBAAmBF,QAAQ;wBAC3BsB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCxD,OACA;4BACE,SAASmC;wBACX,GACA7B,6BACA8C;wBAEFK,UAAUD,aAAa,IAAI;wBAC3BmB,sBAAsBnB,aAAa,OAAO;oBAC5C,EAAE,OAAOX,OAAO;wBACd,IAAIA,iBAAiB2D,cACnB/C,UAAUZ,MAAM,IAAI;wBAEtB,MAAMA;oBACR,SAAU;wBACRT,mBAAmBF,QAAQ;oBAC7B;gBAEJ;gBAEA,MAAMuE,UACJ9C,mBACAI,oBACAS,oBACAG;gBAGF,MAAM+B,2BAA2B9G,iBAC/BwE,mBAAmB,cAAc;gBAGnC,IAAIuC;gBAOJ,IACEF,WACA,IAAI,CAAC,SAAS,IACd,CAAC/B,cACA,EAACb,aAAa,CAAC6C,wBAAuB,KACvC1G,OAAO,cAAc,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,oBAAoB,EACrC,IAAI;oBAGF,IAAI4G,gBAAkCH,QAAQ,MAAM;oBACpD,IAAI/D,AAA6B,MAA7BA,0BAAgC;wBAClCkE,gBAAgB;4BACdjB,KAAK,KAAK,CAACc,QAAQ,MAAM,CAAC,EAAE,GAAG/D;4BAC/BiD,KAAK,KAAK,CAACc,QAAQ,MAAM,CAAC,EAAE,GAAG/D;yBAChC;wBACDhD,MACE,8DACA+G,QAAQ,MAAM,EACdG;oBAEJ;oBAEA,MAAMC,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,oBAAoB,CACvDD,eACA;wBACE,mBACE,AAAwB,YAAxB,OAAO5G,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE;wBACpB,aAAaM;oBACf;oBAEF,IAAIV,iBAAiBiH,UAAU;wBAC7BA,QAAQ,YAAY,GAAGD;wBACvBC,QAAQ,eAAe,GAAG1B;wBAC1B0B,QAAQ,iBAAiB,GACvBC,+BAA+BF;wBACjClH,MACE,uCACAyE,aACA0C;wBAGF,MAAME,uBACJ/B,AACA,YADAA,QAAQ,GAAG,CAACqB,sCAAsC;wBAEpD,IAAIU,sBACF,IAAI;4BACF,MAAMC,SAAS,MAAM,IAAI,CAAC,OAAO,CAAC,sBAAsB,CACtDJ,eACAtG,6BACA6B;4BAEF,IAAI6E,QAAQ;gCACVH,QAAQ,cAAc,GAAGG;gCACzBtH,MACE,4CACAyE;4BAEJ;wBACF,EAAE,OAAOoC,aAAa;4BACpB7G,MAAM,kCAAkC6G;wBAC1C;wBAGFI,oBAAoBE;wBACpB,MAAM,IAAI,CAAC,SAAS,CAAC,yBAAyB,CAC5C;4BACE,MAAM;4BACN,QAAQ1C;4BACR,OAAO0C;wBACT,GACAzC;oBAEJ,OACE1E,MACE,yDACAyE;gBAGN,EAAE,OAAOtB,OAAO;oBACdnD,MAAM,mCAAmCmD;gBAC3C;qBAEAnD,MAAM;gBAIV,IAAI,CAAC+G,SAAS;oBACZ,IAAIlD,YACF,MAAM,IAAIiD,aACR,CAAC,oBAAoB,EAAExG,MAAM,MAAM,EAAE,EACrCuD;oBAGJ,MAAM,IAAIjC,MAAM,CAAC,mBAAmB,EAAEtB,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAIiH;gBAEJ,IAAIpD,WACFoD,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMjH,MAAM,IAAI;oBAClB;gBACF;qBACK,IAAIkE,YACT+C,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAOjH,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAI0E,YACTuC,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP5C;wBACA,aAAasC;oBACf;gBACF;gBAGFzD,WAAWuD;gBAEX,OAAO;oBACL,QAAQ;wBACN,SAAS;4BACP,GAAGA,OAAO;4BAEV,KAAKtE,UAAU,aAAa;wBAC9B;oBACF;oBACA8E;gBACF;YACF;QACF;QAEA,OAAO3D;IACT;IA9uBA,YAAY,EACV4D,iBAAiB,EACjBC,OAAO,EACPC,SAAS,EACTjG,WAAW,EACXkG,eAAe,EACC,CAAE;QAhBpB,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAiB,mBAAjB;QASE,IAAI,CAAC,SAAS,GAAGH;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;QACjB,IAAI,CAAC,WAAW,GAAGjG;QACnB,IAAI,CAAC,eAAe,GAAGkG;IACzB;AAmuBF"}
@@ -0,0 +1,34 @@
1
+ import { getPreferredLanguage } from "@midscene/shared/env";
2
+ const cacheVerifyInstruction = ()=>{
3
+ const preferredLanguage = getPreferredLanguage();
4
+ return `
5
+ You are verifying whether a cached element location is still correct.
6
+ The image shows a SMALL CROPPED AREA around the cached coordinates, with the target element marked by a red rectangle.
7
+
8
+ TASK: Determine if the element in the red rectangle matches the user's original description.
9
+
10
+ USER'S ORIGINAL DESCRIPTION: "{{targetPrompt}}"
11
+
12
+ RULES:
13
+ 1. Look at the element inside the red rectangle carefully
14
+ 2. Compare it with the user's description: "{{targetPrompt}}"
15
+ 3. Consider:
16
+ - Is it the same TYPE of element? (button, input, link, text, etc.)
17
+ - Does it have matching TEXT or LABEL?
18
+ - Is it in a reasonable CONTEXT for that description?
19
+ 4. If the description is vague (e.g., "click the button"), be more lenient
20
+ 5. If the description is specific (e.g., "Login button", "Submit button"), be strict
21
+
22
+ RESPONSE FORMAT (JSON):
23
+ {
24
+ "match": true/false,
25
+ "description": "brief description of what you see in the red rectangle",
26
+ "reason": "short explanation of why it matches or not"
27
+ }
28
+
29
+ IMPORTANT: Write descriptions and reasons in ${preferredLanguage}.
30
+ `;
31
+ };
32
+ export { cacheVerifyInstruction };
33
+
34
+ //# sourceMappingURL=cache-verify.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/prompt/cache-verify.mjs","sources":["../../../../src/ai-model/prompt/cache-verify.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const cacheVerifyInstruction = () => {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are verifying whether a cached element location is still correct.\nThe image shows a SMALL CROPPED AREA around the cached coordinates, with the target element marked by a red rectangle.\n\nTASK: Determine if the element in the red rectangle matches the user's original description.\n\nUSER'S ORIGINAL DESCRIPTION: \"{{targetPrompt}}\"\n\nRULES:\n1. Look at the element inside the red rectangle carefully\n2. Compare it with the user's description: \"{{targetPrompt}}\"\n3. Consider:\n - Is it the same TYPE of element? (button, input, link, text, etc.)\n - Does it have matching TEXT or LABEL?\n - Is it in a reasonable CONTEXT for that description?\n4. If the description is vague (e.g., \"click the button\"), be more lenient\n5. If the description is specific (e.g., \"Login button\", \"Submit button\"), be strict\n\nRESPONSE FORMAT (JSON):\n{\n \"match\": true/false,\n \"description\": \"brief description of what you see in the red rectangle\",\n \"reason\": \"short explanation of why it matches or not\"\n}\n\nIMPORTANT: Write descriptions and reasons in ${preferredLanguage}.\n`;\n};\n"],"names":["cacheVerifyInstruction","preferredLanguage","getPreferredLanguage"],"mappings":";AAEO,MAAMA,yBAAyB;IACpC,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;6CAyBmC,EAAED,kBAAkB;AACjE,CAAC;AACD"}
@@ -0,0 +1,34 @@
1
+ import { getPreferredLanguage } from "@midscene/shared/env";
2
+ const semanticAnchorSearchInstruction = ()=>{
3
+ const preferredLanguage = getPreferredLanguage();
4
+ return `
5
+ You are searching for an element based on its semantic anchor description.
6
+ The image shows a CROPPED AREA of the page around a known landmark element.
7
+
8
+ TARGET ELEMENT DESCRIPTION:
9
+ - Visual: {{visualFingerprint}}
10
+ - Context: {{contextDescription}}
11
+
12
+ TASK: Find the element matching this description in the image.
13
+
14
+ RESPONSE FORMAT (JSON):
15
+ {
16
+ "found": true/false,
17
+ "center": [x, y] or null,
18
+ "rect": {"left": 0, "top": 0, "width": 0, "height": 0} or null,
19
+ "confidence": 0.0-1.0,
20
+ "reason": "brief explanation"
21
+ }
22
+
23
+ RULES:
24
+ 1. Look for an element matching the visualFingerprint description
25
+ 2. Verify it's in the expected position relative to the contextDescription
26
+ 3. Return center coordinates and bounding rect if found
27
+ 4. Set confidence based on how well the match is
28
+ 5. If not found or confidence < 0.5, set found=false
29
+ 6. Write reason in ${preferredLanguage}
30
+ `;
31
+ };
32
+ export { semanticAnchorSearchInstruction };
33
+
34
+ //# sourceMappingURL=semantic-anchor-search.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/prompt/semantic-anchor-search.mjs","sources":["../../../../src/ai-model/prompt/semantic-anchor-search.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const semanticAnchorSearchInstruction = () => {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are searching for an element based on its semantic anchor description.\nThe image shows a CROPPED AREA of the page around a known landmark element.\n\nTARGET ELEMENT DESCRIPTION:\n- Visual: {{visualFingerprint}}\n- Context: {{contextDescription}}\n\nTASK: Find the element matching this description in the image.\n\nRESPONSE FORMAT (JSON):\n{\n \"found\": true/false,\n \"center\": [x, y] or null,\n \"rect\": {\"left\": 0, \"top\": 0, \"width\": 0, \"height\": 0} or null,\n \"confidence\": 0.0-1.0,\n \"reason\": \"brief explanation\"\n}\n\nRULES:\n1. Look for an element matching the visualFingerprint description\n2. Verify it's in the expected position relative to the contextDescription\n3. Return center coordinates and bounding rect if found\n4. Set confidence based on how well the match is\n5. If not found or confidence < 0.5, set found=false\n6. Write reason in ${preferredLanguage}\n`;\n};\n"],"names":["semanticAnchorSearchInstruction","preferredLanguage","getPreferredLanguage"],"mappings":";AAEO,MAAMA,kCAAkC;IAC7C,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;mBAyBS,EAAED,kBAAkB;AACvC,CAAC;AACD"}
@@ -0,0 +1,41 @@
1
+ import { getPreferredLanguage } from "@midscene/shared/env";
2
+ const semanticAnchorGenerateInstruction = ()=>{
3
+ const preferredLanguage = getPreferredLanguage();
4
+ return `
5
+ Analyze the element in the red rectangle and its surrounding context.
6
+
7
+ TASK: Generate a semantic anchor for this element that can be used to relocate it even when the DOM structure changes.
8
+
9
+ IMPORTANT: Write all descriptions in ${preferredLanguage}.
10
+
11
+ RESPONSE FORMAT (JSON):
12
+ {
13
+ "visualFingerprint": "concise visual description of the element (color, shape, icon, text style)",
14
+ "contextDescription": "spatial and semantic context (e.g., 'second button in top navigation bar')",
15
+ "nearbyLandmarks": [
16
+ {
17
+ "description": "description of a nearby stable structural element",
18
+ "xpath": "XPath of the landmark element"
19
+ }
20
+ ],
21
+ "error"?: "error message if any"
22
+ }
23
+
24
+ RULES:
25
+ 1. visualFingerprint: Focus on visual characteristics that survive DOM changes
26
+ - Colors, icons, text content, shape
27
+ - NOT CSS classes or IDs (they change)
28
+ 2. contextDescription: Describe WHERE the element is relative to page structure
29
+ - Use spatial terms: "top-right", "below header", "in sidebar"
30
+ - Use structural terms: "in navigation bar", "in modal dialog"
31
+ 3. nearbyLandmarks: Identify 1-3 STABLE structural elements near the target
32
+ - Prefer: <nav>, <header>, <footer>, <aside>, <main>, <form>
33
+ - Avoid: <div>, <span>, <li> (too generic, likely to change)
34
+ - Each landmark MUST have both description and xpath
35
+ 4. Keep descriptions under 20 words each
36
+ 5. Prioritize landmarks that are large and visually distinct
37
+ `;
38
+ };
39
+ export { semanticAnchorGenerateInstruction };
40
+
41
+ //# sourceMappingURL=semantic-anchor.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/prompt/semantic-anchor.mjs","sources":["../../../../src/ai-model/prompt/semantic-anchor.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const semanticAnchorGenerateInstruction = () => {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nAnalyze the element in the red rectangle and its surrounding context.\n\nTASK: Generate a semantic anchor for this element that can be used to relocate it even when the DOM structure changes.\n\nIMPORTANT: Write all descriptions in ${preferredLanguage}.\n\nRESPONSE FORMAT (JSON):\n{\n \"visualFingerprint\": \"concise visual description of the element (color, shape, icon, text style)\",\n \"contextDescription\": \"spatial and semantic context (e.g., 'second button in top navigation bar')\",\n \"nearbyLandmarks\": [\n {\n \"description\": \"description of a nearby stable structural element\",\n \"xpath\": \"XPath of the landmark element\"\n }\n ],\n \"error\"?: \"error message if any\"\n}\n\nRULES:\n1. visualFingerprint: Focus on visual characteristics that survive DOM changes\n - Colors, icons, text content, shape\n - NOT CSS classes or IDs (they change)\n2. contextDescription: Describe WHERE the element is relative to page structure\n - Use spatial terms: \"top-right\", \"below header\", \"in sidebar\"\n - Use structural terms: \"in navigation bar\", \"in modal dialog\"\n3. nearbyLandmarks: Identify 1-3 STABLE structural elements near the target\n - Prefer: <nav>, <header>, <footer>, <aside>, <main>, <form>\n - Avoid: <div>, <span>, <li> (too generic, likely to change)\n - Each landmark MUST have both description and xpath\n4. Keep descriptions under 20 words each\n5. Prioritize landmarks that are large and visually distinct\n`;\n};\n"],"names":["semanticAnchorGenerateInstruction","preferredLanguage","getPreferredLanguage"],"mappings":";AAEO,MAAMA,oCAAoC;IAC/C,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;qCAK2B,EAAED,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4BzD,CAAC;AACD"}