@donggui/core 1.6.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,121 @@
1
+ import { MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS, MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE } from "@midscene/shared/env/constants";
2
+ import { getDebug } from "@midscene/shared/logger";
3
+ const debug = getDebug('cache-confidence');
4
+ const DEFAULT_HALF_LIFE_MS = 1800000;
5
+ function calculateConfidence(state) {
6
+ const halfLifeMs = Number.parseInt(process.env[MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS] || '', 10) || DEFAULT_HALF_LIFE_MS;
7
+ const ageMs = Date.now() - state.lastVerifiedAt;
8
+ const ageDecay = Math.exp(-ageMs * Math.LN2 / halfLifeMs);
9
+ const experienceBonus = Math.min(state.verificationCount / 10, 0.3);
10
+ const score = Math.max(0.1, 0.7 * ageDecay + experienceBonus);
11
+ debug('calculateConfidence', {
12
+ ageMs,
13
+ ageDecay: ageDecay.toFixed(3),
14
+ verificationCount: state.verificationCount,
15
+ experienceBonus: experienceBonus.toFixed(3),
16
+ score: score.toFixed(3)
17
+ });
18
+ return score;
19
+ }
20
+ function determineVerificationLevel(confidence) {
21
+ if (confidence > 0.8) return 'minimal';
22
+ if (confidence > 0.5) return 'standard';
23
+ if (confidence > 0.2) return 'enhanced';
24
+ return 'full';
25
+ }
26
+ function getVerificationActions(level) {
27
+ switch(level){
28
+ case 'minimal':
29
+ return {
30
+ coordCheck: true,
31
+ visualVerify: false,
32
+ semanticAnchor: false,
33
+ skipCache: false
34
+ };
35
+ case 'standard':
36
+ return {
37
+ coordCheck: true,
38
+ visualVerify: true,
39
+ semanticAnchor: false,
40
+ skipCache: false
41
+ };
42
+ case 'enhanced':
43
+ return {
44
+ coordCheck: true,
45
+ visualVerify: true,
46
+ semanticAnchor: true,
47
+ skipCache: false
48
+ };
49
+ case 'full':
50
+ return {
51
+ coordCheck: false,
52
+ visualVerify: false,
53
+ semanticAnchor: false,
54
+ skipCache: true
55
+ };
56
+ }
57
+ }
58
+ function createInitialConfidenceState() {
59
+ return {
60
+ lastVerifiedAt: Date.now(),
61
+ verificationCount: 1,
62
+ confidenceScore: 1.0
63
+ };
64
+ }
65
+ function updateConfidenceOnVerify(state, passed) {
66
+ return {
67
+ lastVerifiedAt: passed ? Date.now() : state.lastVerifiedAt,
68
+ verificationCount: passed ? state.verificationCount + 1 : state.verificationCount,
69
+ confidenceScore: passed ? calculateConfidence({
70
+ ...state,
71
+ lastVerifiedAt: Date.now(),
72
+ verificationCount: state.verificationCount + 1
73
+ }) : Math.max(0.1, 0.5 * state.confidenceScore)
74
+ };
75
+ }
76
+ function createInitialProgressiveRecord(center) {
77
+ return {
78
+ convergedCenter: center,
79
+ convergenceRadius: 0,
80
+ sampleCount: 1,
81
+ lastUpdatedAt: Date.now()
82
+ };
83
+ }
84
+ function updateProgressiveConvergence(record, newCenter, confidence) {
85
+ const enableProgressive = 'false' !== process.env[MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE];
86
+ if (!enableProgressive) return {
87
+ convergedCenter: newCenter,
88
+ convergenceRadius: 0,
89
+ sampleCount: record.sampleCount + 1,
90
+ lastUpdatedAt: Date.now()
91
+ };
92
+ const weight = Math.max(0.1, confidence);
93
+ const totalWeight = record.sampleCount + weight;
94
+ const convergedCenter = [
95
+ (record.convergedCenter[0] * record.sampleCount + newCenter[0] * weight) / totalWeight,
96
+ (record.convergedCenter[1] * record.sampleCount + newCenter[1] * weight) / totalWeight
97
+ ];
98
+ const allPoints = [
99
+ [
100
+ record.convergedCenter[0],
101
+ record.convergedCenter[1]
102
+ ],
103
+ newCenter
104
+ ];
105
+ const convergenceRadius = Math.max(...allPoints.map((p)=>Math.sqrt((p[0] - convergedCenter[0]) ** 2 + (p[1] - convergedCenter[1]) ** 2)));
106
+ debug('updateProgressiveConvergence', {
107
+ newCenter,
108
+ convergedCenter: convergedCenter.map((v)=>v.toFixed(1)),
109
+ convergenceRadius: convergenceRadius.toFixed(1),
110
+ sampleCount: record.sampleCount + 1
111
+ });
112
+ return {
113
+ convergedCenter,
114
+ convergenceRadius,
115
+ sampleCount: record.sampleCount + 1,
116
+ lastUpdatedAt: Date.now()
117
+ };
118
+ }
119
+ export { calculateConfidence, createInitialConfidenceState, createInitialProgressiveRecord, determineVerificationLevel, getVerificationActions, updateConfidenceOnVerify, updateProgressiveConvergence };
120
+
121
+ //# sourceMappingURL=cache-confidence.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent/cache-confidence.mjs","sources":["../../../src/agent/cache-confidence.ts"],"sourcesContent":["import type {\n CacheConfidenceState,\n ProgressiveLocateRecord,\n VerificationLevel,\n} from '@/types';\nimport {\n MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS,\n MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE,\n} from '@midscene/shared/env/constants';\nimport { getDebug } from '@midscene/shared/logger';\n\nconst debug = getDebug('cache-confidence');\n\nconst DEFAULT_HALF_LIFE_MS = 30 * 60 * 1000;\n\nexport function calculateConfidence(state: CacheConfidenceState): number {\n const halfLifeMs =\n Number.parseInt(\n process.env[MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS] || '',\n 10,\n ) || DEFAULT_HALF_LIFE_MS;\n\n const ageMs = Date.now() - state.lastVerifiedAt;\n const ageDecay = Math.exp((-ageMs * Math.LN2) / halfLifeMs);\n const experienceBonus = Math.min(state.verificationCount / 10, 0.3);\n const score = Math.max(0.1, ageDecay * 0.7 + experienceBonus);\n\n debug('calculateConfidence', {\n ageMs,\n ageDecay: ageDecay.toFixed(3),\n verificationCount: state.verificationCount,\n experienceBonus: experienceBonus.toFixed(3),\n score: score.toFixed(3),\n });\n\n return score;\n}\n\nexport function determineVerificationLevel(\n confidence: number,\n): VerificationLevel {\n if (confidence > 0.8) return 'minimal';\n if (confidence > 0.5) return 'standard';\n if (confidence > 0.2) return 'enhanced';\n return 'full';\n}\n\nexport function getVerificationActions(level: VerificationLevel): {\n coordCheck: boolean;\n visualVerify: boolean;\n semanticAnchor: boolean;\n skipCache: boolean;\n} {\n switch (level) {\n case 'minimal':\n return {\n coordCheck: true,\n visualVerify: false,\n semanticAnchor: false,\n skipCache: false,\n };\n case 'standard':\n return {\n coordCheck: true,\n visualVerify: true,\n semanticAnchor: false,\n skipCache: false,\n };\n case 'enhanced':\n return {\n coordCheck: true,\n visualVerify: true,\n semanticAnchor: true,\n skipCache: false,\n };\n case 'full':\n return {\n coordCheck: false,\n visualVerify: false,\n semanticAnchor: false,\n skipCache: true,\n };\n }\n}\n\nexport function createInitialConfidenceState(): CacheConfidenceState {\n return {\n lastVerifiedAt: Date.now(),\n verificationCount: 1,\n confidenceScore: 1.0,\n };\n}\n\nexport function updateConfidenceOnVerify(\n state: CacheConfidenceState,\n passed: boolean,\n): CacheConfidenceState {\n return {\n lastVerifiedAt: passed ? Date.now() : state.lastVerifiedAt,\n verificationCount: passed\n ? state.verificationCount + 1\n : state.verificationCount,\n confidenceScore: passed\n ? calculateConfidence({\n ...state,\n lastVerifiedAt: Date.now(),\n verificationCount: state.verificationCount + 1,\n })\n : Math.max(0.1, state.confidenceScore * 0.5),\n };\n}\n\nexport function createInitialProgressiveRecord(\n center: [number, number],\n): ProgressiveLocateRecord {\n return {\n convergedCenter: center,\n convergenceRadius: 0,\n sampleCount: 1,\n lastUpdatedAt: Date.now(),\n };\n}\n\nexport function updateProgressiveConvergence(\n record: ProgressiveLocateRecord,\n newCenter: [number, number],\n confidence: number,\n): ProgressiveLocateRecord {\n const enableProgressive =\n process.env[MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE] !== 'false';\n if (!enableProgressive) {\n return {\n convergedCenter: newCenter,\n convergenceRadius: 0,\n sampleCount: record.sampleCount + 1,\n lastUpdatedAt: Date.now(),\n };\n }\n\n const weight = Math.max(0.1, confidence);\n const totalWeight = record.sampleCount + weight;\n\n const convergedCenter: [number, number] = [\n (record.convergedCenter[0] * record.sampleCount + newCenter[0] * weight) /\n totalWeight,\n (record.convergedCenter[1] * record.sampleCount + newCenter[1] * weight) /\n totalWeight,\n ];\n\n const allPoints = [\n [record.convergedCenter[0], record.convergedCenter[1]],\n newCenter,\n ];\n const convergenceRadius = Math.max(\n ...allPoints.map((p) =>\n Math.sqrt(\n (p[0] - convergedCenter[0]) ** 2 + (p[1] - convergedCenter[1]) ** 2,\n ),\n ),\n );\n\n debug('updateProgressiveConvergence', {\n newCenter,\n convergedCenter: convergedCenter.map((v) => v.toFixed(1)),\n convergenceRadius: convergenceRadius.toFixed(1),\n sampleCount: record.sampleCount + 1,\n });\n\n return {\n convergedCenter,\n convergenceRadius,\n sampleCount: record.sampleCount + 1,\n lastUpdatedAt: Date.now(),\n };\n}\n"],"names":["debug","getDebug","DEFAULT_HALF_LIFE_MS","calculateConfidence","state","halfLifeMs","Number","process","MIDSCENE_CACHE_CONFIDENCE_HALF_LIFE_MS","ageMs","Date","ageDecay","Math","experienceBonus","score","determineVerificationLevel","confidence","getVerificationActions","level","createInitialConfidenceState","updateConfidenceOnVerify","passed","createInitialProgressiveRecord","center","updateProgressiveConvergence","record","newCenter","enableProgressive","MIDSCENE_CACHE_ENABLE_PROGRESSIVE_CONVERGENCE","weight","totalWeight","convergedCenter","allPoints","convergenceRadius","p","v"],"mappings":";;AAWA,MAAMA,QAAQC,SAAS;AAEvB,MAAMC,uBAAuB;AAEtB,SAASC,oBAAoBC,KAA2B;IAC7D,MAAMC,aACJC,OAAO,QAAQ,CACbC,QAAQ,GAAG,CAACC,uCAAuC,IAAI,IACvD,OACGN;IAEP,MAAMO,QAAQC,KAAK,GAAG,KAAKN,MAAM,cAAc;IAC/C,MAAMO,WAAWC,KAAK,GAAG,CAAE,CAACH,QAAQG,KAAK,GAAG,GAAIP;IAChD,MAAMQ,kBAAkBD,KAAK,GAAG,CAACR,MAAM,iBAAiB,GAAG,IAAI;IAC/D,MAAMU,QAAQF,KAAK,GAAG,CAAC,KAAKD,AAAW,MAAXA,WAAiBE;IAE7Cb,MAAM,uBAAuB;QAC3BS;QACA,UAAUE,SAAS,OAAO,CAAC;QAC3B,mBAAmBP,MAAM,iBAAiB;QAC1C,iBAAiBS,gBAAgB,OAAO,CAAC;QACzC,OAAOC,MAAM,OAAO,CAAC;IACvB;IAEA,OAAOA;AACT;AAEO,SAASC,2BACdC,UAAkB;IAElB,IAAIA,aAAa,KAAK,OAAO;IAC7B,IAAIA,aAAa,KAAK,OAAO;IAC7B,IAAIA,aAAa,KAAK,OAAO;IAC7B,OAAO;AACT;AAEO,SAASC,uBAAuBC,KAAwB;IAM7D,OAAQA;QACN,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;QACF,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;QACF,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;QACF,KAAK;YACH,OAAO;gBACL,YAAY;gBACZ,cAAc;gBACd,gBAAgB;gBAChB,WAAW;YACb;IACJ;AACF;AAEO,SAASC;IACd,OAAO;QACL,gBAAgBT,KAAK,GAAG;QACxB,mBAAmB;QACnB,iBAAiB;IACnB;AACF;AAEO,SAASU,yBACdhB,KAA2B,EAC3BiB,MAAe;IAEf,OAAO;QACL,gBAAgBA,SAASX,KAAK,GAAG,KAAKN,MAAM,cAAc;QAC1D,mBAAmBiB,SACfjB,MAAM,iBAAiB,GAAG,IAC1BA,MAAM,iBAAiB;QAC3B,iBAAiBiB,SACblB,oBAAoB;YAClB,GAAGC,KAAK;YACR,gBAAgBM,KAAK,GAAG;YACxB,mBAAmBN,MAAM,iBAAiB,GAAG;QAC/C,KACAQ,KAAK,GAAG,CAAC,KAAKR,AAAwB,MAAxBA,MAAM,eAAe;IACzC;AACF;AAEO,SAASkB,+BACdC,MAAwB;IAExB,OAAO;QACL,iBAAiBA;QACjB,mBAAmB;QACnB,aAAa;QACb,eAAeb,KAAK,GAAG;IACzB;AACF;AAEO,SAASc,6BACdC,MAA+B,EAC/BC,SAA2B,EAC3BV,UAAkB;IAElB,MAAMW,oBACJpB,AAA+D,YAA/DA,QAAQ,GAAG,CAACqB,8CAA8C;IAC5D,IAAI,CAACD,mBACH,OAAO;QACL,iBAAiBD;QACjB,mBAAmB;QACnB,aAAaD,OAAO,WAAW,GAAG;QAClC,eAAef,KAAK,GAAG;IACzB;IAGF,MAAMmB,SAASjB,KAAK,GAAG,CAAC,KAAKI;IAC7B,MAAMc,cAAcL,OAAO,WAAW,GAAGI;IAEzC,MAAME,kBAAoC;QACvCN,CAAAA,OAAO,eAAe,CAAC,EAAE,GAAGA,OAAO,WAAW,GAAGC,SAAS,CAAC,EAAE,GAAGG,MAAK,IACpEC;QACDL,CAAAA,OAAO,eAAe,CAAC,EAAE,GAAGA,OAAO,WAAW,GAAGC,SAAS,CAAC,EAAE,GAAGG,MAAK,IACpEC;KACH;IAED,MAAME,YAAY;QAChB;YAACP,OAAO,eAAe,CAAC,EAAE;YAAEA,OAAO,eAAe,CAAC,EAAE;SAAC;QACtDC;KACD;IACD,MAAMO,oBAAoBrB,KAAK,GAAG,IAC7BoB,UAAU,GAAG,CAAC,CAACE,IAChBtB,KAAK,IAAI,CACNsB,AAAAA,CAAAA,CAAC,CAAC,EAAE,GAAGH,eAAe,CAAC,EAAC,KAAM,IAAKG,AAAAA,CAAAA,CAAC,CAAC,EAAE,GAAGH,eAAe,CAAC,EAAC,KAAM;IAKxE/B,MAAM,gCAAgC;QACpC0B;QACA,iBAAiBK,gBAAgB,GAAG,CAAC,CAACI,IAAMA,EAAE,OAAO,CAAC;QACtD,mBAAmBF,kBAAkB,OAAO,CAAC;QAC7C,aAAaR,OAAO,WAAW,GAAG;IACpC;IAEA,OAAO;QACLM;QACAE;QACA,aAAaR,OAAO,WAAW,GAAG;QAClC,eAAef,KAAK,GAAG;IACzB;AACF"}
@@ -1,8 +1,9 @@
1
+ import { calculateConfidence, createInitialConfidenceState, createInitialProgressiveRecord, determineVerificationLevel, getVerificationActions, updateConfidenceOnVerify, updateProgressiveConvergence } from "./cache-confidence.mjs";
1
2
  import { findAllMidsceneLocatorField, parseActionParam } from "../ai-model/index.mjs";
2
3
  import { setTimingFieldOnce } from "../task-timing.mjs";
3
4
  import { ServiceError } from "../types.mjs";
4
5
  import { sleep } from "../utils.mjs";
5
- import { MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD, MIDSCENE_CACHE_ENABLE_COORD_CHECK, MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY } from "@midscene/shared/env/constants";
6
+ import { MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD, MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR } from "@midscene/shared/env/constants";
6
7
  import { generateElementByRect } from "@midscene/shared/extractor";
7
8
  import { getDebug } from "@midscene/shared/logger";
8
9
  import { assert } from "@midscene/shared/utils";
@@ -229,7 +230,7 @@ class TaskBuilder {
229
230
  taskCache: this.taskCache,
230
231
  interfaceInstance: this.interface
231
232
  }, cacheEntry, cachePrompt, param.cacheable);
232
- const elementFromCache = elementFromCacheResult ? transformLogicalElementToScreenshot(elementFromCacheResult, shrunkShotToLogicalRatio) : void 0;
233
+ let elementFromCache = elementFromCacheResult ? transformLogicalElementToScreenshot(elementFromCacheResult, shrunkShotToLogicalRatio) : void 0;
233
234
  let isCacheHit = !!elementFromCache;
234
235
  const timing = taskContext.task.timing;
235
236
  let elementFromAiLocate;
@@ -237,10 +238,25 @@ class TaskBuilder {
237
238
  const cacheFeature = cacheEntry;
238
239
  const cachedCenter = cacheFeature?.cachedCenter;
239
240
  const coordOffsetThreshold = Number.parseInt(process.env[MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD] || '16', 10) || 16;
240
- const enableCoordCheck = 'false' !== process.env[MIDSCENE_CACHE_ENABLE_COORD_CHECK];
241
- const enableVisualVerify = 'false' !== process.env[MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY];
241
+ const confidenceState = cacheFeature?.confidenceState || createInitialConfidenceState();
242
+ const confidence = calculateConfidence(confidenceState);
243
+ const level = determineVerificationLevel(confidence);
244
+ const actions = getVerificationActions(level);
245
+ debug('cache confidence assessment', {
246
+ confidence: confidence.toFixed(3),
247
+ level,
248
+ actions,
249
+ verificationCount: confidenceState.verificationCount
250
+ });
251
+ if (actions.skipCache) {
252
+ debug('cache confidence too low, skipping cache entirely', {
253
+ confidence,
254
+ level
255
+ });
256
+ isCacheHit = false;
257
+ }
242
258
  try {
243
- if (enableCoordCheck && cachedCenter) {
259
+ if (isCacheHit && actions.coordCheck && cachedCenter) {
244
260
  const offset = Math.sqrt((elementFromCache.center[0] - cachedCenter[0]) ** 2 + (elementFromCache.center[1] - cachedCenter[1]) ** 2);
245
261
  debug('cache coord offset check', {
246
262
  cachedCenter,
@@ -256,7 +272,7 @@ class TaskBuilder {
256
272
  isCacheHit = false;
257
273
  }
258
274
  }
259
- if (isCacheHit && enableVisualVerify) {
275
+ if (isCacheHit && actions.visualVerify) {
260
276
  const verification = await this.service.verifyCachedElement(elementFromCache.center, cachePrompt, modelConfigForDefaultIntent, uiContext);
261
277
  if (verification.pass) debug('cache hit and visual verification passed', {
262
278
  description: verification.description
@@ -274,19 +290,59 @@ class TaskBuilder {
274
290
  debug('cache verification error, fallback to AI locate', verifyError);
275
291
  isCacheHit = false;
276
292
  }
293
+ if (isCacheHit) {
294
+ const updatedState = updateConfidenceOnVerify(confidenceState, true);
295
+ cacheFeature.confidenceState = updatedState;
296
+ const progressiveRecord = cacheFeature.progressiveRecord;
297
+ if (progressiveRecord) {
298
+ const updated = updateProgressiveConvergence(progressiveRecord, elementFromCache.center, updatedState.confidenceScore);
299
+ cacheFeature.progressiveRecord = updated;
300
+ if (updated.convergenceRadius < 5 && updated.sampleCount >= 3) {
301
+ debug('using converged center instead of single-result center', {
302
+ convergedCenter: updated.convergedCenter.map((v)=>v.toFixed(1)),
303
+ singleCenter: elementFromCache.center,
304
+ convergenceRadius: updated.convergenceRadius.toFixed(1),
305
+ sampleCount: updated.sampleCount
306
+ });
307
+ elementFromCache = {
308
+ ...elementFromCache,
309
+ center: [
310
+ Math.round(updated.convergedCenter[0]),
311
+ Math.round(updated.convergedCenter[1])
312
+ ]
313
+ };
314
+ }
315
+ }
316
+ } else {
317
+ const updatedState = updateConfidenceOnVerify(confidenceState, false);
318
+ cacheFeature.confidenceState = updatedState;
319
+ }
277
320
  }
278
- if (!isXpathHit && !isCacheHit && !isPlanHit) try {
279
- setTimingFieldOnce(timing, 'callAiStart');
280
- locateResult = await this.service.locate(param, {
281
- context: uiContext
282
- }, modelConfigForDefaultIntent, abortSignal);
283
- applyDump(locateResult.dump);
284
- elementFromAiLocate = locateResult.element;
285
- } catch (error) {
286
- if (error instanceof ServiceError) applyDump(error.dump);
287
- throw error;
288
- } finally{
289
- setTimingFieldOnce(timing, 'callAiEnd');
321
+ if (!isXpathHit && !isCacheHit && !isPlanHit) {
322
+ const cacheFeature = cacheEntry;
323
+ const semanticAnchor = cacheFeature?.semanticAnchor;
324
+ if (semanticAnchor && 'false' !== process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR]) try {
325
+ const anchorResult = await this.service.locateBySemanticAnchor(semanticAnchor, modelConfigForDefaultIntent, this.interface, uiContext);
326
+ if (anchorResult) {
327
+ elementFromAiLocate = anchorResult;
328
+ debug('semantic anchor locate succeeded, skipping full AI locate');
329
+ }
330
+ } catch (anchorError) {
331
+ debug('semantic anchor locate failed:', anchorError);
332
+ }
333
+ if (!elementFromAiLocate) try {
334
+ setTimingFieldOnce(timing, 'callAiStart');
335
+ locateResult = await this.service.locate(param, {
336
+ context: uiContext
337
+ }, modelConfigForDefaultIntent, abortSignal);
338
+ applyDump(locateResult.dump);
339
+ elementFromAiLocate = locateResult.element;
340
+ } catch (error) {
341
+ if (error instanceof ServiceError) applyDump(error.dump);
342
+ throw error;
343
+ } finally{
344
+ setTimingFieldOnce(timing, 'callAiEnd');
345
+ }
290
346
  }
291
347
  const element = elementFromBbox || elementFromXpath || elementFromCache || elementFromAiLocate;
292
348
  const locateCacheAlreadyExists = hasNonEmptyCache(locateCacheRecord?.cacheContent?.cache);
@@ -306,7 +362,19 @@ class TaskBuilder {
306
362
  });
307
363
  if (hasNonEmptyCache(feature)) {
308
364
  feature.cachedCenter = pointForCache;
365
+ feature.confidenceState = createInitialConfidenceState();
366
+ feature.progressiveRecord = createInitialProgressiveRecord(pointForCache);
309
367
  debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
368
+ const enableSemanticAnchor = 'false' !== process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR];
369
+ if (enableSemanticAnchor) try {
370
+ const anchor = await this.service.generateSemanticAnchor(pointForCache, modelConfigForDefaultIntent, uiContext);
371
+ if (anchor) {
372
+ feature.semanticAnchor = anchor;
373
+ debug('semantic anchor generated for prompt: %s', cachePrompt);
374
+ }
375
+ } catch (anchorError) {
376
+ debug('generateSemanticAnchor failed:', anchorError);
377
+ }
310
378
  currentCacheEntry = feature;
311
379
  await this.taskCache.updateOrAppendCacheRecord({
312
380
  type: 'locate',
@@ -1 +1 @@
1
- {"version":3,"file":"agent/task-builder.mjs","sources":["../../../src/agent/task-builder.ts"],"sourcesContent":["import { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport type {\n CacheValidationOptions,\n DetailedLocateParam,\n DeviceAction,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskPlanningLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningLocateParam,\n Rect,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD,\n MIDSCENE_CACHE_ENABLE_COORD_CHECK,\n MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY,\n} from '@midscene/shared/env/constants';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport {\n ifPlanLocateParamIsBbox,\n matchElementFromCache,\n matchElementFromPlan,\n transformLogicalElementToScreenshot,\n transformLogicalRectToScreenshotRect,\n} from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\n/**\n * Check if a cache object is non-empty\n */\nfunction hasNonEmptyCache(cache: unknown): boolean {\n return (\n cache !== null &&\n cache !== undefined &&\n typeof cache === 'object' &&\n Object.keys(cache).length > 0\n );\n}\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n actionSpace: DeviceAction[];\n waitAfterAction?: number;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfigForPlanning: IModelConfig;\n modelConfigForDefaultIntent: IModelConfig;\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n private readonly actionSpace: DeviceAction[];\n\n private readonly waitAfterAction?: number;\n\n constructor({\n interfaceInstance,\n service,\n taskCache,\n actionSpace,\n waitAfterAction,\n }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n this.actionSpace = actionSpace;\n this.waitAfterAction = waitAfterAction;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n cacheable,\n deepLocate: options?.deepLocate,\n abortSignal: options?.abortSignal,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action Space',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n const taskLocate = this.createLocateTask(plan, plan.param, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = this.actionSpace;\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n // Always use createLocateTask for all locate params (including bbox)\n // This ensures cache writing happens even when bbox is available\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n `hasBbox=${ifPlanLocateParamIsBbox(param[field])}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action Space',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action Space',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n executor: async (param, taskContext) => {\n const timing = taskContext.task.timing;\n\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n setTimingFieldOnce(timing, 'beforeInvokeActionHookStart');\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug(\n `will call \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.beforeInvokeAction(action.name, param);\n debug(\n `called \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n setTimingFieldOnce(timing, 'beforeInvokeActionHookEnd');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in Action task',\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema, {\n shrunkShotToLogicalRatio,\n });\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n setTimingFieldOnce(timing, 'callActionStart');\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n const actionResult = await actionFn(param, taskContext);\n setTimingFieldOnce(timing, 'callActionEnd');\n debug('called action', action.name, 'result:', actionResult);\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookStart');\n\n const delayAfterRunner =\n action.delayAfterRunner ?? this.waitAfterAction ?? 300;\n if (delayAfterRunner > 0) {\n await sleep(delayAfterRunner);\n }\n\n try {\n if (this.interface.afterInvokeAction) {\n debug(\n `will call \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.afterInvokeAction(action.name, param);\n debug(\n `called \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookEnd');\n\n return {\n output: actionResult,\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskPlanningLocateApply {\n const { cacheable, modelConfigForDefaultIntent, deepLocate, abortSignal } =\n context;\n\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n if (deepLocate && !locateParam.deepLocate) {\n locateParam = {\n ...locateParam,\n deepLocate: true,\n };\n }\n\n const taskLocator: ExecutionTaskPlanningLocateApply = {\n type: 'Planning',\n subType: 'Locate',\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in locate task',\n );\n }\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // from bbox (plan hit)\n const elementFromBbox = ifPlanLocateParamIsBbox(param)\n ? matchElementFromPlan(param)\n : undefined;\n const isPlanHit = !!elementFromBbox;\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (\n !isPlanHit &&\n param.xpath &&\n this.interface.rectMatchesCacheFeature\n ) {\n try {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n } catch {\n // xpath locate failed, allow fallback to cache or AI locate\n }\n }\n\n const elementFromXpath = rectFromXpath\n ? generateElementByRect(\n // rectFromXpath is in logical coordinates, which should be transformed to screenshot coordinates;\n transformLogicalRectToScreenshotRect(\n rectFromXpath,\n shrunkShotToLogicalRatio,\n ),\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt || '',\n )\n : undefined;\n\n const isXpathHit = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord =\n await this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCacheResult =\n isPlanHit || isXpathHit\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n\n // elementFromCacheResult is in logical coordinates, which should be transformed to screenshot coordinates;\n const elementFromCache = elementFromCacheResult\n ? transformLogicalElementToScreenshot(\n elementFromCacheResult,\n shrunkShotToLogicalRatio,\n )\n : undefined;\n\n let isCacheHit = !!elementFromCache;\n const timing = taskContext.task.timing;\n let elementFromAiLocate: LocateResultElement | null | undefined;\n\n if (isCacheHit) {\n const cacheFeature = cacheEntry as ElementCacheFeature;\n const cachedCenter = cacheFeature?.cachedCenter as\n | [number, number]\n | undefined;\n\n const coordOffsetThreshold =\n Number.parseInt(\n process.env[MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD] || '16',\n 10,\n ) || 16;\n const enableCoordCheck =\n process.env[MIDSCENE_CACHE_ENABLE_COORD_CHECK] !== 'false';\n const enableVisualVerify =\n process.env[MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY] !== 'false';\n\n try {\n if (enableCoordCheck && cachedCenter) {\n const offset = Math.sqrt(\n (elementFromCache!.center[0] - cachedCenter[0]) ** 2 +\n (elementFromCache!.center[1] - cachedCenter[1]) ** 2,\n );\n\n debug('cache coord offset check', {\n cachedCenter,\n currentCenter: elementFromCache!.center,\n offset: Math.round(offset),\n threshold: coordOffsetThreshold,\n });\n\n if (offset > coordOffsetThreshold) {\n debug(\n 'cache coord offset exceeded threshold, fallback to AI locate',\n { offset, threshold: coordOffsetThreshold },\n );\n isCacheHit = false;\n }\n }\n\n if (isCacheHit && enableVisualVerify) {\n const verification = await this.service.verifyCachedElement(\n elementFromCache!.center,\n cachePrompt,\n modelConfigForDefaultIntent,\n uiContext,\n );\n if (!verification.pass) {\n debug(\n 'cache hit but visual verification failed, fallback to AI locate',\n {\n reason: verification.reason,\n description: verification.description,\n prompt: cachePrompt,\n },\n );\n isCacheHit = false;\n } else {\n debug('cache hit and visual verification passed', {\n description: verification.description,\n });\n }\n }\n } catch (verifyError) {\n debug(\n 'cache verification error, fallback to AI locate',\n verifyError,\n );\n isCacheHit = false;\n }\n }\n\n if (!isXpathHit && !isCacheHit && !isPlanHit) {\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfigForDefaultIntent,\n abortSignal,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n }\n\n const element =\n elementFromBbox ||\n elementFromXpath ||\n elementFromCache ||\n elementFromAiLocate;\n\n // Check if locate cache already exists (for planHitFlag case)\n const locateCacheAlreadyExists = hasNonEmptyCache(\n locateCacheRecord?.cacheContent?.cache,\n );\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n // Write cache if:\n // 1. element found\n // 2. taskCache enabled\n // 3. not a cache hit (otherwise we'd be writing what we just read)\n // 4. not already cached for plan hit case (avoid redundant writes), OR allow update if cache validation failed\n // 5. cacheable is not explicitly false\n if (\n element &&\n this.taskCache &&\n !isCacheHit &&\n (!isPlanHit || !locateCacheAlreadyExists) &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForPoint) {\n try {\n // Transform coordinates to logical space for cacheFeatureForPoint\n // cacheFeatureForPoint needs logical coordinates to locate elements in DOM\n let pointForCache: [number, number] = element.center;\n if (shrunkShotToLogicalRatio !== 1) {\n pointForCache = [\n Math.round(element.center[0] / shrunkShotToLogicalRatio),\n Math.round(element.center[1] / shrunkShotToLogicalRatio),\n ];\n debug(\n 'Transformed coordinates for cacheFeatureForPoint: %o -> %o',\n element.center,\n pointForCache,\n );\n }\n\n const feature = await this.interface.cacheFeatureForPoint(\n pointForCache,\n {\n targetDescription:\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt,\n modelConfig: modelConfigForDefaultIntent,\n },\n );\n if (hasNonEmptyCache(feature)) {\n feature.cachedCenter = pointForCache;\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n currentCacheEntry = feature;\n await this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForPoint failed: %s', error);\n }\n } else {\n debug('cacheFeatureForPoint is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (isPlanHit) {\n hitBy = {\n from: 'Plan',\n context: {\n bbox: param.bbox,\n },\n };\n } else if (isXpathHit) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (isCacheHit) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element: {\n ...element,\n // backward compatibility for aiLocate, which return value needs a dpr field\n dpr: uiContext.deprecatedDpr,\n },\n },\n hitBy,\n };\n },\n };\n\n return taskLocator;\n }\n}\n"],"names":["debug","getDebug","hasNonEmptyCache","cache","Object","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfigForPlanning","modelConfigForDefaultIntent","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","ifPlanLocateParamIsBbox","locateTask","result","assert","task","taskContext","timing","uiContext","setTimingFieldOnce","Promise","sleep","originalError","originalMessage","String","shrunkShotToLogicalRatio","undefined","parseActionParam","error","actionFn","actionResult","delayAfterRunner","detailedLocateParam","onResult","deepLocate","abortSignal","locateParam","taskLocator","locateDump","locateResult","applyDump","dump","elementFromBbox","matchElementFromPlan","isPlanHit","rectFromXpath","elementFromXpath","generateElementByRect","transformLogicalRectToScreenshotRect","isXpathHit","cachePrompt","locateCacheRecord","cacheEntry","elementFromCacheResult","matchElementFromCache","elementFromCache","transformLogicalElementToScreenshot","isCacheHit","elementFromAiLocate","cacheFeature","cachedCenter","coordOffsetThreshold","Number","process","MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD","enableCoordCheck","MIDSCENE_CACHE_ENABLE_COORD_CHECK","enableVisualVerify","MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY","offset","Math","verification","verifyError","ServiceError","element","locateCacheAlreadyExists","currentCacheEntry","pointForCache","feature","hitBy","interfaceInstance","service","taskCache","waitAfterAction"],"mappings":";;;;;;;;;;;;;;;;;;;AAwCA,MAAMA,QAAQC,SAAS;AAKvB,SAASC,iBAAiBC,KAAc;IACtC,OACEA,QAAAA,SAEA,AAAiB,YAAjB,OAAOA,SACPC,OAAO,IAAI,CAACD,OAAO,MAAM,GAAG;AAEhC;AAEO,SAASE,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACN,OAAOD;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAyBO,MAAMC;IAyBX,MAAa,MACXC,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,SAAS;QAE3B,MAAMG,UAA4B;YAChCF;YACAH;YACAC;YACAG;YACA,YAAYF,SAAS;YACrB,aAAaA,SAAS;QACxB;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;SAC/D;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQT,MAAO;YACxB,MAAMW,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,UAAU,WAAa;QACzB;QACAH,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEA,MAAc,iBACZH,IAAyC,EACzCH,OAAyB,EACV;QACf,MAAMO,aAAa,IAAI,CAAC,gBAAgB,CAACJ,MAAMA,KAAK,KAAK,EAAEH;QAC3DA,QAAQ,KAAK,CAAC,IAAI,CAACO;IACrB;IAEA,MAAc,iBACZJ,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMQ,WAAWL,KAAK,IAAI;QAC1B,MAAMM,cAAc,IAAI,CAAC,WAAW;QACpC,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMlB,QAAQa,KAAK,KAAK;QAExB,IAAI,CAACO,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI1B,KAAK,CAAC0B,MAAM,EAAE;gBAGhB,MAAMxB,aAAaH,oBAAoBC,KAAK,CAAC0B,MAAM;gBACnDhC,MACE,uCACA,CAAC,YAAY,EAAEwB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC3B,KAAK,CAAC0B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAACzB,aAAa,EAC1C,CAAC,QAAQ,EAAE0B,wBAAwB5B,KAAK,CAAC0B,MAAM,GAAG;gBAEpD,MAAMG,aAAa,IAAI,CAAC,gBAAgB,CACtC3B,YACAF,KAAK,CAAC0B,MAAM,EACZhB,SACA,CAACoB;oBACC9B,KAAK,CAAC0B,MAAM,GAAGI;gBACjB;gBAEFpB,QAAQ,KAAK,CAAC,IAAI,CAACmB;YACrB,OAAO;gBACLE,OACE,CAACN,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3ExB,MAAM,CAAC,OAAO,EAAEgC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMc,OAKF;YACF,MAAM;YACN,SAASd;YACT,SAASL,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,UAAU,OAAOb,OAAOiC;gBACtB,MAAMC,SAASD,YAAY,IAAI,CAAC,MAAM;gBAEtCvC,MACE,oBACAwB,UACAlB,OACA,CAAC,4BAA4B,EAAEiC,YAAY,OAAO,EAAE,QAAQ;gBAG9D,MAAME,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBV,qBAAqB,OAAO,CAAC,CAACC;oBAC5BK,OACE/B,KAAK,CAAC0B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEAkB,mBAAmBF,QAAQ;gBAC3B,IAAI;oBACF,MAAMG,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC3C,MACE,CAAC,8DAA8D,EAAE0B,OAAO,IAAI,EAAE;gCAEhF,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAACA,OAAO,IAAI,EAAEpB;gCACrDN,MACE,CAAC,2DAA2D,EAAE0B,OAAO,IAAI,EAAE;4BAE/E;wBACF;wBACAkB,MAAM;qBACP;gBACH,EAAE,OAAOC,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBACAH,mBAAmBF,QAAQ;gBAE3B,MAAM,EAAEQ,wBAAwB,EAAE,GAAGP;gBACrC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIF,OAAO,WAAW,EACpB,IAAI;oBACFpB,QAAQ4C,iBAAiB5C,OAAOoB,OAAO,WAAW,EAAE;wBAClDsB;oBACF;gBACF,EAAE,OAAOG,OAAY;oBACnB,MAAM,IAAIvB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEyB,MAAM,OAAO,CAAC,cAAc,EAAElB,KAAK,SAAS,CAAC3B,QAAQ,EACtG;wBAAE,OAAO6C;oBAAM;gBAEnB;gBAGFT,mBAAmBF,QAAQ;gBAE3BxC,MAAM,kBAAkB0B,OAAO,IAAI;gBACnC,MAAM0B,WAAW1B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAM2B,eAAe,MAAMD,SAAS9C,OAAOiC;gBAC3CG,mBAAmBF,QAAQ;gBAC3BxC,MAAM,iBAAiB0B,OAAO,IAAI,EAAE,WAAW2B;gBAE/CX,mBAAmBF,QAAQ;gBAE3B,MAAMc,mBACJ5B,OAAO,gBAAgB,IAAI,IAAI,CAAC,eAAe,IAAI;gBACrD,IAAI4B,mBAAmB,GACrB,MAAMV,MAAMU;gBAGd,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpCtD,MACE,CAAC,6DAA6D,EAAE0B,OAAO,IAAI,EAAE;wBAE/E,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAACA,OAAO,IAAI,EAAEpB;wBACpDN,MACE,CAAC,0DAA0D,EAAE0B,OAAO,IAAI,EAAE;oBAE9E;gBACF,EAAE,OAAOmB,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEAH,mBAAmBF,QAAQ;gBAE3B,OAAO;oBACL,QAAQa;gBACV;YACF;QACF;QAEArC,QAAQ,KAAK,CAAC,IAAI,CAACsB;IACrB;IAEQ,iBACNnB,IAAyC,EACzCoC,mBAAiD,EACjDvC,OAAyB,EACzBwC,QAAgD,EACd;QAClC,MAAM,EAAEzC,SAAS,EAAEH,2BAA2B,EAAE6C,UAAU,EAAEC,WAAW,EAAE,GACvE1C;QAEF,IAAI2C,cAAcJ;QAElB,IAAI,AAAuB,YAAvB,OAAOI,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAI5C,AAAckC,WAAdlC,WACF4C,cAAc;YACZ,GAAGA,WAAW;YACd5C;QACF;QAGF,IAAI0C,cAAc,CAACE,YAAY,UAAU,EACvCA,cAAc;YACZ,GAAGA,WAAW;YACd,YAAY;QACd;QAGF,MAAMC,cAAgD;YACpD,MAAM;YACN,SAAS;YACT,OAAOD;YACP,SAASxC,KAAK,OAAO;YACrB,UAAU,OAAOb,OAAOiC;gBACtB,MAAM,EAAED,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACE/B,OAAO,UAAUA,OAAO,MACxB,CAAC,qDAAqD,EAAE2B,KAAK,SAAS,CACpE3B,QACC;gBAGL,IAAI,CAACmC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,MAAM,EAAEO,wBAAwB,EAAE,GAAGP;gBAErC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIiC;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;oBACjB,IAAI,CAACA,MACH;oBAEFH,aAAaG;oBACb1B,KAAK,GAAG,GAAG;wBACT0B;wBACA,aAAaA,KAAK,QAAQ,EAAE;oBAC9B;oBACA1B,KAAK,KAAK,GAAG0B,KAAK,QAAQ,EAAE;oBAC5B,IAAIA,KAAK,QAAQ,EAAE,iBACjB1B,KAAK,eAAe,GAAG0B,KAAK,QAAQ,CAAC,eAAe;oBAEtD,IAAIA,KAAK,QAAQ,EAAE,mBACjB1B,KAAK,iBAAiB,GAAG0B,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAMC,kBAAkB/B,wBAAwB5B,SAC5C4D,qBAAqB5D,SACrB2C;gBACJ,MAAMkB,YAAY,CAAC,CAACF;gBAGpB,IAAIG;gBACJ,IACE,CAACD,aACD7D,MAAM,KAAK,IACX,IAAI,CAAC,SAAS,CAAC,uBAAuB,EAEtC,IAAI;oBACF8D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;wBAC3D,QAAQ;4BAAC9D,MAAM,KAAK;yBAAC;oBACvB;gBACF,EAAE,OAAM,CAER;gBAGF,MAAM+D,mBAAmBD,gBACrBE,sBAEEC,qCACEH,eACApB,2BAEF,AAAwB,YAAxB,OAAO1C,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE,UAAU,MAE9B2C;gBAEJ,MAAMuB,aAAa,CAAC,CAACH;gBAErB,MAAMI,cAAcnE,MAAM,MAAM;gBAChC,MAAMoE,oBACJ,MAAM,IAAI,CAAC,SAAS,EAAE,iBAAiBD;gBACzC,MAAME,aAAaD,mBAAmB,cAAc;gBAEpD,MAAME,yBACJT,aAAaK,aACT,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACAnE,MAAM,SAAS;gBAIvB,MAAMwE,mBAAmBF,yBACrBG,oCACEH,wBACA5B,4BAEFC;gBAEJ,IAAI+B,aAAa,CAAC,CAACF;gBACnB,MAAMtC,SAASD,YAAY,IAAI,CAAC,MAAM;gBACtC,IAAI0C;gBAEJ,IAAID,YAAY;oBACd,MAAME,eAAeP;oBACrB,MAAMQ,eAAeD,cAAc;oBAInC,MAAME,uBACJC,OAAO,QAAQ,CACbC,QAAQ,GAAG,CAACC,sCAAsC,IAAI,MACtD,OACG;oBACP,MAAMC,mBACJF,AAAmD,YAAnDA,QAAQ,GAAG,CAACG,kCAAkC;oBAChD,MAAMC,qBACJJ,AAAqD,YAArDA,QAAQ,GAAG,CAACK,oCAAoC;oBAElD,IAAI;wBACF,IAAIH,oBAAoBL,cAAc;4BACpC,MAAMS,SAASC,KAAK,IAAI,CACrBf,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM,IAChDL,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM;4BAGvDnF,MAAM,4BAA4B;gCAChCmF;gCACA,eAAeL,iBAAkB,MAAM;gCACvC,QAAQe,KAAK,KAAK,CAACD;gCACnB,WAAWR;4BACb;4BAEA,IAAIQ,SAASR,sBAAsB;gCACjCpF,MACE,gEACA;oCAAE4F;oCAAQ,WAAWR;gCAAqB;gCAE5CJ,aAAa;4BACf;wBACF;wBAEA,IAAIA,cAAcU,oBAAoB;4BACpC,MAAMI,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,mBAAmB,CACzDhB,iBAAkB,MAAM,EACxBL,aACA7D,6BACA6B;4BAEF,IAAKqD,aAAa,IAAI,EAWpB9F,MAAM,4CAA4C;gCAChD,aAAa8F,aAAa,WAAW;4BACvC;iCAbsB;gCACtB9F,MACE,mEACA;oCACE,QAAQ8F,aAAa,MAAM;oCAC3B,aAAaA,aAAa,WAAW;oCACrC,QAAQrB;gCACV;gCAEFO,aAAa;4BACf;wBAKF;oBACF,EAAE,OAAOe,aAAa;wBACpB/F,MACE,mDACA+F;wBAEFf,aAAa;oBACf;gBACF;gBAEA,IAAI,CAACR,cAAc,CAACQ,cAAc,CAACb,WACjC,IAAI;oBACFzB,mBAAmBF,QAAQ;oBAC3BsB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCxD,OACA;wBACE,SAASmC;oBACX,GACA7B,6BACA8C;oBAEFK,UAAUD,aAAa,IAAI;oBAC3BmB,sBAAsBnB,aAAa,OAAO;gBAC5C,EAAE,OAAOX,OAAO;oBACd,IAAIA,iBAAiB6C,cACnBjC,UAAUZ,MAAM,IAAI;oBAEtB,MAAMA;gBACR,SAAU;oBACRT,mBAAmBF,QAAQ;gBAC7B;gBAGF,MAAMyD,UACJhC,mBACAI,oBACAS,oBACAG;gBAGF,MAAMiB,2BAA2BhG,iBAC/BwE,mBAAmB,cAAc;gBAGnC,IAAIyB;gBAOJ,IACEF,WACA,IAAI,CAAC,SAAS,IACd,CAACjB,cACA,EAACb,aAAa,CAAC+B,wBAAuB,KACvC5F,OAAO,cAAc,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,oBAAoB,EACrC,IAAI;oBAGF,IAAI8F,gBAAkCH,QAAQ,MAAM;oBACpD,IAAIjD,AAA6B,MAA7BA,0BAAgC;wBAClCoD,gBAAgB;4BACdP,KAAK,KAAK,CAACI,QAAQ,MAAM,CAAC,EAAE,GAAGjD;4BAC/B6C,KAAK,KAAK,CAACI,QAAQ,MAAM,CAAC,EAAE,GAAGjD;yBAChC;wBACDhD,MACE,8DACAiG,QAAQ,MAAM,EACdG;oBAEJ;oBAEA,MAAMC,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,oBAAoB,CACvDD,eACA;wBACE,mBACE,AAAwB,YAAxB,OAAO9F,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE;wBACpB,aAAaM;oBACf;oBAEF,IAAIV,iBAAiBmG,UAAU;wBAC7BA,QAAQ,YAAY,GAAGD;wBACvBpG,MACE,uCACAyE,aACA4B;wBAEFF,oBAAoBE;wBACpB,MAAM,IAAI,CAAC,SAAS,CAAC,yBAAyB,CAC5C;4BACE,MAAM;4BACN,QAAQ5B;4BACR,OAAO4B;wBACT,GACA3B;oBAEJ,OACE1E,MACE,yDACAyE;gBAGN,EAAE,OAAOtB,OAAO;oBACdnD,MAAM,mCAAmCmD;gBAC3C;qBAEAnD,MAAM;gBAIV,IAAI,CAACiG,SAAS;oBACZ,IAAIpC,YACF,MAAM,IAAImC,aACR,CAAC,oBAAoB,EAAE1F,MAAM,MAAM,EAAE,EACrCuD;oBAGJ,MAAM,IAAIjC,MAAM,CAAC,mBAAmB,EAAEtB,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAIgG;gBAEJ,IAAInC,WACFmC,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMhG,MAAM,IAAI;oBAClB;gBACF;qBACK,IAAIkE,YACT8B,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAOhG,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAI0E,YACTsB,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP3B;wBACA,aAAawB;oBACf;gBACF;gBAGF3C,WAAWyC;gBAEX,OAAO;oBACL,QAAQ;wBACN,SAAS;4BACP,GAAGA,OAAO;4BAEV,KAAKxD,UAAU,aAAa;wBAC9B;oBACF;oBACA6D;gBACF;YACF;QACF;QAEA,OAAO1C;IACT;IAvnBA,YAAY,EACV2C,iBAAiB,EACjBC,OAAO,EACPC,SAAS,EACThF,WAAW,EACXiF,eAAe,EACC,CAAE;QAhBpB,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAiB,mBAAjB;QASE,IAAI,CAAC,SAAS,GAAGH;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;QACjB,IAAI,CAAC,WAAW,GAAGhF;QACnB,IAAI,CAAC,eAAe,GAAGiF;IACzB;AA4mBF"}
1
+ {"version":3,"file":"agent/task-builder.mjs","sources":["../../../src/agent/task-builder.ts"],"sourcesContent":["import {\n calculateConfidence,\n createInitialConfidenceState,\n createInitialProgressiveRecord,\n determineVerificationLevel,\n getVerificationActions,\n updateConfidenceOnVerify,\n updateProgressiveConvergence,\n} from '@/agent/cache-confidence';\nimport { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport type {\n CacheConfidenceState,\n CacheValidationOptions,\n DetailedLocateParam,\n DeviceAction,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskPlanningLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningLocateParam,\n ProgressiveLocateRecord,\n Rect,\n SemanticAnchor,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD,\n MIDSCENE_CACHE_ENABLE_COORD_CHECK,\n MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR,\n MIDSCENE_CACHE_ENABLE_VISUAL_VERIFY,\n} from '@midscene/shared/env/constants';\nimport { generateElementByRect } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport {\n ifPlanLocateParamIsBbox,\n matchElementFromCache,\n matchElementFromPlan,\n transformLogicalElementToScreenshot,\n transformLogicalRectToScreenshotRect,\n} from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\n/**\n * Check if a cache object is non-empty\n */\nfunction hasNonEmptyCache(cache: unknown): boolean {\n return (\n cache !== null &&\n cache !== undefined &&\n typeof cache === 'object' &&\n Object.keys(cache).length > 0\n );\n}\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n actionSpace: DeviceAction[];\n waitAfterAction?: number;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfigForPlanning: IModelConfig;\n modelConfigForDefaultIntent: IModelConfig;\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n private readonly actionSpace: DeviceAction[];\n\n private readonly waitAfterAction?: number;\n\n constructor({\n interfaceInstance,\n service,\n taskCache,\n actionSpace,\n waitAfterAction,\n }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n this.actionSpace = actionSpace;\n this.waitAfterAction = waitAfterAction;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n cacheable,\n deepLocate: options?.deepLocate,\n abortSignal: options?.abortSignal,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action Space',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n const taskLocate = this.createLocateTask(plan, plan.param, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = this.actionSpace;\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n // Always use createLocateTask for all locate params (including bbox)\n // This ensures cache writing happens even when bbox is available\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n `hasBbox=${ifPlanLocateParamIsBbox(param[field])}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action Space',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action Space',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n executor: async (param, taskContext) => {\n const timing = taskContext.task.timing;\n\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n setTimingFieldOnce(timing, 'beforeInvokeActionHookStart');\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug(\n `will call \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.beforeInvokeAction(action.name, param);\n debug(\n `called \"beforeInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n setTimingFieldOnce(timing, 'beforeInvokeActionHookEnd');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in Action task',\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema, {\n shrunkShotToLogicalRatio,\n });\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n setTimingFieldOnce(timing, 'callActionStart');\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n const actionResult = await actionFn(param, taskContext);\n setTimingFieldOnce(timing, 'callActionEnd');\n debug('called action', action.name, 'result:', actionResult);\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookStart');\n\n const delayAfterRunner =\n action.delayAfterRunner ?? this.waitAfterAction ?? 300;\n if (delayAfterRunner > 0) {\n await sleep(delayAfterRunner);\n }\n\n try {\n if (this.interface.afterInvokeAction) {\n debug(\n `will call \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n await this.interface.afterInvokeAction(action.name, param);\n debug(\n `called \"afterInvokeAction\" for interface with action name ${action.name}`,\n );\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n setTimingFieldOnce(timing, 'afterInvokeActionHookEnd');\n\n return {\n output: actionResult,\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskPlanningLocateApply {\n const { cacheable, modelConfigForDefaultIntent, deepLocate, abortSignal } =\n context;\n\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n if (deepLocate && !locateParam.deepLocate) {\n locateParam = {\n ...locateParam,\n deepLocate: true,\n };\n }\n\n const taskLocator: ExecutionTaskPlanningLocateApply = {\n type: 'Planning',\n subType: 'Locate',\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n const { shrunkShotToLogicalRatio } = uiContext;\n\n if (shrunkShotToLogicalRatio === undefined) {\n throw new Error(\n 'shrunkShotToLogicalRatio is not defined in locate task',\n );\n }\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // from bbox (plan hit)\n const elementFromBbox = ifPlanLocateParamIsBbox(param)\n ? matchElementFromPlan(param)\n : undefined;\n const isPlanHit = !!elementFromBbox;\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (\n !isPlanHit &&\n param.xpath &&\n this.interface.rectMatchesCacheFeature\n ) {\n try {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n } catch {\n // xpath locate failed, allow fallback to cache or AI locate\n }\n }\n\n const elementFromXpath = rectFromXpath\n ? generateElementByRect(\n // rectFromXpath is in logical coordinates, which should be transformed to screenshot coordinates;\n transformLogicalRectToScreenshotRect(\n rectFromXpath,\n shrunkShotToLogicalRatio,\n ),\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt || '',\n )\n : undefined;\n\n const isXpathHit = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord =\n await this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCacheResult =\n isPlanHit || isXpathHit\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n\n // elementFromCacheResult is in logical coordinates, which should be transformed to screenshot coordinates;\n let elementFromCache = elementFromCacheResult\n ? transformLogicalElementToScreenshot(\n elementFromCacheResult,\n shrunkShotToLogicalRatio,\n )\n : undefined;\n\n let isCacheHit = !!elementFromCache;\n const timing = taskContext.task.timing;\n let elementFromAiLocate: LocateResultElement | null | undefined;\n\n if (isCacheHit) {\n const cacheFeature = cacheEntry as ElementCacheFeature;\n const cachedCenter = cacheFeature?.cachedCenter as\n | [number, number]\n | undefined;\n\n const coordOffsetThreshold =\n Number.parseInt(\n process.env[MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD] || '16',\n 10,\n ) || 16;\n\n const confidenceState = (cacheFeature?.confidenceState ||\n createInitialConfidenceState()) as CacheConfidenceState;\n const confidence = calculateConfidence(confidenceState);\n const level = determineVerificationLevel(confidence);\n const actions = getVerificationActions(level);\n\n debug('cache confidence assessment', {\n confidence: confidence.toFixed(3),\n level,\n actions,\n verificationCount: confidenceState.verificationCount,\n });\n\n if (actions.skipCache) {\n debug('cache confidence too low, skipping cache entirely', {\n confidence,\n level,\n });\n isCacheHit = false;\n }\n\n try {\n if (isCacheHit && actions.coordCheck && cachedCenter) {\n const offset = Math.sqrt(\n (elementFromCache!.center[0] - cachedCenter[0]) ** 2 +\n (elementFromCache!.center[1] - cachedCenter[1]) ** 2,\n );\n\n debug('cache coord offset check', {\n cachedCenter,\n currentCenter: elementFromCache!.center,\n offset: Math.round(offset),\n threshold: coordOffsetThreshold,\n });\n\n if (offset > coordOffsetThreshold) {\n debug(\n 'cache coord offset exceeded threshold, fallback to AI locate',\n { offset, threshold: coordOffsetThreshold },\n );\n isCacheHit = false;\n }\n }\n\n if (isCacheHit && actions.visualVerify) {\n const verification = await this.service.verifyCachedElement(\n elementFromCache!.center,\n cachePrompt,\n modelConfigForDefaultIntent,\n uiContext,\n );\n if (!verification.pass) {\n debug(\n 'cache hit but visual verification failed, fallback to AI locate',\n {\n reason: verification.reason,\n description: verification.description,\n prompt: cachePrompt,\n },\n );\n isCacheHit = false;\n } else {\n debug('cache hit and visual verification passed', {\n description: verification.description,\n });\n }\n }\n } catch (verifyError) {\n debug(\n 'cache verification error, fallback to AI locate',\n verifyError,\n );\n isCacheHit = false;\n }\n\n if (isCacheHit) {\n const updatedState = updateConfidenceOnVerify(\n confidenceState,\n true,\n );\n cacheFeature.confidenceState = updatedState;\n\n const progressiveRecord = cacheFeature.progressiveRecord as\n | ProgressiveLocateRecord\n | undefined;\n if (progressiveRecord) {\n const updated = updateProgressiveConvergence(\n progressiveRecord,\n elementFromCache!.center,\n updatedState.confidenceScore,\n );\n cacheFeature.progressiveRecord = updated;\n\n if (updated.convergenceRadius < 5 && updated.sampleCount >= 3) {\n debug(\n 'using converged center instead of single-result center',\n {\n convergedCenter: updated.convergedCenter.map((v) =>\n v.toFixed(1),\n ),\n singleCenter: elementFromCache!.center,\n convergenceRadius: updated.convergenceRadius.toFixed(1),\n sampleCount: updated.sampleCount,\n },\n );\n elementFromCache = {\n ...elementFromCache!,\n center: [\n Math.round(updated.convergedCenter[0]),\n Math.round(updated.convergedCenter[1]),\n ],\n };\n }\n }\n } else {\n const updatedState = updateConfidenceOnVerify(\n confidenceState,\n false,\n );\n cacheFeature.confidenceState = updatedState;\n }\n }\n\n if (!isXpathHit && !isCacheHit && !isPlanHit) {\n const cacheFeature = cacheEntry as ElementCacheFeature;\n const semanticAnchor = cacheFeature?.semanticAnchor as\n | SemanticAnchor\n | undefined;\n\n if (\n semanticAnchor &&\n process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR] !== 'false'\n ) {\n try {\n const anchorResult = await this.service.locateBySemanticAnchor(\n semanticAnchor,\n modelConfigForDefaultIntent,\n this.interface,\n uiContext,\n );\n if (anchorResult) {\n elementFromAiLocate = anchorResult;\n debug(\n 'semantic anchor locate succeeded, skipping full AI locate',\n );\n }\n } catch (anchorError) {\n debug('semantic anchor locate failed:', anchorError);\n }\n }\n\n if (!elementFromAiLocate) {\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfigForDefaultIntent,\n abortSignal,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n }\n }\n\n const element =\n elementFromBbox ||\n elementFromXpath ||\n elementFromCache ||\n elementFromAiLocate;\n\n // Check if locate cache already exists (for planHitFlag case)\n const locateCacheAlreadyExists = hasNonEmptyCache(\n locateCacheRecord?.cacheContent?.cache,\n );\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n // Write cache if:\n // 1. element found\n // 2. taskCache enabled\n // 3. not a cache hit (otherwise we'd be writing what we just read)\n // 4. not already cached for plan hit case (avoid redundant writes), OR allow update if cache validation failed\n // 5. cacheable is not explicitly false\n if (\n element &&\n this.taskCache &&\n !isCacheHit &&\n (!isPlanHit || !locateCacheAlreadyExists) &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForPoint) {\n try {\n // Transform coordinates to logical space for cacheFeatureForPoint\n // cacheFeatureForPoint needs logical coordinates to locate elements in DOM\n let pointForCache: [number, number] = element.center;\n if (shrunkShotToLogicalRatio !== 1) {\n pointForCache = [\n Math.round(element.center[0] / shrunkShotToLogicalRatio),\n Math.round(element.center[1] / shrunkShotToLogicalRatio),\n ];\n debug(\n 'Transformed coordinates for cacheFeatureForPoint: %o -> %o',\n element.center,\n pointForCache,\n );\n }\n\n const feature = await this.interface.cacheFeatureForPoint(\n pointForCache,\n {\n targetDescription:\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt,\n modelConfig: modelConfigForDefaultIntent,\n },\n );\n if (hasNonEmptyCache(feature)) {\n feature.cachedCenter = pointForCache;\n feature.confidenceState = createInitialConfidenceState();\n feature.progressiveRecord =\n createInitialProgressiveRecord(pointForCache);\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n\n const enableSemanticAnchor =\n process.env[MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR] !==\n 'false';\n if (enableSemanticAnchor) {\n try {\n const anchor = await this.service.generateSemanticAnchor(\n pointForCache,\n modelConfigForDefaultIntent,\n uiContext,\n );\n if (anchor) {\n feature.semanticAnchor = anchor;\n debug(\n 'semantic anchor generated for prompt: %s',\n cachePrompt,\n );\n }\n } catch (anchorError) {\n debug('generateSemanticAnchor failed:', anchorError);\n }\n }\n\n currentCacheEntry = feature;\n await this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForPoint failed: %s', error);\n }\n } else {\n debug('cacheFeatureForPoint is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (isPlanHit) {\n hitBy = {\n from: 'Plan',\n context: {\n bbox: param.bbox,\n },\n };\n } else if (isXpathHit) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (isCacheHit) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element: {\n ...element,\n // backward compatibility for aiLocate, which return value needs a dpr field\n dpr: uiContext.deprecatedDpr,\n },\n },\n hitBy,\n };\n },\n };\n\n return taskLocator;\n }\n}\n"],"names":["debug","getDebug","hasNonEmptyCache","cache","Object","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfigForPlanning","modelConfigForDefaultIntent","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","ifPlanLocateParamIsBbox","locateTask","result","assert","task","taskContext","timing","uiContext","setTimingFieldOnce","Promise","sleep","originalError","originalMessage","String","shrunkShotToLogicalRatio","undefined","parseActionParam","error","actionFn","actionResult","delayAfterRunner","detailedLocateParam","onResult","deepLocate","abortSignal","locateParam","taskLocator","locateDump","locateResult","applyDump","dump","elementFromBbox","matchElementFromPlan","isPlanHit","rectFromXpath","elementFromXpath","generateElementByRect","transformLogicalRectToScreenshotRect","isXpathHit","cachePrompt","locateCacheRecord","cacheEntry","elementFromCacheResult","matchElementFromCache","elementFromCache","transformLogicalElementToScreenshot","isCacheHit","elementFromAiLocate","cacheFeature","cachedCenter","coordOffsetThreshold","Number","process","MIDSCENE_CACHE_COORD_OFFSET_THRESHOLD","confidenceState","createInitialConfidenceState","confidence","calculateConfidence","level","determineVerificationLevel","actions","getVerificationActions","offset","Math","verification","verifyError","updatedState","updateConfidenceOnVerify","progressiveRecord","updated","updateProgressiveConvergence","v","semanticAnchor","MIDSCENE_CACHE_ENABLE_SEMANTIC_ANCHOR","anchorResult","anchorError","ServiceError","element","locateCacheAlreadyExists","currentCacheEntry","pointForCache","feature","createInitialProgressiveRecord","enableSemanticAnchor","anchor","hitBy","interfaceInstance","service","taskCache","waitAfterAction"],"mappings":";;;;;;;;;;;;;;;;;;;;AAqDA,MAAMA,QAAQC,SAAS;AAKvB,SAASC,iBAAiBC,KAAc;IACtC,OACEA,QAAAA,SAEA,AAAiB,YAAjB,OAAOA,SACPC,OAAO,IAAI,CAACD,OAAO,MAAM,GAAG;AAEhC;AAEO,SAASE,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACN,OAAOD;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAyBO,MAAMC;IAyBX,MAAa,MACXC,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,SAAS;QAE3B,MAAMG,UAA4B;YAChCF;YACAH;YACAC;YACAG;YACA,YAAYF,SAAS;YACrB,aAAaA,SAAS;QACxB;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;SAC/D;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQT,MAAO;YACxB,MAAMW,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,UAAU,WAAa;QACzB;QACAH,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEA,MAAc,iBACZH,IAAyC,EACzCH,OAAyB,EACV;QACf,MAAMO,aAAa,IAAI,CAAC,gBAAgB,CAACJ,MAAMA,KAAK,KAAK,EAAEH;QAC3DA,QAAQ,KAAK,CAAC,IAAI,CAACO;IACrB;IAEA,MAAc,iBACZJ,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMQ,WAAWL,KAAK,IAAI;QAC1B,MAAMM,cAAc,IAAI,CAAC,WAAW;QACpC,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMlB,QAAQa,KAAK,KAAK;QAExB,IAAI,CAACO,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI1B,KAAK,CAAC0B,MAAM,EAAE;gBAGhB,MAAMxB,aAAaH,oBAAoBC,KAAK,CAAC0B,MAAM;gBACnDhC,MACE,uCACA,CAAC,YAAY,EAAEwB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC3B,KAAK,CAAC0B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAACzB,aAAa,EAC1C,CAAC,QAAQ,EAAE0B,wBAAwB5B,KAAK,CAAC0B,MAAM,GAAG;gBAEpD,MAAMG,aAAa,IAAI,CAAC,gBAAgB,CACtC3B,YACAF,KAAK,CAAC0B,MAAM,EACZhB,SACA,CAACoB;oBACC9B,KAAK,CAAC0B,MAAM,GAAGI;gBACjB;gBAEFpB,QAAQ,KAAK,CAAC,IAAI,CAACmB;YACrB,OAAO;gBACLE,OACE,CAACN,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3ExB,MAAM,CAAC,OAAO,EAAEgC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMc,OAKF;YACF,MAAM;YACN,SAASd;YACT,SAASL,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,UAAU,OAAOb,OAAOiC;gBACtB,MAAMC,SAASD,YAAY,IAAI,CAAC,MAAM;gBAEtCvC,MACE,oBACAwB,UACAlB,OACA,CAAC,4BAA4B,EAAEiC,YAAY,OAAO,EAAE,QAAQ;gBAG9D,MAAME,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBV,qBAAqB,OAAO,CAAC,CAACC;oBAC5BK,OACE/B,KAAK,CAAC0B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEAkB,mBAAmBF,QAAQ;gBAC3B,IAAI;oBACF,MAAMG,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC3C,MACE,CAAC,8DAA8D,EAAE0B,OAAO,IAAI,EAAE;gCAEhF,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAACA,OAAO,IAAI,EAAEpB;gCACrDN,MACE,CAAC,2DAA2D,EAAE0B,OAAO,IAAI,EAAE;4BAE/E;wBACF;wBACAkB,MAAM;qBACP;gBACH,EAAE,OAAOC,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBACAH,mBAAmBF,QAAQ;gBAE3B,MAAM,EAAEQ,wBAAwB,EAAE,GAAGP;gBACrC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIF,OAAO,WAAW,EACpB,IAAI;oBACFpB,QAAQ4C,iBAAiB5C,OAAOoB,OAAO,WAAW,EAAE;wBAClDsB;oBACF;gBACF,EAAE,OAAOG,OAAY;oBACnB,MAAM,IAAIvB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEyB,MAAM,OAAO,CAAC,cAAc,EAAElB,KAAK,SAAS,CAAC3B,QAAQ,EACtG;wBAAE,OAAO6C;oBAAM;gBAEnB;gBAGFT,mBAAmBF,QAAQ;gBAE3BxC,MAAM,kBAAkB0B,OAAO,IAAI;gBACnC,MAAM0B,WAAW1B,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAM2B,eAAe,MAAMD,SAAS9C,OAAOiC;gBAC3CG,mBAAmBF,QAAQ;gBAC3BxC,MAAM,iBAAiB0B,OAAO,IAAI,EAAE,WAAW2B;gBAE/CX,mBAAmBF,QAAQ;gBAE3B,MAAMc,mBACJ5B,OAAO,gBAAgB,IAAI,IAAI,CAAC,eAAe,IAAI;gBACrD,IAAI4B,mBAAmB,GACrB,MAAMV,MAAMU;gBAGd,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpCtD,MACE,CAAC,6DAA6D,EAAE0B,OAAO,IAAI,EAAE;wBAE/E,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAACA,OAAO,IAAI,EAAEpB;wBACpDN,MACE,CAAC,0DAA0D,EAAE0B,OAAO,IAAI,EAAE;oBAE9E;gBACF,EAAE,OAAOmB,eAAoB;oBAC3B,MAAMC,kBACJD,eAAe,WAAWE,OAAOF;oBACnC,MAAM,IAAIjB,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEAH,mBAAmBF,QAAQ;gBAE3B,OAAO;oBACL,QAAQa;gBACV;YACF;QACF;QAEArC,QAAQ,KAAK,CAAC,IAAI,CAACsB;IACrB;IAEQ,iBACNnB,IAAyC,EACzCoC,mBAAiD,EACjDvC,OAAyB,EACzBwC,QAAgD,EACd;QAClC,MAAM,EAAEzC,SAAS,EAAEH,2BAA2B,EAAE6C,UAAU,EAAEC,WAAW,EAAE,GACvE1C;QAEF,IAAI2C,cAAcJ;QAElB,IAAI,AAAuB,YAAvB,OAAOI,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAI5C,AAAckC,WAAdlC,WACF4C,cAAc;YACZ,GAAGA,WAAW;YACd5C;QACF;QAGF,IAAI0C,cAAc,CAACE,YAAY,UAAU,EACvCA,cAAc;YACZ,GAAGA,WAAW;YACd,YAAY;QACd;QAGF,MAAMC,cAAgD;YACpD,MAAM;YACN,SAAS;YACT,OAAOD;YACP,SAASxC,KAAK,OAAO;YACrB,UAAU,OAAOb,OAAOiC;gBACtB,MAAM,EAAED,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACE/B,OAAO,UAAUA,OAAO,MACxB,CAAC,qDAAqD,EAAE2B,KAAK,SAAS,CACpE3B,QACC;gBAGL,IAAI,CAACmC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,MAAM,EAAEO,wBAAwB,EAAE,GAAGP;gBAErC,IAAIO,AAA6BC,WAA7BD,0BACF,MAAM,IAAIpB,MACR;gBAIJ,IAAIiC;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;oBACjB,IAAI,CAACA,MACH;oBAEFH,aAAaG;oBACb1B,KAAK,GAAG,GAAG;wBACT0B;wBACA,aAAaA,KAAK,QAAQ,EAAE;oBAC9B;oBACA1B,KAAK,KAAK,GAAG0B,KAAK,QAAQ,EAAE;oBAC5B,IAAIA,KAAK,QAAQ,EAAE,iBACjB1B,KAAK,eAAe,GAAG0B,KAAK,QAAQ,CAAC,eAAe;oBAEtD,IAAIA,KAAK,QAAQ,EAAE,mBACjB1B,KAAK,iBAAiB,GAAG0B,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAMC,kBAAkB/B,wBAAwB5B,SAC5C4D,qBAAqB5D,SACrB2C;gBACJ,MAAMkB,YAAY,CAAC,CAACF;gBAGpB,IAAIG;gBACJ,IACE,CAACD,aACD7D,MAAM,KAAK,IACX,IAAI,CAAC,SAAS,CAAC,uBAAuB,EAEtC,IAAI;oBACF8D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;wBAC3D,QAAQ;4BAAC9D,MAAM,KAAK;yBAAC;oBACvB;gBACF,EAAE,OAAM,CAER;gBAGF,MAAM+D,mBAAmBD,gBACrBE,sBAEEC,qCACEH,eACApB,2BAEF,AAAwB,YAAxB,OAAO1C,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE,UAAU,MAE9B2C;gBAEJ,MAAMuB,aAAa,CAAC,CAACH;gBAErB,MAAMI,cAAcnE,MAAM,MAAM;gBAChC,MAAMoE,oBACJ,MAAM,IAAI,CAAC,SAAS,EAAE,iBAAiBD;gBACzC,MAAME,aAAaD,mBAAmB,cAAc;gBAEpD,MAAME,yBACJT,aAAaK,aACT,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACAnE,MAAM,SAAS;gBAIvB,IAAIwE,mBAAmBF,yBACnBG,oCACEH,wBACA5B,4BAEFC;gBAEJ,IAAI+B,aAAa,CAAC,CAACF;gBACnB,MAAMtC,SAASD,YAAY,IAAI,CAAC,MAAM;gBACtC,IAAI0C;gBAEJ,IAAID,YAAY;oBACd,MAAME,eAAeP;oBACrB,MAAMQ,eAAeD,cAAc;oBAInC,MAAME,uBACJC,OAAO,QAAQ,CACbC,QAAQ,GAAG,CAACC,sCAAsC,IAAI,MACtD,OACG;oBAEP,MAAMC,kBAAmBN,cAAc,mBACrCO;oBACF,MAAMC,aAAaC,oBAAoBH;oBACvC,MAAMI,QAAQC,2BAA2BH;oBACzC,MAAMI,UAAUC,uBAAuBH;oBAEvC5F,MAAM,+BAA+B;wBACnC,YAAY0F,WAAW,OAAO,CAAC;wBAC/BE;wBACAE;wBACA,mBAAmBN,gBAAgB,iBAAiB;oBACtD;oBAEA,IAAIM,QAAQ,SAAS,EAAE;wBACrB9F,MAAM,qDAAqD;4BACzD0F;4BACAE;wBACF;wBACAZ,aAAa;oBACf;oBAEA,IAAI;wBACF,IAAIA,cAAcc,QAAQ,UAAU,IAAIX,cAAc;4BACpD,MAAMa,SAASC,KAAK,IAAI,CACrBnB,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM,IAChDL,AAAAA,CAAAA,iBAAkB,MAAM,CAAC,EAAE,GAAGK,YAAY,CAAC,EAAC,KAAM;4BAGvDnF,MAAM,4BAA4B;gCAChCmF;gCACA,eAAeL,iBAAkB,MAAM;gCACvC,QAAQmB,KAAK,KAAK,CAACD;gCACnB,WAAWZ;4BACb;4BAEA,IAAIY,SAASZ,sBAAsB;gCACjCpF,MACE,gEACA;oCAAEgG;oCAAQ,WAAWZ;gCAAqB;gCAE5CJ,aAAa;4BACf;wBACF;wBAEA,IAAIA,cAAcc,QAAQ,YAAY,EAAE;4BACtC,MAAMI,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,mBAAmB,CACzDpB,iBAAkB,MAAM,EACxBL,aACA7D,6BACA6B;4BAEF,IAAKyD,aAAa,IAAI,EAWpBlG,MAAM,4CAA4C;gCAChD,aAAakG,aAAa,WAAW;4BACvC;iCAbsB;gCACtBlG,MACE,mEACA;oCACE,QAAQkG,aAAa,MAAM;oCAC3B,aAAaA,aAAa,WAAW;oCACrC,QAAQzB;gCACV;gCAEFO,aAAa;4BACf;wBAKF;oBACF,EAAE,OAAOmB,aAAa;wBACpBnG,MACE,mDACAmG;wBAEFnB,aAAa;oBACf;oBAEA,IAAIA,YAAY;wBACd,MAAMoB,eAAeC,yBACnBb,iBACA;wBAEFN,aAAa,eAAe,GAAGkB;wBAE/B,MAAME,oBAAoBpB,aAAa,iBAAiB;wBAGxD,IAAIoB,mBAAmB;4BACrB,MAAMC,UAAUC,6BACdF,mBACAxB,iBAAkB,MAAM,EACxBsB,aAAa,eAAe;4BAE9BlB,aAAa,iBAAiB,GAAGqB;4BAEjC,IAAIA,QAAQ,iBAAiB,GAAG,KAAKA,QAAQ,WAAW,IAAI,GAAG;gCAC7DvG,MACE,0DACA;oCACE,iBAAiBuG,QAAQ,eAAe,CAAC,GAAG,CAAC,CAACE,IAC5CA,EAAE,OAAO,CAAC;oCAEZ,cAAc3B,iBAAkB,MAAM;oCACtC,mBAAmByB,QAAQ,iBAAiB,CAAC,OAAO,CAAC;oCACrD,aAAaA,QAAQ,WAAW;gCAClC;gCAEFzB,mBAAmB;oCACjB,GAAGA,gBAAgB;oCACnB,QAAQ;wCACNmB,KAAK,KAAK,CAACM,QAAQ,eAAe,CAAC,EAAE;wCACrCN,KAAK,KAAK,CAACM,QAAQ,eAAe,CAAC,EAAE;qCACtC;gCACH;4BACF;wBACF;oBACF,OAAO;wBACL,MAAMH,eAAeC,yBACnBb,iBACA;wBAEFN,aAAa,eAAe,GAAGkB;oBACjC;gBACF;gBAEA,IAAI,CAAC5B,cAAc,CAACQ,cAAc,CAACb,WAAW;oBAC5C,MAAMe,eAAeP;oBACrB,MAAM+B,iBAAiBxB,cAAc;oBAIrC,IACEwB,kBACApB,AAAuD,YAAvDA,QAAQ,GAAG,CAACqB,sCAAsC,EAElD,IAAI;wBACF,MAAMC,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,sBAAsB,CAC5DF,gBACA9F,6BACA,IAAI,CAAC,SAAS,EACd6B;wBAEF,IAAImE,cAAc;4BAChB3B,sBAAsB2B;4BACtB5G,MACE;wBAEJ;oBACF,EAAE,OAAO6G,aAAa;wBACpB7G,MAAM,kCAAkC6G;oBAC1C;oBAGF,IAAI,CAAC5B,qBACH,IAAI;wBACFvC,mBAAmBF,QAAQ;wBAC3BsB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCxD,OACA;4BACE,SAASmC;wBACX,GACA7B,6BACA8C;wBAEFK,UAAUD,aAAa,IAAI;wBAC3BmB,sBAAsBnB,aAAa,OAAO;oBAC5C,EAAE,OAAOX,OAAO;wBACd,IAAIA,iBAAiB2D,cACnB/C,UAAUZ,MAAM,IAAI;wBAEtB,MAAMA;oBACR,SAAU;wBACRT,mBAAmBF,QAAQ;oBAC7B;gBAEJ;gBAEA,MAAMuE,UACJ9C,mBACAI,oBACAS,oBACAG;gBAGF,MAAM+B,2BAA2B9G,iBAC/BwE,mBAAmB,cAAc;gBAGnC,IAAIuC;gBAOJ,IACEF,WACA,IAAI,CAAC,SAAS,IACd,CAAC/B,cACA,EAACb,aAAa,CAAC6C,wBAAuB,KACvC1G,OAAO,cAAc,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,oBAAoB,EACrC,IAAI;oBAGF,IAAI4G,gBAAkCH,QAAQ,MAAM;oBACpD,IAAI/D,AAA6B,MAA7BA,0BAAgC;wBAClCkE,gBAAgB;4BACdjB,KAAK,KAAK,CAACc,QAAQ,MAAM,CAAC,EAAE,GAAG/D;4BAC/BiD,KAAK,KAAK,CAACc,QAAQ,MAAM,CAAC,EAAE,GAAG/D;yBAChC;wBACDhD,MACE,8DACA+G,QAAQ,MAAM,EACdG;oBAEJ;oBAEA,MAAMC,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,oBAAoB,CACvDD,eACA;wBACE,mBACE,AAAwB,YAAxB,OAAO5G,MAAM,MAAM,GACfA,MAAM,MAAM,GACZA,MAAM,MAAM,EAAE;wBACpB,aAAaM;oBACf;oBAEF,IAAIV,iBAAiBiH,UAAU;wBAC7BA,QAAQ,YAAY,GAAGD;wBACvBC,QAAQ,eAAe,GAAG1B;wBAC1B0B,QAAQ,iBAAiB,GACvBC,+BAA+BF;wBACjClH,MACE,uCACAyE,aACA0C;wBAGF,MAAME,uBACJ/B,AACA,YADAA,QAAQ,GAAG,CAACqB,sCAAsC;wBAEpD,IAAIU,sBACF,IAAI;4BACF,MAAMC,SAAS,MAAM,IAAI,CAAC,OAAO,CAAC,sBAAsB,CACtDJ,eACAtG,6BACA6B;4BAEF,IAAI6E,QAAQ;gCACVH,QAAQ,cAAc,GAAGG;gCACzBtH,MACE,4CACAyE;4BAEJ;wBACF,EAAE,OAAOoC,aAAa;4BACpB7G,MAAM,kCAAkC6G;wBAC1C;wBAGFI,oBAAoBE;wBACpB,MAAM,IAAI,CAAC,SAAS,CAAC,yBAAyB,CAC5C;4BACE,MAAM;4BACN,QAAQ1C;4BACR,OAAO0C;wBACT,GACAzC;oBAEJ,OACE1E,MACE,yDACAyE;gBAGN,EAAE,OAAOtB,OAAO;oBACdnD,MAAM,mCAAmCmD;gBAC3C;qBAEAnD,MAAM;gBAIV,IAAI,CAAC+G,SAAS;oBACZ,IAAIlD,YACF,MAAM,IAAIiD,aACR,CAAC,oBAAoB,EAAExG,MAAM,MAAM,EAAE,EACrCuD;oBAGJ,MAAM,IAAIjC,MAAM,CAAC,mBAAmB,EAAEtB,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAIiH;gBAEJ,IAAIpD,WACFoD,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMjH,MAAM,IAAI;oBAClB;gBACF;qBACK,IAAIkE,YACT+C,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAOjH,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAI0E,YACTuC,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP5C;wBACA,aAAasC;oBACf;gBACF;gBAGFzD,WAAWuD;gBAEX,OAAO;oBACL,QAAQ;wBACN,SAAS;4BACP,GAAGA,OAAO;4BAEV,KAAKtE,UAAU,aAAa;wBAC9B;oBACF;oBACA8E;gBACF;YACF;QACF;QAEA,OAAO3D;IACT;IA9uBA,YAAY,EACV4D,iBAAiB,EACjBC,OAAO,EACPC,SAAS,EACTjG,WAAW,EACXkG,eAAe,EACC,CAAE;QAhBpB,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAiB,mBAAjB;QASE,IAAI,CAAC,SAAS,GAAGH;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;QACjB,IAAI,CAAC,WAAW,GAAGjG;QACnB,IAAI,CAAC,eAAe,GAAGkG;IACzB;AAmuBF"}
@@ -0,0 +1,34 @@
1
+ import { getPreferredLanguage } from "@midscene/shared/env";
2
+ const semanticAnchorSearchInstruction = ()=>{
3
+ const preferredLanguage = getPreferredLanguage();
4
+ return `
5
+ You are searching for an element based on its semantic anchor description.
6
+ The image shows a CROPPED AREA of the page around a known landmark element.
7
+
8
+ TARGET ELEMENT DESCRIPTION:
9
+ - Visual: {{visualFingerprint}}
10
+ - Context: {{contextDescription}}
11
+
12
+ TASK: Find the element matching this description in the image.
13
+
14
+ RESPONSE FORMAT (JSON):
15
+ {
16
+ "found": true/false,
17
+ "center": [x, y] or null,
18
+ "rect": {"left": 0, "top": 0, "width": 0, "height": 0} or null,
19
+ "confidence": 0.0-1.0,
20
+ "reason": "brief explanation"
21
+ }
22
+
23
+ RULES:
24
+ 1. Look for an element matching the visualFingerprint description
25
+ 2. Verify it's in the expected position relative to the contextDescription
26
+ 3. Return center coordinates and bounding rect if found
27
+ 4. Set confidence based on how well the match is
28
+ 5. If not found or confidence < 0.5, set found=false
29
+ 6. Write reason in ${preferredLanguage}
30
+ `;
31
+ };
32
+ export { semanticAnchorSearchInstruction };
33
+
34
+ //# sourceMappingURL=semantic-anchor-search.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/prompt/semantic-anchor-search.mjs","sources":["../../../../src/ai-model/prompt/semantic-anchor-search.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const semanticAnchorSearchInstruction = () => {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are searching for an element based on its semantic anchor description.\nThe image shows a CROPPED AREA of the page around a known landmark element.\n\nTARGET ELEMENT DESCRIPTION:\n- Visual: {{visualFingerprint}}\n- Context: {{contextDescription}}\n\nTASK: Find the element matching this description in the image.\n\nRESPONSE FORMAT (JSON):\n{\n \"found\": true/false,\n \"center\": [x, y] or null,\n \"rect\": {\"left\": 0, \"top\": 0, \"width\": 0, \"height\": 0} or null,\n \"confidence\": 0.0-1.0,\n \"reason\": \"brief explanation\"\n}\n\nRULES:\n1. Look for an element matching the visualFingerprint description\n2. Verify it's in the expected position relative to the contextDescription\n3. Return center coordinates and bounding rect if found\n4. Set confidence based on how well the match is\n5. If not found or confidence < 0.5, set found=false\n6. Write reason in ${preferredLanguage}\n`;\n};\n"],"names":["semanticAnchorSearchInstruction","preferredLanguage","getPreferredLanguage"],"mappings":";AAEO,MAAMA,kCAAkC;IAC7C,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;mBAyBS,EAAED,kBAAkB;AACvC,CAAC;AACD"}
@@ -0,0 +1,41 @@
1
+ import { getPreferredLanguage } from "@midscene/shared/env";
2
+ const semanticAnchorGenerateInstruction = ()=>{
3
+ const preferredLanguage = getPreferredLanguage();
4
+ return `
5
+ Analyze the element in the red rectangle and its surrounding context.
6
+
7
+ TASK: Generate a semantic anchor for this element that can be used to relocate it even when the DOM structure changes.
8
+
9
+ IMPORTANT: Write all descriptions in ${preferredLanguage}.
10
+
11
+ RESPONSE FORMAT (JSON):
12
+ {
13
+ "visualFingerprint": "concise visual description of the element (color, shape, icon, text style)",
14
+ "contextDescription": "spatial and semantic context (e.g., 'second button in top navigation bar')",
15
+ "nearbyLandmarks": [
16
+ {
17
+ "description": "description of a nearby stable structural element",
18
+ "xpath": "XPath of the landmark element"
19
+ }
20
+ ],
21
+ "error"?: "error message if any"
22
+ }
23
+
24
+ RULES:
25
+ 1. visualFingerprint: Focus on visual characteristics that survive DOM changes
26
+ - Colors, icons, text content, shape
27
+ - NOT CSS classes or IDs (they change)
28
+ 2. contextDescription: Describe WHERE the element is relative to page structure
29
+ - Use spatial terms: "top-right", "below header", "in sidebar"
30
+ - Use structural terms: "in navigation bar", "in modal dialog"
31
+ 3. nearbyLandmarks: Identify 1-3 STABLE structural elements near the target
32
+ - Prefer: <nav>, <header>, <footer>, <aside>, <main>, <form>
33
+ - Avoid: <div>, <span>, <li> (too generic, likely to change)
34
+ - Each landmark MUST have both description and xpath
35
+ 4. Keep descriptions under 20 words each
36
+ 5. Prioritize landmarks that are large and visually distinct
37
+ `;
38
+ };
39
+ export { semanticAnchorGenerateInstruction };
40
+
41
+ //# sourceMappingURL=semantic-anchor.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-model/prompt/semantic-anchor.mjs","sources":["../../../../src/ai-model/prompt/semantic-anchor.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const semanticAnchorGenerateInstruction = () => {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nAnalyze the element in the red rectangle and its surrounding context.\n\nTASK: Generate a semantic anchor for this element that can be used to relocate it even when the DOM structure changes.\n\nIMPORTANT: Write all descriptions in ${preferredLanguage}.\n\nRESPONSE FORMAT (JSON):\n{\n \"visualFingerprint\": \"concise visual description of the element (color, shape, icon, text style)\",\n \"contextDescription\": \"spatial and semantic context (e.g., 'second button in top navigation bar')\",\n \"nearbyLandmarks\": [\n {\n \"description\": \"description of a nearby stable structural element\",\n \"xpath\": \"XPath of the landmark element\"\n }\n ],\n \"error\"?: \"error message if any\"\n}\n\nRULES:\n1. visualFingerprint: Focus on visual characteristics that survive DOM changes\n - Colors, icons, text content, shape\n - NOT CSS classes or IDs (they change)\n2. contextDescription: Describe WHERE the element is relative to page structure\n - Use spatial terms: \"top-right\", \"below header\", \"in sidebar\"\n - Use structural terms: \"in navigation bar\", \"in modal dialog\"\n3. nearbyLandmarks: Identify 1-3 STABLE structural elements near the target\n - Prefer: <nav>, <header>, <footer>, <aside>, <main>, <form>\n - Avoid: <div>, <span>, <li> (too generic, likely to change)\n - Each landmark MUST have both description and xpath\n4. Keep descriptions under 20 words each\n5. Prioritize landmarks that are large and visually distinct\n`;\n};\n"],"names":["semanticAnchorGenerateInstruction","preferredLanguage","getPreferredLanguage"],"mappings":";AAEO,MAAMA,oCAAoC;IAC/C,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;qCAK2B,EAAED,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4BzD,CAAC;AACD"}