@midscene/core 0.16.9-beta-20250506093037.0 → 0.16.9-beta-20250507095704.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/es/ai-model.d.ts +3 -3
  2. package/dist/es/ai-model.js +1 -1
  3. package/dist/es/{chunk-KHYCNMYW.js → chunk-MVRTMYEM.js} +2 -2
  4. package/dist/es/{chunk-U3HZWESX.js → chunk-NWSBT5YP.js} +48 -55
  5. package/dist/es/chunk-NWSBT5YP.js.map +1 -0
  6. package/dist/es/index.d.ts +8 -5
  7. package/dist/es/index.js +85 -2
  8. package/dist/es/index.js.map +1 -1
  9. package/dist/es/{llm-planning-e16d1814.d.ts → llm-planning-28e999f3.d.ts} +3 -2
  10. package/dist/es/tree.d.ts +1 -1
  11. package/dist/es/{types-8c83481c.d.ts → types-88f46e9e.d.ts} +20 -2
  12. package/dist/es/utils.d.ts +1 -1
  13. package/dist/es/utils.js +1 -1
  14. package/dist/lib/ai-model.d.ts +3 -3
  15. package/dist/lib/ai-model.js +2 -2
  16. package/dist/lib/{chunk-KHYCNMYW.js → chunk-MVRTMYEM.js} +2 -2
  17. package/dist/lib/{chunk-U3HZWESX.js → chunk-NWSBT5YP.js} +49 -56
  18. package/dist/lib/chunk-NWSBT5YP.js.map +1 -0
  19. package/dist/lib/index.d.ts +8 -5
  20. package/dist/lib/index.js +93 -10
  21. package/dist/lib/index.js.map +1 -1
  22. package/dist/lib/{llm-planning-e16d1814.d.ts → llm-planning-28e999f3.d.ts} +3 -2
  23. package/dist/lib/tree.d.ts +1 -1
  24. package/dist/{types/types-8c83481c.d.ts → lib/types-88f46e9e.d.ts} +20 -2
  25. package/dist/lib/utils.d.ts +1 -1
  26. package/dist/lib/utils.js +2 -2
  27. package/dist/types/ai-model.d.ts +3 -3
  28. package/dist/types/index.d.ts +8 -5
  29. package/dist/types/{llm-planning-e16d1814.d.ts → llm-planning-28e999f3.d.ts} +3 -2
  30. package/dist/types/tree.d.ts +1 -1
  31. package/dist/{lib/types-8c83481c.d.ts → types/types-88f46e9e.d.ts} +20 -2
  32. package/dist/types/utils.d.ts +1 -1
  33. package/package.json +2 -2
  34. package/report/index.html +127 -108
  35. package/dist/es/chunk-U3HZWESX.js.map +0 -1
  36. package/dist/lib/chunk-U3HZWESX.js.map +0 -1
  37. /package/dist/es/{chunk-KHYCNMYW.js.map → chunk-MVRTMYEM.js.map} +0 -0
  38. /package/dist/lib/{chunk-KHYCNMYW.js.map → chunk-MVRTMYEM.js.map} +0 -0
package/dist/es/index.js CHANGED
@@ -1,15 +1,17 @@
1
1
  import {
2
2
  getVersion
3
- } from "./chunk-KHYCNMYW.js";
3
+ } from "./chunk-MVRTMYEM.js";
4
4
  import {
5
5
  AiAssert,
6
6
  AiExtractElementInfo,
7
7
  AiLocateElement,
8
8
  AiLocateSection,
9
9
  callAiFn,
10
+ callToGetJSONObject,
10
11
  describeUserPage,
12
+ expandSearchArea,
11
13
  plan
12
- } from "./chunk-U3HZWESX.js";
14
+ } from "./chunk-NWSBT5YP.js";
13
15
 
14
16
  // src/ai-model/action-executor.ts
15
17
  import { MIDSCENE_MODEL_NAME, getAIConfig } from "@midscene/shared/env";
@@ -166,12 +168,40 @@ ${this.latestErrorTask()?.errorStack}`
166
168
  }
167
169
  };
168
170
 
171
+ // src/ai-model/prompt/describe.ts
172
+ import { getPreferredLanguage } from "@midscene/shared/env";
173
+ var preferredLanguage = getPreferredLanguage();
174
+ var elementDescriberInstruction = () => {
175
+ return `Tell what is the content of the element wrapped by the read rectangle in the screenshot. Your description is expected to be used to precisely locate the element from other similar elements on screenshot. Use ${preferredLanguage} in the description.
176
+
177
+ Please follow the following rules:
178
+ 1. The description should be start with a brief description, like "a button for confirming the action".
179
+
180
+ 2. Include these information in the description to distinguish the element from its siblings and other similar elements, as much as possible:
181
+ - The text of the element, like "with text 'Confirm'"
182
+ - What the element looks like if it's an image, like "with image '...'"
183
+ - The relative position of the element, like "on the left of ..., around ..."
184
+ - How to distinguish the element from its siblings elements, like "it is the icon instead of the text"
185
+
186
+ 3. Do NOT mention the red rectangle in the description.
187
+
188
+ 4. Use the error field to describe the unexpected situations, if any. If not, put null.
189
+
190
+ Return in JSON:
191
+ {
192
+ "description": "[{brief description}]: {text of the element} {image of the element} {relative position of the element} ... ",
193
+ "error"?: "..."
194
+ }`;
195
+ };
196
+
169
197
  // src/insight/index.ts
170
198
  import {
171
199
  MIDSCENE_FORCE_DEEP_THINK,
200
+ MIDSCENE_USE_QWEN_VL,
172
201
  getAIConfigInBoolean,
173
202
  vlLocateMode as vlLocateMode2
174
203
  } from "@midscene/shared/env";
204
+ import { compositeElementInfoImg, cropByRect } from "@midscene/shared/img";
175
205
  import { getDebug } from "@midscene/shared/logger";
176
206
  import { assert as assert2 } from "@midscene/shared/utils";
177
207
 
@@ -439,6 +469,59 @@ ${parseResult.errors.join("\n")}`;
439
469
  usage: assertResult.usage
440
470
  };
441
471
  }
472
+ async describe(target, opt) {
473
+ assert2(target, "target is required for insight.describe");
474
+ const context = await this.contextRetrieverFn("describe");
475
+ const { screenshotBase64 } = context;
476
+ assert2(screenshotBase64, "screenshot is required for insight.describe");
477
+ const systemPrompt = elementDescriberInstruction();
478
+ const defaultRectSize = 30;
479
+ const targetRect = Array.isArray(target) ? {
480
+ left: Math.floor(target[0] - defaultRectSize / 2),
481
+ top: Math.floor(target[1] - defaultRectSize / 2),
482
+ width: defaultRectSize,
483
+ height: defaultRectSize
484
+ } : target;
485
+ let imagePayload = await compositeElementInfoImg({
486
+ inputImgBase64: screenshotBase64,
487
+ elementsPositionInfo: [
488
+ {
489
+ rect: targetRect
490
+ }
491
+ ],
492
+ borderThickness: 3
493
+ });
494
+ if (opt?.deepThink) {
495
+ const searchArea = expandSearchArea(targetRect, context.size);
496
+ debug("describe: set searchArea", searchArea);
497
+ imagePayload = await cropByRect(
498
+ imagePayload,
499
+ searchArea,
500
+ getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL)
501
+ );
502
+ }
503
+ const msgs = [
504
+ { role: "system", content: systemPrompt },
505
+ {
506
+ role: "user",
507
+ content: [
508
+ {
509
+ type: "image_url",
510
+ image_url: {
511
+ url: imagePayload,
512
+ detail: "high"
513
+ }
514
+ }
515
+ ]
516
+ }
517
+ ];
518
+ const callAIFn = this.aiVendorFn || callToGetJSONObject;
519
+ const res = await callAIFn(msgs, 4 /* DESCRIBE_ELEMENT */);
520
+ const { content } = res;
521
+ assert2(!content.error, `describe failed: ${content.error}`);
522
+ assert2(content.description, "failed to describe the element");
523
+ return content;
524
+ }
442
525
  };
443
526
 
444
527
  // src/index.ts
@@ -1 +1 @@
1
- {"version":3,"mappings":";;;;;;;;;;;;;;AAUA,SAAS,qBAAqB,mBAAmB;AACjD,SAAS,cAAc;AAEhB,IAAM,WAAN,MAAe;AAAA,EAUpB,YACE,MACA,SAGA;AACA,SAAK,SACH,SAAS,SAAS,QAAQ,MAAM,SAAS,IAAI,YAAY;AAC3D,SAAK,OAAO;AACZ,SAAK,SAAS,SAAS,SAAS,CAAC,GAAG;AAAA,MAAI,CAAC,SACvC,KAAK,kBAAkB,IAAI;AAAA,IAC7B;AACA,SAAK,cAAc,SAAS;AAAA,EAC9B;AAAA,EAEQ,kBAAkB,MAAyC;AACjE,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,MAAgE;AAC3E;AAAA,MACE,KAAK,WAAW;AAAA,MAChB;AAAA,QAAyD,KAAK,gBAAgB,GAAG,KAAK;AAAA,EAAK,KAAK,gBAAgB,GAAG,UAAU;AAAA,IAC/H;AACA,QAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAK,MAAM,KAAK,GAAG,KAAK,IAAI,CAAC,SAAS,KAAK,kBAAkB,IAAI,CAAC,CAAC;AAAA,IACrE,OAAO;AACL,WAAK,MAAM,KAAK,KAAK,kBAAkB,IAAI,CAAC;AAAA,IAC9C;AACA,QAAI,KAAK,WAAW,WAAW;AAC7B,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAM,QAAsB;AAC1B,QAAI,KAAK,WAAW,UAAU,KAAK,MAAM,SAAS,GAAG;AACnD,cAAQ;AAAA,QACN;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,WAAW,WAAW,6BAA6B;AAC/D,WAAO,KAAK,WAAW,aAAa,+BAA+B;AACnE,WAAO,KAAK,WAAW,SAAS,4BAA4B;AAE5D,UAAM,mBAAmB,KAAK,MAAM;AAAA,MAClC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,mBAAmB,GAAG;AAExB;AAAA,IACF;AAEA,SAAK,SAAS;AACd,QAAI,YAAY;AAChB,QAAI,wBAAwB;AAE5B,QAAI;AAEJ,WAAO,YAAY,KAAK,MAAM,QAAQ;AACpC,YAAM,OAAO,KAAK,MAAM,SAAS;AACjC;AAAA,QACE,KAAK,WAAW;AAAA,QAChB,2CAA2C,KAAK,MAAM;AAAA,MACxD;AACA,WAAK,SAAS;AAAA,QACZ,OAAO,KAAK,IAAI;AAAA,MAClB;AACA,UAAI;AACF,aAAK,SAAS;AACd,YAAI;AACF,cAAI,KAAK,aAAa;AACpB,kBAAM,KAAK,YAAY,IAAI;AAAA,UAC7B;AAAA,QACF,SAAS,GAAG;AACV,kBAAQ,MAAM,wBAAwB,CAAC;AAAA,QACzC;AACA;AAAA,UACE,CAAC,WAAW,UAAU,UAAU,EAAE,QAAQ,KAAK,IAAI,KAAK;AAAA,UACxD,0BAA0B,KAAK,IAAI;AAAA,QACrC;AAEA,cAAM,EAAE,UAAU,MAAM,IAAI;AAC5B,eAAO,UAAU,uCAAuC,KAAK,IAAI,EAAE;AAEnE,YAAI;AACJ,cAAM,kBAAmC;AAAA,UACvC;AAAA,UACA,SAAS,oBAAoB;AAAA,QAC/B;AAEA,YAAI,KAAK,SAAS,WAAW;AAC3B;AAAA,YACE,KAAK,YAAY,YACf,KAAK,YAAY,WACjB,KAAK,YAAY,YACjB,KAAK,YAAY,aACjB,KAAK,YAAY,YACjB,KAAK,YAAY;AAAA,YACnB,gCAAgC,KAAK,OAAO;AAAA,UAC9C;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AACxD,cAAI,KAAK,YAAY,UAAU;AAC7B,iCACE,aACC;AAAA,UACL;AAAA,QACF,WAAW,KAAK,SAAS,YAAY,KAAK,SAAS,YAAY;AAC7D,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D,OAAO;AACL,kBAAQ;AAAA,YACN,0BAA0B,KAAK,IAAI;AAAA,UACrC;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D;AAEA,eAAO,OAAO,MAAM,WAAW;AAC/B,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD,aAAK,OAAO,SAAU,aAAqB,UAAU;AACrD;AAAA,MACF,SAAS,GAAQ;AACf,gCAAwB;AACxB,aAAK,QACH,GAAG,YAAY,OAAO,MAAM,WAAW,IAAI;AAC7C,aAAK,aAAa,EAAE;AAEpB,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF;AAAA,IACF;AAGA,aAAS,IAAI,YAAY,GAAG,IAAI,KAAK,MAAM,QAAQ,KAAK;AACtD,WAAK,MAAM,CAAC,EAAE,SAAS;AAAA,IACzB;AAEA,QAAI,uBAAuB;AACzB,WAAK,SAAS;AAAA,IAChB,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAEA,QAAI,KAAK,MAAM,QAAQ;AAErB,YAAM,cAAc,KAAK,IAAI,WAAW,KAAK,MAAM,SAAS,CAAC;AAC7D,aAAO,KAAK,MAAM,WAAW,EAAE;AAAA,IACjC;AAAA,EACF;AAAA,EAEA,iBAA0B;AACxB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,kBAAwC;AACtC,QAAI,KAAK,WAAW,SAAS;AAC3B,aAAO;AAAA,IACT;AACA,UAAM,iBAAiB,KAAK,MAAM;AAAA,MAChC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,kBAAkB,GAAG;AACvB,aAAO,KAAK,MAAM,cAAc;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,OAAsB;AACpB,UAAM,WAA0B;AAAA,MAC9B,YAAY,WAAW;AAAA,MACvB,YAAY,YAAY,mBAAmB,KAAK;AAAA,MAChD,SAAS,KAAK,IAAI;AAAA,MAClB,MAAM,KAAK;AAAA,MACX,OAAO,KAAK;AAAA,IACd;AACA,WAAO;AAAA,EACT;AACF;;;AC1LA;AAAA,EACE;AAAA,EACA;AAAA,EACA,gBAAAA;AAAA,OACK;AACP,SAAS,gBAAgB;AACzB,SAAS,UAAAC,eAAc;;;ACnBvB;AAAA,EACE,uBAAAC;AAAA,EACA,eAAAC;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY;AAEd,SAAS,gBACd,MACA,gBACA;AACA,MAAI,mBAAmB;AAEvB,MAAI,aAAa,GAAG;AAClB,UAAM,iBAAiB,mBAAmB;AAC1C,QAAI,gBAAgB;AAClB,yBAAmB,WAAW,cAAc;AAAA,IAC9C,OAAO;AACL,yBAAmB,GAAG,aAAa,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,QAAM,WAAqB;AAAA,IACzB,YAAY,WAAW;AAAA,IACvB,SAAS,KAAK,IAAI;AAAA,IAClB,YAAYA,aAAYD,oBAAmB,KAAK;AAAA,IAChD,mBAAmB;AAAA,EACrB;AACA,QAAM,YAAyB;AAAA,IAC7B,OAAO,KAAK;AAAA,IACZ,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,mBAAiB,SAAS;AAC5B;;;ADLA,IAAM,QAAQ,SAAS,YAAY;AACnC,IAAqB,UAArB,MAGE;AAAA,EAWA,YACE,SAGA,KACA;AAXF,sBAAoD;AAYlD,IAAAD,QAAO,SAAS,iCAAiC;AACjD,QAAI,OAAO,YAAY,YAAY;AACjC,WAAK,qBAAqB;AAAA,IAC5B,OAAO;AACL,WAAK,qBAAqB,MAAM,QAAQ,QAAQ,OAAO;AAAA,IACzD;AAEA,QAAI,OAAO,KAAK,eAAe,aAAa;AAC1C,WAAK,aAAa,IAAI;AAAA,IACxB;AACA,QAAI,OAAO,KAAK,aAAa,aAAa;AACxC,WAAK,WAAW,IAAI;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,MAAM,OACJ,OACA,KACuB;AACvB,UAAM,EAAE,OAAO,IAAI,OAAO,CAAC;AAC3B,UAAM,cAAc,OAAO,UAAU,WAAW,QAAQ,MAAM;AAC9D,IAAAA;AAAA,MACE,eAAe,KAAK;AAAA,MACpB;AAAA,IACF;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,IAAAA,QAAO,OAAO,UAAU,UAAU,sCAAsC;AAExE,UAAM,wBAAwB;AAAA,MAC5B;AAAA,IACF;AACA,QAAI,uBAAuB;AACzB,YAAM,yBAAyB,qBAAqB;AAAA,IACtD;AACA,QAAI;AACJ,QAAI,MAAM,aAAa,uBAAuB;AAC5C,yBAAmB,MAAM;AAAA,IAC3B;AAEA,QAAI,oBAAoB,CAACD,cAAa,GAAG;AACvC,cAAQ;AAAA,QACN;AAAA,MACF;AACA,yBAAmB;AAAA,IACrB;AAEA,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AAEtD,QAAI,aAA+B;AACnC,QAAI,wBAA4C;AAChD,QAAI,kBAA2C;AAC/C,QAAI,qBAEY;AAChB,QAAI,kBAAkB;AACpB,2BAAqB,MAAM,gBAAgB;AAAA,QACzC;AAAA,QACA,oBAAoB;AAAA,MACtB,CAAC;AACD,MAAAC;AAAA,QACE,mBAAmB;AAAA,QACnB,gCAAgC,gBAAgB,IAC9C,mBAAmB,QAAQ,KAAK,mBAAmB,KAAK,KAAK,EAC/D;AAAA,MACF;AACA,8BAAwB,mBAAmB;AAC3C,wBAAkB,mBAAmB;AACrC,mBAAa,mBAAmB;AAAA,IAClC;AAEA,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,aAAa,aAAa,MAAM,IACzD,MAAM,gBAAgB;AAAA,MACpB,QAAQ,UAAU,KAAK;AAAA,MACvB;AAAA,MACA,0BAA0B;AAAA,MAC1B,aAAa,KAAK;AAAA,MAClB,cAAc;AAAA,IAChB,CAAC;AAEH,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,MACvC,gBAAgB,KAAK,UAAU,WAAW;AAAA,MAC1C;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAgC,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAC1E;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT,SAAS;AAAA,MACX;AAAA,MACA,aAAa,KAAK;AAAA,MAClB,gBAAgB,CAAC;AAAA,MACjB,aAAa;AAAA,MACb,MAAM;AAAA,MACN;AAAA,MACA,WAAW,CAAC,CAAC;AAAA,MACb,OAAO;AAAA,IACT;AAEA,UAAM,WAA0B,CAAC;AACjC,KAAC,YAAY,YAAY,CAAC,GAAG,QAAQ,CAAC,SAAS;AAC7C,UAAI,QAAQ,MAAM;AAChB,cAAM,UAAU,YAAY,KAAK,EAAE;AAEnC,YAAI,CAAC,SAAS;AACZ,kBAAQ;AAAA,YACN,kCAAkC,KAAK,EAAE;AAAA,UAC3C;AACA;AAAA,QACF;AACA,iBAAS,KAAK,OAAO;AAAA,MACvB;AAAA,IACF,CAAC;AAED;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH,gBAAgB;AAAA,MAClB;AAAA,MACA;AAAA,IACF;AAEA,QAAI,UAAU;AACZ,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,IAAAA;AAAA,MACE,SAAS,UAAU;AAAA,MACnB,6CAA6C,SAAS,MAAM;AAAA,IAC9D;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO;AAAA,QACL,SAAS;AAAA,UACP,IAAI,SAAS,CAAC,EAAG;AAAA,UACjB,SAAS,SAAS,CAAC,EAAG;AAAA,UACtB,QAAQ,SAAS,CAAC,EAAG;AAAA,UACrB,MAAM,SAAS,CAAC,EAAG;AAAA,QACrB;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAAA,EAQA,MAAM,QAAW,YAA+C;AAC9D,IAAAA;AAAA,MACE,OAAO,eAAe,YAAY,OAAO,eAAe;AAAA,MACxD,kDAAkD,OAAO,UAAU;AAAA,IACrE;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,SAAS;AAEvD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,IAAI,MAAM,qBAAwB;AAAA,MAC3D;AAAA,MACA,WAAW;AAAA,IACb,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,IACzC;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAwB,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAClE;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,OAAO;AAAA,IACT;AAEA,UAAM,EAAE,KAAK,IAAI,eAAe,CAAC;AAGjC;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,QAAI,YAAY,CAAC,MAAM;AACrB,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,WAAsD;AACjE,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AACtD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,eAAe,MAAM,SAAS;AAAA,MAClC;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,aAAa,OAAO;AAAA,IAClD;AAEA,UAAM,EAAE,SAAS,KAAK,IAAI,aAAa;AACvC,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,eAAe;AAAA,MACf,kBAAkB;AAAA,MAClB,OAAO,OAAO,SAAY;AAAA,IAC5B;AACA,oBAAgB,UAAU,cAAc;AAExC,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,aAAa;AAAA,IACtB;AAAA,EACF;AACF;;;AEjUA,SAAS,eAAAE,cAAa,uBAAAD,4BAA2B;AAGjD,IAAO,cAAQ","names":["vlLocateMode","assert","MIDSCENE_MODEL_NAME","getAIConfig"],"ignoreList":[],"sources":["../../src/ai-model/action-executor.ts","../../src/insight/index.ts","../../src/insight/utils.ts","../../src/index.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<any> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n task.timing.aiCost = (returnValue as any)?.aiCost || 0;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n return this.tasks[outputIndex].output;\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n","import { callAiFn } from '@/ai-model/common';\nimport { AiExtractElementInfo, AiLocateElement } from '@/ai-model/index';\nimport { AiAssert, AiLocateSection } from '@/ai-model/inspect';\nimport type {\n AIElementResponse,\n AISingleElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n callAI?: typeof callAiFn<AIElementResponse>;\n quickAnswer?: Partial<AISingleElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(\n queryPrompt || opt?.quickAnswer,\n 'query or quickAnswer is required for locate',\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = await this.contextRetrieverFn('locate');\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, elementById, rawResponse, usage } =\n await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n quickAnswer: opt?.quickAnswer,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n quickAnswer: opt?.quickAnswer,\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T = any>(input: string): Promise<T>;\n async extract<T extends Record<string, string>>(\n input: T,\n ): Promise<Record<keyof T, any>>;\n async extract<T extends object>(input: Record<keyof T, string>): Promise<T>;\n\n async extract<T>(dataDemand: InsightExtractParam): Promise<any> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n usage,\n };\n }\n\n async assert(assertion: string): Promise<InsightAssertionResponse> {\n if (typeof assertion !== 'string') {\n throw new Error(\n 'This is the assert method for Midscene, the first argument should be a string. If you want to use the assert method from Node.js, please import it from the Node.js assert module.',\n );\n }\n\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('assert');\n const startTime = Date.now();\n const assertResult = await AiAssert({\n assertion,\n context,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(assertResult.content),\n };\n\n const { thought, pass } = assertResult.content;\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'assert',\n userQuery: {\n assertion,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n assertionPass: pass,\n assertionThought: thought,\n error: pass ? undefined : thought,\n };\n emitInsightDump(dumpData, dumpSubscriber);\n\n return {\n pass,\n thought,\n usage: assertResult.usage,\n };\n }\n}\n","import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { uuid } from '@midscene/shared/utils';\n\nexport function emitInsightDump(\n data: PartialInsightDumpFromSDK,\n dumpSubscriber?: DumpSubscriber,\n) {\n let modelDescription = '';\n\n if (vlLocateMode()) {\n const uiTarsModelVer = uiTarsModelVersion();\n if (uiTarsModelVer) {\n modelDescription = `UI-TARS=${uiTarsModelVer}`;\n } else {\n modelDescription = `${vlLocateMode()} mode`;\n }\n }\n\n const baseData: DumpMeta = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n model_description: modelDescription,\n };\n const finalData: InsightDump = {\n logId: uuid(),\n ...baseData,\n ...data,\n };\n\n dumpSubscriber?.(finalData);\n}\n","import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n AiAssert,\n} from './ai-model/index';\n\nexport { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';\n\nexport type * from './types';\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n MidsceneYamlScript,\n MidsceneYamlTask,\n MidsceneYamlFlowItem,\n} from './yaml';\n"]}
1
+ {"version":3,"mappings":";;;;;;;;;;;;;;;;AAUA,SAAS,qBAAqB,mBAAmB;AACjD,SAAS,cAAc;AAEhB,IAAM,WAAN,MAAe;AAAA,EAUpB,YACE,MACA,SAGA;AACA,SAAK,SACH,SAAS,SAAS,QAAQ,MAAM,SAAS,IAAI,YAAY;AAC3D,SAAK,OAAO;AACZ,SAAK,SAAS,SAAS,SAAS,CAAC,GAAG;AAAA,MAAI,CAAC,SACvC,KAAK,kBAAkB,IAAI;AAAA,IAC7B;AACA,SAAK,cAAc,SAAS;AAAA,EAC9B;AAAA,EAEQ,kBAAkB,MAAyC;AACjE,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,MAAgE;AAC3E;AAAA,MACE,KAAK,WAAW;AAAA,MAChB;AAAA,QAAyD,KAAK,gBAAgB,GAAG,KAAK;AAAA,EAAK,KAAK,gBAAgB,GAAG,UAAU;AAAA,IAC/H;AACA,QAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAK,MAAM,KAAK,GAAG,KAAK,IAAI,CAAC,SAAS,KAAK,kBAAkB,IAAI,CAAC,CAAC;AAAA,IACrE,OAAO;AACL,WAAK,MAAM,KAAK,KAAK,kBAAkB,IAAI,CAAC;AAAA,IAC9C;AACA,QAAI,KAAK,WAAW,WAAW;AAC7B,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAM,QAAsB;AAC1B,QAAI,KAAK,WAAW,UAAU,KAAK,MAAM,SAAS,GAAG;AACnD,cAAQ;AAAA,QACN;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,WAAW,WAAW,6BAA6B;AAC/D,WAAO,KAAK,WAAW,aAAa,+BAA+B;AACnE,WAAO,KAAK,WAAW,SAAS,4BAA4B;AAE5D,UAAM,mBAAmB,KAAK,MAAM;AAAA,MAClC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,mBAAmB,GAAG;AAExB;AAAA,IACF;AAEA,SAAK,SAAS;AACd,QAAI,YAAY;AAChB,QAAI,wBAAwB;AAE5B,QAAI;AAEJ,WAAO,YAAY,KAAK,MAAM,QAAQ;AACpC,YAAM,OAAO,KAAK,MAAM,SAAS;AACjC;AAAA,QACE,KAAK,WAAW;AAAA,QAChB,2CAA2C,KAAK,MAAM;AAAA,MACxD;AACA,WAAK,SAAS;AAAA,QACZ,OAAO,KAAK,IAAI;AAAA,MAClB;AACA,UAAI;AACF,aAAK,SAAS;AACd,YAAI;AACF,cAAI,KAAK,aAAa;AACpB,kBAAM,KAAK,YAAY,IAAI;AAAA,UAC7B;AAAA,QACF,SAAS,GAAG;AACV,kBAAQ,MAAM,wBAAwB,CAAC;AAAA,QACzC;AACA;AAAA,UACE,CAAC,WAAW,UAAU,UAAU,EAAE,QAAQ,KAAK,IAAI,KAAK;AAAA,UACxD,0BAA0B,KAAK,IAAI;AAAA,QACrC;AAEA,cAAM,EAAE,UAAU,MAAM,IAAI;AAC5B,eAAO,UAAU,uCAAuC,KAAK,IAAI,EAAE;AAEnE,YAAI;AACJ,cAAM,kBAAmC;AAAA,UACvC;AAAA,UACA,SAAS,oBAAoB;AAAA,QAC/B;AAEA,YAAI,KAAK,SAAS,WAAW;AAC3B;AAAA,YACE,KAAK,YAAY,YACf,KAAK,YAAY,WACjB,KAAK,YAAY,YACjB,KAAK,YAAY,aACjB,KAAK,YAAY,YACjB,KAAK,YAAY;AAAA,YACnB,gCAAgC,KAAK,OAAO;AAAA,UAC9C;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AACxD,cAAI,KAAK,YAAY,UAAU;AAC7B,iCACE,aACC;AAAA,UACL;AAAA,QACF,WAAW,KAAK,SAAS,YAAY,KAAK,SAAS,YAAY;AAC7D,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D,OAAO;AACL,kBAAQ;AAAA,YACN,0BAA0B,KAAK,IAAI;AAAA,UACrC;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D;AAEA,eAAO,OAAO,MAAM,WAAW;AAC/B,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD,aAAK,OAAO,SAAU,aAAqB,UAAU;AACrD;AAAA,MACF,SAAS,GAAQ;AACf,gCAAwB;AACxB,aAAK,QACH,GAAG,YAAY,OAAO,MAAM,WAAW,IAAI;AAC7C,aAAK,aAAa,EAAE;AAEpB,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF;AAAA,IACF;AAGA,aAAS,IAAI,YAAY,GAAG,IAAI,KAAK,MAAM,QAAQ,KAAK;AACtD,WAAK,MAAM,CAAC,EAAE,SAAS;AAAA,IACzB;AAEA,QAAI,uBAAuB;AACzB,WAAK,SAAS;AAAA,IAChB,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAEA,QAAI,KAAK,MAAM,QAAQ;AAErB,YAAM,cAAc,KAAK,IAAI,WAAW,KAAK,MAAM,SAAS,CAAC;AAC7D,aAAO,KAAK,MAAM,WAAW,EAAE;AAAA,IACjC;AAAA,EACF;AAAA,EAEA,iBAA0B;AACxB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,kBAAwC;AACtC,QAAI,KAAK,WAAW,SAAS;AAC3B,aAAO;AAAA,IACT;AACA,UAAM,iBAAiB,KAAK,MAAM;AAAA,MAChC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,kBAAkB,GAAG;AACvB,aAAO,KAAK,MAAM,cAAc;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,OAAsB;AACpB,UAAM,WAA0B;AAAA,MAC9B,YAAY,WAAW;AAAA,MACvB,YAAY,YAAY,mBAAmB,KAAK;AAAA,MAChD,SAAS,KAAK,IAAI;AAAA,MAClB,MAAM,KAAK;AAAA,MACX,OAAO,KAAK;AAAA,IACd;AACA,WAAO;AAAA,EACT;AACF;;;AC9MA,SAAS,4BAA4B;AAErC,IAAM,oBAAoB,qBAAqB;AAExC,IAAM,8BAA8B,MAAM;AAC/C,SAAO,mNAAmN,iBAAiB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAoB7O;;;ACMA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA,gBAAAA;AAAA,OACK;AACP,SAAS,yBAAyB,kBAAkB;AACpD,SAAS,gBAAgB;AACzB,SAAS,UAAAC,eAAc;;;AChCvB;AAAA,EACE,uBAAAC;AAAA,EACA,eAAAC;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY;AAEd,SAAS,gBACd,MACA,gBACA;AACA,MAAI,mBAAmB;AAEvB,MAAI,aAAa,GAAG;AAClB,UAAM,iBAAiB,mBAAmB;AAC1C,QAAI,gBAAgB;AAClB,yBAAmB,WAAW,cAAc;AAAA,IAC9C,OAAO;AACL,yBAAmB,GAAG,aAAa,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,QAAM,WAAqB;AAAA,IACzB,YAAY,WAAW;AAAA,IACvB,SAAS,KAAK,IAAI;AAAA,IAClB,YAAYA,aAAYD,oBAAmB,KAAK;AAAA,IAChD,mBAAmB;AAAA,EACrB;AACA,QAAM,YAAyB;AAAA,IAC7B,OAAO,KAAK;AAAA,IACZ,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,mBAAiB,SAAS;AAC5B;;;ADQA,IAAM,QAAQ,SAAS,YAAY;AACnC,IAAqB,UAArB,MAGE;AAAA,EAWA,YACE,SAGA,KACA;AAXF,sBAAoD;AAYlD,IAAAD,QAAO,SAAS,iCAAiC;AACjD,QAAI,OAAO,YAAY,YAAY;AACjC,WAAK,qBAAqB;AAAA,IAC5B,OAAO;AACL,WAAK,qBAAqB,MAAM,QAAQ,QAAQ,OAAO;AAAA,IACzD;AAEA,QAAI,OAAO,KAAK,eAAe,aAAa;AAC1C,WAAK,aAAa,IAAI;AAAA,IACxB;AACA,QAAI,OAAO,KAAK,aAAa,aAAa;AACxC,WAAK,WAAW,IAAI;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,MAAM,OACJ,OACA,KACuB;AACvB,UAAM,EAAE,OAAO,IAAI,OAAO,CAAC;AAC3B,UAAM,cAAc,OAAO,UAAU,WAAW,QAAQ,MAAM;AAC9D,IAAAA;AAAA,MACE,eAAe,KAAK;AAAA,MACpB;AAAA,IACF;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,IAAAA,QAAO,OAAO,UAAU,UAAU,sCAAsC;AAExE,UAAM,wBAAwB;AAAA,MAC5B;AAAA,IACF;AACA,QAAI,uBAAuB;AACzB,YAAM,yBAAyB,qBAAqB;AAAA,IACtD;AACA,QAAI;AACJ,QAAI,MAAM,aAAa,uBAAuB;AAC5C,yBAAmB,MAAM;AAAA,IAC3B;AAEA,QAAI,oBAAoB,CAACD,cAAa,GAAG;AACvC,cAAQ;AAAA,QACN;AAAA,MACF;AACA,yBAAmB;AAAA,IACrB;AAEA,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AAEtD,QAAI,aAA+B;AACnC,QAAI,wBAA4C;AAChD,QAAI,kBAA2C;AAC/C,QAAI,qBAEY;AAChB,QAAI,kBAAkB;AACpB,2BAAqB,MAAM,gBAAgB;AAAA,QACzC;AAAA,QACA,oBAAoB;AAAA,MACtB,CAAC;AACD,MAAAC;AAAA,QACE,mBAAmB;AAAA,QACnB,gCAAgC,gBAAgB,IAC9C,mBAAmB,QAAQ,KAAK,mBAAmB,KAAK,KAAK,EAC/D;AAAA,MACF;AACA,8BAAwB,mBAAmB;AAC3C,wBAAkB,mBAAmB;AACrC,mBAAa,mBAAmB;AAAA,IAClC;AAEA,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,aAAa,aAAa,MAAM,IACzD,MAAM,gBAAgB;AAAA,MACpB,QAAQ,UAAU,KAAK;AAAA,MACvB;AAAA,MACA,0BAA0B;AAAA,MAC1B,aAAa,KAAK;AAAA,MAClB,cAAc;AAAA,IAChB,CAAC;AAEH,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,MACvC,gBAAgB,KAAK,UAAU,WAAW;AAAA,MAC1C;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAgC,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAC1E;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT,SAAS;AAAA,MACX;AAAA,MACA,aAAa,KAAK;AAAA,MAClB,gBAAgB,CAAC;AAAA,MACjB,aAAa;AAAA,MACb,MAAM;AAAA,MACN;AAAA,MACA,WAAW,CAAC,CAAC;AAAA,MACb,OAAO;AAAA,IACT;AAEA,UAAM,WAA0B,CAAC;AACjC,KAAC,YAAY,YAAY,CAAC,GAAG,QAAQ,CAAC,SAAS;AAC7C,UAAI,QAAQ,MAAM;AAChB,cAAM,UAAU,YAAY,KAAK,EAAE;AAEnC,YAAI,CAAC,SAAS;AACZ,kBAAQ;AAAA,YACN,kCAAkC,KAAK,EAAE;AAAA,UAC3C;AACA;AAAA,QACF;AACA,iBAAS,KAAK,OAAO;AAAA,MACvB;AAAA,IACF,CAAC;AAED;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH,gBAAgB;AAAA,MAClB;AAAA,MACA;AAAA,IACF;AAEA,QAAI,UAAU;AACZ,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,IAAAA;AAAA,MACE,SAAS,UAAU;AAAA,MACnB,6CAA6C,SAAS,MAAM;AAAA,IAC9D;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO;AAAA,QACL,SAAS;AAAA,UACP,IAAI,SAAS,CAAC,EAAG;AAAA,UACjB,SAAS,SAAS,CAAC,EAAG;AAAA,UACtB,QAAQ,SAAS,CAAC,EAAG;AAAA,UACrB,MAAM,SAAS,CAAC,EAAG;AAAA,QACrB;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAAA,EAQA,MAAM,QAAW,YAA+C;AAC9D,IAAAA;AAAA,MACE,OAAO,eAAe,YAAY,OAAO,eAAe;AAAA,MACxD,kDAAkD,OAAO,UAAU;AAAA,IACrE;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,SAAS;AAEvD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,IAAI,MAAM,qBAAwB;AAAA,MAC3D;AAAA,MACA,WAAW;AAAA,IACb,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,IACzC;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAwB,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAClE;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,OAAO;AAAA,IACT;AAEA,UAAM,EAAE,KAAK,IAAI,eAAe,CAAC;AAGjC;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,QAAI,YAAY,CAAC,MAAM;AACrB,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,WAAsD;AACjE,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AACtD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,eAAe,MAAM,SAAS;AAAA,MAClC;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,aAAa,OAAO;AAAA,IAClD;AAEA,UAAM,EAAE,SAAS,KAAK,IAAI,aAAa;AACvC,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,eAAe;AAAA,MACf,kBAAkB;AAAA,MAClB,OAAO,OAAO,SAAY;AAAA,IAC5B;AACA,oBAAgB,UAAU,cAAc;AAExC,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,aAAa;AAAA,IACtB;AAAA,EACF;AAAA,EACA,MAAM,SACJ,QACA,KAGyD;AACzD,IAAAA,QAAO,QAAQ,yCAAyC;AACxD,UAAM,UAAU,MAAM,KAAK,mBAAmB,UAAU;AACxD,UAAM,EAAE,iBAAiB,IAAI;AAC7B,IAAAA,QAAO,kBAAkB,6CAA6C;AAEtE,UAAM,eAAe,4BAA4B;AAGjD,UAAM,kBAAkB;AACxB,UAAM,aAAmB,MAAM,QAAQ,MAAM,IACzC;AAAA,MACE,MAAM,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAChD,KAAK,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAC/C,OAAO;AAAA,MACP,QAAQ;AAAA,IACV,IACA;AAEJ,QAAI,eAAe,MAAM,wBAAwB;AAAA,MAC/C,gBAAgB;AAAA,MAChB,sBAAsB;AAAA,QACpB;AAAA,UACE,MAAM;AAAA,QACR;AAAA,MACF;AAAA,MACA,iBAAiB;AAAA,IACnB,CAAC;AAED,QAAI,KAAK,WAAW;AAClB,YAAM,aAAa,iBAAiB,YAAY,QAAQ,IAAI;AAC5D,YAAM,4BAA4B,UAAU;AAC5C,qBAAe,MAAM;AAAA,QACnB;AAAA,QACA;AAAA,QACA,qBAAqB,oBAAoB;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,OAAe;AAAA,MACnB,EAAE,MAAM,UAAU,SAAS,aAAa;AAAA,MACxC;AAAA,QACE,MAAM;AAAA,QACN,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,WAAW;AAAA,cACT,KAAK;AAAA,cACL,QAAQ;AAAA,YACV;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,WACJ,KAAK,cAAc;AAErB,UAAM,MAAM,MAAM,SAAS,8BAAmC;AAE9D,UAAM,EAAE,QAAQ,IAAI;AACpB,IAAAA,QAAO,CAAC,QAAQ,OAAO,oBAAoB,QAAQ,KAAK,EAAE;AAC1D,IAAAA,QAAO,QAAQ,aAAa,gCAAgC;AAC5D,WAAO;AAAA,EACT;AACF;;;AEpZA,SAAS,eAAAE,cAAa,uBAAAD,4BAA2B;AAGjD,IAAO,cAAQ","names":["vlLocateMode","assert","MIDSCENE_MODEL_NAME","getAIConfig"],"ignoreList":[],"sources":["../../src/ai-model/action-executor.ts","../../src/ai-model/prompt/describe.ts","../../src/insight/index.ts","../../src/insight/utils.ts","../../src/index.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<any> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n task.timing.aiCost = (returnValue as any)?.aiCost || 0;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n return this.tasks[outputIndex].output;\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n","import { getPreferredLanguage } from '@midscene/shared/env';\n\nconst preferredLanguage = getPreferredLanguage();\n\nexport const elementDescriberInstruction = () => {\n return `Tell what is the content of the element wrapped by the read rectangle in the screenshot. Your description is expected to be used to precisely locate the element from other similar elements on screenshot. Use ${preferredLanguage} in the description.\n\nPlease follow the following rules:\n1. The description should be start with a brief description, like \"a button for confirming the action\".\n\n2. Include these information in the description to distinguish the element from its siblings and other similar elements, as much as possible:\n- The text of the element, like \"with text 'Confirm'\"\n- What the element looks like if it's an image, like \"with image '...'\"\n- The relative position of the element, like \"on the left of ..., around ...\"\n- How to distinguish the element from its siblings elements, like \"it is the icon instead of the text\"\n\n3. Do NOT mention the red rectangle in the description.\n\n4. Use the error field to describe the unexpected situations, if any. If not, put null.\n\nReturn in JSON:\n{\n \"description\": \"[{brief description}]: {text of the element} {image of the element} {relative position of the element} ... \",\n \"error\"?: \"...\"\n}`;\n};\n","import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiAssert, AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AISingleElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n MIDSCENE_USE_QWEN_VL,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n callAI?: typeof callAiFn<AIElementResponse>;\n quickAnswer?: Partial<AISingleElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(\n queryPrompt || opt?.quickAnswer,\n 'query or quickAnswer is required for locate',\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = await this.contextRetrieverFn('locate');\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, elementById, rawResponse, usage } =\n await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n quickAnswer: opt?.quickAnswer,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n quickAnswer: opt?.quickAnswer,\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T = any>(input: string): Promise<T>;\n async extract<T extends Record<string, string>>(\n input: T,\n ): Promise<Record<keyof T, any>>;\n async extract<T extends object>(input: Record<keyof T, string>): Promise<T>;\n\n async extract<T>(dataDemand: InsightExtractParam): Promise<any> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n usage,\n };\n }\n\n async assert(assertion: string): Promise<InsightAssertionResponse> {\n if (typeof assertion !== 'string') {\n throw new Error(\n 'This is the assert method for Midscene, the first argument should be a string. If you want to use the assert method from Node.js, please import it from the Node.js assert module.',\n );\n }\n\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('assert');\n const startTime = Date.now();\n const assertResult = await AiAssert({\n assertion,\n context,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(assertResult.content),\n };\n\n const { thought, pass } = assertResult.content;\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'assert',\n userQuery: {\n assertion,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n assertionPass: pass,\n assertionThought: thought,\n error: pass ? undefined : thought,\n };\n emitInsightDump(dumpData, dumpSubscriber);\n\n return {\n pass,\n thought,\n usage: assertResult.usage,\n };\n }\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64 } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, context.size);\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n","import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { uuid } from '@midscene/shared/utils';\n\nexport function emitInsightDump(\n data: PartialInsightDumpFromSDK,\n dumpSubscriber?: DumpSubscriber,\n) {\n let modelDescription = '';\n\n if (vlLocateMode()) {\n const uiTarsModelVer = uiTarsModelVersion();\n if (uiTarsModelVer) {\n modelDescription = `UI-TARS=${uiTarsModelVer}`;\n } else {\n modelDescription = `${vlLocateMode()} mode`;\n }\n }\n\n const baseData: DumpMeta = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n model_description: modelDescription,\n };\n const finalData: InsightDump = {\n logId: uuid(),\n ...baseData,\n ...data,\n };\n\n dumpSubscriber?.(finalData);\n}\n","import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n AiAssert,\n} from './ai-model/index';\n\nexport { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';\n\nexport type * from './types';\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n MidsceneYamlScript,\n MidsceneYamlTask,\n MidsceneYamlFlowItem,\n} from './yaml';\n"]}
@@ -1,4 +1,4 @@
1
- import { k as AIUsageInfo, R as Rect, B as BaseElement, U as UIContext, A as AISingleElementResponse, n as AISingleElementResponseByPosition, o as AIElementLocatorResponse, H as ElementById, r as AIDataExtractionResponse, t as AIAssertionResponse, ar as PageType, T as PlanningAIResponse } from './types-8c83481c.js';
1
+ import { l as AIUsageInfo, R as Rect, B as BaseElement, U as UIContext, h as AISingleElementResponse, o as AISingleElementResponseByPosition, p as AIElementLocatorResponse, O as ElementById, s as AIDataExtractionResponse, u as AIAssertionResponse, av as PageType, Y as PlanningAIResponse } from './types-88f46e9e.js';
2
2
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
3
3
 
4
4
  type AIArgs = [
@@ -9,7 +9,8 @@ declare enum AIActionType {
9
9
  ASSERT = 0,
10
10
  INSPECT_ELEMENT = 1,
11
11
  EXTRACT_DATA = 2,
12
- PLAN = 3
12
+ PLAN = 3,
13
+ DESCRIBE_ELEMENT = 4
13
14
  }
14
15
  declare function callAiFn<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
15
16
  content: T;
package/dist/es/tree.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import * as _midscene_shared_constants from '@midscene/shared/constants';
2
- import { B as BaseElement, j as ElementTreeNode } from './types-8c83481c.js';
2
+ import { B as BaseElement, k as ElementTreeNode } from './types-88f46e9e.js';
3
3
  import 'openai/resources';
4
4
 
5
5
  declare function truncateText(text: string | number | object | undefined, maxLength?: number): string;
@@ -203,6 +203,24 @@ interface AIAssertionResponse {
203
203
  pass: boolean;
204
204
  thought: string;
205
205
  }
206
+ interface AIDescribeElementResponse {
207
+ description: string;
208
+ error?: string;
209
+ }
210
+ interface LocatorValidatorOption {
211
+ centerDistanceThreshold?: number;
212
+ }
213
+ interface LocateValidatorResult {
214
+ pass: boolean;
215
+ rect: Rect;
216
+ center: [number, number];
217
+ centerDistance?: number;
218
+ }
219
+ interface AgentDescribeElementAtPointResult {
220
+ prompt: string;
221
+ deepThink: boolean;
222
+ verifyResult?: LocateValidatorResult;
223
+ }
206
224
  /**
207
225
  * context
208
226
  */
@@ -223,7 +241,7 @@ interface InsightOptions {
223
241
  type EnsureObject<T> = {
224
242
  [K in keyof T]: any;
225
243
  };
226
- type InsightAction = 'locate' | 'extract' | 'assert';
244
+ type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';
227
245
  type InsightExtractParam = string | Record<string, string>;
228
246
  type LocateResultElement = {
229
247
  id: string;
@@ -439,4 +457,4 @@ interface GroupedActionDump {
439
457
  }
440
458
  type PageType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android';
441
459
 
442
- export { type PlanningActionParamError as $, type AISingleElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PartialInsightDumpFromSDK as F, type LiteUISection as G, type ElementById as H, type InsightAction as I, type AgentWaitForOpt as J, type AgentAssertOpt as K, type LocateResult as L, type MidsceneYamlScript as M, type PlanningLocateParam as N, type OnTaskStartTip as O, type Point as P, type PlanningAction as Q, type Rect as R, type Size as S, type PlanningAIResponse as T, UIContext as U, type PlanningActionParamTap as V, type PlanningActionParamHover as W, type PlanningActionParamInputOrKeyPress as X, type PlanningActionParamScroll as Y, type PlanningActionParamAssert as Z, type PlanningActionParamSleep as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamWaitFor as a0, type Color as a1, type BaseAgentParserOpt as a2, type PuppeteerParserOpt as a3, type PlaywrightParserOpt as a4, type ExecutionRecorderItem as a5, type ExecutionTaskType as a6, type ExecutorContext as a7, type TaskCacheInfo as a8, type ExecutionTaskReturn as a9, type MidsceneYamlFlowItemAIQuery as aA, type MidsceneYamlFlowItemAINumber as aB, type MidsceneYamlFlowItemAINString as aC, type MidsceneYamlFlowItemAIBoolean as aD, type MidsceneYamlFlowItemAILocate as aE, type MidsceneYamlFlowItemAIWaitFor as aF, type MidsceneYamlFlowItemAITap as aG, type MidsceneYamlFlowItemAIHover as aH, type MidsceneYamlFlowItemAIInput as aI, type MidsceneYamlFlowItemAIKeyboardPress as aJ, type MidsceneYamlFlowItemAIScroll as aK, type MidsceneYamlFlowItemEvaluateJavaScript as aL, type MidsceneYamlFlowItemSleep as aM, type FreeFn as aN, type ScriptPlayerTaskStatus as aO, type ScriptPlayerStatusValue as aP, type ExecutionTaskInsightLocateParam as aa, type ExecutionTaskInsightLocateOutput as ab, type ExecutionTaskInsightDumpLog as ac, type ExecutionTaskInsightLocateApply as ad, type ExecutionTaskInsightLocate as ae, type ExecutionTaskInsightQueryParam as af, type ExecutionTaskInsightQueryOutput as ag, type ExecutionTaskInsightQueryApply as ah, type ExecutionTaskInsightQuery as ai, type ExecutionTaskInsightAssertionParam as aj, type ExecutionTaskInsightAssertionApply as ak, type ExecutionTaskInsightAssertion as al, type ExecutionTaskActionApply as am, type ExecutionTaskAction as an, type ExecutionTaskPlanningApply as ao, type ExecutionTaskPlanning as ap, type GroupedActionDump as aq, type PageType as ar, type LocateOption as as, type scrollParam as at, type MidsceneYamlScriptEnvBase as au, type MidsceneYamlScriptWebEnv as av, type MidsceneYamlScriptAndroidEnv as aw, type MidsceneYamlScriptEnv as ax, type MidsceneYamlFlowItemAIAction as ay, type MidsceneYamlFlowItemAIAssert as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type ElementTreeNode as j, type AIUsageInfo as k, AIResponseFormat as l, type AISingleElementResponseById as m, type AISingleElementResponseByPosition as n, type AIElementLocatorResponse as o, type AIElementCoordinatesResponse as p, type AIElementResponse as q, type AIDataExtractionResponse as r, type AISectionLocatorResponse as s, type AIAssertionResponse as t, type EnsureObject as u, type InsightExtractParam as v, type LocateResultElement as w, type DumpMeta as x, type ReportDumpWithAttributes as y, type InsightDump as z };
460
+ export { type PlanningActionParamInputOrKeyPress as $, type AIDescribeElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type LocateResultElement as F, type DumpMeta as G, type ReportDumpWithAttributes as H, type InsightAction as I, type InsightDump as J, type PartialInsightDumpFromSDK as K, type LocateResult as L, type MidsceneYamlScript as M, type LiteUISection as N, type ElementById as O, type Point as P, type OnTaskStartTip as Q, type Rect as R, type Size as S, type AgentWaitForOpt as T, UIContext as U, type AgentAssertOpt as V, type PlanningLocateParam as W, type PlanningAction as X, type PlanningAIResponse as Y, type PlanningActionParamTap as Z, type PlanningActionParamHover as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamScroll as a0, type PlanningActionParamAssert as a1, type PlanningActionParamSleep as a2, type PlanningActionParamError as a3, type PlanningActionParamWaitFor as a4, type Color as a5, type BaseAgentParserOpt as a6, type PuppeteerParserOpt as a7, type PlaywrightParserOpt as a8, type ExecutionRecorderItem as a9, type MidsceneYamlScriptAndroidEnv as aA, type MidsceneYamlScriptEnv as aB, type MidsceneYamlFlowItemAIAction as aC, type MidsceneYamlFlowItemAIAssert as aD, type MidsceneYamlFlowItemAIQuery as aE, type MidsceneYamlFlowItemAINumber as aF, type MidsceneYamlFlowItemAINString as aG, type MidsceneYamlFlowItemAIBoolean as aH, type MidsceneYamlFlowItemAILocate as aI, type MidsceneYamlFlowItemAIWaitFor as aJ, type MidsceneYamlFlowItemAITap as aK, type MidsceneYamlFlowItemAIHover as aL, type MidsceneYamlFlowItemAIInput as aM, type MidsceneYamlFlowItemAIKeyboardPress as aN, type MidsceneYamlFlowItemAIScroll as aO, type MidsceneYamlFlowItemEvaluateJavaScript as aP, type MidsceneYamlFlowItemSleep as aQ, type FreeFn as aR, type ScriptPlayerTaskStatus as aS, type ScriptPlayerStatusValue as aT, type ExecutionTaskType as aa, type ExecutorContext as ab, type TaskCacheInfo as ac, type ExecutionTaskReturn as ad, type ExecutionTaskInsightLocateParam as ae, type ExecutionTaskInsightLocateOutput as af, type ExecutionTaskInsightDumpLog as ag, type ExecutionTaskInsightLocateApply as ah, type ExecutionTaskInsightLocate as ai, type ExecutionTaskInsightQueryParam as aj, type ExecutionTaskInsightQueryOutput as ak, type ExecutionTaskInsightQueryApply as al, type ExecutionTaskInsightQuery as am, type ExecutionTaskInsightAssertionParam as an, type ExecutionTaskInsightAssertionApply as ao, type ExecutionTaskInsightAssertion as ap, type ExecutionTaskActionApply as aq, type ExecutionTaskAction as ar, type ExecutionTaskPlanningApply as as, type ExecutionTaskPlanning as at, type GroupedActionDump as au, type PageType as av, type LocateOption as aw, type scrollParam as ax, type MidsceneYamlScriptEnvBase as ay, type MidsceneYamlScriptWebEnv as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type AISingleElementResponse as h, type MidsceneYamlTask as i, type MidsceneYamlFlowItem as j, type ElementTreeNode as k, type AIUsageInfo as l, AIResponseFormat as m, type AISingleElementResponseById as n, type AISingleElementResponseByPosition as o, type AIElementLocatorResponse as p, type AIElementCoordinatesResponse as q, type AIElementResponse as r, type AIDataExtractionResponse as s, type AISectionLocatorResponse as t, type AIAssertionResponse as u, type LocatorValidatorOption as v, type LocateValidatorResult as w, type AgentDescribeElementAtPointResult as x, type EnsureObject as y, type InsightExtractParam as z };
@@ -1,4 +1,4 @@
1
- import { y as ReportDumpWithAttributes, R as Rect } from './types-8c83481c.js';
1
+ import { H as ReportDumpWithAttributes, R as Rect } from './types-88f46e9e.js';
2
2
  import '@midscene/shared/constants';
3
3
  import 'openai/resources';
4
4
 
package/dist/es/utils.js CHANGED
@@ -14,7 +14,7 @@ import {
14
14
  uploadTestInfoToServer,
15
15
  writeDumpReport,
16
16
  writeLogFile
17
- } from "./chunk-KHYCNMYW.js";
17
+ } from "./chunk-MVRTMYEM.js";
18
18
  export {
19
19
  getLogDir,
20
20
  getTmpDir,
@@ -1,8 +1,8 @@
1
- import { k as AIUsageInfo, Q as PlanningAction } from './types-8c83481c.js';
1
+ import { l as AIUsageInfo, X as PlanningAction } from './types-88f46e9e.js';
2
2
  import { ChatCompletionMessageParam } from 'openai/resources';
3
3
  export { ChatCompletionMessageParam } from 'openai/resources';
4
- import { b as AIActionType } from './llm-planning-e16d1814.js';
5
- export { a as AiAssert, e as AiExtractElementInfo, A as AiLocateElement, f as AiLocateSection, g as adaptBboxToRect, c as callAiFn, d as describeUserPage, p as plan } from './llm-planning-e16d1814.js';
4
+ import { b as AIActionType } from './llm-planning-28e999f3.js';
5
+ export { a as AiAssert, e as AiExtractElementInfo, A as AiLocateElement, f as AiLocateSection, g as adaptBboxToRect, c as callAiFn, d as describeUserPage, p as plan } from './llm-planning-28e999f3.js';
6
6
  import { vlLocateMode } from '@midscene/shared/env';
7
7
  import { actionParser } from '@ui-tars/action-parser';
8
8
  import '@midscene/shared/constants';
@@ -10,7 +10,7 @@
10
10
 
11
11
 
12
12
 
13
- var _chunkU3HZWESXjs = require('./chunk-U3HZWESX.js');
13
+ var _chunkNWSBT5YPjs = require('./chunk-NWSBT5YP.js');
14
14
 
15
15
 
16
16
 
@@ -23,4 +23,4 @@ var _chunkU3HZWESXjs = require('./chunk-U3HZWESX.js');
23
23
 
24
24
 
25
25
 
26
- exports.AiAssert = _chunkU3HZWESXjs.AiAssert; exports.AiExtractElementInfo = _chunkU3HZWESXjs.AiExtractElementInfo; exports.AiLocateElement = _chunkU3HZWESXjs.AiLocateElement; exports.AiLocateSection = _chunkU3HZWESXjs.AiLocateSection; exports.adaptBboxToRect = _chunkU3HZWESXjs.adaptBboxToRect; exports.callAiFn = _chunkU3HZWESXjs.callAiFn; exports.callToGetJSONObject = _chunkU3HZWESXjs.callToGetJSONObject; exports.describeUserPage = _chunkU3HZWESXjs.describeUserPage; exports.plan = _chunkU3HZWESXjs.plan; exports.systemPromptToLocateElement = _chunkU3HZWESXjs.systemPromptToLocateElement; exports.vlmPlanning = _chunkU3HZWESXjs.vlmPlanning;
26
+ exports.AiAssert = _chunkNWSBT5YPjs.AiAssert; exports.AiExtractElementInfo = _chunkNWSBT5YPjs.AiExtractElementInfo; exports.AiLocateElement = _chunkNWSBT5YPjs.AiLocateElement; exports.AiLocateSection = _chunkNWSBT5YPjs.AiLocateSection; exports.adaptBboxToRect = _chunkNWSBT5YPjs.adaptBboxToRect; exports.callAiFn = _chunkNWSBT5YPjs.callAiFn; exports.callToGetJSONObject = _chunkNWSBT5YPjs.callToGetJSONObject; exports.describeUserPage = _chunkNWSBT5YPjs.describeUserPage; exports.plan = _chunkNWSBT5YPjs.plan; exports.systemPromptToLocateElement = _chunkNWSBT5YPjs.systemPromptToLocateElement; exports.vlmPlanning = _chunkNWSBT5YPjs.vlmPlanning;
@@ -218,7 +218,7 @@ function stringifyDumpData(data, indents) {
218
218
  return JSON.stringify(data, replacerForPageObject, indents);
219
219
  }
220
220
  function getVersion() {
221
- return "0.16.9-beta-20250506093037.0";
221
+ return "0.16.9-beta-20250507095704.0";
222
222
  }
223
223
  function debugLog(...message) {
224
224
  const debugMode = _env.getAIConfig.call(void 0, _env.MIDSCENE_DEBUG_MODE);
@@ -282,4 +282,4 @@ function uploadTestInfoToServer({ testUrl }) {
282
282
 
283
283
  exports.groupedActionDumpFileExt = groupedActionDumpFileExt; exports.getLogDir = getLogDir; exports.setReportTpl = setReportTpl; exports.replaceStringWithFirstAppearance = replaceStringWithFirstAppearance; exports.reportHTMLContent = reportHTMLContent; exports.writeDumpReport = writeDumpReport; exports.writeLogFile = writeLogFile; exports.getTmpDir = getTmpDir; exports.getTmpFile = getTmpFile; exports.overlapped = overlapped; exports.sleep = sleep; exports.replacerForPageObject = replacerForPageObject; exports.stringifyDumpData = stringifyDumpData; exports.getVersion = getVersion; exports.uploadTestInfoToServer = uploadTestInfoToServer;
284
284
 
285
- //# sourceMappingURL=chunk-KHYCNMYW.js.map
285
+ //# sourceMappingURL=chunk-MVRTMYEM.js.map
@@ -26,8 +26,6 @@ var _identity = require('@azure/identity');
26
26
 
27
27
 
28
28
 
29
-
30
-
31
29
 
32
30
 
33
31
 
@@ -231,53 +229,9 @@ async function markupImageForLLM(screenshotBase64, tree, size) {
231
229
  return imagePayload;
232
230
  }
233
231
 
234
- // src/ai-model/prompt/ui-tars-planning.ts
235
- function getTimeZoneInfo() {
236
- const timeZone = Intl.DateTimeFormat().resolvedOptions().timeZone;
237
- const offset = -(/* @__PURE__ */ new Date()).getTimezoneOffset() / 60;
238
- return {
239
- timezone: `UTC${offset >= 0 ? "+" : ""}${offset}`,
240
- isChina: timeZone === "Asia/Shanghai"
241
- };
242
- }
243
- function getLanguage() {
244
- return getTimeZoneInfo().isChina ? "Chinese" : "English";
245
- }
246
- function getUiTarsPlanningPrompt() {
247
- const language2 = getLanguage();
248
- return `
249
- You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
250
-
251
- ## Output Format
252
- \`\`\`
253
- Thought: ...
254
- Action: ...
255
- \`\`\`
256
-
257
- ## Action Space
258
-
259
- click(start_box='[x1, y1, x2, y2]')
260
- left_double(start_box='[x1, y1, x2, y2]')
261
- right_single(start_box='[x1, y1, x2, y2]')
262
- drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
263
- hotkey(key='')
264
- type(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \\n at the end of content.
265
- scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
266
- wait() #Sleep for 5s and take a screenshot to check for any changes.
267
- finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format.
268
-
269
-
270
- ## Note
271
- - Use ${language2} in \`Thought\` part.
272
- - Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
273
-
274
- ## User Instruction
275
- `;
276
- }
277
- var getSummary = (prediction) => prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, "").trim();
278
-
279
232
  // src/ai-model/prompt/assertion.ts
280
- var language = getTimeZoneInfo().isChina ? "Chinese" : "English";
233
+
234
+ var preferredLanguage = _env.getPreferredLanguage.call(void 0, );
281
235
  var defaultAssertionPrompt = "You are a senior testing engineer. User will give an assertion and a screenshot of a page. By carefully viewing the screenshot, please tell whether the assertion is truthy.";
282
236
  var defaultAssertionResponseJsonFormat = `Return in the following JSON format:
283
237
  {
@@ -294,7 +248,7 @@ var uiTarsAssertionResponseJsonFormat = `## Output Json String Format
294
248
 
295
249
  ## Rules **MUST** follow
296
250
  - Make sure to return **only** the JSON, with **no additional** text or explanations.
297
- - Use ${language} in \`thought\` part.
251
+ - Use ${preferredLanguage} in \`thought\` part.
298
252
  - You **MUST** strictly follow up the **Output Json String Format**.`;
299
253
  function systemPromptToAssert(model) {
300
254
  return `${defaultAssertionPrompt}
@@ -1247,10 +1201,10 @@ async function call(messages, AIActionTypeValue, responseFormat) {
1247
1201
  let content;
1248
1202
  let usage;
1249
1203
  const commonConfig = {
1250
- temperature: _env.getAIConfigInBoolean.call(void 0, _env.MIDSCENE_USE_VLM_UI_TARS) ? 0 : 0.1,
1204
+ temperature: _env.vlLocateMode.call(void 0, ) === "vlm-ui-tars" ? 0 : 0.1,
1251
1205
  stream: false,
1252
1206
  max_tokens: typeof maxTokens === "number" ? maxTokens : Number.parseInt(maxTokens || "2048", 10),
1253
- ..._env.getAIConfigInBoolean.call(void 0, _env.MIDSCENE_USE_QWEN_VL) ? {
1207
+ ..._env.vlLocateMode.call(void 0, ) === "qwen-vl" ? {
1254
1208
  vl_high_resolution_images: true
1255
1209
  } : {}
1256
1210
  };
@@ -1328,12 +1282,13 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
1328
1282
  case 1 /* INSPECT_ELEMENT */:
1329
1283
  responseFormat = locatorSchema;
1330
1284
  break;
1331
- case 2 /* EXTRACT_DATA */:
1332
- responseFormat = { type: "json_object" /* JSON */ };
1333
- break;
1334
1285
  case 3 /* PLAN */:
1335
1286
  responseFormat = planSchema;
1336
1287
  break;
1288
+ case 2 /* EXTRACT_DATA */:
1289
+ case 4 /* DESCRIBE_ELEMENT */:
1290
+ responseFormat = { type: "json_object" /* JSON */ };
1291
+ break;
1337
1292
  }
1338
1293
  }
1339
1294
  if (model === "gpt-4o-2024-05-13") {
@@ -1892,6 +1847,43 @@ var _keyboardlayout = require('@midscene/shared/keyboard-layout');
1892
1847
 
1893
1848
 
1894
1849
  var _actionparser = require('@ui-tars/action-parser');
1850
+
1851
+ // src/ai-model/prompt/ui-tars-planning.ts
1852
+
1853
+ function getUiTarsPlanningPrompt() {
1854
+ const preferredLanguage2 = _env.getPreferredLanguage.call(void 0, );
1855
+ return `
1856
+ You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
1857
+
1858
+ ## Output Format
1859
+ \`\`\`
1860
+ Thought: ...
1861
+ Action: ...
1862
+ \`\`\`
1863
+
1864
+ ## Action Space
1865
+
1866
+ click(start_box='[x1, y1, x2, y2]')
1867
+ left_double(start_box='[x1, y1, x2, y2]')
1868
+ right_single(start_box='[x1, y1, x2, y2]')
1869
+ drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')
1870
+ hotkey(key='')
1871
+ type(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \\n at the end of content.
1872
+ scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')
1873
+ wait() #Sleep for 5s and take a screenshot to check for any changes.
1874
+ finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format.
1875
+
1876
+
1877
+ ## Note
1878
+ - Use ${preferredLanguage2} in \`Thought\` part.
1879
+ - Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
1880
+
1881
+ ## User Instruction
1882
+ `;
1883
+ }
1884
+ var getSummary = (prediction) => prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, "").trim();
1885
+
1886
+ // src/ai-model/ui-tars-planning.ts
1895
1887
  var debug = _logger.getDebug.call(void 0, "ui-tars-planning");
1896
1888
  var bboxSize = 10;
1897
1889
  var pointToBbox = (point, width, height) => {
@@ -2086,6 +2078,7 @@ function getPoint(startBox, size) {
2086
2078
 
2087
2079
 
2088
2080
 
2089
- exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.describeUserPage = describeUserPage; exports.callToGetJSONObject = callToGetJSONObject; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning;
2090
2081
 
2091
- //# sourceMappingURL=chunk-U3HZWESX.js.map
2082
+ exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.describeUserPage = describeUserPage; exports.callToGetJSONObject = callToGetJSONObject; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.expandSearchArea = expandSearchArea; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning;
2083
+
2084
+ //# sourceMappingURL=chunk-NWSBT5YP.js.map