@midscene/core 0.17.2-beta-20250521115451.0 → 0.17.2-beta-20250521131112.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/es/ai-model.d.ts +3 -3
  2. package/dist/es/ai-model.js +1 -1
  3. package/dist/es/{chunk-PI7QH3ZW.js → chunk-GHP3FR4O.js} +3 -3
  4. package/dist/es/{chunk-EUJHCFP2.js → chunk-K2IXQ5O2.js} +56 -55
  5. package/dist/es/chunk-K2IXQ5O2.js.map +1 -0
  6. package/dist/es/index.d.ts +8 -5
  7. package/dist/es/index.js +85 -2
  8. package/dist/es/index.js.map +1 -1
  9. package/dist/es/{llm-planning-dc99c344.d.ts → llm-planning-3bdabecb.d.ts} +4 -2
  10. package/dist/es/tree.d.ts +1 -1
  11. package/dist/es/{types-e6507d2e.d.ts → types-01381369.d.ts} +25 -2
  12. package/dist/es/utils.d.ts +1 -1
  13. package/dist/es/utils.js +1 -1
  14. package/dist/lib/ai-model.d.ts +3 -3
  15. package/dist/lib/ai-model.js +2 -2
  16. package/dist/lib/{chunk-PI7QH3ZW.js → chunk-GHP3FR4O.js} +3 -3
  17. package/dist/lib/{chunk-EUJHCFP2.js → chunk-K2IXQ5O2.js} +64 -63
  18. package/dist/lib/chunk-K2IXQ5O2.js.map +1 -0
  19. package/dist/lib/index.d.ts +8 -5
  20. package/dist/lib/index.js +93 -10
  21. package/dist/lib/index.js.map +1 -1
  22. package/dist/lib/{llm-planning-dc99c344.d.ts → llm-planning-3bdabecb.d.ts} +4 -2
  23. package/dist/lib/tree.d.ts +1 -1
  24. package/dist/{types/types-e6507d2e.d.ts → lib/types-01381369.d.ts} +25 -2
  25. package/dist/lib/utils.d.ts +1 -1
  26. package/dist/lib/utils.js +2 -2
  27. package/dist/types/ai-model.d.ts +3 -3
  28. package/dist/types/index.d.ts +8 -5
  29. package/dist/types/{llm-planning-dc99c344.d.ts → llm-planning-3bdabecb.d.ts} +4 -2
  30. package/dist/types/tree.d.ts +1 -1
  31. package/dist/{lib/types-e6507d2e.d.ts → types/types-01381369.d.ts} +25 -2
  32. package/dist/types/utils.d.ts +1 -1
  33. package/package.json +2 -2
  34. package/dist/es/chunk-EUJHCFP2.js.map +0 -1
  35. package/dist/lib/chunk-EUJHCFP2.js.map +0 -1
  36. /package/dist/es/{chunk-PI7QH3ZW.js.map → chunk-GHP3FR4O.js.map} +0 -0
  37. /package/dist/lib/{chunk-PI7QH3ZW.js.map → chunk-GHP3FR4O.js.map} +0 -0
package/dist/lib/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  "use strict";Object.defineProperty(exports, "__esModule", {value: true}); function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
2
2
 
3
- var _chunkPI7QH3ZWjs = require('./chunk-PI7QH3ZW.js');
3
+ var _chunkGHP3FR4Ojs = require('./chunk-GHP3FR4O.js');
4
4
 
5
5
 
6
6
 
@@ -9,7 +9,9 @@ var _chunkPI7QH3ZWjs = require('./chunk-PI7QH3ZW.js');
9
9
 
10
10
 
11
11
 
12
- var _chunkEUJHCFP2js = require('./chunk-EUJHCFP2.js');
12
+
13
+
14
+ var _chunkK2IXQ5O2js = require('./chunk-K2IXQ5O2.js');
13
15
 
14
16
  // src/ai-model/action-executor.ts
15
17
  var _env = require('@midscene/shared/env');
@@ -156,7 +158,7 @@ ${_optionalChain([this, 'access', _7 => _7.latestErrorTask, 'call', _8 => _8(),
156
158
  }
157
159
  dump() {
158
160
  const dumpData = {
159
- sdkVersion: _chunkPI7QH3ZWjs.getVersion.call(void 0, ),
161
+ sdkVersion: _chunkGHP3FR4Ojs.getVersion.call(void 0, ),
160
162
  model_name: _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME) || "",
161
163
  logTime: Date.now(),
162
164
  name: this.name,
@@ -166,12 +168,40 @@ ${_optionalChain([this, 'access', _7 => _7.latestErrorTask, 'call', _8 => _8(),
166
168
  }
167
169
  };
168
170
 
171
+ // src/ai-model/prompt/describe.ts
172
+
173
+ var preferredLanguage = _env.getPreferredLanguage.call(void 0, );
174
+ var elementDescriberInstruction = () => {
175
+ return `Tell what is the content of the element wrapped by the read rectangle in the screenshot. Your description is expected to be used to precisely locate the element from other similar elements on screenshot. Use ${preferredLanguage} in the description.
176
+
177
+ Please follow the following rules:
178
+ 1. The description should be start with a brief description, like "a button for confirming the action".
179
+
180
+ 2. Include these information in the description to distinguish the element from its siblings and other similar elements, as much as possible:
181
+ - The text of the element, like "with text 'Confirm'"
182
+ - What the element looks like if it's an image, like "with image '...'"
183
+ - The relative position of the element, like "on the left of ..., around ..."
184
+ - How to distinguish the element from its siblings elements, like "it is the icon instead of the text"
185
+
186
+ 3. Do NOT mention the red rectangle in the description.
187
+
188
+ 4. Use the error field to describe the unexpected situations, if any. If not, put null.
189
+
190
+ Return in JSON:
191
+ {
192
+ "description": "[{brief description}]: {text of the element} {image of the element} {relative position of the element} ... ",
193
+ "error"?: "..."
194
+ }`;
195
+ };
196
+
169
197
  // src/insight/index.ts
170
198
 
171
199
 
172
200
 
173
201
 
174
202
 
203
+
204
+ var _img = require('@midscene/shared/img');
175
205
  var _logger = require('@midscene/shared/logger');
176
206
 
177
207
 
@@ -194,7 +224,7 @@ function emitInsightDump(data, dumpSubscriber) {
194
224
  }
195
225
  }
196
226
  const baseData = {
197
- sdkVersion: _chunkPI7QH3ZWjs.getVersion.call(void 0, ),
227
+ sdkVersion: _chunkGHP3FR4Ojs.getVersion.call(void 0, ),
198
228
  logTime: Date.now(),
199
229
  model_name: _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME) || "",
200
230
  model_description: modelDescription
@@ -211,7 +241,7 @@ function emitInsightDump(data, dumpSubscriber) {
211
241
  var debug = _logger.getDebug.call(void 0, "ai:insight");
212
242
  var Insight = class {
213
243
  constructor(context, opt) {
214
- this.aiVendorFn = _chunkEUJHCFP2js.callAiFn;
244
+ this.aiVendorFn = _chunkK2IXQ5O2js.callAiFn;
215
245
  _utils.assert.call(void 0, context, "context is required for Insight");
216
246
  if (typeof context === "function") {
217
247
  this.contextRetrieverFn = context;
@@ -254,7 +284,7 @@ var Insight = class {
254
284
  let searchAreaUsage = void 0;
255
285
  let searchAreaResponse = void 0;
256
286
  if (searchAreaPrompt) {
257
- searchAreaResponse = await _chunkEUJHCFP2js.AiLocateSection.call(void 0, {
287
+ searchAreaResponse = await _chunkK2IXQ5O2js.AiLocateSection.call(void 0, {
258
288
  context,
259
289
  sectionDescription: searchAreaPrompt
260
290
  });
@@ -267,7 +297,7 @@ var Insight = class {
267
297
  searchArea = searchAreaResponse.rect;
268
298
  }
269
299
  const startTime = Date.now();
270
- const { parseResult, rect, elementById, rawResponse, usage } = await _chunkEUJHCFP2js.AiLocateElement.call(void 0, {
300
+ const { parseResult, rect, elementById, rawResponse, usage } = await _chunkK2IXQ5O2js.AiLocateElement.call(void 0, {
271
301
  callAI: callAI || this.aiVendorFn,
272
302
  context,
273
303
  targetElementDescription: queryPrompt,
@@ -355,7 +385,7 @@ ${parseResult.errors.join("\n")}`;
355
385
  this.onceDumpUpdatedFn = void 0;
356
386
  const context = await this.contextRetrieverFn("extract");
357
387
  const startTime = Date.now();
358
- const { parseResult, usage } = await _chunkEUJHCFP2js.AiExtractElementInfo.call(void 0, {
388
+ const { parseResult, usage } = await _chunkK2IXQ5O2js.AiExtractElementInfo.call(void 0, {
359
389
  context,
360
390
  dataQuery: dataDemand
361
391
  });
@@ -406,7 +436,7 @@ ${parseResult.errors.join("\n")}`;
406
436
  this.onceDumpUpdatedFn = void 0;
407
437
  const context = await this.contextRetrieverFn("assert");
408
438
  const startTime = Date.now();
409
- const assertResult = await _chunkEUJHCFP2js.AiAssert.call(void 0, {
439
+ const assertResult = await _chunkK2IXQ5O2js.AiAssert.call(void 0, {
410
440
  assertion,
411
441
  context
412
442
  });
@@ -436,6 +466,59 @@ ${parseResult.errors.join("\n")}`;
436
466
  usage: assertResult.usage
437
467
  };
438
468
  }
469
+ async describe(target, opt) {
470
+ _utils.assert.call(void 0, target, "target is required for insight.describe");
471
+ const context = await this.contextRetrieverFn("describe");
472
+ const { screenshotBase64 } = context;
473
+ _utils.assert.call(void 0, screenshotBase64, "screenshot is required for insight.describe");
474
+ const systemPrompt = elementDescriberInstruction();
475
+ const defaultRectSize = 30;
476
+ const targetRect = Array.isArray(target) ? {
477
+ left: Math.floor(target[0] - defaultRectSize / 2),
478
+ top: Math.floor(target[1] - defaultRectSize / 2),
479
+ width: defaultRectSize,
480
+ height: defaultRectSize
481
+ } : target;
482
+ let imagePayload = await _img.compositeElementInfoImg.call(void 0, {
483
+ inputImgBase64: screenshotBase64,
484
+ elementsPositionInfo: [
485
+ {
486
+ rect: targetRect
487
+ }
488
+ ],
489
+ borderThickness: 3
490
+ });
491
+ if (_optionalChain([opt, 'optionalAccess', _23 => _23.deepThink])) {
492
+ const searchArea = _chunkK2IXQ5O2js.expandSearchArea.call(void 0, targetRect, context.size);
493
+ debug("describe: set searchArea", searchArea);
494
+ imagePayload = await _img.cropByRect.call(void 0,
495
+ imagePayload,
496
+ searchArea,
497
+ _env.getAIConfigInBoolean.call(void 0, _env.MIDSCENE_USE_QWEN_VL)
498
+ );
499
+ }
500
+ const msgs = [
501
+ { role: "system", content: systemPrompt },
502
+ {
503
+ role: "user",
504
+ content: [
505
+ {
506
+ type: "image_url",
507
+ image_url: {
508
+ url: imagePayload,
509
+ detail: "high"
510
+ }
511
+ }
512
+ ]
513
+ }
514
+ ];
515
+ const callAIFn = this.aiVendorFn || _chunkK2IXQ5O2js.callToGetJSONObject;
516
+ const res = await callAIFn(msgs, 4 /* DESCRIBE_ELEMENT */);
517
+ const { content } = res;
518
+ _utils.assert.call(void 0, !content.error, `describe failed: ${content.error}`);
519
+ _utils.assert.call(void 0, content.description, "failed to describe the element");
520
+ return content;
521
+ }
439
522
  };
440
523
 
441
524
  // src/index.ts
@@ -452,6 +535,6 @@ var src_default = Insight;
452
535
 
453
536
 
454
537
 
455
- exports.AiAssert = _chunkEUJHCFP2js.AiAssert; exports.AiLocateElement = _chunkEUJHCFP2js.AiLocateElement; exports.Executor = Executor; exports.Insight = Insight; exports.MIDSCENE_MODEL_NAME = _env.MIDSCENE_MODEL_NAME; exports.default = src_default; exports.describeUserPage = _chunkEUJHCFP2js.describeUserPage; exports.getAIConfig = _env.getAIConfig; exports.getVersion = _chunkPI7QH3ZWjs.getVersion; exports.plan = _chunkEUJHCFP2js.plan;
538
+ exports.AiAssert = _chunkK2IXQ5O2js.AiAssert; exports.AiLocateElement = _chunkK2IXQ5O2js.AiLocateElement; exports.Executor = Executor; exports.Insight = Insight; exports.MIDSCENE_MODEL_NAME = _env.MIDSCENE_MODEL_NAME; exports.default = src_default; exports.describeUserPage = _chunkK2IXQ5O2js.describeUserPage; exports.getAIConfig = _env.getAIConfig; exports.getVersion = _chunkGHP3FR4Ojs.getVersion; exports.plan = _chunkK2IXQ5O2js.plan;
456
539
 
457
540
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"mappings":";;;;;;;;;;;;;;AAUA,SAAS,qBAAqB,mBAAmB;AACjD,SAAS,cAAc;AAEhB,IAAM,WAAN,MAAe;AAAA,EAUpB,YACE,MACA,SAGA;AACA,SAAK,SACH,SAAS,SAAS,QAAQ,MAAM,SAAS,IAAI,YAAY;AAC3D,SAAK,OAAO;AACZ,SAAK,SAAS,SAAS,SAAS,CAAC,GAAG;AAAA,MAAI,CAAC,SACvC,KAAK,kBAAkB,IAAI;AAAA,IAC7B;AACA,SAAK,cAAc,SAAS;AAAA,EAC9B;AAAA,EAEQ,kBAAkB,MAAyC;AACjE,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,MAAgE;AAC3E;AAAA,MACE,KAAK,WAAW;AAAA,MAChB;AAAA,QAAyD,KAAK,gBAAgB,GAAG,KAAK;AAAA,EAAK,KAAK,gBAAgB,GAAG,UAAU;AAAA,IAC/H;AACA,QAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAK,MAAM,KAAK,GAAG,KAAK,IAAI,CAAC,SAAS,KAAK,kBAAkB,IAAI,CAAC,CAAC;AAAA,IACrE,OAAO;AACL,WAAK,MAAM,KAAK,KAAK,kBAAkB,IAAI,CAAC;AAAA,IAC9C;AACA,QAAI,KAAK,WAAW,WAAW;AAC7B,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAM,QAAsB;AAC1B,QAAI,KAAK,WAAW,UAAU,KAAK,MAAM,SAAS,GAAG;AACnD,cAAQ;AAAA,QACN;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,WAAW,WAAW,6BAA6B;AAC/D,WAAO,KAAK,WAAW,aAAa,+BAA+B;AACnE,WAAO,KAAK,WAAW,SAAS,4BAA4B;AAE5D,UAAM,mBAAmB,KAAK,MAAM;AAAA,MAClC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,mBAAmB,GAAG;AAExB;AAAA,IACF;AAEA,SAAK,SAAS;AACd,QAAI,YAAY;AAChB,QAAI,wBAAwB;AAE5B,QAAI;AAEJ,WAAO,YAAY,KAAK,MAAM,QAAQ;AACpC,YAAM,OAAO,KAAK,MAAM,SAAS;AACjC;AAAA,QACE,KAAK,WAAW;AAAA,QAChB,2CAA2C,KAAK,MAAM;AAAA,MACxD;AACA,WAAK,SAAS;AAAA,QACZ,OAAO,KAAK,IAAI;AAAA,MAClB;AACA,UAAI;AACF,aAAK,SAAS;AACd,YAAI;AACF,cAAI,KAAK,aAAa;AACpB,kBAAM,KAAK,YAAY,IAAI;AAAA,UAC7B;AAAA,QACF,SAAS,GAAG;AACV,kBAAQ,MAAM,wBAAwB,CAAC;AAAA,QACzC;AACA;AAAA,UACE,CAAC,WAAW,UAAU,UAAU,EAAE,QAAQ,KAAK,IAAI,KAAK;AAAA,UACxD,0BAA0B,KAAK,IAAI;AAAA,QACrC;AAEA,cAAM,EAAE,UAAU,MAAM,IAAI;AAC5B,eAAO,UAAU,uCAAuC,KAAK,IAAI,EAAE;AAEnE,YAAI;AACJ,cAAM,kBAAmC;AAAA,UACvC;AAAA,UACA,SAAS,oBAAoB;AAAA,QAC/B;AAEA,YAAI,KAAK,SAAS,WAAW;AAC3B;AAAA,YACE,KAAK,YAAY,YACf,KAAK,YAAY,WACjB,KAAK,YAAY,YACjB,KAAK,YAAY,aACjB,KAAK,YAAY,YACjB,KAAK,YAAY;AAAA,YACnB,gCAAgC,KAAK,OAAO;AAAA,UAC9C;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AACxD,cAAI,KAAK,YAAY,UAAU;AAC7B,iCACE,aACC;AAAA,UACL;AAAA,QACF,WAAW,KAAK,SAAS,YAAY,KAAK,SAAS,YAAY;AAC7D,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D,OAAO;AACL,kBAAQ;AAAA,YACN,0BAA0B,KAAK,IAAI;AAAA,UACrC;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D;AAEA,eAAO,OAAO,MAAM,WAAW;AAC/B,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD,aAAK,OAAO,SAAU,aAAqB,UAAU;AACrD;AAAA,MACF,SAAS,GAAQ;AACf,gCAAwB;AACxB,aAAK,QACH,GAAG,YAAY,OAAO,MAAM,WAAW,IAAI;AAC7C,aAAK,aAAa,EAAE;AAEpB,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF;AAAA,IACF;AAGA,aAAS,IAAI,YAAY,GAAG,IAAI,KAAK,MAAM,QAAQ,KAAK;AACtD,WAAK,MAAM,CAAC,EAAE,SAAS;AAAA,IACzB;AAEA,QAAI,uBAAuB;AACzB,WAAK,SAAS;AAAA,IAChB,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAEA,QAAI,KAAK,MAAM,QAAQ;AAErB,YAAM,cAAc,KAAK,IAAI,WAAW,KAAK,MAAM,SAAS,CAAC;AAC7D,aAAO,KAAK,MAAM,WAAW,EAAE;AAAA,IACjC;AAAA,EACF;AAAA,EAEA,iBAA0B;AACxB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,kBAAwC;AACtC,QAAI,KAAK,WAAW,SAAS;AAC3B,aAAO;AAAA,IACT;AACA,UAAM,iBAAiB,KAAK,MAAM;AAAA,MAChC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,kBAAkB,GAAG;AACvB,aAAO,KAAK,MAAM,cAAc;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,OAAsB;AACpB,UAAM,WAA0B;AAAA,MAC9B,YAAY,WAAW;AAAA,MACvB,YAAY,YAAY,mBAAmB,KAAK;AAAA,MAChD,SAAS,KAAK,IAAI;AAAA,MAClB,MAAM,KAAK;AAAA,MACX,OAAO,KAAK;AAAA,IACd;AACA,WAAO;AAAA,EACT;AACF;;;AC1LA;AAAA,EACE;AAAA,EACA;AAAA,EACA,gBAAAA;AAAA,OACK;AACP,SAAS,gBAAgB;AACzB,SAAS,UAAAC,eAAc;;;ACnBvB;AAAA,EACE,uBAAAC;AAAA,EACA,eAAAC;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY;AAEd,SAAS,gBACd,MACA,gBACA;AACA,MAAI,mBAAmB;AAEvB,MAAI,aAAa,GAAG;AAClB,UAAM,iBAAiB,mBAAmB;AAC1C,QAAI,gBAAgB;AAClB,yBAAmB,WAAW,cAAc;AAAA,IAC9C,OAAO;AACL,yBAAmB,GAAG,aAAa,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,QAAM,WAAqB;AAAA,IACzB,YAAY,WAAW;AAAA,IACvB,SAAS,KAAK,IAAI;AAAA,IAClB,YAAYA,aAAYD,oBAAmB,KAAK;AAAA,IAChD,mBAAmB;AAAA,EACrB;AACA,QAAM,YAAyB;AAAA,IAC7B,OAAO,KAAK;AAAA,IACZ,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,mBAAiB,SAAS;AAC5B;;;ADLA,IAAM,QAAQ,SAAS,YAAY;AACnC,IAAqB,UAArB,MAGE;AAAA,EAWA,YACE,SAGA,KACA;AAXF,sBAAoD;AAYlD,IAAAD,QAAO,SAAS,iCAAiC;AACjD,QAAI,OAAO,YAAY,YAAY;AACjC,WAAK,qBAAqB;AAAA,IAC5B,OAAO;AACL,WAAK,qBAAqB,MAAM,QAAQ,QAAQ,OAAO;AAAA,IACzD;AAEA,QAAI,OAAO,KAAK,eAAe,aAAa;AAC1C,WAAK,aAAa,IAAI;AAAA,IACxB;AACA,QAAI,OAAO,KAAK,aAAa,aAAa;AACxC,WAAK,WAAW,IAAI;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,MAAM,OACJ,OACA,KACuB;AACvB,UAAM,EAAE,OAAO,IAAI,OAAO,CAAC;AAC3B,UAAM,cAAc,OAAO,UAAU,WAAW,QAAQ,MAAM;AAC9D,IAAAA,QAAO,aAAa,8BAA8B;AAClD,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,IAAAA,QAAO,OAAO,UAAU,UAAU,sCAAsC;AAExE,UAAM,wBAAwB;AAAA,MAC5B;AAAA,IACF;AACA,QAAI,uBAAuB;AACzB,YAAM,yBAAyB,qBAAqB;AAAA,IACtD;AACA,QAAI;AACJ,QAAI,MAAM,aAAa,uBAAuB;AAC5C,yBAAmB,MAAM;AAAA,IAC3B;AAEA,QAAI,oBAAoB,CAACD,cAAa,GAAG;AACvC,cAAQ;AAAA,QACN;AAAA,MACF;AACA,yBAAmB;AAAA,IACrB;AAEA,UAAM,UAAU,KAAK,WAAY,MAAM,KAAK,mBAAmB,QAAQ;AAEvE,QAAI,aAA+B;AACnC,QAAI,wBAA4C;AAChD,QAAI,kBAA2C;AAC/C,QAAI,qBAEY;AAChB,QAAI,kBAAkB;AACpB,2BAAqB,MAAM,gBAAgB;AAAA,QACzC;AAAA,QACA,oBAAoB;AAAA,MACtB,CAAC;AACD,MAAAC;AAAA,QACE,mBAAmB;AAAA,QACnB,gCAAgC,gBAAgB,IAC9C,mBAAmB,QAAQ,KAAK,mBAAmB,KAAK,KAAK,EAC/D;AAAA,MACF;AACA,8BAAwB,mBAAmB;AAC3C,wBAAkB,mBAAmB;AACrC,mBAAa,mBAAmB;AAAA,IAClC;AAEA,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,aAAa,aAAa,MAAM,IACzD,MAAM,gBAAgB;AAAA,MACpB,QAAQ,UAAU,KAAK;AAAA,MACvB;AAAA,MACA,0BAA0B;AAAA,MAC1B,cAAc;AAAA,IAChB,CAAC;AAEH,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,MACvC,gBAAgB,KAAK,UAAU,WAAW;AAAA,MAC1C;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAgC,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAC1E;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT,SAAS;AAAA,MACX;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,aAAa;AAAA,MACb,MAAM;AAAA,MACN;AAAA,MACA,WAAW,CAAC,CAAC;AAAA,MACb,OAAO;AAAA,IACT;AAEA,UAAM,WAA0B,CAAC;AACjC,KAAC,YAAY,YAAY,CAAC,GAAG,QAAQ,CAAC,SAAS;AAC7C,UAAI,QAAQ,MAAM;AAChB,cAAM,UAAU,YAAY,MAAM,EAAE;AAEpC,YAAI,CAAC,SAAS;AACZ,kBAAQ;AAAA,YACN,kCAAkC,KAAK,EAAE;AAAA,UAC3C;AACA;AAAA,QACF;AACA,iBAAS,KAAK,OAAO;AAAA,MACvB;AAAA,IACF,CAAC;AAED;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH,gBAAgB;AAAA,MAClB;AAAA,MACA;AAAA,IACF;AAEA,QAAI,UAAU;AACZ,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,IAAAA;AAAA,MACE,SAAS,UAAU;AAAA,MACnB,6CAA6C,SAAS,MAAM;AAAA,IAC9D;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO;AAAA,QACL,SAAS;AAAA,UACP,IAAI,SAAS,CAAC,EAAG;AAAA,UACjB,SAAS,SAAS,CAAC,EAAG;AAAA,UACtB,QAAQ,SAAS,CAAC,EAAG;AAAA,UACrB,MAAM,SAAS,CAAC,EAAG;AAAA,UACnB,QAAQ,SAAS,CAAC,EAAG,UAAU,CAAC;AAAA,UAChC,YAAY,SAAS,CAAC,EAAG;AAAA,QAC3B;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAAA,EAQA,MAAM,QAAW,YAA+C;AAC9D,IAAAA;AAAA,MACE,OAAO,eAAe,YAAY,OAAO,eAAe;AAAA,MACxD,kDAAkD,OAAO,UAAU;AAAA,IACrE;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,SAAS;AAEvD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,IAAI,MAAM,qBAAwB;AAAA,MAC3D;AAAA,MACA,WAAW;AAAA,IACb,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,IACzC;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAwB,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAClE;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,OAAO;AAAA,IACT;AAEA,UAAM,EAAE,KAAK,IAAI,eAAe,CAAC;AAGjC;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,QAAI,YAAY,CAAC,MAAM;AACrB,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,WAAsD;AACjE,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AACtD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,eAAe,MAAM,SAAS;AAAA,MAClC;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,aAAa,OAAO;AAAA,IAClD;AAEA,UAAM,EAAE,SAAS,KAAK,IAAI,aAAa;AACvC,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,eAAe;AAAA,MACf,kBAAkB;AAAA,MAClB,OAAO,OAAO,SAAY;AAAA,IAC5B;AACA,oBAAgB,UAAU,cAAc;AAExC,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,aAAa;AAAA,IACtB;AAAA,EACF;AACF;;;AE9TA,SAAS,eAAAE,cAAa,uBAAAD,4BAA2B;AAGjD,IAAO,cAAQ","names":["vlLocateMode","assert","MIDSCENE_MODEL_NAME","getAIConfig"],"ignoreList":[],"sources":["../../src/ai-model/action-executor.ts","../../src/insight/index.ts","../../src/insight/utils.ts","../../src/index.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<any> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n task.timing.aiCost = (returnValue as any)?.aiCost || 0;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n return this.tasks[outputIndex].output;\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n","import { callAiFn } from '@/ai-model/common';\nimport { AiExtractElementInfo, AiLocateElement } from '@/ai-model/index';\nimport { AiAssert, AiLocateSection } from '@/ai-model/inspect';\nimport type {\n AIElementResponse,\n AISingleElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, elementById, rawResponse, usage } =\n await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T = any>(input: string): Promise<T>;\n async extract<T extends Record<string, string>>(\n input: T,\n ): Promise<Record<keyof T, any>>;\n async extract<T extends object>(input: Record<keyof T, string>): Promise<T>;\n\n async extract<T>(dataDemand: InsightExtractParam): Promise<any> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n usage,\n };\n }\n\n async assert(assertion: string): Promise<InsightAssertionResponse> {\n if (typeof assertion !== 'string') {\n throw new Error(\n 'This is the assert method for Midscene, the first argument should be a string. If you want to use the assert method from Node.js, please import it from the Node.js assert module.',\n );\n }\n\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('assert');\n const startTime = Date.now();\n const assertResult = await AiAssert({\n assertion,\n context,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(assertResult.content),\n };\n\n const { thought, pass } = assertResult.content;\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'assert',\n userQuery: {\n assertion,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n assertionPass: pass,\n assertionThought: thought,\n error: pass ? undefined : thought,\n };\n emitInsightDump(dumpData, dumpSubscriber);\n\n return {\n pass,\n thought,\n usage: assertResult.usage,\n };\n }\n}\n","import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { uuid } from '@midscene/shared/utils';\n\nexport function emitInsightDump(\n data: PartialInsightDumpFromSDK,\n dumpSubscriber?: DumpSubscriber,\n) {\n let modelDescription = '';\n\n if (vlLocateMode()) {\n const uiTarsModelVer = uiTarsModelVersion();\n if (uiTarsModelVer) {\n modelDescription = `UI-TARS=${uiTarsModelVer}`;\n } else {\n modelDescription = `${vlLocateMode()} mode`;\n }\n }\n\n const baseData: DumpMeta = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n model_description: modelDescription,\n };\n const finalData: InsightDump = {\n logId: uuid(),\n ...baseData,\n ...data,\n };\n\n dumpSubscriber?.(finalData);\n}\n","import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n AiAssert,\n} from './ai-model/index';\n\nexport { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';\n\nexport type * from './types';\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n MidsceneYamlScript,\n MidsceneYamlTask,\n MidsceneYamlFlowItem,\n} from './yaml';\n"]}
1
+ {"version":3,"mappings":";;;;;;;;;;;;;;;;AAUA,SAAS,qBAAqB,mBAAmB;AACjD,SAAS,cAAc;AAEhB,IAAM,WAAN,MAAe;AAAA,EAUpB,YACE,MACA,SAGA;AACA,SAAK,SACH,SAAS,SAAS,QAAQ,MAAM,SAAS,IAAI,YAAY;AAC3D,SAAK,OAAO;AACZ,SAAK,SAAS,SAAS,SAAS,CAAC,GAAG;AAAA,MAAI,CAAC,SACvC,KAAK,kBAAkB,IAAI;AAAA,IAC7B;AACA,SAAK,cAAc,SAAS;AAAA,EAC9B;AAAA,EAEQ,kBAAkB,MAAyC;AACjE,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,MAAgE;AAC3E;AAAA,MACE,KAAK,WAAW;AAAA,MAChB;AAAA,QAAyD,KAAK,gBAAgB,GAAG,KAAK;AAAA,EAAK,KAAK,gBAAgB,GAAG,UAAU;AAAA,IAC/H;AACA,QAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAK,MAAM,KAAK,GAAG,KAAK,IAAI,CAAC,SAAS,KAAK,kBAAkB,IAAI,CAAC,CAAC;AAAA,IACrE,OAAO;AACL,WAAK,MAAM,KAAK,KAAK,kBAAkB,IAAI,CAAC;AAAA,IAC9C;AACA,QAAI,KAAK,WAAW,WAAW;AAC7B,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAM,QAAsB;AAC1B,QAAI,KAAK,WAAW,UAAU,KAAK,MAAM,SAAS,GAAG;AACnD,cAAQ;AAAA,QACN;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,WAAW,WAAW,6BAA6B;AAC/D,WAAO,KAAK,WAAW,aAAa,+BAA+B;AACnE,WAAO,KAAK,WAAW,SAAS,4BAA4B;AAE5D,UAAM,mBAAmB,KAAK,MAAM;AAAA,MAClC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,mBAAmB,GAAG;AAExB;AAAA,IACF;AAEA,SAAK,SAAS;AACd,QAAI,YAAY;AAChB,QAAI,wBAAwB;AAE5B,QAAI;AAEJ,WAAO,YAAY,KAAK,MAAM,QAAQ;AACpC,YAAM,OAAO,KAAK,MAAM,SAAS;AACjC;AAAA,QACE,KAAK,WAAW;AAAA,QAChB,2CAA2C,KAAK,MAAM;AAAA,MACxD;AACA,WAAK,SAAS;AAAA,QACZ,OAAO,KAAK,IAAI;AAAA,MAClB;AACA,UAAI;AACF,aAAK,SAAS;AACd,YAAI;AACF,cAAI,KAAK,aAAa;AACpB,kBAAM,KAAK,YAAY,IAAI;AAAA,UAC7B;AAAA,QACF,SAAS,GAAG;AACV,kBAAQ,MAAM,wBAAwB,CAAC;AAAA,QACzC;AACA;AAAA,UACE,CAAC,WAAW,UAAU,UAAU,EAAE,QAAQ,KAAK,IAAI,KAAK;AAAA,UACxD,0BAA0B,KAAK,IAAI;AAAA,QACrC;AAEA,cAAM,EAAE,UAAU,MAAM,IAAI;AAC5B,eAAO,UAAU,uCAAuC,KAAK,IAAI,EAAE;AAEnE,YAAI;AACJ,cAAM,kBAAmC;AAAA,UACvC;AAAA,UACA,SAAS,oBAAoB;AAAA,QAC/B;AAEA,YAAI,KAAK,SAAS,WAAW;AAC3B;AAAA,YACE,KAAK,YAAY,YACf,KAAK,YAAY,WACjB,KAAK,YAAY,YACjB,KAAK,YAAY,aACjB,KAAK,YAAY,YACjB,KAAK,YAAY;AAAA,YACnB,gCAAgC,KAAK,OAAO;AAAA,UAC9C;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AACxD,cAAI,KAAK,YAAY,UAAU;AAC7B,iCACE,aACC;AAAA,UACL;AAAA,QACF,WAAW,KAAK,SAAS,YAAY,KAAK,SAAS,YAAY;AAC7D,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D,OAAO;AACL,kBAAQ;AAAA,YACN,0BAA0B,KAAK,IAAI;AAAA,UACrC;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D;AAEA,eAAO,OAAO,MAAM,WAAW;AAC/B,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD,aAAK,OAAO,SAAU,aAAqB,UAAU;AACrD;AAAA,MACF,SAAS,GAAQ;AACf,gCAAwB;AACxB,aAAK,QACH,GAAG,YAAY,OAAO,MAAM,WAAW,IAAI;AAC7C,aAAK,aAAa,EAAE;AAEpB,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF;AAAA,IACF;AAGA,aAAS,IAAI,YAAY,GAAG,IAAI,KAAK,MAAM,QAAQ,KAAK;AACtD,WAAK,MAAM,CAAC,EAAE,SAAS;AAAA,IACzB;AAEA,QAAI,uBAAuB;AACzB,WAAK,SAAS;AAAA,IAChB,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAEA,QAAI,KAAK,MAAM,QAAQ;AAErB,YAAM,cAAc,KAAK,IAAI,WAAW,KAAK,MAAM,SAAS,CAAC;AAC7D,aAAO,KAAK,MAAM,WAAW,EAAE;AAAA,IACjC;AAAA,EACF;AAAA,EAEA,iBAA0B;AACxB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,kBAAwC;AACtC,QAAI,KAAK,WAAW,SAAS;AAC3B,aAAO;AAAA,IACT;AACA,UAAM,iBAAiB,KAAK,MAAM;AAAA,MAChC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,kBAAkB,GAAG;AACvB,aAAO,KAAK,MAAM,cAAc;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,OAAsB;AACpB,UAAM,WAA0B;AAAA,MAC9B,YAAY,WAAW;AAAA,MACvB,YAAY,YAAY,mBAAmB,KAAK;AAAA,MAChD,SAAS,KAAK,IAAI;AAAA,MAClB,MAAM,KAAK;AAAA,MACX,OAAO,KAAK;AAAA,IACd;AACA,WAAO;AAAA,EACT;AACF;;;AC9MA,SAAS,4BAA4B;AAErC,IAAM,oBAAoB,qBAAqB;AAExC,IAAM,8BAA8B,MAAM;AAC/C,SAAO,mNAAmN,iBAAiB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAoB7O;;;ACMA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA,gBAAAA;AAAA,OACK;AACP,SAAS,yBAAyB,kBAAkB;AACpD,SAAS,gBAAgB;AACzB,SAAS,UAAAC,eAAc;;;AChCvB;AAAA,EACE,uBAAAC;AAAA,EACA,eAAAC;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY;AAEd,SAAS,gBACd,MACA,gBACA;AACA,MAAI,mBAAmB;AAEvB,MAAI,aAAa,GAAG;AAClB,UAAM,iBAAiB,mBAAmB;AAC1C,QAAI,gBAAgB;AAClB,yBAAmB,WAAW,cAAc;AAAA,IAC9C,OAAO;AACL,yBAAmB,GAAG,aAAa,CAAC;AAAA,IACtC;AAAA,EACF;AAEA,QAAM,WAAqB;AAAA,IACzB,YAAY,WAAW;AAAA,IACvB,SAAS,KAAK,IAAI;AAAA,IAClB,YAAYA,aAAYD,oBAAmB,KAAK;AAAA,IAChD,mBAAmB;AAAA,EACrB;AACA,QAAM,YAAyB;AAAA,IAC7B,OAAO,KAAK;AAAA,IACZ,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,mBAAiB,SAAS;AAC5B;;;ADQA,IAAM,QAAQ,SAAS,YAAY;AACnC,IAAqB,UAArB,MAGE;AAAA,EAWA,YACE,SAGA,KACA;AAXF,sBAAoD;AAYlD,IAAAD,QAAO,SAAS,iCAAiC;AACjD,QAAI,OAAO,YAAY,YAAY;AACjC,WAAK,qBAAqB;AAAA,IAC5B,OAAO;AACL,WAAK,qBAAqB,MAAM,QAAQ,QAAQ,OAAO;AAAA,IACzD;AAEA,QAAI,OAAO,KAAK,eAAe,aAAa;AAC1C,WAAK,aAAa,IAAI;AAAA,IACxB;AACA,QAAI,OAAO,KAAK,aAAa,aAAa;AACxC,WAAK,WAAW,IAAI;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,MAAM,OACJ,OACA,KACuB;AACvB,UAAM,EAAE,OAAO,IAAI,OAAO,CAAC;AAC3B,UAAM,cAAc,OAAO,UAAU,WAAW,QAAQ,MAAM;AAC9D,IAAAA,QAAO,aAAa,8BAA8B;AAClD,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,IAAAA,QAAO,OAAO,UAAU,UAAU,sCAAsC;AAExE,UAAM,wBAAwB;AAAA,MAC5B;AAAA,IACF;AACA,QAAI,uBAAuB;AACzB,YAAM,yBAAyB,qBAAqB;AAAA,IACtD;AACA,QAAI;AACJ,QAAI,MAAM,aAAa,uBAAuB;AAC5C,yBAAmB,MAAM;AAAA,IAC3B;AAEA,QAAI,oBAAoB,CAACD,cAAa,GAAG;AACvC,cAAQ;AAAA,QACN;AAAA,MACF;AACA,yBAAmB;AAAA,IACrB;AAEA,UAAM,UAAU,KAAK,WAAY,MAAM,KAAK,mBAAmB,QAAQ;AAEvE,QAAI,aAA+B;AACnC,QAAI,wBAA4C;AAChD,QAAI,kBAA2C;AAC/C,QAAI,qBAEY;AAChB,QAAI,kBAAkB;AACpB,2BAAqB,MAAM,gBAAgB;AAAA,QACzC;AAAA,QACA,oBAAoB;AAAA,MACtB,CAAC;AACD,MAAAC;AAAA,QACE,mBAAmB;AAAA,QACnB,gCAAgC,gBAAgB,IAC9C,mBAAmB,QAAQ,KAAK,mBAAmB,KAAK,KAAK,EAC/D;AAAA,MACF;AACA,8BAAwB,mBAAmB;AAC3C,wBAAkB,mBAAmB;AACrC,mBAAa,mBAAmB;AAAA,IAClC;AAEA,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,aAAa,aAAa,MAAM,IACzD,MAAM,gBAAgB;AAAA,MACpB,QAAQ,UAAU,KAAK;AAAA,MACvB;AAAA,MACA,0BAA0B;AAAA,MAC1B,cAAc;AAAA,IAChB,CAAC;AAEH,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,MACvC,gBAAgB,KAAK,UAAU,WAAW;AAAA,MAC1C;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAgC,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAC1E;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT,SAAS;AAAA,MACX;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,aAAa;AAAA,MACb,MAAM;AAAA,MACN;AAAA,MACA,WAAW,CAAC,CAAC;AAAA,MACb,OAAO;AAAA,IACT;AAEA,UAAM,WAA0B,CAAC;AACjC,KAAC,YAAY,YAAY,CAAC,GAAG,QAAQ,CAAC,SAAS;AAC7C,UAAI,QAAQ,MAAM;AAChB,cAAM,UAAU,YAAY,MAAM,EAAE;AAEpC,YAAI,CAAC,SAAS;AACZ,kBAAQ;AAAA,YACN,kCAAkC,KAAK,EAAE;AAAA,UAC3C;AACA;AAAA,QACF;AACA,iBAAS,KAAK,OAAO;AAAA,MACvB;AAAA,IACF,CAAC;AAED;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH,gBAAgB;AAAA,MAClB;AAAA,MACA;AAAA,IACF;AAEA,QAAI,UAAU;AACZ,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,IAAAA;AAAA,MACE,SAAS,UAAU;AAAA,MACnB,6CAA6C,SAAS,MAAM;AAAA,IAC9D;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO;AAAA,QACL,SAAS;AAAA,UACP,IAAI,SAAS,CAAC,EAAG;AAAA,UACjB,SAAS,SAAS,CAAC,EAAG;AAAA,UACtB,QAAQ,SAAS,CAAC,EAAG;AAAA,UACrB,MAAM,SAAS,CAAC,EAAG;AAAA,UACnB,QAAQ,SAAS,CAAC,EAAG,UAAU,CAAC;AAAA,UAChC,YAAY,SAAS,CAAC,EAAG;AAAA,QAC3B;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAAA,EAQA,MAAM,QAAW,YAA+C;AAC9D,IAAAA;AAAA,MACE,OAAO,eAAe,YAAY,OAAO,eAAe;AAAA,MACxD,kDAAkD,OAAO,UAAU;AAAA,IACrE;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,SAAS;AAEvD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,IAAI,MAAM,qBAAwB;AAAA,MAC3D;AAAA,MACA,WAAW;AAAA,IACb,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,IACzC;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAwB,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAClE;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,OAAO;AAAA,IACT;AAEA,UAAM,EAAE,KAAK,IAAI,eAAe,CAAC;AAGjC;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,QAAI,YAAY,CAAC,MAAM;AACrB,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,WAAsD;AACjE,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AACtD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,eAAe,MAAM,SAAS;AAAA,MAClC;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,aAAa,OAAO;AAAA,IAClD;AAEA,UAAM,EAAE,SAAS,KAAK,IAAI,aAAa;AACvC,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,eAAe;AAAA,MACf,kBAAkB;AAAA,MAClB,OAAO,OAAO,SAAY;AAAA,IAC5B;AACA,oBAAgB,UAAU,cAAc;AAExC,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,aAAa;AAAA,IACtB;AAAA,EACF;AAAA,EACA,MAAM,SACJ,QACA,KAGyD;AACzD,IAAAA,QAAO,QAAQ,yCAAyC;AACxD,UAAM,UAAU,MAAM,KAAK,mBAAmB,UAAU;AACxD,UAAM,EAAE,iBAAiB,IAAI;AAC7B,IAAAA,QAAO,kBAAkB,6CAA6C;AAEtE,UAAM,eAAe,4BAA4B;AAGjD,UAAM,kBAAkB;AACxB,UAAM,aAAmB,MAAM,QAAQ,MAAM,IACzC;AAAA,MACE,MAAM,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAChD,KAAK,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAC/C,OAAO;AAAA,MACP,QAAQ;AAAA,IACV,IACA;AAEJ,QAAI,eAAe,MAAM,wBAAwB;AAAA,MAC/C,gBAAgB;AAAA,MAChB,sBAAsB;AAAA,QACpB;AAAA,UACE,MAAM;AAAA,QACR;AAAA,MACF;AAAA,MACA,iBAAiB;AAAA,IACnB,CAAC;AAED,QAAI,KAAK,WAAW;AAClB,YAAM,aAAa,iBAAiB,YAAY,QAAQ,IAAI;AAC5D,YAAM,4BAA4B,UAAU;AAC5C,qBAAe,MAAM;AAAA,QACnB;AAAA,QACA;AAAA,QACA,qBAAqB,oBAAoB;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,OAAe;AAAA,MACnB,EAAE,MAAM,UAAU,SAAS,aAAa;AAAA,MACxC;AAAA,QACE,MAAM;AAAA,QACN,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,WAAW;AAAA,cACT,KAAK;AAAA,cACL,QAAQ;AAAA,YACV;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,WACJ,KAAK,cAAc;AAErB,UAAM,MAAM,MAAM,SAAS,8BAAmC;AAE9D,UAAM,EAAE,QAAQ,IAAI;AACpB,IAAAA,QAAO,CAAC,QAAQ,OAAO,oBAAoB,QAAQ,KAAK,EAAE;AAC1D,IAAAA,QAAO,QAAQ,aAAa,gCAAgC;AAC5D,WAAO;AAAA,EACT;AACF;;;AEjZA,SAAS,eAAAE,cAAa,uBAAAD,4BAA2B;AAGjD,IAAO,cAAQ","names":["vlLocateMode","assert","MIDSCENE_MODEL_NAME","getAIConfig"],"ignoreList":[],"sources":["../../src/ai-model/action-executor.ts","../../src/ai-model/prompt/describe.ts","../../src/insight/index.ts","../../src/insight/utils.ts","../../src/index.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<any> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n task.timing.aiCost = (returnValue as any)?.aiCost || 0;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n return this.tasks[outputIndex].output;\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n","import { getPreferredLanguage } from '@midscene/shared/env';\n\nconst preferredLanguage = getPreferredLanguage();\n\nexport const elementDescriberInstruction = () => {\n return `Tell what is the content of the element wrapped by the read rectangle in the screenshot. Your description is expected to be used to precisely locate the element from other similar elements on screenshot. Use ${preferredLanguage} in the description.\n\nPlease follow the following rules:\n1. The description should be start with a brief description, like \"a button for confirming the action\".\n\n2. Include these information in the description to distinguish the element from its siblings and other similar elements, as much as possible:\n- The text of the element, like \"with text 'Confirm'\"\n- What the element looks like if it's an image, like \"with image '...'\"\n- The relative position of the element, like \"on the left of ..., around ...\"\n- How to distinguish the element from its siblings elements, like \"it is the icon instead of the text\"\n\n3. Do NOT mention the red rectangle in the description.\n\n4. Use the error field to describe the unexpected situations, if any. If not, put null.\n\nReturn in JSON:\n{\n \"description\": \"[{brief description}]: {text of the element} {image of the element} {relative position of the element} ... \",\n \"error\"?: \"...\"\n}`;\n};\n","import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiAssert, AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AISingleElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n MIDSCENE_USE_QWEN_VL,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, elementById, rawResponse, usage } =\n await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T = any>(input: string): Promise<T>;\n async extract<T extends Record<string, string>>(\n input: T,\n ): Promise<Record<keyof T, any>>;\n async extract<T extends object>(input: Record<keyof T, string>): Promise<T>;\n\n async extract<T>(dataDemand: InsightExtractParam): Promise<any> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n usage,\n };\n }\n\n async assert(assertion: string): Promise<InsightAssertionResponse> {\n if (typeof assertion !== 'string') {\n throw new Error(\n 'This is the assert method for Midscene, the first argument should be a string. If you want to use the assert method from Node.js, please import it from the Node.js assert module.',\n );\n }\n\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('assert');\n const startTime = Date.now();\n const assertResult = await AiAssert({\n assertion,\n context,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(assertResult.content),\n };\n\n const { thought, pass } = assertResult.content;\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'assert',\n userQuery: {\n assertion,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n assertionPass: pass,\n assertionThought: thought,\n error: pass ? undefined : thought,\n };\n emitInsightDump(dumpData, dumpSubscriber);\n\n return {\n pass,\n thought,\n usage: assertResult.usage,\n };\n }\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64 } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, context.size);\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n","import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { uuid } from '@midscene/shared/utils';\n\nexport function emitInsightDump(\n data: PartialInsightDumpFromSDK,\n dumpSubscriber?: DumpSubscriber,\n) {\n let modelDescription = '';\n\n if (vlLocateMode()) {\n const uiTarsModelVer = uiTarsModelVersion();\n if (uiTarsModelVer) {\n modelDescription = `UI-TARS=${uiTarsModelVer}`;\n } else {\n modelDescription = `${vlLocateMode()} mode`;\n }\n }\n\n const baseData: DumpMeta = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n model_description: modelDescription,\n };\n const finalData: InsightDump = {\n logId: uuid(),\n ...baseData,\n ...data,\n };\n\n dumpSubscriber?.(finalData);\n}\n","import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n AiAssert,\n} from './ai-model/index';\n\nexport { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';\n\nexport type * from './types';\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n MidsceneYamlScript,\n MidsceneYamlTask,\n MidsceneYamlFlowItem,\n} from './yaml';\n"]}
@@ -1,4 +1,4 @@
1
- import { A as AIUsageInfo, R as Rect, j as ElementTreeNode, B as BaseElement, U as UIContext, o as AIElementLocatorResponse, H as ElementById, r as AIDataExtractionResponse, t as AIAssertionResponse, ar as PageType, T as PlanningAIResponse } from './types-e6507d2e.js';
1
+ import { k as AIUsageInfo, R as Rect, j as ElementTreeNode, B as BaseElement, U as UIContext, ax as ReferenceImage, p as AIElementLocatorResponse, O as ElementById, s as AIDataExtractionResponse, u as AIAssertionResponse, av as PageType, Y as PlanningAIResponse } from './types-01381369.js';
2
2
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
3
3
 
4
4
  type AIArgs = [
@@ -9,7 +9,8 @@ declare enum AIActionType {
9
9
  ASSERT = 0,
10
10
  INSPECT_ELEMENT = 1,
11
11
  EXTRACT_DATA = 2,
12
- PLAN = 3
12
+ PLAN = 3,
13
+ DESCRIBE_ELEMENT = 4
13
14
  }
14
15
  declare function callAiFn<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
15
16
  content: T;
@@ -50,6 +51,7 @@ declare function describeUserPage<ElementType extends BaseElement = BaseElement>
50
51
  declare function AiLocateElement<ElementType extends BaseElement = BaseElement>(options: {
51
52
  context: UIContext<ElementType>;
52
53
  targetElementDescription: string;
54
+ referenceImage?: ReferenceImage;
53
55
  callAI?: typeof callAiFn<AIElementResponse | [number, number]>;
54
56
  searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
55
57
  }): Promise<{
@@ -1,5 +1,5 @@
1
1
  import * as _midscene_shared_constants from '@midscene/shared/constants';
2
- import { B as BaseElement, j as ElementTreeNode } from './types-e6507d2e.js';
2
+ import { B as BaseElement, j as ElementTreeNode } from './types-01381369.js';
3
3
  import 'openai/resources';
4
4
 
5
5
  declare function truncateText(text: string | number | object | undefined, maxLength?: number): string;
@@ -6,8 +6,13 @@ interface LocateOption {
6
6
  deepThink?: boolean;
7
7
  cacheable?: boolean;
8
8
  }
9
+ interface ReferenceImage {
10
+ base64: string;
11
+ rect?: Rect;
12
+ }
9
13
  interface DetailedLocateParam extends LocateOption {
10
14
  prompt: string;
15
+ referenceImage?: ReferenceImage;
11
16
  }
12
17
  interface scrollParam {
13
18
  direction: 'down' | 'up' | 'right' | 'left';
@@ -207,6 +212,24 @@ interface AIAssertionResponse {
207
212
  pass: boolean;
208
213
  thought: string;
209
214
  }
215
+ interface AIDescribeElementResponse {
216
+ description: string;
217
+ error?: string;
218
+ }
219
+ interface LocatorValidatorOption {
220
+ centerDistanceThreshold?: number;
221
+ }
222
+ interface LocateValidatorResult {
223
+ pass: boolean;
224
+ rect: Rect;
225
+ center: [number, number];
226
+ centerDistance?: number;
227
+ }
228
+ interface AgentDescribeElementAtPointResult {
229
+ prompt: string;
230
+ deepThink: boolean;
231
+ verifyResult?: LocateValidatorResult;
232
+ }
210
233
  /**
211
234
  * context
212
235
  */
@@ -227,7 +250,7 @@ interface InsightOptions {
227
250
  type EnsureObject<T> = {
228
251
  [K in keyof T]: any;
229
252
  };
230
- type InsightAction = 'locate' | 'extract' | 'assert';
253
+ type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';
231
254
  type InsightExtractParam = string | Record<string, string>;
232
255
  type LocateResultElement = {
233
256
  id: string;
@@ -448,4 +471,4 @@ interface GroupedActionDump {
448
471
  }
449
472
  type PageType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android';
450
473
 
451
- export { type PlanningActionParamError as $, type AIUsageInfo as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PartialInsightDumpFromSDK as F, type LiteUISection as G, type ElementById as H, type InsightAction as I, type AgentWaitForOpt as J, type AgentAssertOpt as K, type LocateResult as L, type MidsceneYamlScript as M, type PlanningLocateParam as N, type OnTaskStartTip as O, type Point as P, type PlanningAction as Q, type Rect as R, type Size as S, type PlanningAIResponse as T, UIContext as U, type PlanningActionParamTap as V, type PlanningActionParamHover as W, type PlanningActionParamInputOrKeyPress as X, type PlanningActionParamScroll as Y, type PlanningActionParamAssert as Z, type PlanningActionParamSleep as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamWaitFor as a0, type Color as a1, type BaseAgentParserOpt as a2, type PuppeteerParserOpt as a3, type PlaywrightParserOpt as a4, type ExecutionRecorderItem as a5, type ExecutionTaskType as a6, type ExecutorContext as a7, type TaskCacheInfo as a8, type ExecutionTaskReturn as a9, type MidsceneYamlFlowItemAIQuery as aA, type MidsceneYamlFlowItemAINumber as aB, type MidsceneYamlFlowItemAINString as aC, type MidsceneYamlFlowItemAIBoolean as aD, type MidsceneYamlFlowItemAILocate as aE, type MidsceneYamlFlowItemAIWaitFor as aF, type MidsceneYamlFlowItemAITap as aG, type MidsceneYamlFlowItemAIHover as aH, type MidsceneYamlFlowItemAIInput as aI, type MidsceneYamlFlowItemAIKeyboardPress as aJ, type MidsceneYamlFlowItemAIScroll as aK, type MidsceneYamlFlowItemEvaluateJavaScript as aL, type MidsceneYamlFlowItemSleep as aM, type FreeFn as aN, type ScriptPlayerTaskStatus as aO, type ScriptPlayerStatusValue as aP, type ExecutionTaskInsightLocateParam as aa, type ExecutionTaskInsightLocateOutput as ab, type ExecutionTaskInsightDumpLog as ac, type ExecutionTaskInsightLocateApply as ad, type ExecutionTaskInsightLocate as ae, type ExecutionTaskInsightQueryParam as af, type ExecutionTaskInsightQueryOutput as ag, type ExecutionTaskInsightQueryApply as ah, type ExecutionTaskInsightQuery as ai, type ExecutionTaskInsightAssertionParam as aj, type ExecutionTaskInsightAssertionApply as ak, type ExecutionTaskInsightAssertion as al, type ExecutionTaskActionApply as am, type ExecutionTaskAction as an, type ExecutionTaskPlanningApply as ao, type ExecutionTaskPlanning as ap, type GroupedActionDump as aq, type PageType as ar, type LocateOption as as, type scrollParam as at, type MidsceneYamlScriptEnvBase as au, type MidsceneYamlScriptWebEnv as av, type MidsceneYamlScriptAndroidEnv as aw, type MidsceneYamlScriptEnv as ax, type MidsceneYamlFlowItemAIAction as ay, type MidsceneYamlFlowItemAIAssert as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type ElementTreeNode as j, AIResponseFormat as k, type AISingleElementResponseById as l, type AISingleElementResponseByPosition as m, type AISingleElementResponse as n, type AIElementLocatorResponse as o, type AIElementCoordinatesResponse as p, type AIElementResponse as q, type AIDataExtractionResponse as r, type AISectionLocatorResponse as s, type AIAssertionResponse as t, type EnsureObject as u, type InsightExtractParam as v, type LocateResultElement as w, type DumpMeta as x, type ReportDumpWithAttributes as y, type InsightDump as z };
474
+ export { type PlanningActionParamInputOrKeyPress as $, type AIDescribeElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type LocateResultElement as F, type DumpMeta as G, type ReportDumpWithAttributes as H, type InsightAction as I, type InsightDump as J, type PartialInsightDumpFromSDK as K, type LocateResult as L, type MidsceneYamlScript as M, type LiteUISection as N, type ElementById as O, type Point as P, type OnTaskStartTip as Q, type Rect as R, type Size as S, type AgentWaitForOpt as T, UIContext as U, type AgentAssertOpt as V, type PlanningLocateParam as W, type PlanningAction as X, type PlanningAIResponse as Y, type PlanningActionParamTap as Z, type PlanningActionParamHover as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamScroll as a0, type PlanningActionParamAssert as a1, type PlanningActionParamSleep as a2, type PlanningActionParamError as a3, type PlanningActionParamWaitFor as a4, type Color as a5, type BaseAgentParserOpt as a6, type PuppeteerParserOpt as a7, type PlaywrightParserOpt as a8, type ExecutionRecorderItem as a9, type MidsceneYamlScriptWebEnv as aA, type MidsceneYamlScriptAndroidEnv as aB, type MidsceneYamlScriptEnv as aC, type MidsceneYamlFlowItemAIAction as aD, type MidsceneYamlFlowItemAIAssert as aE, type MidsceneYamlFlowItemAIQuery as aF, type MidsceneYamlFlowItemAINumber as aG, type MidsceneYamlFlowItemAINString as aH, type MidsceneYamlFlowItemAIBoolean as aI, type MidsceneYamlFlowItemAILocate as aJ, type MidsceneYamlFlowItemAIWaitFor as aK, type MidsceneYamlFlowItemAITap as aL, type MidsceneYamlFlowItemAIHover as aM, type MidsceneYamlFlowItemAIInput as aN, type MidsceneYamlFlowItemAIKeyboardPress as aO, type MidsceneYamlFlowItemAIScroll as aP, type MidsceneYamlFlowItemEvaluateJavaScript as aQ, type MidsceneYamlFlowItemSleep as aR, type FreeFn as aS, type ScriptPlayerTaskStatus as aT, type ScriptPlayerStatusValue as aU, type ExecutionTaskType as aa, type ExecutorContext as ab, type TaskCacheInfo as ac, type ExecutionTaskReturn as ad, type ExecutionTaskInsightLocateParam as ae, type ExecutionTaskInsightLocateOutput as af, type ExecutionTaskInsightDumpLog as ag, type ExecutionTaskInsightLocateApply as ah, type ExecutionTaskInsightLocate as ai, type ExecutionTaskInsightQueryParam as aj, type ExecutionTaskInsightQueryOutput as ak, type ExecutionTaskInsightQueryApply as al, type ExecutionTaskInsightQuery as am, type ExecutionTaskInsightAssertionParam as an, type ExecutionTaskInsightAssertionApply as ao, type ExecutionTaskInsightAssertion as ap, type ExecutionTaskActionApply as aq, type ExecutionTaskAction as ar, type ExecutionTaskPlanningApply as as, type ExecutionTaskPlanning as at, type GroupedActionDump as au, type PageType as av, type LocateOption as aw, type ReferenceImage as ax, type scrollParam as ay, type MidsceneYamlScriptEnvBase as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type ElementTreeNode as j, type AIUsageInfo as k, AIResponseFormat as l, type AISingleElementResponseById as m, type AISingleElementResponseByPosition as n, type AISingleElementResponse as o, type AIElementLocatorResponse as p, type AIElementCoordinatesResponse as q, type AIElementResponse as r, type AIDataExtractionResponse as s, type AISectionLocatorResponse as t, type AIAssertionResponse as u, type LocatorValidatorOption as v, type LocateValidatorResult as w, type AgentDescribeElementAtPointResult as x, type EnsureObject as y, type InsightExtractParam as z };
@@ -1,4 +1,4 @@
1
- import { y as ReportDumpWithAttributes, R as Rect } from './types-e6507d2e.js';
1
+ import { H as ReportDumpWithAttributes, R as Rect } from './types-01381369.js';
2
2
  import '@midscene/shared/constants';
3
3
  import 'openai/resources';
4
4
 
package/dist/lib/utils.js CHANGED
@@ -13,7 +13,7 @@
13
13
 
14
14
 
15
15
 
16
- var _chunkPI7QH3ZWjs = require('./chunk-PI7QH3ZW.js');
16
+ var _chunkGHP3FR4Ojs = require('./chunk-GHP3FR4O.js');
17
17
 
18
18
 
19
19
 
@@ -29,4 +29,4 @@ var _chunkPI7QH3ZWjs = require('./chunk-PI7QH3ZW.js');
29
29
 
30
30
 
31
31
 
32
- exports.getLogDir = _chunkPI7QH3ZWjs.getLogDir; exports.getTmpDir = _chunkPI7QH3ZWjs.getTmpDir; exports.getTmpFile = _chunkPI7QH3ZWjs.getTmpFile; exports.getVersion = _chunkPI7QH3ZWjs.getVersion; exports.groupedActionDumpFileExt = _chunkPI7QH3ZWjs.groupedActionDumpFileExt; exports.overlapped = _chunkPI7QH3ZWjs.overlapped; exports.replaceStringWithFirstAppearance = _chunkPI7QH3ZWjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunkPI7QH3ZWjs.replacerForPageObject; exports.reportHTMLContent = _chunkPI7QH3ZWjs.reportHTMLContent; exports.sleep = _chunkPI7QH3ZWjs.sleep; exports.stringifyDumpData = _chunkPI7QH3ZWjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkPI7QH3ZWjs.uploadTestInfoToServer; exports.writeDumpReport = _chunkPI7QH3ZWjs.writeDumpReport; exports.writeLogFile = _chunkPI7QH3ZWjs.writeLogFile;
32
+ exports.getLogDir = _chunkGHP3FR4Ojs.getLogDir; exports.getTmpDir = _chunkGHP3FR4Ojs.getTmpDir; exports.getTmpFile = _chunkGHP3FR4Ojs.getTmpFile; exports.getVersion = _chunkGHP3FR4Ojs.getVersion; exports.groupedActionDumpFileExt = _chunkGHP3FR4Ojs.groupedActionDumpFileExt; exports.overlapped = _chunkGHP3FR4Ojs.overlapped; exports.replaceStringWithFirstAppearance = _chunkGHP3FR4Ojs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunkGHP3FR4Ojs.replacerForPageObject; exports.reportHTMLContent = _chunkGHP3FR4Ojs.reportHTMLContent; exports.sleep = _chunkGHP3FR4Ojs.sleep; exports.stringifyDumpData = _chunkGHP3FR4Ojs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkGHP3FR4Ojs.uploadTestInfoToServer; exports.writeDumpReport = _chunkGHP3FR4Ojs.writeDumpReport; exports.writeLogFile = _chunkGHP3FR4Ojs.writeLogFile;
@@ -1,8 +1,8 @@
1
- import { A as AIUsageInfo, Q as PlanningAction, i as MidsceneYamlFlowItem, S as Size } from './types-e6507d2e.js';
1
+ import { k as AIUsageInfo, X as PlanningAction, i as MidsceneYamlFlowItem, S as Size } from './types-01381369.js';
2
2
  import { ChatCompletionMessageParam } from 'openai/resources';
3
3
  export { ChatCompletionMessageParam } from 'openai/resources';
4
- import { b as AIActionType } from './llm-planning-dc99c344.js';
5
- export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-dc99c344.js';
4
+ import { b as AIActionType } from './llm-planning-3bdabecb.js';
5
+ export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-3bdabecb.js';
6
6
  import { vlLocateMode } from '@midscene/shared/env';
7
7
  import { actionParser } from '@ui-tars/action-parser';
8
8
  import '@midscene/shared/constants';
@@ -1,7 +1,7 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse } from './types-e6507d2e.js';
2
- export { t as AIAssertionResponse, r as AIDataExtractionResponse, p as AIElementCoordinatesResponse, o as AIElementLocatorResponse, q as AIElementResponse, k as AIResponseFormat, s as AISectionLocatorResponse, n as AISingleElementResponse, l as AISingleElementResponseById, m as AISingleElementResponseByPosition, A as AIUsageInfo, K as AgentAssertOpt, J as AgentWaitForOpt, a2 as BaseAgentParserOpt, C as CallAIFn, a1 as Color, x as DumpMeta, H as ElementById, j as ElementTreeNode, u as EnsureObject, a5 as ExecutionRecorderItem, an as ExecutionTaskAction, am as ExecutionTaskActionApply, al as ExecutionTaskInsightAssertion, ak as ExecutionTaskInsightAssertionApply, aj as ExecutionTaskInsightAssertionParam, ac as ExecutionTaskInsightDumpLog, ae as ExecutionTaskInsightLocate, ad as ExecutionTaskInsightLocateApply, ab as ExecutionTaskInsightLocateOutput, aa as ExecutionTaskInsightLocateParam, ai as ExecutionTaskInsightQuery, ah as ExecutionTaskInsightQueryApply, ag as ExecutionTaskInsightQueryOutput, af as ExecutionTaskInsightQueryParam, ap as ExecutionTaskPlanning, ao as ExecutionTaskPlanningApply, a9 as ExecutionTaskReturn, a6 as ExecutionTaskType, a7 as ExecutorContext, aN as FreeFn, aq as GroupedActionDump, z as InsightDump, v as InsightExtractParam, G as LiteUISection, as as LocateOption, w as LocateResultElement, i as MidsceneYamlFlowItem, ay as MidsceneYamlFlowItemAIAction, az as MidsceneYamlFlowItemAIAssert, aD as MidsceneYamlFlowItemAIBoolean, aH as MidsceneYamlFlowItemAIHover, aI as MidsceneYamlFlowItemAIInput, aJ as MidsceneYamlFlowItemAIKeyboardPress, aE as MidsceneYamlFlowItemAILocate, aC as MidsceneYamlFlowItemAINString, aB as MidsceneYamlFlowItemAINumber, aA as MidsceneYamlFlowItemAIQuery, aK as MidsceneYamlFlowItemAIScroll, aG as MidsceneYamlFlowItemAITap, aF as MidsceneYamlFlowItemAIWaitFor, aL as MidsceneYamlFlowItemEvaluateJavaScript, aM as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aw as MidsceneYamlScriptAndroidEnv, ax as MidsceneYamlScriptEnv, au as MidsceneYamlScriptEnvBase, av as MidsceneYamlScriptWebEnv, h as MidsceneYamlTask, O as OnTaskStartTip, ar as PageType, F as PartialInsightDumpFromSDK, T as PlanningAIResponse, Q as PlanningAction, Z as PlanningActionParamAssert, $ as PlanningActionParamError, W as PlanningActionParamHover, X as PlanningActionParamInputOrKeyPress, Y as PlanningActionParamScroll, _ as PlanningActionParamSleep, V as PlanningActionParamTap, a0 as PlanningActionParamWaitFor, N as PlanningLocateParam, a4 as PlaywrightParserOpt, P as Point, a3 as PuppeteerParserOpt, R as Rect, y as ReportDumpWithAttributes, aP as ScriptPlayerStatusValue, aO as ScriptPlayerTaskStatus, S as Size, a8 as TaskCacheInfo, at as scrollParam } from './types-e6507d2e.js';
3
- import { c as callAiFn } from './llm-planning-dc99c344.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-dc99c344.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, R as Rect, A as AIDescribeElementResponse } from './types-01381369.js';
2
+ export { u as AIAssertionResponse, s as AIDataExtractionResponse, q as AIElementCoordinatesResponse, p as AIElementLocatorResponse, r as AIElementResponse, l as AIResponseFormat, t as AISectionLocatorResponse, o as AISingleElementResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, V as AgentAssertOpt, x as AgentDescribeElementAtPointResult, T as AgentWaitForOpt, a6 as BaseAgentParserOpt, C as CallAIFn, a5 as Color, G as DumpMeta, O as ElementById, j as ElementTreeNode, y as EnsureObject, a9 as ExecutionRecorderItem, ar as ExecutionTaskAction, aq as ExecutionTaskActionApply, ap as ExecutionTaskInsightAssertion, ao as ExecutionTaskInsightAssertionApply, an as ExecutionTaskInsightAssertionParam, ag as ExecutionTaskInsightDumpLog, ai as ExecutionTaskInsightLocate, ah as ExecutionTaskInsightLocateApply, af as ExecutionTaskInsightLocateOutput, ae as ExecutionTaskInsightLocateParam, am as ExecutionTaskInsightQuery, al as ExecutionTaskInsightQueryApply, ak as ExecutionTaskInsightQueryOutput, aj as ExecutionTaskInsightQueryParam, at as ExecutionTaskPlanning, as as ExecutionTaskPlanningApply, ad as ExecutionTaskReturn, aa as ExecutionTaskType, ab as ExecutorContext, aS as FreeFn, au as GroupedActionDump, J as InsightDump, z as InsightExtractParam, N as LiteUISection, aw as LocateOption, F as LocateResultElement, w as LocateValidatorResult, v as LocatorValidatorOption, i as MidsceneYamlFlowItem, aD as MidsceneYamlFlowItemAIAction, aE as MidsceneYamlFlowItemAIAssert, aI as MidsceneYamlFlowItemAIBoolean, aM as MidsceneYamlFlowItemAIHover, aN as MidsceneYamlFlowItemAIInput, aO as MidsceneYamlFlowItemAIKeyboardPress, aJ as MidsceneYamlFlowItemAILocate, aH as MidsceneYamlFlowItemAINString, aG as MidsceneYamlFlowItemAINumber, aF as MidsceneYamlFlowItemAIQuery, aP as MidsceneYamlFlowItemAIScroll, aL as MidsceneYamlFlowItemAITap, aK as MidsceneYamlFlowItemAIWaitFor, aQ as MidsceneYamlFlowItemEvaluateJavaScript, aR as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aB as MidsceneYamlScriptAndroidEnv, aC as MidsceneYamlScriptEnv, az as MidsceneYamlScriptEnvBase, aA as MidsceneYamlScriptWebEnv, h as MidsceneYamlTask, Q as OnTaskStartTip, av as PageType, K as PartialInsightDumpFromSDK, Y as PlanningAIResponse, X as PlanningAction, a1 as PlanningActionParamAssert, a3 as PlanningActionParamError, _ as PlanningActionParamHover, $ as PlanningActionParamInputOrKeyPress, a0 as PlanningActionParamScroll, a2 as PlanningActionParamSleep, Z as PlanningActionParamTap, a4 as PlanningActionParamWaitFor, W as PlanningLocateParam, a8 as PlaywrightParserOpt, P as Point, a7 as PuppeteerParserOpt, ax as ReferenceImage, H as ReportDumpWithAttributes, aU as ScriptPlayerStatusValue, aT as ScriptPlayerTaskStatus, S as Size, ac as TaskCacheInfo, ay as scrollParam } from './types-01381369.js';
3
+ import { c as callAiFn } from './llm-planning-3bdabecb.js';
4
+ export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-3bdabecb.js';
5
5
  export { getVersion } from './utils.js';
6
6
  export { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';
7
7
  import '@midscene/shared/constants';
@@ -38,6 +38,9 @@ declare class Insight<ElementType extends BaseElement = BaseElement, ContextType
38
38
  extract<T extends Record<string, string>>(input: T): Promise<Record<keyof T, any>>;
39
39
  extract<T extends object>(input: Record<keyof T, string>): Promise<T>;
40
40
  assert(assertion: string): Promise<InsightAssertionResponse>;
41
+ describe(target: Rect | [number, number], opt?: {
42
+ deepThink?: boolean;
43
+ }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
41
44
  }
42
45
 
43
- export { BaseElement, DetailedLocateParam, DumpSubscriber, ExecutionDump, ExecutionTask, ExecutionTaskApply, ExecutionTaskProgressOptions, Executor, Insight, InsightAction, InsightAssertionResponse, InsightOptions, InsightTaskInfo, LocateResult, UIContext, Insight as default };
46
+ export { AIDescribeElementResponse, BaseElement, DetailedLocateParam, DumpSubscriber, ExecutionDump, ExecutionTask, ExecutionTaskApply, ExecutionTaskProgressOptions, Executor, Insight, InsightAction, InsightAssertionResponse, InsightOptions, InsightTaskInfo, LocateResult, Rect, UIContext, Insight as default };
@@ -1,4 +1,4 @@
1
- import { A as AIUsageInfo, R as Rect, j as ElementTreeNode, B as BaseElement, U as UIContext, o as AIElementLocatorResponse, H as ElementById, r as AIDataExtractionResponse, t as AIAssertionResponse, ar as PageType, T as PlanningAIResponse } from './types-e6507d2e.js';
1
+ import { k as AIUsageInfo, R as Rect, j as ElementTreeNode, B as BaseElement, U as UIContext, ax as ReferenceImage, p as AIElementLocatorResponse, O as ElementById, s as AIDataExtractionResponse, u as AIAssertionResponse, av as PageType, Y as PlanningAIResponse } from './types-01381369.js';
2
2
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
3
3
 
4
4
  type AIArgs = [
@@ -9,7 +9,8 @@ declare enum AIActionType {
9
9
  ASSERT = 0,
10
10
  INSPECT_ELEMENT = 1,
11
11
  EXTRACT_DATA = 2,
12
- PLAN = 3
12
+ PLAN = 3,
13
+ DESCRIBE_ELEMENT = 4
13
14
  }
14
15
  declare function callAiFn<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
15
16
  content: T;
@@ -50,6 +51,7 @@ declare function describeUserPage<ElementType extends BaseElement = BaseElement>
50
51
  declare function AiLocateElement<ElementType extends BaseElement = BaseElement>(options: {
51
52
  context: UIContext<ElementType>;
52
53
  targetElementDescription: string;
54
+ referenceImage?: ReferenceImage;
53
55
  callAI?: typeof callAiFn<AIElementResponse | [number, number]>;
54
56
  searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
55
57
  }): Promise<{
@@ -1,5 +1,5 @@
1
1
  import * as _midscene_shared_constants from '@midscene/shared/constants';
2
- import { B as BaseElement, j as ElementTreeNode } from './types-e6507d2e.js';
2
+ import { B as BaseElement, j as ElementTreeNode } from './types-01381369.js';
3
3
  import 'openai/resources';
4
4
 
5
5
  declare function truncateText(text: string | number | object | undefined, maxLength?: number): string;
@@ -6,8 +6,13 @@ interface LocateOption {
6
6
  deepThink?: boolean;
7
7
  cacheable?: boolean;
8
8
  }
9
+ interface ReferenceImage {
10
+ base64: string;
11
+ rect?: Rect;
12
+ }
9
13
  interface DetailedLocateParam extends LocateOption {
10
14
  prompt: string;
15
+ referenceImage?: ReferenceImage;
11
16
  }
12
17
  interface scrollParam {
13
18
  direction: 'down' | 'up' | 'right' | 'left';
@@ -207,6 +212,24 @@ interface AIAssertionResponse {
207
212
  pass: boolean;
208
213
  thought: string;
209
214
  }
215
+ interface AIDescribeElementResponse {
216
+ description: string;
217
+ error?: string;
218
+ }
219
+ interface LocatorValidatorOption {
220
+ centerDistanceThreshold?: number;
221
+ }
222
+ interface LocateValidatorResult {
223
+ pass: boolean;
224
+ rect: Rect;
225
+ center: [number, number];
226
+ centerDistance?: number;
227
+ }
228
+ interface AgentDescribeElementAtPointResult {
229
+ prompt: string;
230
+ deepThink: boolean;
231
+ verifyResult?: LocateValidatorResult;
232
+ }
210
233
  /**
211
234
  * context
212
235
  */
@@ -227,7 +250,7 @@ interface InsightOptions {
227
250
  type EnsureObject<T> = {
228
251
  [K in keyof T]: any;
229
252
  };
230
- type InsightAction = 'locate' | 'extract' | 'assert';
253
+ type InsightAction = 'locate' | 'extract' | 'assert' | 'describe';
231
254
  type InsightExtractParam = string | Record<string, string>;
232
255
  type LocateResultElement = {
233
256
  id: string;
@@ -448,4 +471,4 @@ interface GroupedActionDump {
448
471
  }
449
472
  type PageType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android';
450
473
 
451
- export { type PlanningActionParamError as $, type AIUsageInfo as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type PartialInsightDumpFromSDK as F, type LiteUISection as G, type ElementById as H, type InsightAction as I, type AgentWaitForOpt as J, type AgentAssertOpt as K, type LocateResult as L, type MidsceneYamlScript as M, type PlanningLocateParam as N, type OnTaskStartTip as O, type Point as P, type PlanningAction as Q, type Rect as R, type Size as S, type PlanningAIResponse as T, UIContext as U, type PlanningActionParamTap as V, type PlanningActionParamHover as W, type PlanningActionParamInputOrKeyPress as X, type PlanningActionParamScroll as Y, type PlanningActionParamAssert as Z, type PlanningActionParamSleep as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamWaitFor as a0, type Color as a1, type BaseAgentParserOpt as a2, type PuppeteerParserOpt as a3, type PlaywrightParserOpt as a4, type ExecutionRecorderItem as a5, type ExecutionTaskType as a6, type ExecutorContext as a7, type TaskCacheInfo as a8, type ExecutionTaskReturn as a9, type MidsceneYamlFlowItemAIQuery as aA, type MidsceneYamlFlowItemAINumber as aB, type MidsceneYamlFlowItemAINString as aC, type MidsceneYamlFlowItemAIBoolean as aD, type MidsceneYamlFlowItemAILocate as aE, type MidsceneYamlFlowItemAIWaitFor as aF, type MidsceneYamlFlowItemAITap as aG, type MidsceneYamlFlowItemAIHover as aH, type MidsceneYamlFlowItemAIInput as aI, type MidsceneYamlFlowItemAIKeyboardPress as aJ, type MidsceneYamlFlowItemAIScroll as aK, type MidsceneYamlFlowItemEvaluateJavaScript as aL, type MidsceneYamlFlowItemSleep as aM, type FreeFn as aN, type ScriptPlayerTaskStatus as aO, type ScriptPlayerStatusValue as aP, type ExecutionTaskInsightLocateParam as aa, type ExecutionTaskInsightLocateOutput as ab, type ExecutionTaskInsightDumpLog as ac, type ExecutionTaskInsightLocateApply as ad, type ExecutionTaskInsightLocate as ae, type ExecutionTaskInsightQueryParam as af, type ExecutionTaskInsightQueryOutput as ag, type ExecutionTaskInsightQueryApply as ah, type ExecutionTaskInsightQuery as ai, type ExecutionTaskInsightAssertionParam as aj, type ExecutionTaskInsightAssertionApply as ak, type ExecutionTaskInsightAssertion as al, type ExecutionTaskActionApply as am, type ExecutionTaskAction as an, type ExecutionTaskPlanningApply as ao, type ExecutionTaskPlanning as ap, type GroupedActionDump as aq, type PageType as ar, type LocateOption as as, type scrollParam as at, type MidsceneYamlScriptEnvBase as au, type MidsceneYamlScriptWebEnv as av, type MidsceneYamlScriptAndroidEnv as aw, type MidsceneYamlScriptEnv as ax, type MidsceneYamlFlowItemAIAction as ay, type MidsceneYamlFlowItemAIAssert as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type ElementTreeNode as j, AIResponseFormat as k, type AISingleElementResponseById as l, type AISingleElementResponseByPosition as m, type AISingleElementResponse as n, type AIElementLocatorResponse as o, type AIElementCoordinatesResponse as p, type AIElementResponse as q, type AIDataExtractionResponse as r, type AISectionLocatorResponse as s, type AIAssertionResponse as t, type EnsureObject as u, type InsightExtractParam as v, type LocateResultElement as w, type DumpMeta as x, type ReportDumpWithAttributes as y, type InsightDump as z };
474
+ export { type PlanningActionParamInputOrKeyPress as $, type AIDescribeElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type LocateResultElement as F, type DumpMeta as G, type ReportDumpWithAttributes as H, type InsightAction as I, type InsightDump as J, type PartialInsightDumpFromSDK as K, type LocateResult as L, type MidsceneYamlScript as M, type LiteUISection as N, type ElementById as O, type Point as P, type OnTaskStartTip as Q, type Rect as R, type Size as S, type AgentWaitForOpt as T, UIContext as U, type AgentAssertOpt as V, type PlanningLocateParam as W, type PlanningAction as X, type PlanningAIResponse as Y, type PlanningActionParamTap as Z, type PlanningActionParamHover as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamScroll as a0, type PlanningActionParamAssert as a1, type PlanningActionParamSleep as a2, type PlanningActionParamError as a3, type PlanningActionParamWaitFor as a4, type Color as a5, type BaseAgentParserOpt as a6, type PuppeteerParserOpt as a7, type PlaywrightParserOpt as a8, type ExecutionRecorderItem as a9, type MidsceneYamlScriptWebEnv as aA, type MidsceneYamlScriptAndroidEnv as aB, type MidsceneYamlScriptEnv as aC, type MidsceneYamlFlowItemAIAction as aD, type MidsceneYamlFlowItemAIAssert as aE, type MidsceneYamlFlowItemAIQuery as aF, type MidsceneYamlFlowItemAINumber as aG, type MidsceneYamlFlowItemAINString as aH, type MidsceneYamlFlowItemAIBoolean as aI, type MidsceneYamlFlowItemAILocate as aJ, type MidsceneYamlFlowItemAIWaitFor as aK, type MidsceneYamlFlowItemAITap as aL, type MidsceneYamlFlowItemAIHover as aM, type MidsceneYamlFlowItemAIInput as aN, type MidsceneYamlFlowItemAIKeyboardPress as aO, type MidsceneYamlFlowItemAIScroll as aP, type MidsceneYamlFlowItemEvaluateJavaScript as aQ, type MidsceneYamlFlowItemSleep as aR, type FreeFn as aS, type ScriptPlayerTaskStatus as aT, type ScriptPlayerStatusValue as aU, type ExecutionTaskType as aa, type ExecutorContext as ab, type TaskCacheInfo as ac, type ExecutionTaskReturn as ad, type ExecutionTaskInsightLocateParam as ae, type ExecutionTaskInsightLocateOutput as af, type ExecutionTaskInsightDumpLog as ag, type ExecutionTaskInsightLocateApply as ah, type ExecutionTaskInsightLocate as ai, type ExecutionTaskInsightQueryParam as aj, type ExecutionTaskInsightQueryOutput as ak, type ExecutionTaskInsightQueryApply as al, type ExecutionTaskInsightQuery as am, type ExecutionTaskInsightAssertionParam as an, type ExecutionTaskInsightAssertionApply as ao, type ExecutionTaskInsightAssertion as ap, type ExecutionTaskActionApply as aq, type ExecutionTaskAction as ar, type ExecutionTaskPlanningApply as as, type ExecutionTaskPlanning as at, type GroupedActionDump as au, type PageType as av, type LocateOption as aw, type ReferenceImage as ax, type scrollParam as ay, type MidsceneYamlScriptEnvBase as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type ElementTreeNode as j, type AIUsageInfo as k, AIResponseFormat as l, type AISingleElementResponseById as m, type AISingleElementResponseByPosition as n, type AISingleElementResponse as o, type AIElementLocatorResponse as p, type AIElementCoordinatesResponse as q, type AIElementResponse as r, type AIDataExtractionResponse as s, type AISectionLocatorResponse as t, type AIAssertionResponse as u, type LocatorValidatorOption as v, type LocateValidatorResult as w, type AgentDescribeElementAtPointResult as x, type EnsureObject as y, type InsightExtractParam as z };
@@ -1,4 +1,4 @@
1
- import { y as ReportDumpWithAttributes, R as Rect } from './types-e6507d2e.js';
1
+ import { H as ReportDumpWithAttributes, R as Rect } from './types-01381369.js';
2
2
  import '@midscene/shared/constants';
3
3
  import 'openai/resources';
4
4