@midscene/core 0.24.2-beta-20250801111909.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/es/ai-model.d.ts +8 -4
  2. package/dist/es/ai-model.js +3 -1
  3. package/dist/es/{chunk-2RCMQS5O.js → chunk-FEGAGUMN.js} +3 -3
  4. package/dist/es/{chunk-KFA65L55.js → chunk-JS4CT3XV.js} +22 -17
  5. package/dist/es/chunk-JS4CT3XV.js.map +1 -0
  6. package/dist/es/index.d.ts +4 -4
  7. package/dist/es/index.js +36 -12
  8. package/dist/es/index.js.map +1 -1
  9. package/dist/es/{llm-planning-4c782a8d.d.ts → llm-planning-877248da.d.ts} +2 -2
  10. package/dist/es/{types-7b64b80b.d.ts → types-512d3687.d.ts} +2 -3
  11. package/dist/es/utils.d.ts +1 -1
  12. package/dist/es/utils.js +1 -1
  13. package/dist/lib/ai-model.d.ts +8 -4
  14. package/dist/lib/ai-model.js +4 -2
  15. package/dist/lib/{chunk-2RCMQS5O.js → chunk-FEGAGUMN.js} +3 -3
  16. package/dist/lib/{chunk-KFA65L55.js → chunk-JS4CT3XV.js} +22 -17
  17. package/dist/lib/chunk-JS4CT3XV.js.map +1 -0
  18. package/dist/lib/index.d.ts +4 -4
  19. package/dist/lib/index.js +47 -23
  20. package/dist/lib/index.js.map +1 -1
  21. package/dist/lib/{llm-planning-4c782a8d.d.ts → llm-planning-877248da.d.ts} +2 -2
  22. package/dist/{types/types-7b64b80b.d.ts → lib/types-512d3687.d.ts} +2 -3
  23. package/dist/lib/utils.d.ts +1 -1
  24. package/dist/lib/utils.js +2 -2
  25. package/dist/types/ai-model.d.ts +8 -4
  26. package/dist/types/index.d.ts +4 -4
  27. package/dist/types/{llm-planning-4c782a8d.d.ts → llm-planning-877248da.d.ts} +2 -2
  28. package/dist/{lib/types-7b64b80b.d.ts → types/types-512d3687.d.ts} +2 -3
  29. package/dist/types/utils.d.ts +1 -1
  30. package/package.json +3 -3
  31. package/dist/es/chunk-KFA65L55.js.map +0 -1
  32. package/dist/lib/chunk-KFA65L55.js.map +0 -1
  33. /package/dist/es/{chunk-2RCMQS5O.js.map → chunk-FEGAGUMN.js.map} +0 -0
  34. /package/dist/lib/{chunk-2RCMQS5O.js.map → chunk-FEGAGUMN.js.map} +0 -0
package/dist/lib/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  "use strict";Object.defineProperty(exports, "__esModule", {value: true}); function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
2
2
 
3
- var _chunk2RCMQS5Ojs = require('./chunk-2RCMQS5O.js');
3
+ var _chunkFEGAGUMNjs = require('./chunk-FEGAGUMN.js');
4
4
 
5
5
 
6
6
 
@@ -11,7 +11,7 @@ var _chunk2RCMQS5Ojs = require('./chunk-2RCMQS5O.js');
11
11
 
12
12
 
13
13
 
14
- var _chunkKFA65L55js = require('./chunk-KFA65L55.js');
14
+ var _chunkJS4CT3XVjs = require('./chunk-JS4CT3XV.js');
15
15
 
16
16
  // src/ai-model/action-executor.ts
17
17
 
@@ -172,7 +172,7 @@ ${_optionalChain([this, 'access', _7 => _7.latestErrorTask, 'call', _8 => _8(),
172
172
  }
173
173
  }
174
174
  const dumpData = {
175
- sdkVersion: _chunk2RCMQS5Ojs.getVersion.call(void 0, ),
175
+ sdkVersion: _chunkFEGAGUMNjs.getVersion.call(void 0, ),
176
176
  model_name: _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME) || "",
177
177
  model_description: modelDescription,
178
178
  logTime: Date.now(),
@@ -186,20 +186,44 @@ ${_optionalChain([this, 'access', _7 => _7.latestErrorTask, 'call', _8 => _8(),
186
186
  // src/ai-model/prompt/describe.ts
187
187
 
188
188
  var elementDescriberInstruction = () => {
189
- return `Describe the element in the red rectangle for precise identification. Use ${_env.getPreferredLanguage.call(void 0, )}.
190
-
191
- Rules:
192
- 1. Start with element type (button, input, link, etc.)
193
- 2. Include key identifiers:
194
- - Text content: "with text 'Submit'"
195
- - Visual features: "blue background", "icon only"
196
- - Position: "top-right", "below search bar"
197
- 3. Keep description under 20 words
198
- 4. Don't mention the red rectangle
189
+ return `
190
+ Describe the element in the red rectangle for precise identification. Use ${_env.getPreferredLanguage.call(void 0, )}.
191
+
192
+ CRITICAL REQUIREMENTS:
193
+ 1. UNIQUENESS: The description must uniquely identify this element on the current page
194
+ 2. UNIVERSALITY: Use generic, reusable selectors that work across different contexts
195
+ 3. PRECISION: Be specific enough to distinguish from similar elements
196
+
197
+ DESCRIPTION STRUCTURE:
198
+ 1. Element type (button, input, link, div, etc.)
199
+ 2. Primary identifier (in order of preference):
200
+ - Unique text content: "with text 'Login'"
201
+ - Unique attribute: "with aria-label 'Search'"
202
+ - Unique class/ID: "with class 'primary-button'"
203
+ - Unique position: "in header navigation"
204
+ 3. Secondary identifiers (if needed for uniqueness):
205
+ - Visual features: "blue background", "with icon"
206
+ - Relative position: "below search bar", "in sidebar"
207
+ - Parent context: "in login form", "in main menu"
208
+
209
+ GUIDELINES:
210
+ - Keep description under 25 words
211
+ - Prioritize semantic identifiers over visual ones
212
+ - Use consistent terminology across similar elements
213
+ - Avoid page-specific or temporary content
214
+ - Don't mention the red rectangle or selection box
215
+ - Focus on stable, reusable characteristics
216
+
217
+ EXAMPLES:
218
+ - "Login button with text 'Sign In'"
219
+ - "Search input with placeholder 'Enter keywords'"
220
+ - "Navigation link with text 'Home' in header"
221
+ - "Submit button in contact form"
222
+ - "Menu icon with aria-label 'Open menu'"
199
223
 
200
224
  Return JSON:
201
225
  {
202
- "description": "brief element type with key identifiers",
226
+ "description": "unique element identifier",
203
227
  "error"?: "error message if any"
204
228
  }`;
205
229
  };
@@ -220,7 +244,7 @@ var _logger = require('@midscene/shared/logger');
220
244
 
221
245
  function emitInsightDump(data, dumpSubscriber) {
222
246
  const baseData = {
223
- sdkVersion: _chunk2RCMQS5Ojs.getVersion.call(void 0, ),
247
+ sdkVersion: _chunkFEGAGUMNjs.getVersion.call(void 0, ),
224
248
  logTime: Date.now(),
225
249
  model_name: _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME) || ""
226
250
  };
@@ -236,7 +260,7 @@ function emitInsightDump(data, dumpSubscriber) {
236
260
  var debug = _logger.getDebug.call(void 0, "ai:insight");
237
261
  var Insight = class {
238
262
  constructor(context, opt) {
239
- this.aiVendorFn = _chunkKFA65L55js.callAiFn;
263
+ this.aiVendorFn = _chunkJS4CT3XVjs.callAiFn;
240
264
  _utils.assert.call(void 0, context, "context is required for Insight");
241
265
  if (typeof context === "function") {
242
266
  this.contextRetrieverFn = context;
@@ -279,7 +303,7 @@ var Insight = class {
279
303
  let searchAreaUsage = void 0;
280
304
  let searchAreaResponse = void 0;
281
305
  if (searchAreaPrompt) {
282
- searchAreaResponse = await _chunkKFA65L55js.AiLocateSection.call(void 0, {
306
+ searchAreaResponse = await _chunkJS4CT3XVjs.AiLocateSection.call(void 0, {
283
307
  context,
284
308
  sectionDescription: searchAreaPrompt
285
309
  });
@@ -299,7 +323,7 @@ var Insight = class {
299
323
  rawResponse,
300
324
  usage,
301
325
  isOrderSensitive
302
- } = await _chunkKFA65L55js.AiLocateElement.call(void 0, {
326
+ } = await _chunkJS4CT3XVjs.AiLocateElement.call(void 0, {
303
327
  callAI: callAI || this.aiVendorFn,
304
328
  context,
305
329
  targetElementDescription: queryPrompt,
@@ -388,7 +412,7 @@ ${parseResult.errors.join("\n")}`;
388
412
  this.onceDumpUpdatedFn = void 0;
389
413
  const context = await this.contextRetrieverFn("extract");
390
414
  const startTime = Date.now();
391
- const { parseResult, usage } = await _chunkKFA65L55js.AiExtractElementInfo.call(void 0, {
415
+ const { parseResult, usage } = await _chunkJS4CT3XVjs.AiExtractElementInfo.call(void 0, {
392
416
  context,
393
417
  dataQuery: dataDemand,
394
418
  multimodalPrompt,
@@ -436,7 +460,7 @@ ${parseResult.errors.join("\n")}`;
436
460
  this.onceDumpUpdatedFn = void 0;
437
461
  const context = await this.contextRetrieverFn("assert");
438
462
  const startTime = Date.now();
439
- const assertResult = await _chunkKFA65L55js.AiAssert.call(void 0, {
463
+ const assertResult = await _chunkJS4CT3XVjs.AiAssert.call(void 0, {
440
464
  assertion,
441
465
  context
442
466
  });
@@ -490,7 +514,7 @@ ${parseResult.errors.join("\n")}`;
490
514
  borderThickness: 3
491
515
  });
492
516
  if (_optionalChain([opt, 'optionalAccess', _22 => _22.deepThink])) {
493
- const searchArea = _chunkKFA65L55js.expandSearchArea.call(void 0, targetRect, context.size);
517
+ const searchArea = _chunkJS4CT3XVjs.expandSearchArea.call(void 0, targetRect, context.size);
494
518
  debug("describe: set searchArea", searchArea);
495
519
  imagePayload = await _img.cropByRect.call(void 0,
496
520
  imagePayload,
@@ -513,7 +537,7 @@ ${parseResult.errors.join("\n")}`;
513
537
  ]
514
538
  }
515
539
  ];
516
- const callAIFn = this.aiVendorFn || _chunkKFA65L55js.callToGetJSONObject;
540
+ const callAIFn = this.aiVendorFn || _chunkJS4CT3XVjs.callToGetJSONObject;
517
541
  const res = await callAIFn(msgs, 4 /* DESCRIBE_ELEMENT */);
518
542
  const { content } = res;
519
543
  _utils.assert.call(void 0, !content.error, `describe failed: ${content.error}`);
@@ -536,6 +560,6 @@ var src_default = Insight;
536
560
 
537
561
 
538
562
 
539
- exports.AiAssert = _chunkKFA65L55js.AiAssert; exports.AiLocateElement = _chunkKFA65L55js.AiLocateElement; exports.Executor = Executor; exports.Insight = Insight; exports.MIDSCENE_MODEL_NAME = _env.MIDSCENE_MODEL_NAME; exports.default = src_default; exports.describeUserPage = _chunkKFA65L55js.describeUserPage; exports.getAIConfig = _env.getAIConfig; exports.getVersion = _chunk2RCMQS5Ojs.getVersion; exports.plan = _chunkKFA65L55js.plan;
563
+ exports.AiAssert = _chunkJS4CT3XVjs.AiAssert; exports.AiLocateElement = _chunkJS4CT3XVjs.AiLocateElement; exports.Executor = Executor; exports.Insight = Insight; exports.MIDSCENE_MODEL_NAME = _env.MIDSCENE_MODEL_NAME; exports.default = src_default; exports.describeUserPage = _chunkJS4CT3XVjs.describeUserPage; exports.getAIConfig = _env.getAIConfig; exports.getVersion = _chunkFEGAGUMNjs.getVersion; exports.plan = _chunkJS4CT3XVjs.plan;
540
564
 
541
565
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"mappings":";;;;;;;;;;;;;;;;AAUA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,cAAc;AAEhB,IAAM,WAAN,MAAe;AAAA,EAUpB,YACE,MACA,SAGA;AACA,SAAK,SACH,SAAS,SAAS,QAAQ,MAAM,SAAS,IAAI,YAAY;AAC3D,SAAK,OAAO;AACZ,SAAK,SAAS,SAAS,SAAS,CAAC,GAAG;AAAA,MAAI,CAAC,SACvC,KAAK,kBAAkB,IAAI;AAAA,IAC7B;AACA,SAAK,cAAc,SAAS;AAAA,EAC9B;AAAA,EAEQ,kBAAkB,MAAyC;AACjE,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,MAAgE;AAC3E;AAAA,MACE,KAAK,WAAW;AAAA,MAChB;AAAA,QAAyD,KAAK,gBAAgB,GAAG,KAAK;AAAA,EAAK,KAAK,gBAAgB,GAAG,UAAU;AAAA,IAC/H;AACA,QAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAK,MAAM,KAAK,GAAG,KAAK,IAAI,CAAC,SAAS,KAAK,kBAAkB,IAAI,CAAC,CAAC;AAAA,IACrE,OAAO;AACL,WAAK,MAAM,KAAK,KAAK,kBAAkB,IAAI,CAAC;AAAA,IAC9C;AACA,QAAI,KAAK,WAAW,WAAW;AAC7B,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAM,QAAsB;AAC1B,QAAI,KAAK,WAAW,UAAU,KAAK,MAAM,SAAS,GAAG;AACnD,cAAQ;AAAA,QACN;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,WAAW,WAAW,6BAA6B;AAC/D,WAAO,KAAK,WAAW,aAAa,+BAA+B;AACnE,WAAO,KAAK,WAAW,SAAS,4BAA4B;AAE5D,UAAM,mBAAmB,KAAK,MAAM;AAAA,MAClC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,mBAAmB,GAAG;AAExB;AAAA,IACF;AAEA,SAAK,SAAS;AACd,QAAI,YAAY;AAChB,QAAI,wBAAwB;AAE5B,QAAI;AAEJ,WAAO,YAAY,KAAK,MAAM,QAAQ;AACpC,YAAM,OAAO,KAAK,MAAM,SAAS;AACjC;AAAA,QACE,KAAK,WAAW;AAAA,QAChB,2CAA2C,KAAK,MAAM;AAAA,MACxD;AACA,WAAK,SAAS;AAAA,QACZ,OAAO,KAAK,IAAI;AAAA,MAClB;AACA,UAAI;AACF,aAAK,SAAS;AACd,YAAI;AACF,cAAI,KAAK,aAAa;AACpB,kBAAM,KAAK,YAAY,IAAI;AAAA,UAC7B;AAAA,QACF,SAAS,GAAG;AACV,kBAAQ,MAAM,wBAAwB,CAAC;AAAA,QACzC;AACA;AAAA,UACE,CAAC,WAAW,UAAU,UAAU,EAAE,QAAQ,KAAK,IAAI,KAAK;AAAA,UACxD,0BAA0B,KAAK,IAAI;AAAA,QACrC;AAEA,cAAM,EAAE,UAAU,MAAM,IAAI;AAC5B,eAAO,UAAU,uCAAuC,KAAK,IAAI,EAAE;AAEnE,YAAI;AACJ,cAAM,kBAAmC;AAAA,UACvC;AAAA,UACA,SAAS,oBAAoB;AAAA,QAC/B;AAEA,YAAI,KAAK,SAAS,WAAW;AAC3B;AAAA,YACE,KAAK,YAAY,YACf,KAAK,YAAY,WACjB,KAAK,YAAY,YACjB,KAAK,YAAY,aACjB,KAAK,YAAY,YACjB,KAAK,YAAY;AAAA,YACnB,gCAAgC,KAAK,OAAO;AAAA,UAC9C;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AACxD,cAAI,KAAK,YAAY,UAAU;AAC7B,iCACE,aACC;AAAA,UACL;AAAA,QACF,WAAW,KAAK,SAAS,YAAY,KAAK,SAAS,YAAY;AAC7D,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D,OAAO;AACL,kBAAQ;AAAA,YACN,0BAA0B,KAAK,IAAI;AAAA,UACrC;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D;AAEA,eAAO,OAAO,MAAM,WAAW;AAC/B,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF,SAAS,GAAQ;AACf,gCAAwB;AACxB,aAAK,QAAQ;AACb,aAAK,eACH,GAAG,YAAY,OAAO,MAAM,WAAW,IAAI;AAC7C,aAAK,aAAa,EAAE;AAEpB,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF;AAAA,IACF;AAGA,aAAS,IAAI,YAAY,GAAG,IAAI,KAAK,MAAM,QAAQ,KAAK;AACtD,WAAK,MAAM,CAAC,EAAE,SAAS;AAAA,IACzB;AAEA,QAAI,uBAAuB;AACzB,WAAK,SAAS;AAAA,IAChB,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAEA,QAAI,KAAK,MAAM,QAAQ;AAErB,YAAM,cAAc,KAAK,IAAI,WAAW,KAAK,MAAM,SAAS,CAAC;AAC7D,aAAO,KAAK,MAAM,WAAW,EAAE;AAAA,IACjC;AAAA,EACF;AAAA,EAEA,iBAA0B;AACxB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,kBAAwC;AACtC,QAAI,KAAK,WAAW,SAAS;AAC3B,aAAO;AAAA,IACT;AACA,UAAM,iBAAiB,KAAK,MAAM;AAAA,MAChC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,kBAAkB,GAAG;AACvB,aAAO,KAAK,MAAM,cAAc;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,OAAsB;AACpB,QAAI,mBAAmB;AAEvB,QAAI,aAAa,GAAG;AAClB,YAAM,iBAAiB,mBAAmB;AAC1C,UAAI,gBAAgB;AAClB,2BAAmB,WAAW,cAAc;AAAA,MAC9C,OAAO;AACL,2BAAmB,GAAG,aAAa,CAAC;AAAA,MACtC;AAAA,IACF;AACA,UAAM,WAA0B;AAAA,MAC9B,YAAY,WAAW;AAAA,MACvB,YAAY,YAAY,mBAAmB,KAAK;AAAA,MAChD,mBAAmB;AAAA,MACnB,SAAS,KAAK,IAAI;AAAA,MAClB,MAAM,KAAK;AAAA,MACX,OAAO,KAAK;AAAA,IACd;AACA,WAAO;AAAA,EACT;AACF;;;AC9NA,SAAS,4BAA4B;AAE9B,IAAM,8BAA8B,MAAM;AAC/C,SAAO,6EAA6E,qBAAqB,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAgB5G;;;ACcA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA,gBAAAA;AAAA,OACK;AACP,SAAS,yBAAyB,kBAAkB;AACpD,SAAS,gBAAgB;AACzB,SAAS,UAAAC,eAAc;;;AClCvB,SAAS,uBAAAC,sBAAqB,eAAAC,oBAAmB;AACjD,SAAS,YAAY;AAEd,SAAS,gBACd,MACA,gBACA;AACA,QAAM,WAAqB;AAAA,IACzB,YAAY,WAAW;AAAA,IACvB,SAAS,KAAK,IAAI;AAAA,IAClB,YAAYA,aAAYD,oBAAmB,KAAK;AAAA,EAClD;AACA,QAAM,YAAyB;AAAA,IAC7B,OAAO,KAAK;AAAA,IACZ,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,mBAAiB,SAAS;AAC5B;;;AD2BA,IAAM,QAAQ,SAAS,YAAY;AACnC,IAAqB,UAArB,MAGE;AAAA,EAWA,YACE,SAGA,KACA;AAXF,sBAAoD;AAYlD,IAAAD,QAAO,SAAS,iCAAiC;AACjD,QAAI,OAAO,YAAY,YAAY;AACjC,WAAK,qBAAqB;AAAA,IAC5B,OAAO;AACL,WAAK,qBAAqB,MAAM,QAAQ,QAAQ,OAAO;AAAA,IACzD;AAEA,QAAI,OAAO,KAAK,eAAe,aAAa;AAC1C,WAAK,aAAa,IAAI;AAAA,IACxB;AACA,QAAI,OAAO,KAAK,aAAa,aAAa;AACxC,WAAK,WAAW,IAAI;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,MAAM,OACJ,OACA,KACuB;AACvB,UAAM,EAAE,OAAO,IAAI,OAAO,CAAC;AAC3B,UAAM,cAAc,OAAO,UAAU,WAAW,QAAQ,MAAM;AAC9D,IAAAA,QAAO,aAAa,8BAA8B;AAClD,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,IAAAA,QAAO,OAAO,UAAU,UAAU,sCAAsC;AAExE,UAAM,wBAAwB;AAAA,MAC5B;AAAA,IACF;AACA,QAAI,uBAAuB;AACzB,YAAM,yBAAyB,qBAAqB;AAAA,IACtD;AACA,QAAI;AACJ,QAAI,MAAM,aAAa,uBAAuB;AAC5C,yBAAmB,MAAM;AAAA,IAC3B;AAEA,QAAI,oBAAoB,CAACD,cAAa,GAAG;AACvC,cAAQ;AAAA,QACN;AAAA,MACF;AACA,yBAAmB;AAAA,IACrB;AAEA,UAAM,UAAU,KAAK,WAAY,MAAM,KAAK,mBAAmB,QAAQ;AAEvE,QAAI,aAA+B;AACnC,QAAI,wBAA4C;AAChD,QAAI,kBAA2C;AAC/C,QAAI,qBAEY;AAChB,QAAI,kBAAkB;AACpB,2BAAqB,MAAM,gBAAgB;AAAA,QACzC;AAAA,QACA,oBAAoB;AAAA,MACtB,CAAC;AACD,MAAAC;AAAA,QACE,mBAAmB;AAAA,QACnB,gCAAgC,gBAAgB,IAC9C,mBAAmB,QAAQ,KAAK,mBAAmB,KAAK,KAAK,EAC/D;AAAA,MACF;AACA,8BAAwB,mBAAmB;AAC3C,wBAAkB,mBAAmB;AACrC,mBAAa,mBAAmB;AAAA,IAClC;AAEA,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,IAAI,MAAM,gBAAgB;AAAA,MACxB,QAAQ,UAAU,KAAK;AAAA,MACvB;AAAA,MACA,0BAA0B;AAAA,MAC1B,cAAc;AAAA,IAChB,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,MACvC,gBAAgB,KAAK,UAAU,WAAW;AAAA,MAC1C;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAgC,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAC1E;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT,SAAS;AAAA,MACX;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,aAAa;AAAA,MACb,MAAM;AAAA,MACN;AAAA,MACA,WAAW,CAAC,CAAC;AAAA,MACb,OAAO;AAAA,IACT;AAEA,UAAM,WAA0B,CAAC;AACjC,KAAC,YAAY,YAAY,CAAC,GAAG,QAAQ,CAAC,SAAS;AAC7C,UAAI,QAAQ,MAAM;AAChB,cAAM,UAAU,YAAY,MAAM,EAAE;AAEpC,YAAI,CAAC,SAAS;AACZ,kBAAQ;AAAA,YACN,kCAAkC,KAAK,EAAE;AAAA,UAC3C;AACA;AAAA,QACF;AACA,iBAAS,KAAK,OAAO;AAAA,MACvB;AAAA,IACF,CAAC;AAED;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH,gBAAgB;AAAA,MAClB;AAAA,MACA;AAAA,IACF;AAEA,QAAI,UAAU;AACZ,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,IAAAA;AAAA,MACE,SAAS,UAAU;AAAA,MACnB,6CAA6C,SAAS,MAAM;AAAA,IAC9D;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO;AAAA,QACL,SAAS;AAAA,UACP,IAAI,SAAS,CAAC,EAAG;AAAA,UACjB,SAAS,SAAS,CAAC,EAAG;AAAA,UACtB,QAAQ,SAAS,CAAC,EAAG;AAAA,UACrB,MAAM,SAAS,CAAC,EAAG;AAAA,UACnB,QAAQ,SAAS,CAAC,EAAG,UAAU,CAAC;AAAA,UAChC,YAAY,SAAS,CAAC,EAAG;AAAA,UACzB;AAAA,QACF;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,QACJ,YACA,KACA,kBACc;AACd,IAAAA;AAAA,MACE,OAAO,eAAe,YAAY,OAAO,eAAe;AAAA,MACxD,kDAAkD,OAAO,UAAU;AAAA,IACrE;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,SAAS;AAEvD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,IAAI,MAAM,qBAAwB;AAAA,MAC3D;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,eAAe;AAAA,IACjB,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,IACzC;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAwB,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAClE;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,OAAO;AAAA,IACT;AAEA,UAAM,EAAE,KAAK,IAAI,eAAe,CAAC;AAGjC;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,QAAI,YAAY,CAAC,MAAM;AACrB,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,WAA2D;AACtE,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AACtD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,eAAe,MAAM,SAAS;AAAA,MAClC;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,aAAa,OAAO;AAAA,IAClD;AAEA,UAAM,EAAE,SAAS,KAAK,IAAI,aAAa;AACvC,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,eAAe;AAAA,MACf,kBAAkB;AAAA,MAClB,OAAO,OAAO,SAAY;AAAA,IAC5B;AACA,oBAAgB,UAAU,cAAc;AAExC,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,aAAa;AAAA,IACtB;AAAA,EACF;AAAA,EACA,MAAM,SACJ,QACA,KAGyD;AACzD,IAAAA,QAAO,QAAQ,yCAAyC;AACxD,UAAM,UAAU,MAAM,KAAK,mBAAmB,UAAU;AACxD,UAAM,EAAE,kBAAkB,KAAK,IAAI;AACnC,IAAAA,QAAO,kBAAkB,6CAA6C;AAEtE,UAAM,eAAe,4BAA4B;AAGjD,UAAM,kBAAkB;AACxB,UAAM,aAAmB,MAAM,QAAQ,MAAM,IACzC;AAAA,MACE,MAAM,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAChD,KAAK,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAC/C,OAAO;AAAA,MACP,QAAQ;AAAA,IACV,IACA;AAEJ,QAAI,eAAe,MAAM,wBAAwB;AAAA,MAC/C,gBAAgB;AAAA,MAChB;AAAA,MACA,sBAAsB;AAAA,QACpB;AAAA,UACE,MAAM;AAAA,QACR;AAAA,MACF;AAAA,MACA,iBAAiB;AAAA,IACnB,CAAC;AAED,QAAI,KAAK,WAAW;AAClB,YAAM,aAAa,iBAAiB,YAAY,QAAQ,IAAI;AAC5D,YAAM,4BAA4B,UAAU;AAC5C,qBAAe,MAAM;AAAA,QACnB;AAAA,QACA;AAAA,QACA,qBAAqB,oBAAoB;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,OAAe;AAAA,MACnB,EAAE,MAAM,UAAU,SAAS,aAAa;AAAA,MACxC;AAAA,QACE,MAAM;AAAA,QACN,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,WAAW;AAAA,cACT,KAAK;AAAA,cACL,QAAQ;AAAA,YACV;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,WACJ,KAAK,cAAc;AAErB,UAAM,MAAM,MAAM,SAAS,8BAAmC;AAE9D,UAAM,EAAE,QAAQ,IAAI;AACpB,IAAAA,QAAO,CAAC,QAAQ,OAAO,oBAAoB,QAAQ,KAAK,EAAE;AAC1D,IAAAA,QAAO,QAAQ,aAAa,gCAAgC;AAC5D,WAAO;AAAA,EACT;AACF;;;AErZA,SAAS,eAAAE,cAAa,uBAAAD,4BAA2B;AAGjD,IAAO,cAAQ","names":["vlLocateMode","assert","MIDSCENE_MODEL_NAME","getAIConfig"],"ignoreList":[],"sources":["../../src/ai-model/action-executor.ts","../../src/ai-model/prompt/describe.ts","../../src/insight/index.ts","../../src/insight/utils.ts","../../src/index.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<any> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error = e;\n task.errorMessage =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n return this.tasks[outputIndex].output;\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n let modelDescription = '';\n\n if (vlLocateMode()) {\n const uiTarsModelVer = uiTarsModelVersion();\n if (uiTarsModelVer) {\n modelDescription = `UI-TARS=${uiTarsModelVer}`;\n } else {\n modelDescription = `${vlLocateMode()} mode`;\n }\n }\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n model_description: modelDescription,\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n","import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const elementDescriberInstruction = () => {\n return `Describe the element in the red rectangle for precise identification. Use ${getPreferredLanguage()}.\n\nRules:\n1. Start with element type (button, input, link, etc.) \n2. Include key identifiers:\n - Text content: \"with text 'Submit'\"\n - Visual features: \"blue background\", \"icon only\"\n - Position: \"top-right\", \"below search bar\"\n3. Keep description under 20 words\n4. Don't mention the red rectangle\n\nReturn JSON:\n{\n \"description\": \"brief element type with key identifiers\",\n \"error\"?: \"error message if any\"\n}`;\n};\n","import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiAssert, AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractOption,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n TMultimodalPrompt,\n TUserPrompt,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n MIDSCENE_USE_QWEN_VL,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const {\n parseResult,\n rect,\n elementById,\n rawResponse,\n usage,\n isOrderSensitive,\n } = await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n isOrderSensitive,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T>(\n dataDemand: InsightExtractParam,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<any> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n usage,\n };\n }\n\n async assert(assertion: TUserPrompt): Promise<InsightAssertionResponse> {\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('assert');\n const startTime = Date.now();\n const assertResult = await AiAssert({\n assertion,\n context,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(assertResult.content),\n };\n\n const { thought, pass } = assertResult.content;\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'assert',\n userQuery: {\n assertion,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n assertionPass: pass,\n assertionThought: thought,\n error: pass ? undefined : thought,\n };\n emitInsightDump(dumpData, dumpSubscriber);\n\n return {\n pass,\n thought,\n usage: assertResult.usage,\n };\n }\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64, size } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, context.size);\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n","import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';\nimport { uuid } from '@midscene/shared/utils';\n\nexport function emitInsightDump(\n data: PartialInsightDumpFromSDK,\n dumpSubscriber?: DumpSubscriber,\n) {\n const baseData: DumpMeta = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n };\n const finalData: InsightDump = {\n logId: uuid(),\n ...baseData,\n ...data,\n };\n\n dumpSubscriber?.(finalData);\n}\n","import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n AiAssert,\n} from './ai-model/index';\n\nexport { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';\n\nexport type * from './types';\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n MidsceneYamlScript,\n MidsceneYamlTask,\n MidsceneYamlFlowItem,\n MidsceneYamlFlowItemAIRightClick,\n MidsceneYamlConfigResult,\n LocateOption,\n DetailedLocateParam,\n} from './yaml';\n"]}
1
+ {"version":3,"mappings":";;;;;;;;;;;;;;;;AAUA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,cAAc;AAEhB,IAAM,WAAN,MAAe;AAAA,EAUpB,YACE,MACA,SAGA;AACA,SAAK,SACH,SAAS,SAAS,QAAQ,MAAM,SAAS,IAAI,YAAY;AAC3D,SAAK,OAAO;AACZ,SAAK,SAAS,SAAS,SAAS,CAAC,GAAG;AAAA,MAAI,CAAC,SACvC,KAAK,kBAAkB,IAAI;AAAA,IAC7B;AACA,SAAK,cAAc,SAAS;AAAA,EAC9B;AAAA,EAEQ,kBAAkB,MAAyC;AACjE,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,MAAgE;AAC3E;AAAA,MACE,KAAK,WAAW;AAAA,MAChB;AAAA,QAAyD,KAAK,gBAAgB,GAAG,KAAK;AAAA,EAAK,KAAK,gBAAgB,GAAG,UAAU;AAAA,IAC/H;AACA,QAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAK,MAAM,KAAK,GAAG,KAAK,IAAI,CAAC,SAAS,KAAK,kBAAkB,IAAI,CAAC,CAAC;AAAA,IACrE,OAAO;AACL,WAAK,MAAM,KAAK,KAAK,kBAAkB,IAAI,CAAC;AAAA,IAC9C;AACA,QAAI,KAAK,WAAW,WAAW;AAC7B,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAM,QAAsB;AAC1B,QAAI,KAAK,WAAW,UAAU,KAAK,MAAM,SAAS,GAAG;AACnD,cAAQ;AAAA,QACN;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,WAAW,WAAW,6BAA6B;AAC/D,WAAO,KAAK,WAAW,aAAa,+BAA+B;AACnE,WAAO,KAAK,WAAW,SAAS,4BAA4B;AAE5D,UAAM,mBAAmB,KAAK,MAAM;AAAA,MAClC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,mBAAmB,GAAG;AAExB;AAAA,IACF;AAEA,SAAK,SAAS;AACd,QAAI,YAAY;AAChB,QAAI,wBAAwB;AAE5B,QAAI;AAEJ,WAAO,YAAY,KAAK,MAAM,QAAQ;AACpC,YAAM,OAAO,KAAK,MAAM,SAAS;AACjC;AAAA,QACE,KAAK,WAAW;AAAA,QAChB,2CAA2C,KAAK,MAAM;AAAA,MACxD;AACA,WAAK,SAAS;AAAA,QACZ,OAAO,KAAK,IAAI;AAAA,MAClB;AACA,UAAI;AACF,aAAK,SAAS;AACd,YAAI;AACF,cAAI,KAAK,aAAa;AACpB,kBAAM,KAAK,YAAY,IAAI;AAAA,UAC7B;AAAA,QACF,SAAS,GAAG;AACV,kBAAQ,MAAM,wBAAwB,CAAC;AAAA,QACzC;AACA;AAAA,UACE,CAAC,WAAW,UAAU,UAAU,EAAE,QAAQ,KAAK,IAAI,KAAK;AAAA,UACxD,0BAA0B,KAAK,IAAI;AAAA,QACrC;AAEA,cAAM,EAAE,UAAU,MAAM,IAAI;AAC5B,eAAO,UAAU,uCAAuC,KAAK,IAAI,EAAE;AAEnE,YAAI;AACJ,cAAM,kBAAmC;AAAA,UACvC;AAAA,UACA,SAAS,oBAAoB;AAAA,QAC/B;AAEA,YAAI,KAAK,SAAS,WAAW;AAC3B;AAAA,YACE,KAAK,YAAY,YACf,KAAK,YAAY,WACjB,KAAK,YAAY,YACjB,KAAK,YAAY,aACjB,KAAK,YAAY,YACjB,KAAK,YAAY;AAAA,YACnB,gCAAgC,KAAK,OAAO;AAAA,UAC9C;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AACxD,cAAI,KAAK,YAAY,UAAU;AAC7B,iCACE,aACC;AAAA,UACL;AAAA,QACF,WAAW,KAAK,SAAS,YAAY,KAAK,SAAS,YAAY;AAC7D,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D,OAAO;AACL,kBAAQ;AAAA,YACN,0BAA0B,KAAK,IAAI;AAAA,UACrC;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D;AAEA,eAAO,OAAO,MAAM,WAAW;AAC/B,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF,SAAS,GAAQ;AACf,gCAAwB;AACxB,aAAK,QAAQ;AACb,aAAK,eACH,GAAG,YAAY,OAAO,MAAM,WAAW,IAAI;AAC7C,aAAK,aAAa,EAAE;AAEpB,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF;AAAA,IACF;AAGA,aAAS,IAAI,YAAY,GAAG,IAAI,KAAK,MAAM,QAAQ,KAAK;AACtD,WAAK,MAAM,CAAC,EAAE,SAAS;AAAA,IACzB;AAEA,QAAI,uBAAuB;AACzB,WAAK,SAAS;AAAA,IAChB,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAEA,QAAI,KAAK,MAAM,QAAQ;AAErB,YAAM,cAAc,KAAK,IAAI,WAAW,KAAK,MAAM,SAAS,CAAC;AAC7D,aAAO,KAAK,MAAM,WAAW,EAAE;AAAA,IACjC;AAAA,EACF;AAAA,EAEA,iBAA0B;AACxB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,kBAAwC;AACtC,QAAI,KAAK,WAAW,SAAS;AAC3B,aAAO;AAAA,IACT;AACA,UAAM,iBAAiB,KAAK,MAAM;AAAA,MAChC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,kBAAkB,GAAG;AACvB,aAAO,KAAK,MAAM,cAAc;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,OAAsB;AACpB,QAAI,mBAAmB;AAEvB,QAAI,aAAa,GAAG;AAClB,YAAM,iBAAiB,mBAAmB;AAC1C,UAAI,gBAAgB;AAClB,2BAAmB,WAAW,cAAc;AAAA,MAC9C,OAAO;AACL,2BAAmB,GAAG,aAAa,CAAC;AAAA,MACtC;AAAA,IACF;AACA,UAAM,WAA0B;AAAA,MAC9B,YAAY,WAAW;AAAA,MACvB,YAAY,YAAY,mBAAmB,KAAK;AAAA,MAChD,mBAAmB;AAAA,MACnB,SAAS,KAAK,IAAI;AAAA,MAClB,MAAM,KAAK;AAAA,MACX,OAAO,KAAK;AAAA,IACd;AACA,WAAO;AAAA,EACT;AACF;;;AC9NA,SAAS,4BAA4B;AAE9B,IAAM,8BAA8B,MAAM;AAC/C,SAAO;AAAA,4EACmE,qBAAqB,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAuClG;;;ACVA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA,gBAAAA;AAAA,OACK;AACP,SAAS,yBAAyB,kBAAkB;AACpD,SAAS,gBAAgB;AACzB,SAAS,UAAAC,eAAc;;;AClCvB,SAAS,uBAAAC,sBAAqB,eAAAC,oBAAmB;AACjD,SAAS,YAAY;AAEd,SAAS,gBACd,MACA,gBACA;AACA,QAAM,WAAqB;AAAA,IACzB,YAAY,WAAW;AAAA,IACvB,SAAS,KAAK,IAAI;AAAA,IAClB,YAAYA,aAAYD,oBAAmB,KAAK;AAAA,EAClD;AACA,QAAM,YAAyB;AAAA,IAC7B,OAAO,KAAK;AAAA,IACZ,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,mBAAiB,SAAS;AAC5B;;;AD2BA,IAAM,QAAQ,SAAS,YAAY;AACnC,IAAqB,UAArB,MAGE;AAAA,EAWA,YACE,SAGA,KACA;AAXF,sBAAoD;AAYlD,IAAAD,QAAO,SAAS,iCAAiC;AACjD,QAAI,OAAO,YAAY,YAAY;AACjC,WAAK,qBAAqB;AAAA,IAC5B,OAAO;AACL,WAAK,qBAAqB,MAAM,QAAQ,QAAQ,OAAO;AAAA,IACzD;AAEA,QAAI,OAAO,KAAK,eAAe,aAAa;AAC1C,WAAK,aAAa,IAAI;AAAA,IACxB;AACA,QAAI,OAAO,KAAK,aAAa,aAAa;AACxC,WAAK,WAAW,IAAI;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,MAAM,OACJ,OACA,KACuB;AACvB,UAAM,EAAE,OAAO,IAAI,OAAO,CAAC;AAC3B,UAAM,cAAc,OAAO,UAAU,WAAW,QAAQ,MAAM;AAC9D,IAAAA,QAAO,aAAa,8BAA8B;AAClD,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,IAAAA,QAAO,OAAO,UAAU,UAAU,sCAAsC;AAExE,UAAM,wBAAwB;AAAA,MAC5B;AAAA,IACF;AACA,QAAI,uBAAuB;AACzB,YAAM,yBAAyB,qBAAqB;AAAA,IACtD;AACA,QAAI;AACJ,QAAI,MAAM,aAAa,uBAAuB;AAC5C,yBAAmB,MAAM;AAAA,IAC3B;AAEA,QAAI,oBAAoB,CAACD,cAAa,GAAG;AACvC,cAAQ;AAAA,QACN;AAAA,MACF;AACA,yBAAmB;AAAA,IACrB;AAEA,UAAM,UAAU,KAAK,WAAY,MAAM,KAAK,mBAAmB,QAAQ;AAEvE,QAAI,aAA+B;AACnC,QAAI,wBAA4C;AAChD,QAAI,kBAA2C;AAC/C,QAAI,qBAEY;AAChB,QAAI,kBAAkB;AACpB,2BAAqB,MAAM,gBAAgB;AAAA,QACzC;AAAA,QACA,oBAAoB;AAAA,MACtB,CAAC;AACD,MAAAC;AAAA,QACE,mBAAmB;AAAA,QACnB,gCAAgC,gBAAgB,IAC9C,mBAAmB,QAAQ,KAAK,mBAAmB,KAAK,KAAK,EAC/D;AAAA,MACF;AACA,8BAAwB,mBAAmB;AAC3C,wBAAkB,mBAAmB;AACrC,mBAAa,mBAAmB;AAAA,IAClC;AAEA,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,IAAI,MAAM,gBAAgB;AAAA,MACxB,QAAQ,UAAU,KAAK;AAAA,MACvB;AAAA,MACA,0BAA0B;AAAA,MAC1B,cAAc;AAAA,IAChB,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,MACvC,gBAAgB,KAAK,UAAU,WAAW;AAAA,MAC1C;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAgC,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAC1E;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT,SAAS;AAAA,MACX;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,aAAa;AAAA,MACb,MAAM;AAAA,MACN;AAAA,MACA,WAAW,CAAC,CAAC;AAAA,MACb,OAAO;AAAA,IACT;AAEA,UAAM,WAA0B,CAAC;AACjC,KAAC,YAAY,YAAY,CAAC,GAAG,QAAQ,CAAC,SAAS;AAC7C,UAAI,QAAQ,MAAM;AAChB,cAAM,UAAU,YAAY,MAAM,EAAE;AAEpC,YAAI,CAAC,SAAS;AACZ,kBAAQ;AAAA,YACN,kCAAkC,KAAK,EAAE;AAAA,UAC3C;AACA;AAAA,QACF;AACA,iBAAS,KAAK,OAAO;AAAA,MACvB;AAAA,IACF,CAAC;AAED;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH,gBAAgB;AAAA,MAClB;AAAA,MACA;AAAA,IACF;AAEA,QAAI,UAAU;AACZ,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,IAAAA;AAAA,MACE,SAAS,UAAU;AAAA,MACnB,6CAA6C,SAAS,MAAM;AAAA,IAC9D;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO;AAAA,QACL,SAAS;AAAA,UACP,IAAI,SAAS,CAAC,EAAG;AAAA,UACjB,SAAS,SAAS,CAAC,EAAG;AAAA,UACtB,QAAQ,SAAS,CAAC,EAAG;AAAA,UACrB,MAAM,SAAS,CAAC,EAAG;AAAA,UACnB,QAAQ,SAAS,CAAC,EAAG,UAAU,CAAC;AAAA,UAChC,YAAY,SAAS,CAAC,EAAG;AAAA,UACzB;AAAA,QACF;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,QACJ,YACA,KACA,kBACc;AACd,IAAAA;AAAA,MACE,OAAO,eAAe,YAAY,OAAO,eAAe;AAAA,MACxD,kDAAkD,OAAO,UAAU;AAAA,IACrE;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,SAAS;AAEvD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,IAAI,MAAM,qBAAwB;AAAA,MAC3D;AAAA,MACA,WAAW;AAAA,MACX;AAAA,MACA,eAAe;AAAA,IACjB,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,IACzC;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAwB,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAClE;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,OAAO;AAAA,IACT;AAEA,UAAM,EAAE,KAAK,IAAI,eAAe,CAAC;AAGjC;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,QAAI,YAAY,CAAC,MAAM;AACrB,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,WAA2D;AACtE,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AACtD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,eAAe,MAAM,SAAS;AAAA,MAClC;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,aAAa,OAAO;AAAA,IAClD;AAEA,UAAM,EAAE,SAAS,KAAK,IAAI,aAAa;AACvC,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,eAAe;AAAA,MACf,kBAAkB;AAAA,MAClB,OAAO,OAAO,SAAY;AAAA,IAC5B;AACA,oBAAgB,UAAU,cAAc;AAExC,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,aAAa;AAAA,IACtB;AAAA,EACF;AAAA,EACA,MAAM,SACJ,QACA,KAGyD;AACzD,IAAAA,QAAO,QAAQ,yCAAyC;AACxD,UAAM,UAAU,MAAM,KAAK,mBAAmB,UAAU;AACxD,UAAM,EAAE,kBAAkB,KAAK,IAAI;AACnC,IAAAA,QAAO,kBAAkB,6CAA6C;AAEtE,UAAM,eAAe,4BAA4B;AAGjD,UAAM,kBAAkB;AACxB,UAAM,aAAmB,MAAM,QAAQ,MAAM,IACzC;AAAA,MACE,MAAM,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAChD,KAAK,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAC/C,OAAO;AAAA,MACP,QAAQ;AAAA,IACV,IACA;AAEJ,QAAI,eAAe,MAAM,wBAAwB;AAAA,MAC/C,gBAAgB;AAAA,MAChB;AAAA,MACA,sBAAsB;AAAA,QACpB;AAAA,UACE,MAAM;AAAA,QACR;AAAA,MACF;AAAA,MACA,iBAAiB;AAAA,IACnB,CAAC;AAED,QAAI,KAAK,WAAW;AAClB,YAAM,aAAa,iBAAiB,YAAY,QAAQ,IAAI;AAC5D,YAAM,4BAA4B,UAAU;AAC5C,qBAAe,MAAM;AAAA,QACnB;AAAA,QACA;AAAA,QACA,qBAAqB,oBAAoB;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,OAAe;AAAA,MACnB,EAAE,MAAM,UAAU,SAAS,aAAa;AAAA,MACxC;AAAA,QACE,MAAM;AAAA,QACN,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,WAAW;AAAA,cACT,KAAK;AAAA,cACL,QAAQ;AAAA,YACV;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,WACJ,KAAK,cAAc;AAErB,UAAM,MAAM,MAAM,SAAS,8BAAmC;AAE9D,UAAM,EAAE,QAAQ,IAAI;AACpB,IAAAA,QAAO,CAAC,QAAQ,OAAO,oBAAoB,QAAQ,KAAK,EAAE;AAC1D,IAAAA,QAAO,QAAQ,aAAa,gCAAgC;AAC5D,WAAO;AAAA,EACT;AACF;;;AErZA,SAAS,eAAAE,cAAa,uBAAAD,4BAA2B;AAGjD,IAAO,cAAQ","names":["vlLocateMode","assert","MIDSCENE_MODEL_NAME","getAIConfig"],"ignoreList":[],"sources":["../../src/ai-model/action-executor.ts","../../src/ai-model/prompt/describe.ts","../../src/insight/index.ts","../../src/insight/utils.ts","../../src/index.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<any> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error = e;\n task.errorMessage =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n return this.tasks[outputIndex].output;\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n let modelDescription = '';\n\n if (vlLocateMode()) {\n const uiTarsModelVer = uiTarsModelVersion();\n if (uiTarsModelVer) {\n modelDescription = `UI-TARS=${uiTarsModelVer}`;\n } else {\n modelDescription = `${vlLocateMode()} mode`;\n }\n }\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n model_description: modelDescription,\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n","import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const elementDescriberInstruction = () => {\n return `\nDescribe the element in the red rectangle for precise identification. Use ${getPreferredLanguage()}.\n\nCRITICAL REQUIREMENTS:\n1. UNIQUENESS: The description must uniquely identify this element on the current page\n2. UNIVERSALITY: Use generic, reusable selectors that work across different contexts\n3. PRECISION: Be specific enough to distinguish from similar elements\n\nDESCRIPTION STRUCTURE:\n1. Element type (button, input, link, div, etc.)\n2. Primary identifier (in order of preference):\n - Unique text content: \"with text 'Login'\"\n - Unique attribute: \"with aria-label 'Search'\"\n - Unique class/ID: \"with class 'primary-button'\"\n - Unique position: \"in header navigation\"\n3. Secondary identifiers (if needed for uniqueness):\n - Visual features: \"blue background\", \"with icon\"\n - Relative position: \"below search bar\", \"in sidebar\"\n - Parent context: \"in login form\", \"in main menu\"\n\nGUIDELINES:\n- Keep description under 25 words\n- Prioritize semantic identifiers over visual ones\n- Use consistent terminology across similar elements\n- Avoid page-specific or temporary content\n- Don't mention the red rectangle or selection box\n- Focus on stable, reusable characteristics\n\nEXAMPLES:\n- \"Login button with text 'Sign In'\"\n- \"Search input with placeholder 'Enter keywords'\"\n- \"Navigation link with text 'Home' in header\"\n- \"Submit button in contact form\"\n- \"Menu icon with aria-label 'Open menu'\"\n\nReturn JSON:\n{\n \"description\": \"unique element identifier\",\n \"error\"?: \"error message if any\"\n}`;\n};\n","import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiAssert, AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractOption,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n TMultimodalPrompt,\n TUserPrompt,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n MIDSCENE_USE_QWEN_VL,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const {\n parseResult,\n rect,\n elementById,\n rawResponse,\n usage,\n isOrderSensitive,\n } = await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n isOrderSensitive,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T>(\n dataDemand: InsightExtractParam,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<any> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n usage,\n };\n }\n\n async assert(assertion: TUserPrompt): Promise<InsightAssertionResponse> {\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('assert');\n const startTime = Date.now();\n const assertResult = await AiAssert({\n assertion,\n context,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(assertResult.content),\n };\n\n const { thought, pass } = assertResult.content;\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'assert',\n userQuery: {\n assertion,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n assertionPass: pass,\n assertionThought: thought,\n error: pass ? undefined : thought,\n };\n emitInsightDump(dumpData, dumpSubscriber);\n\n return {\n pass,\n thought,\n usage: assertResult.usage,\n };\n }\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64, size } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, context.size);\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n","import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';\nimport { uuid } from '@midscene/shared/utils';\n\nexport function emitInsightDump(\n data: PartialInsightDumpFromSDK,\n dumpSubscriber?: DumpSubscriber,\n) {\n const baseData: DumpMeta = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n };\n const finalData: InsightDump = {\n logId: uuid(),\n ...baseData,\n ...data,\n };\n\n dumpSubscriber?.(finalData);\n}\n","import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n AiAssert,\n} from './ai-model/index';\n\nexport { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';\n\nexport type * from './types';\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n MidsceneYamlScript,\n MidsceneYamlTask,\n MidsceneYamlFlowItem,\n MidsceneYamlFlowItemAIRightClick,\n MidsceneYamlConfigResult,\n} from './yaml';\n"]}
@@ -1,4 +1,4 @@
1
- import { p as AIUsageInfo, U as UIContext, i as TUserPrompt, aG as ReferenceImage, u as AIElementLocatorResponse, Q as ElementById, T as TMultimodalPrompt, h as InsightExtractOption, x as AIDataExtractionResponse, z as AIAssertionResponse, aB as PageType, Z as PlanningAIResponse } from './types-7b64b80b.js';
1
+ import { o as AIUsageInfo, U as UIContext, i as TUserPrompt, aG as ReferenceImage, t as AIElementLocatorResponse, O as ElementById, T as TMultimodalPrompt, h as InsightExtractOption, w as AIDataExtractionResponse, y as AIAssertionResponse, aA as PageType, Y as PlanningAIResponse } from './types-512d3687.js';
2
2
  import { Rect, ElementTreeNode, BaseElement } from '@midscene/shared/types';
3
3
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
4
4
 
@@ -102,4 +102,4 @@ declare function plan(userInstruction: string, opts: {
102
102
  actionContext?: string;
103
103
  }): Promise<PlanningAIResponse>;
104
104
 
105
- export { AiLocateElement as A, AiAssert as a, AIActionType as b, callAiFn as c, describeUserPage as d, elementByPositionWithElementInfo as e, AiExtractElementInfo as f, AiLocateSection as g, adaptBboxToRect as h, plan as p };
105
+ export { AiLocateElement as A, AiAssert as a, AIActionType as b, callAiFn as c, describeUserPage as d, type AIArgs as e, elementByPositionWithElementInfo as f, AiExtractElementInfo as g, AiLocateSection as h, adaptBboxToRect as i, plan as p };
@@ -1,12 +1,11 @@
1
1
  import { NodeType } from '@midscene/shared/constants';
2
- import { BaseElement, Rect, ElementTreeNode, Size } from '@midscene/shared/types';
2
+ import { Rect, BaseElement, ElementTreeNode, Size } from '@midscene/shared/types';
3
3
  import { ChatCompletionMessageParam } from 'openai/resources';
4
4
 
5
5
  interface LocateOption {
6
6
  deepThink?: boolean;
7
7
  cacheable?: boolean;
8
8
  xpath?: string;
9
- _forceContext?: UIContext<BaseElement>;
10
9
  }
11
10
  interface InsightExtractOption {
12
11
  domIncluded?: boolean | 'visible-only';
@@ -563,4 +562,4 @@ type TUserPrompt = string | ({
563
562
  prompt: string;
564
563
  } & Partial<TMultimodalPrompt>);
565
564
 
566
- export { type PlanningActionParamHover as $, type AIDescribeElementResponse as A, type LocatorValidatorOption as B, type LocateValidatorResult as C, type DumpSubscriber as D, type ExecutionTask as E, type AgentDescribeElementAtPointResult as F, type CallAIFn as G, type EnsureObject as H, type InsightAction as I, type LocateResultElement as J, type DumpMeta as K, type LocateResult as L, type MidsceneYamlScript as M, type InsightDump as N, type LiteUISection as O, type PartialInsightDumpFromSDK as P, type ElementById as Q, type ReportDumpWithAttributes as R, type OnTaskStartTip as S, type TMultimodalPrompt as T, UIContext as U, type AgentWaitForOpt as V, type AgentAssertOpt as W, type PlanningLocateParam as X, type PlanningAction as Y, type PlanningAIResponse as Z, type PlanningActionParamTap as _, type ExecutionTaskProgressOptions as a, type MidsceneYamlFlowItemSleep as a$, type PlanningActionParamRightClick as a0, type PlanningActionParamInputOrKeyPress as a1, type PlanningActionParamScroll as a2, type PlanningActionParamAssert as a3, type PlanningActionParamSleep as a4, type PlanningActionParamError as a5, type PlanningActionParamWaitFor as a6, type PlanningActionParamAndroidLongPress as a7, type PlanningActionParamAndroidPull as a8, type Color as a9, type GroupedActionDump as aA, type PageType as aB, type StreamingCodeGenerationOptions as aC, type StreamingCallback as aD, type CodeGenerationChunk as aE, type StreamingAIResponse as aF, type ReferenceImage as aG, type scrollParam as aH, type MidsceneYamlScriptEnvBase as aI, type MidsceneYamlScriptWebEnv as aJ, type MidsceneYamlScriptAndroidEnv as aK, type MidsceneYamlScriptEnv as aL, type MidsceneYamlFlowItemAIAction as aM, type MidsceneYamlFlowItemAIAssert as aN, type MidsceneYamlFlowItemAIQuery as aO, type MidsceneYamlFlowItemAINumber as aP, type MidsceneYamlFlowItemAIString as aQ, type MidsceneYamlFlowItemAIAsk as aR, type MidsceneYamlFlowItemAIBoolean as aS, type MidsceneYamlFlowItemAILocate as aT, type MidsceneYamlFlowItemAIWaitFor as aU, type MidsceneYamlFlowItemAITap as aV, type MidsceneYamlFlowItemAIHover as aW, type MidsceneYamlFlowItemAIInput as aX, type MidsceneYamlFlowItemAIKeyboardPress as aY, type MidsceneYamlFlowItemAIScroll as aZ, type MidsceneYamlFlowItemEvaluateJavaScript as a_, type BaseAgentParserOpt as aa, type PuppeteerParserOpt as ab, type PlaywrightParserOpt as ac, type ExecutionRecorderItem as ad, type ExecutionTaskType as ae, type ExecutorContext as af, type ExecutionTaskHitBy as ag, type ExecutionTaskReturn as ah, type ExecutionTaskInsightLocateParam as ai, type ExecutionTaskInsightLocateOutput as aj, type ExecutionTaskInsightDumpLog as ak, type ExecutionTaskInsightLocateApply as al, type ExecutionTaskInsightLocate as am, type ExecutionTaskInsightQueryParam as an, type ExecutionTaskInsightQueryOutput as ao, type ExecutionTaskInsightQueryApply as ap, type ExecutionTaskInsightQuery as aq, type ExecutionTaskInsightAssertionParam as ar, type ExecutionTaskInsightAssertionApply as as, type ExecutionTaskInsightAssertion as at, type ExecutionTaskActionApply as au, type ExecutionTaskAction as av, type ExecutionTaskLogApply as aw, type ExecutionTaskLog as ax, type ExecutionTaskPlanningApply as ay, type ExecutionTaskPlanning as az, type ExecutionTaskApply as b, type MidsceneYamlFlowItemLogScreenshot as b0, type FreeFn as b1, type ScriptPlayerTaskStatus as b2, type ScriptPlayerStatusValue as b3, type MidsceneYamlConfig as b4, type MidsceneYamlConfigOutput as b5, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractParam as g, type InsightExtractOption as h, type TUserPrompt as i, type InsightAssertionResponse as j, type MidsceneYamlTask as k, type MidsceneYamlFlowItem as l, type MidsceneYamlFlowItemAIRightClick as m, type MidsceneYamlConfigResult as n, type LocateOption as o, type AIUsageInfo as p, AIResponseFormat as q, type AISingleElementResponseById as r, type AISingleElementResponseByPosition as s, type AISingleElementResponse as t, type AIElementLocatorResponse as u, type AIElementCoordinatesResponse as v, type AIElementResponse as w, type AIDataExtractionResponse as x, type AISectionLocatorResponse as y, type AIAssertionResponse as z };
565
+ export { type PlanningActionParamRightClick as $, type AIDescribeElementResponse as A, type LocateValidatorResult as B, type AgentDescribeElementAtPointResult as C, type DumpSubscriber as D, type ExecutionTask as E, type CallAIFn as F, type EnsureObject as G, type LocateResultElement as H, type InsightAction as I, type DumpMeta as J, type InsightDump as K, type LocateResult as L, type MidsceneYamlScript as M, type LiteUISection as N, type ElementById as O, type PartialInsightDumpFromSDK as P, type OnTaskStartTip as Q, type ReportDumpWithAttributes as R, type AgentWaitForOpt as S, type TMultimodalPrompt as T, UIContext as U, type AgentAssertOpt as V, type PlanningLocateParam as W, type PlanningAction as X, type PlanningAIResponse as Y, type PlanningActionParamTap as Z, type PlanningActionParamHover as _, type ExecutionTaskProgressOptions as a, type MidsceneYamlFlowItemSleep as a$, type PlanningActionParamInputOrKeyPress as a0, type PlanningActionParamScroll as a1, type PlanningActionParamAssert as a2, type PlanningActionParamSleep as a3, type PlanningActionParamError as a4, type PlanningActionParamWaitFor as a5, type PlanningActionParamAndroidLongPress as a6, type PlanningActionParamAndroidPull as a7, type Color as a8, type BaseAgentParserOpt as a9, type PageType as aA, type StreamingCodeGenerationOptions as aB, type StreamingCallback as aC, type CodeGenerationChunk as aD, type StreamingAIResponse as aE, type LocateOption as aF, type ReferenceImage as aG, type scrollParam as aH, type MidsceneYamlScriptEnvBase as aI, type MidsceneYamlScriptWebEnv as aJ, type MidsceneYamlScriptAndroidEnv as aK, type MidsceneYamlScriptEnv as aL, type MidsceneYamlFlowItemAIAction as aM, type MidsceneYamlFlowItemAIAssert as aN, type MidsceneYamlFlowItemAIQuery as aO, type MidsceneYamlFlowItemAINumber as aP, type MidsceneYamlFlowItemAIString as aQ, type MidsceneYamlFlowItemAIAsk as aR, type MidsceneYamlFlowItemAIBoolean as aS, type MidsceneYamlFlowItemAILocate as aT, type MidsceneYamlFlowItemAIWaitFor as aU, type MidsceneYamlFlowItemAITap as aV, type MidsceneYamlFlowItemAIHover as aW, type MidsceneYamlFlowItemAIInput as aX, type MidsceneYamlFlowItemAIKeyboardPress as aY, type MidsceneYamlFlowItemAIScroll as aZ, type MidsceneYamlFlowItemEvaluateJavaScript as a_, type PuppeteerParserOpt as aa, type PlaywrightParserOpt as ab, type ExecutionRecorderItem as ac, type ExecutionTaskType as ad, type ExecutorContext as ae, type ExecutionTaskHitBy as af, type ExecutionTaskReturn as ag, type ExecutionTaskInsightLocateParam as ah, type ExecutionTaskInsightLocateOutput as ai, type ExecutionTaskInsightDumpLog as aj, type ExecutionTaskInsightLocateApply as ak, type ExecutionTaskInsightLocate as al, type ExecutionTaskInsightQueryParam as am, type ExecutionTaskInsightQueryOutput as an, type ExecutionTaskInsightQueryApply as ao, type ExecutionTaskInsightQuery as ap, type ExecutionTaskInsightAssertionParam as aq, type ExecutionTaskInsightAssertionApply as ar, type ExecutionTaskInsightAssertion as as, type ExecutionTaskActionApply as at, type ExecutionTaskAction as au, type ExecutionTaskLogApply as av, type ExecutionTaskLog as aw, type ExecutionTaskPlanningApply as ax, type ExecutionTaskPlanning as ay, type GroupedActionDump as az, type ExecutionTaskApply as b, type MidsceneYamlFlowItemLogScreenshot as b0, type FreeFn as b1, type ScriptPlayerTaskStatus as b2, type ScriptPlayerStatusValue as b3, type MidsceneYamlConfig as b4, type MidsceneYamlConfigOutput as b5, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractParam as g, type InsightExtractOption as h, type TUserPrompt as i, type InsightAssertionResponse as j, type MidsceneYamlTask as k, type MidsceneYamlFlowItem as l, type MidsceneYamlFlowItemAIRightClick as m, type MidsceneYamlConfigResult as n, type AIUsageInfo as o, AIResponseFormat as p, type AISingleElementResponseById as q, type AISingleElementResponseByPosition as r, type AISingleElementResponse as s, type AIElementLocatorResponse as t, type AIElementCoordinatesResponse as u, type AIElementResponse as v, type AIDataExtractionResponse as w, type AISectionLocatorResponse as x, type AIAssertionResponse as y, type LocatorValidatorOption as z };
@@ -1,4 +1,4 @@
1
- import { R as ReportDumpWithAttributes } from './types-7b64b80b.js';
1
+ import { R as ReportDumpWithAttributes } from './types-512d3687.js';
2
2
  import { Rect } from '@midscene/shared/types';
3
3
  import '@midscene/shared/constants';
4
4
  import 'openai/resources';
package/dist/lib/utils.js CHANGED
@@ -12,7 +12,7 @@
12
12
 
13
13
 
14
14
 
15
- var _chunk2RCMQS5Ojs = require('./chunk-2RCMQS5O.js');
15
+ var _chunkFEGAGUMNjs = require('./chunk-FEGAGUMN.js');
16
16
 
17
17
 
18
18
 
@@ -27,4 +27,4 @@ var _chunk2RCMQS5Ojs = require('./chunk-2RCMQS5O.js');
27
27
 
28
28
 
29
29
 
30
- exports.getTmpDir = _chunk2RCMQS5Ojs.getTmpDir; exports.getTmpFile = _chunk2RCMQS5Ojs.getTmpFile; exports.getVersion = _chunk2RCMQS5Ojs.getVersion; exports.groupedActionDumpFileExt = _chunk2RCMQS5Ojs.groupedActionDumpFileExt; exports.insertScriptBeforeClosingHtml = _chunk2RCMQS5Ojs.insertScriptBeforeClosingHtml; exports.overlapped = _chunk2RCMQS5Ojs.overlapped; exports.replacerForPageObject = _chunk2RCMQS5Ojs.replacerForPageObject; exports.reportHTMLContent = _chunk2RCMQS5Ojs.reportHTMLContent; exports.sleep = _chunk2RCMQS5Ojs.sleep; exports.stringifyDumpData = _chunk2RCMQS5Ojs.stringifyDumpData; exports.uploadTestInfoToServer = _chunk2RCMQS5Ojs.uploadTestInfoToServer; exports.writeDumpReport = _chunk2RCMQS5Ojs.writeDumpReport; exports.writeLogFile = _chunk2RCMQS5Ojs.writeLogFile;
30
+ exports.getTmpDir = _chunkFEGAGUMNjs.getTmpDir; exports.getTmpFile = _chunkFEGAGUMNjs.getTmpFile; exports.getVersion = _chunkFEGAGUMNjs.getVersion; exports.groupedActionDumpFileExt = _chunkFEGAGUMNjs.groupedActionDumpFileExt; exports.insertScriptBeforeClosingHtml = _chunkFEGAGUMNjs.insertScriptBeforeClosingHtml; exports.overlapped = _chunkFEGAGUMNjs.overlapped; exports.replacerForPageObject = _chunkFEGAGUMNjs.replacerForPageObject; exports.reportHTMLContent = _chunkFEGAGUMNjs.reportHTMLContent; exports.sleep = _chunkFEGAGUMNjs.sleep; exports.stringifyDumpData = _chunkFEGAGUMNjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkFEGAGUMNjs.uploadTestInfoToServer; exports.writeDumpReport = _chunkFEGAGUMNjs.writeDumpReport; exports.writeLogFile = _chunkFEGAGUMNjs.writeLogFile;
@@ -1,9 +1,9 @@
1
- import { aD as StreamingCallback, p as AIUsageInfo, aC as StreamingCodeGenerationOptions, aF as StreamingAIResponse, Y as PlanningAction, l as MidsceneYamlFlowItem } from './types-7b64b80b.js';
1
+ import { aC as StreamingCallback, o as AIUsageInfo, aB as StreamingCodeGenerationOptions, aE as StreamingAIResponse, X as PlanningAction, l as MidsceneYamlFlowItem } from './types-512d3687.js';
2
2
  import OpenAI from 'openai';
3
3
  import { ChatCompletionMessageParam } from 'openai/resources';
4
4
  export { ChatCompletionMessageParam } from 'openai/resources';
5
- import { b as AIActionType } from './llm-planning-4c782a8d.js';
6
- export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-4c782a8d.js';
5
+ import { b as AIActionType, e as AIArgs } from './llm-planning-877248da.js';
6
+ export { a as AiAssert, g as AiExtractElementInfo, A as AiLocateElement, h as AiLocateSection, i as adaptBboxToRect, c as callAiFn, d as describeUserPage, f as elementByPositionWithElementInfo, p as plan } from './llm-planning-877248da.js';
7
7
  import { vlLocateMode } from '@midscene/shared/env';
8
8
  import { actionParser } from '@ui-tars/action-parser';
9
9
  import { Size } from '@midscene/shared/types';
@@ -21,6 +21,10 @@ declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[],
21
21
  content: T;
22
22
  usage?: AIUsageInfo;
23
23
  }>;
24
+ declare function callAiFnWithStringResponse<T>(msgs: AIArgs, AIActionTypeValue: AIActionType): Promise<{
25
+ content: string;
26
+ usage?: AIUsageInfo;
27
+ }>;
24
28
 
25
29
  declare function systemPromptToLocateElement(vlMode: ReturnType<typeof vlLocateMode>): string;
26
30
 
@@ -92,4 +96,4 @@ declare function vlmPlanning(options: {
92
96
  }>;
93
97
  declare function resizeImageForUiTars(imageBase64: string, size: Size): Promise<string>;
94
98
 
95
- export { AIActionType, call as callAi, callToGetJSONObject, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };
99
+ export { AIActionType, AIArgs, call as callAi, callAiFnWithStringResponse, callToGetJSONObject, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };
@@ -1,7 +1,7 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractParam, h as InsightExtractOption, T as TMultimodalPrompt, i as TUserPrompt, j as InsightAssertionResponse, A as AIDescribeElementResponse } from './types-7b64b80b.js';
2
- export { z as AIAssertionResponse, x as AIDataExtractionResponse, v as AIElementCoordinatesResponse, u as AIElementLocatorResponse, w as AIElementResponse, q as AIResponseFormat, y as AISectionLocatorResponse, t as AISingleElementResponse, r as AISingleElementResponseById, s as AISingleElementResponseByPosition, p as AIUsageInfo, W as AgentAssertOpt, F as AgentDescribeElementAtPointResult, V as AgentWaitForOpt, aa as BaseAgentParserOpt, G as CallAIFn, aE as CodeGenerationChunk, a9 as Color, K as DumpMeta, Q as ElementById, H as EnsureObject, ad as ExecutionRecorderItem, av as ExecutionTaskAction, au as ExecutionTaskActionApply, ag as ExecutionTaskHitBy, at as ExecutionTaskInsightAssertion, as as ExecutionTaskInsightAssertionApply, ar as ExecutionTaskInsightAssertionParam, ak as ExecutionTaskInsightDumpLog, am as ExecutionTaskInsightLocate, al as ExecutionTaskInsightLocateApply, aj as ExecutionTaskInsightLocateOutput, ai as ExecutionTaskInsightLocateParam, aq as ExecutionTaskInsightQuery, ap as ExecutionTaskInsightQueryApply, ao as ExecutionTaskInsightQueryOutput, an as ExecutionTaskInsightQueryParam, ax as ExecutionTaskLog, aw as ExecutionTaskLogApply, az as ExecutionTaskPlanning, ay as ExecutionTaskPlanningApply, ah as ExecutionTaskReturn, ae as ExecutionTaskType, af as ExecutorContext, b1 as FreeFn, aA as GroupedActionDump, N as InsightDump, O as LiteUISection, o as LocateOption, J as LocateResultElement, C as LocateValidatorResult, B as LocatorValidatorOption, b4 as MidsceneYamlConfig, b5 as MidsceneYamlConfigOutput, n as MidsceneYamlConfigResult, l as MidsceneYamlFlowItem, aM as MidsceneYamlFlowItemAIAction, aR as MidsceneYamlFlowItemAIAsk, aN as MidsceneYamlFlowItemAIAssert, aS as MidsceneYamlFlowItemAIBoolean, aW as MidsceneYamlFlowItemAIHover, aX as MidsceneYamlFlowItemAIInput, aY as MidsceneYamlFlowItemAIKeyboardPress, aT as MidsceneYamlFlowItemAILocate, aP as MidsceneYamlFlowItemAINumber, aO as MidsceneYamlFlowItemAIQuery, m as MidsceneYamlFlowItemAIRightClick, aZ as MidsceneYamlFlowItemAIScroll, aQ as MidsceneYamlFlowItemAIString, aV as MidsceneYamlFlowItemAITap, aU as MidsceneYamlFlowItemAIWaitFor, a_ as MidsceneYamlFlowItemEvaluateJavaScript, b0 as MidsceneYamlFlowItemLogScreenshot, a$ as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aK as MidsceneYamlScriptAndroidEnv, aL as MidsceneYamlScriptEnv, aI as MidsceneYamlScriptEnvBase, aJ as MidsceneYamlScriptWebEnv, k as MidsceneYamlTask, S as OnTaskStartTip, aB as PageType, P as PartialInsightDumpFromSDK, Z as PlanningAIResponse, Y as PlanningAction, a7 as PlanningActionParamAndroidLongPress, a8 as PlanningActionParamAndroidPull, a3 as PlanningActionParamAssert, a5 as PlanningActionParamError, $ as PlanningActionParamHover, a1 as PlanningActionParamInputOrKeyPress, a0 as PlanningActionParamRightClick, a2 as PlanningActionParamScroll, a4 as PlanningActionParamSleep, _ as PlanningActionParamTap, a6 as PlanningActionParamWaitFor, X as PlanningLocateParam, ac as PlaywrightParserOpt, ab as PuppeteerParserOpt, aG as ReferenceImage, R as ReportDumpWithAttributes, b3 as ScriptPlayerStatusValue, b2 as ScriptPlayerTaskStatus, aF as StreamingAIResponse, aD as StreamingCallback, aC as StreamingCodeGenerationOptions, aH as scrollParam } from './types-7b64b80b.js';
3
- import { c as callAiFn } from './llm-planning-4c782a8d.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-4c782a8d.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractParam, h as InsightExtractOption, T as TMultimodalPrompt, i as TUserPrompt, j as InsightAssertionResponse, A as AIDescribeElementResponse } from './types-512d3687.js';
2
+ export { y as AIAssertionResponse, w as AIDataExtractionResponse, u as AIElementCoordinatesResponse, t as AIElementLocatorResponse, v as AIElementResponse, p as AIResponseFormat, x as AISectionLocatorResponse, s as AISingleElementResponse, q as AISingleElementResponseById, r as AISingleElementResponseByPosition, o as AIUsageInfo, V as AgentAssertOpt, C as AgentDescribeElementAtPointResult, S as AgentWaitForOpt, a9 as BaseAgentParserOpt, F as CallAIFn, aD as CodeGenerationChunk, a8 as Color, J as DumpMeta, O as ElementById, G as EnsureObject, ac as ExecutionRecorderItem, au as ExecutionTaskAction, at as ExecutionTaskActionApply, af as ExecutionTaskHitBy, as as ExecutionTaskInsightAssertion, ar as ExecutionTaskInsightAssertionApply, aq as ExecutionTaskInsightAssertionParam, aj as ExecutionTaskInsightDumpLog, al as ExecutionTaskInsightLocate, ak as ExecutionTaskInsightLocateApply, ai as ExecutionTaskInsightLocateOutput, ah as ExecutionTaskInsightLocateParam, ap as ExecutionTaskInsightQuery, ao as ExecutionTaskInsightQueryApply, an as ExecutionTaskInsightQueryOutput, am as ExecutionTaskInsightQueryParam, aw as ExecutionTaskLog, av as ExecutionTaskLogApply, ay as ExecutionTaskPlanning, ax as ExecutionTaskPlanningApply, ag as ExecutionTaskReturn, ad as ExecutionTaskType, ae as ExecutorContext, b1 as FreeFn, az as GroupedActionDump, K as InsightDump, N as LiteUISection, aF as LocateOption, H as LocateResultElement, B as LocateValidatorResult, z as LocatorValidatorOption, b4 as MidsceneYamlConfig, b5 as MidsceneYamlConfigOutput, n as MidsceneYamlConfigResult, l as MidsceneYamlFlowItem, aM as MidsceneYamlFlowItemAIAction, aR as MidsceneYamlFlowItemAIAsk, aN as MidsceneYamlFlowItemAIAssert, aS as MidsceneYamlFlowItemAIBoolean, aW as MidsceneYamlFlowItemAIHover, aX as MidsceneYamlFlowItemAIInput, aY as MidsceneYamlFlowItemAIKeyboardPress, aT as MidsceneYamlFlowItemAILocate, aP as MidsceneYamlFlowItemAINumber, aO as MidsceneYamlFlowItemAIQuery, m as MidsceneYamlFlowItemAIRightClick, aZ as MidsceneYamlFlowItemAIScroll, aQ as MidsceneYamlFlowItemAIString, aV as MidsceneYamlFlowItemAITap, aU as MidsceneYamlFlowItemAIWaitFor, a_ as MidsceneYamlFlowItemEvaluateJavaScript, b0 as MidsceneYamlFlowItemLogScreenshot, a$ as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aK as MidsceneYamlScriptAndroidEnv, aL as MidsceneYamlScriptEnv, aI as MidsceneYamlScriptEnvBase, aJ as MidsceneYamlScriptWebEnv, k as MidsceneYamlTask, Q as OnTaskStartTip, aA as PageType, P as PartialInsightDumpFromSDK, Y as PlanningAIResponse, X as PlanningAction, a6 as PlanningActionParamAndroidLongPress, a7 as PlanningActionParamAndroidPull, a2 as PlanningActionParamAssert, a4 as PlanningActionParamError, _ as PlanningActionParamHover, a0 as PlanningActionParamInputOrKeyPress, $ as PlanningActionParamRightClick, a1 as PlanningActionParamScroll, a3 as PlanningActionParamSleep, Z as PlanningActionParamTap, a5 as PlanningActionParamWaitFor, W as PlanningLocateParam, ab as PlaywrightParserOpt, aa as PuppeteerParserOpt, aG as ReferenceImage, R as ReportDumpWithAttributes, b3 as ScriptPlayerStatusValue, b2 as ScriptPlayerTaskStatus, aE as StreamingAIResponse, aC as StreamingCallback, aB as StreamingCodeGenerationOptions, aH as scrollParam } from './types-512d3687.js';
3
+ import { c as callAiFn } from './llm-planning-877248da.js';
4
+ export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-877248da.js';
5
5
  import { BaseElement, Rect } from '@midscene/shared/types';
6
6
  export { BaseElement, ElementTreeNode, Point, Rect, Size } from '@midscene/shared/types';
7
7
  export { getVersion } from './utils.js';
@@ -1,4 +1,4 @@
1
- import { p as AIUsageInfo, U as UIContext, i as TUserPrompt, aG as ReferenceImage, u as AIElementLocatorResponse, Q as ElementById, T as TMultimodalPrompt, h as InsightExtractOption, x as AIDataExtractionResponse, z as AIAssertionResponse, aB as PageType, Z as PlanningAIResponse } from './types-7b64b80b.js';
1
+ import { o as AIUsageInfo, U as UIContext, i as TUserPrompt, aG as ReferenceImage, t as AIElementLocatorResponse, O as ElementById, T as TMultimodalPrompt, h as InsightExtractOption, w as AIDataExtractionResponse, y as AIAssertionResponse, aA as PageType, Y as PlanningAIResponse } from './types-512d3687.js';
2
2
  import { Rect, ElementTreeNode, BaseElement } from '@midscene/shared/types';
3
3
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
4
4
 
@@ -102,4 +102,4 @@ declare function plan(userInstruction: string, opts: {
102
102
  actionContext?: string;
103
103
  }): Promise<PlanningAIResponse>;
104
104
 
105
- export { AiLocateElement as A, AiAssert as a, AIActionType as b, callAiFn as c, describeUserPage as d, elementByPositionWithElementInfo as e, AiExtractElementInfo as f, AiLocateSection as g, adaptBboxToRect as h, plan as p };
105
+ export { AiLocateElement as A, AiAssert as a, AIActionType as b, callAiFn as c, describeUserPage as d, type AIArgs as e, elementByPositionWithElementInfo as f, AiExtractElementInfo as g, AiLocateSection as h, adaptBboxToRect as i, plan as p };
@@ -1,12 +1,11 @@
1
1
  import { NodeType } from '@midscene/shared/constants';
2
- import { BaseElement, Rect, ElementTreeNode, Size } from '@midscene/shared/types';
2
+ import { Rect, BaseElement, ElementTreeNode, Size } from '@midscene/shared/types';
3
3
  import { ChatCompletionMessageParam } from 'openai/resources';
4
4
 
5
5
  interface LocateOption {
6
6
  deepThink?: boolean;
7
7
  cacheable?: boolean;
8
8
  xpath?: string;
9
- _forceContext?: UIContext<BaseElement>;
10
9
  }
11
10
  interface InsightExtractOption {
12
11
  domIncluded?: boolean | 'visible-only';
@@ -563,4 +562,4 @@ type TUserPrompt = string | ({
563
562
  prompt: string;
564
563
  } & Partial<TMultimodalPrompt>);
565
564
 
566
- export { type PlanningActionParamHover as $, type AIDescribeElementResponse as A, type LocatorValidatorOption as B, type LocateValidatorResult as C, type DumpSubscriber as D, type ExecutionTask as E, type AgentDescribeElementAtPointResult as F, type CallAIFn as G, type EnsureObject as H, type InsightAction as I, type LocateResultElement as J, type DumpMeta as K, type LocateResult as L, type MidsceneYamlScript as M, type InsightDump as N, type LiteUISection as O, type PartialInsightDumpFromSDK as P, type ElementById as Q, type ReportDumpWithAttributes as R, type OnTaskStartTip as S, type TMultimodalPrompt as T, UIContext as U, type AgentWaitForOpt as V, type AgentAssertOpt as W, type PlanningLocateParam as X, type PlanningAction as Y, type PlanningAIResponse as Z, type PlanningActionParamTap as _, type ExecutionTaskProgressOptions as a, type MidsceneYamlFlowItemSleep as a$, type PlanningActionParamRightClick as a0, type PlanningActionParamInputOrKeyPress as a1, type PlanningActionParamScroll as a2, type PlanningActionParamAssert as a3, type PlanningActionParamSleep as a4, type PlanningActionParamError as a5, type PlanningActionParamWaitFor as a6, type PlanningActionParamAndroidLongPress as a7, type PlanningActionParamAndroidPull as a8, type Color as a9, type GroupedActionDump as aA, type PageType as aB, type StreamingCodeGenerationOptions as aC, type StreamingCallback as aD, type CodeGenerationChunk as aE, type StreamingAIResponse as aF, type ReferenceImage as aG, type scrollParam as aH, type MidsceneYamlScriptEnvBase as aI, type MidsceneYamlScriptWebEnv as aJ, type MidsceneYamlScriptAndroidEnv as aK, type MidsceneYamlScriptEnv as aL, type MidsceneYamlFlowItemAIAction as aM, type MidsceneYamlFlowItemAIAssert as aN, type MidsceneYamlFlowItemAIQuery as aO, type MidsceneYamlFlowItemAINumber as aP, type MidsceneYamlFlowItemAIString as aQ, type MidsceneYamlFlowItemAIAsk as aR, type MidsceneYamlFlowItemAIBoolean as aS, type MidsceneYamlFlowItemAILocate as aT, type MidsceneYamlFlowItemAIWaitFor as aU, type MidsceneYamlFlowItemAITap as aV, type MidsceneYamlFlowItemAIHover as aW, type MidsceneYamlFlowItemAIInput as aX, type MidsceneYamlFlowItemAIKeyboardPress as aY, type MidsceneYamlFlowItemAIScroll as aZ, type MidsceneYamlFlowItemEvaluateJavaScript as a_, type BaseAgentParserOpt as aa, type PuppeteerParserOpt as ab, type PlaywrightParserOpt as ac, type ExecutionRecorderItem as ad, type ExecutionTaskType as ae, type ExecutorContext as af, type ExecutionTaskHitBy as ag, type ExecutionTaskReturn as ah, type ExecutionTaskInsightLocateParam as ai, type ExecutionTaskInsightLocateOutput as aj, type ExecutionTaskInsightDumpLog as ak, type ExecutionTaskInsightLocateApply as al, type ExecutionTaskInsightLocate as am, type ExecutionTaskInsightQueryParam as an, type ExecutionTaskInsightQueryOutput as ao, type ExecutionTaskInsightQueryApply as ap, type ExecutionTaskInsightQuery as aq, type ExecutionTaskInsightAssertionParam as ar, type ExecutionTaskInsightAssertionApply as as, type ExecutionTaskInsightAssertion as at, type ExecutionTaskActionApply as au, type ExecutionTaskAction as av, type ExecutionTaskLogApply as aw, type ExecutionTaskLog as ax, type ExecutionTaskPlanningApply as ay, type ExecutionTaskPlanning as az, type ExecutionTaskApply as b, type MidsceneYamlFlowItemLogScreenshot as b0, type FreeFn as b1, type ScriptPlayerTaskStatus as b2, type ScriptPlayerStatusValue as b3, type MidsceneYamlConfig as b4, type MidsceneYamlConfigOutput as b5, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractParam as g, type InsightExtractOption as h, type TUserPrompt as i, type InsightAssertionResponse as j, type MidsceneYamlTask as k, type MidsceneYamlFlowItem as l, type MidsceneYamlFlowItemAIRightClick as m, type MidsceneYamlConfigResult as n, type LocateOption as o, type AIUsageInfo as p, AIResponseFormat as q, type AISingleElementResponseById as r, type AISingleElementResponseByPosition as s, type AISingleElementResponse as t, type AIElementLocatorResponse as u, type AIElementCoordinatesResponse as v, type AIElementResponse as w, type AIDataExtractionResponse as x, type AISectionLocatorResponse as y, type AIAssertionResponse as z };
565
+ export { type PlanningActionParamRightClick as $, type AIDescribeElementResponse as A, type LocateValidatorResult as B, type AgentDescribeElementAtPointResult as C, type DumpSubscriber as D, type ExecutionTask as E, type CallAIFn as F, type EnsureObject as G, type LocateResultElement as H, type InsightAction as I, type DumpMeta as J, type InsightDump as K, type LocateResult as L, type MidsceneYamlScript as M, type LiteUISection as N, type ElementById as O, type PartialInsightDumpFromSDK as P, type OnTaskStartTip as Q, type ReportDumpWithAttributes as R, type AgentWaitForOpt as S, type TMultimodalPrompt as T, UIContext as U, type AgentAssertOpt as V, type PlanningLocateParam as W, type PlanningAction as X, type PlanningAIResponse as Y, type PlanningActionParamTap as Z, type PlanningActionParamHover as _, type ExecutionTaskProgressOptions as a, type MidsceneYamlFlowItemSleep as a$, type PlanningActionParamInputOrKeyPress as a0, type PlanningActionParamScroll as a1, type PlanningActionParamAssert as a2, type PlanningActionParamSleep as a3, type PlanningActionParamError as a4, type PlanningActionParamWaitFor as a5, type PlanningActionParamAndroidLongPress as a6, type PlanningActionParamAndroidPull as a7, type Color as a8, type BaseAgentParserOpt as a9, type PageType as aA, type StreamingCodeGenerationOptions as aB, type StreamingCallback as aC, type CodeGenerationChunk as aD, type StreamingAIResponse as aE, type LocateOption as aF, type ReferenceImage as aG, type scrollParam as aH, type MidsceneYamlScriptEnvBase as aI, type MidsceneYamlScriptWebEnv as aJ, type MidsceneYamlScriptAndroidEnv as aK, type MidsceneYamlScriptEnv as aL, type MidsceneYamlFlowItemAIAction as aM, type MidsceneYamlFlowItemAIAssert as aN, type MidsceneYamlFlowItemAIQuery as aO, type MidsceneYamlFlowItemAINumber as aP, type MidsceneYamlFlowItemAIString as aQ, type MidsceneYamlFlowItemAIAsk as aR, type MidsceneYamlFlowItemAIBoolean as aS, type MidsceneYamlFlowItemAILocate as aT, type MidsceneYamlFlowItemAIWaitFor as aU, type MidsceneYamlFlowItemAITap as aV, type MidsceneYamlFlowItemAIHover as aW, type MidsceneYamlFlowItemAIInput as aX, type MidsceneYamlFlowItemAIKeyboardPress as aY, type MidsceneYamlFlowItemAIScroll as aZ, type MidsceneYamlFlowItemEvaluateJavaScript as a_, type PuppeteerParserOpt as aa, type PlaywrightParserOpt as ab, type ExecutionRecorderItem as ac, type ExecutionTaskType as ad, type ExecutorContext as ae, type ExecutionTaskHitBy as af, type ExecutionTaskReturn as ag, type ExecutionTaskInsightLocateParam as ah, type ExecutionTaskInsightLocateOutput as ai, type ExecutionTaskInsightDumpLog as aj, type ExecutionTaskInsightLocateApply as ak, type ExecutionTaskInsightLocate as al, type ExecutionTaskInsightQueryParam as am, type ExecutionTaskInsightQueryOutput as an, type ExecutionTaskInsightQueryApply as ao, type ExecutionTaskInsightQuery as ap, type ExecutionTaskInsightAssertionParam as aq, type ExecutionTaskInsightAssertionApply as ar, type ExecutionTaskInsightAssertion as as, type ExecutionTaskActionApply as at, type ExecutionTaskAction as au, type ExecutionTaskLogApply as av, type ExecutionTaskLog as aw, type ExecutionTaskPlanningApply as ax, type ExecutionTaskPlanning as ay, type GroupedActionDump as az, type ExecutionTaskApply as b, type MidsceneYamlFlowItemLogScreenshot as b0, type FreeFn as b1, type ScriptPlayerTaskStatus as b2, type ScriptPlayerStatusValue as b3, type MidsceneYamlConfig as b4, type MidsceneYamlConfigOutput as b5, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractParam as g, type InsightExtractOption as h, type TUserPrompt as i, type InsightAssertionResponse as j, type MidsceneYamlTask as k, type MidsceneYamlFlowItem as l, type MidsceneYamlFlowItemAIRightClick as m, type MidsceneYamlConfigResult as n, type AIUsageInfo as o, AIResponseFormat as p, type AISingleElementResponseById as q, type AISingleElementResponseByPosition as r, type AISingleElementResponse as s, type AIElementLocatorResponse as t, type AIElementCoordinatesResponse as u, type AIElementResponse as v, type AIDataExtractionResponse as w, type AISectionLocatorResponse as x, type AIAssertionResponse as y, type LocatorValidatorOption as z };
@@ -1,4 +1,4 @@
1
- import { R as ReportDumpWithAttributes } from './types-7b64b80b.js';
1
+ import { R as ReportDumpWithAttributes } from './types-512d3687.js';
2
2
  import { Rect } from '@midscene/shared/types';
3
3
  import '@midscene/shared/constants';
4
4
  import 'openai/resources';
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "0.24.2-beta-20250801111909.0",
4
+ "version": "0.25.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "jsnext:source": "./src/index.ts",
@@ -44,8 +44,8 @@
44
44
  "langsmith": "0.3.7",
45
45
  "openai": "4.81.0",
46
46
  "socks-proxy-agent": "8.0.4",
47
- "@midscene/recorder": "0.24.2-beta-20250801111909.0",
48
- "@midscene/shared": "0.24.2-beta-20250801111909.0"
47
+ "@midscene/recorder": "0.25.0",
48
+ "@midscene/shared": "0.25.0"
49
49
  },
50
50
  "devDependencies": {
51
51
  "@modern-js/module-tools": "2.60.6",