@midscene/core 0.17.4-beta-20250528162713.0 → 0.17.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/es/ai-model.d.ts +4 -3
  2. package/dist/es/ai-model.js +1 -1
  3. package/dist/es/{chunk-5HHR4GGS.js → chunk-7MNTWX2A.js} +52 -50
  4. package/dist/es/chunk-7MNTWX2A.js.map +1 -0
  5. package/dist/es/{chunk-CDRBBE7D.js → chunk-A22YWG37.js} +3 -3
  6. package/dist/es/index.d.ts +10 -8
  7. package/dist/es/index.js +5 -4
  8. package/dist/es/index.js.map +1 -1
  9. package/dist/es/{llm-planning-9cfa38ad.d.ts → llm-planning-573b9b34.d.ts} +5 -1
  10. package/dist/es/tree.d.ts +1 -12
  11. package/dist/es/tree.js +5 -94
  12. package/dist/es/tree.js.map +1 -1
  13. package/dist/es/{types-c892e193.d.ts → types-8c197f92.d.ts} +10 -34
  14. package/dist/es/utils.d.ts +2 -1
  15. package/dist/es/utils.js +1 -1
  16. package/dist/lib/ai-model.d.ts +4 -3
  17. package/dist/lib/ai-model.js +2 -2
  18. package/dist/lib/{chunk-5HHR4GGS.js → chunk-7MNTWX2A.js} +46 -44
  19. package/dist/lib/chunk-7MNTWX2A.js.map +1 -0
  20. package/dist/lib/{chunk-CDRBBE7D.js → chunk-A22YWG37.js} +3 -3
  21. package/dist/lib/index.d.ts +10 -8
  22. package/dist/lib/index.js +15 -14
  23. package/dist/lib/index.js.map +1 -1
  24. package/dist/lib/{llm-planning-9cfa38ad.d.ts → llm-planning-573b9b34.d.ts} +5 -1
  25. package/dist/lib/tree.d.ts +1 -12
  26. package/dist/lib/tree.js +7 -96
  27. package/dist/lib/tree.js.map +1 -1
  28. package/dist/{types/types-c892e193.d.ts → lib/types-8c197f92.d.ts} +10 -34
  29. package/dist/lib/utils.d.ts +2 -1
  30. package/dist/lib/utils.js +2 -2
  31. package/dist/types/ai-model.d.ts +4 -3
  32. package/dist/types/index.d.ts +10 -8
  33. package/dist/types/{llm-planning-9cfa38ad.d.ts → llm-planning-573b9b34.d.ts} +5 -1
  34. package/dist/types/tree.d.ts +1 -12
  35. package/dist/{lib/types-c892e193.d.ts → types/types-8c197f92.d.ts} +10 -34
  36. package/dist/types/utils.d.ts +2 -1
  37. package/package.json +2 -2
  38. package/dist/es/chunk-5HHR4GGS.js.map +0 -1
  39. package/dist/lib/chunk-5HHR4GGS.js.map +0 -1
  40. /package/dist/es/{chunk-CDRBBE7D.js.map → chunk-A22YWG37.js.map} +0 -0
  41. /package/dist/lib/{chunk-CDRBBE7D.js.map → chunk-A22YWG37.js.map} +0 -0
@@ -1,7 +1,9 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, R as Rect, A as AIDescribeElementResponse } from './types-c892e193.js';
2
- export { v as AIAssertionResponse, t as AIDataExtractionResponse, r as AIElementCoordinatesResponse, q as AIElementLocatorResponse, s as AIElementResponse, m as AIResponseFormat, u as AISectionLocatorResponse, p as AISingleElementResponse, n as AISingleElementResponseById, o as AISingleElementResponseByPosition, l as AIUsageInfo, W as AgentAssertOpt, y as AgentDescribeElementAtPointResult, V as AgentWaitForOpt, a8 as BaseAgentParserOpt, C as CallAIFn, a7 as Color, H as DumpMeta, Q as ElementById, k as ElementTreeNode, z as EnsureObject, ab as ExecutionRecorderItem, at as ExecutionTaskAction, as as ExecutionTaskActionApply, ar as ExecutionTaskInsightAssertion, aq as ExecutionTaskInsightAssertionApply, ap as ExecutionTaskInsightAssertionParam, ai as ExecutionTaskInsightDumpLog, ak as ExecutionTaskInsightLocate, aj as ExecutionTaskInsightLocateApply, ah as ExecutionTaskInsightLocateOutput, ag as ExecutionTaskInsightLocateParam, ao as ExecutionTaskInsightQuery, an as ExecutionTaskInsightQueryApply, am as ExecutionTaskInsightQueryOutput, al as ExecutionTaskInsightQueryParam, av as ExecutionTaskPlanning, au as ExecutionTaskPlanningApply, af as ExecutionTaskReturn, ac as ExecutionTaskType, ad as ExecutorContext, aU as FreeFn, aw as GroupedActionDump, K as InsightDump, F as InsightExtractParam, O as LiteUISection, ay as LocateOption, G as LocateResultElement, x as LocateValidatorResult, w as LocatorValidatorOption, i as MidsceneYamlFlowItem, aF as MidsceneYamlFlowItemAIAction, aG as MidsceneYamlFlowItemAIAssert, aK as MidsceneYamlFlowItemAIBoolean, aO as MidsceneYamlFlowItemAIHover, aP as MidsceneYamlFlowItemAIInput, aQ as MidsceneYamlFlowItemAIKeyboardPress, aL as MidsceneYamlFlowItemAILocate, aJ as MidsceneYamlFlowItemAINString, aI as MidsceneYamlFlowItemAINumber, aH as MidsceneYamlFlowItemAIQuery, j as MidsceneYamlFlowItemAIRightClick, aR as MidsceneYamlFlowItemAIScroll, aN as MidsceneYamlFlowItemAITap, aM as MidsceneYamlFlowItemAIWaitFor, aS as MidsceneYamlFlowItemEvaluateJavaScript, aT as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aD as MidsceneYamlScriptAndroidEnv, aE as MidsceneYamlScriptEnv, aB as MidsceneYamlScriptEnvBase, aC as MidsceneYamlScriptWebEnv, h as MidsceneYamlTask, T as OnTaskStartTip, ax as PageType, N as PartialInsightDumpFromSDK, Z as PlanningAIResponse, Y as PlanningAction, a3 as PlanningActionParamAssert, a5 as PlanningActionParamError, $ as PlanningActionParamHover, a1 as PlanningActionParamInputOrKeyPress, a0 as PlanningActionParamRightClick, a2 as PlanningActionParamScroll, a4 as PlanningActionParamSleep, _ as PlanningActionParamTap, a6 as PlanningActionParamWaitFor, X as PlanningLocateParam, aa as PlaywrightParserOpt, P as Point, a9 as PuppeteerParserOpt, az as ReferenceImage, J as ReportDumpWithAttributes, aW as ScriptPlayerStatusValue, aV as ScriptPlayerTaskStatus, S as Size, ae as TaskCacheInfo, aA as scrollParam } from './types-c892e193.js';
3
- import { c as callAiFn } from './llm-planning-9cfa38ad.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-9cfa38ad.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractOption, h as InsightAssertionResponse, A as AIDescribeElementResponse } from './types-8c197f92.js';
2
+ export { v as AIAssertionResponse, t as AIDataExtractionResponse, r as AIElementCoordinatesResponse, q as AIElementLocatorResponse, s as AIElementResponse, m as AIResponseFormat, u as AISectionLocatorResponse, p as AISingleElementResponse, n as AISingleElementResponseById, o as AISingleElementResponseByPosition, l as AIUsageInfo, Q as AgentAssertOpt, y as AgentDescribeElementAtPointResult, N as AgentWaitForOpt, a4 as BaseAgentParserOpt, C as CallAIFn, a3 as Color, G as DumpMeta, K as ElementById, z as EnsureObject, a7 as ExecutionRecorderItem, ap as ExecutionTaskAction, ao as ExecutionTaskActionApply, an as ExecutionTaskInsightAssertion, am as ExecutionTaskInsightAssertionApply, al as ExecutionTaskInsightAssertionParam, ae as ExecutionTaskInsightDumpLog, ag as ExecutionTaskInsightLocate, af as ExecutionTaskInsightLocateApply, ad as ExecutionTaskInsightLocateOutput, ac as ExecutionTaskInsightLocateParam, ak as ExecutionTaskInsightQuery, aj as ExecutionTaskInsightQueryApply, ai as ExecutionTaskInsightQueryOutput, ah as ExecutionTaskInsightQueryParam, ar as ExecutionTaskPlanning, aq as ExecutionTaskPlanningApply, ab as ExecutionTaskReturn, a8 as ExecutionTaskType, a9 as ExecutorContext, aQ as FreeFn, as as GroupedActionDump, H as InsightDump, B as InsightExtractParam, J as LiteUISection, au as LocateOption, F as LocateResultElement, x as LocateValidatorResult, w as LocatorValidatorOption, j as MidsceneYamlFlowItem, aB as MidsceneYamlFlowItemAIAction, aC as MidsceneYamlFlowItemAIAssert, aG as MidsceneYamlFlowItemAIBoolean, aK as MidsceneYamlFlowItemAIHover, aL as MidsceneYamlFlowItemAIInput, aM as MidsceneYamlFlowItemAIKeyboardPress, aH as MidsceneYamlFlowItemAILocate, aF as MidsceneYamlFlowItemAINString, aE as MidsceneYamlFlowItemAINumber, aD as MidsceneYamlFlowItemAIQuery, k as MidsceneYamlFlowItemAIRightClick, aN as MidsceneYamlFlowItemAIScroll, aJ as MidsceneYamlFlowItemAITap, aI as MidsceneYamlFlowItemAIWaitFor, aO as MidsceneYamlFlowItemEvaluateJavaScript, aP as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, az as MidsceneYamlScriptAndroidEnv, aA as MidsceneYamlScriptEnv, ax as MidsceneYamlScriptEnvBase, ay as MidsceneYamlScriptWebEnv, i as MidsceneYamlTask, O as OnTaskStartTip, at as PageType, P as PartialInsightDumpFromSDK, V as PlanningAIResponse, T as PlanningAction, $ as PlanningActionParamAssert, a1 as PlanningActionParamError, X as PlanningActionParamHover, Z as PlanningActionParamInputOrKeyPress, Y as PlanningActionParamRightClick, _ as PlanningActionParamScroll, a0 as PlanningActionParamSleep, W as PlanningActionParamTap, a2 as PlanningActionParamWaitFor, S as PlanningLocateParam, a6 as PlaywrightParserOpt, a5 as PuppeteerParserOpt, av as ReferenceImage, R as ReportDumpWithAttributes, aS as ScriptPlayerStatusValue, aR as ScriptPlayerTaskStatus, aa as TaskCacheInfo, aw as scrollParam } from './types-8c197f92.js';
3
+ import { c as callAiFn } from './llm-planning-573b9b34.js';
4
+ export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-573b9b34.js';
5
+ import { BaseElement, Rect } from '@midscene/shared/types';
6
+ export { BaseElement, ElementTreeNode, Point, Rect, Size } from '@midscene/shared/types';
5
7
  export { getVersion } from './utils.js';
6
8
  export { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';
7
9
  import '@midscene/shared/constants';
@@ -34,13 +36,13 @@ declare class Insight<ElementType extends BaseElement = BaseElement, ContextType
34
36
  taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
35
37
  constructor(context: ContextType | ((action: InsightAction) => Promise<ContextType> | ContextType), opt?: InsightOptions);
36
38
  locate(query: DetailedLocateParam, opt?: LocateOpts): Promise<LocateResult>;
37
- extract<T = any>(input: string): Promise<T>;
38
- extract<T extends Record<string, string>>(input: T): Promise<Record<keyof T, any>>;
39
- extract<T extends object>(input: Record<keyof T, string>): Promise<T>;
39
+ extract<T = any>(input: string, opt?: InsightExtractOption): Promise<T>;
40
+ extract<T extends Record<string, string>>(input: T, opt?: InsightExtractOption): Promise<Record<keyof T, any>>;
41
+ extract<T extends object>(input: Record<keyof T, string>, opt?: InsightExtractOption): Promise<T>;
40
42
  assert(assertion: string): Promise<InsightAssertionResponse>;
41
43
  describe(target: Rect | [number, number], opt?: {
42
44
  deepThink?: boolean;
43
45
  }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
44
46
  }
45
47
 
46
- export { AIDescribeElementResponse, BaseElement, DetailedLocateParam, DumpSubscriber, ExecutionDump, ExecutionTask, ExecutionTaskApply, ExecutionTaskProgressOptions, Executor, Insight, InsightAction, InsightAssertionResponse, InsightOptions, InsightTaskInfo, LocateResult, Rect, UIContext, Insight as default };
48
+ export { AIDescribeElementResponse, DetailedLocateParam, DumpSubscriber, ExecutionDump, ExecutionTask, ExecutionTaskApply, ExecutionTaskProgressOptions, Executor, Insight, InsightAction, InsightAssertionResponse, InsightExtractOption, InsightOptions, InsightTaskInfo, LocateResult, UIContext, Insight as default };
package/dist/es/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  getVersion
3
- } from "./chunk-CDRBBE7D.js";
3
+ } from "./chunk-A22YWG37.js";
4
4
  import {
5
5
  AiAssert,
6
6
  AiExtractElementInfo,
@@ -11,7 +11,7 @@ import {
11
11
  describeUserPage,
12
12
  expandSearchArea,
13
13
  plan
14
- } from "./chunk-5HHR4GGS.js";
14
+ } from "./chunk-7MNTWX2A.js";
15
15
 
16
16
  // src/ai-model/action-executor.ts
17
17
  import {
@@ -378,7 +378,7 @@ ${parseResult.errors.join("\n")}`;
378
378
  rect
379
379
  };
380
380
  }
381
- async extract(dataDemand) {
381
+ async extract(dataDemand, opt) {
382
382
  assert2(
383
383
  typeof dataDemand === "object" || typeof dataDemand === "string",
384
384
  `dataDemand should be object or string, but get ${typeof dataDemand}`
@@ -389,7 +389,8 @@ ${parseResult.errors.join("\n")}`;
389
389
  const startTime = Date.now();
390
390
  const { parseResult, usage } = await AiExtractElementInfo({
391
391
  context,
392
- dataQuery: dataDemand
392
+ dataQuery: dataDemand,
393
+ extractOption: opt
393
394
  });
394
395
  const timeCost = Date.now() - startTime;
395
396
  const taskInfo = {
@@ -1 +1 @@
1
- {"version":3,"mappings":";;;;;;;;;;;;;;;;AAUA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,cAAc;AAEhB,IAAM,WAAN,MAAe;AAAA,EAUpB,YACE,MACA,SAGA;AACA,SAAK,SACH,SAAS,SAAS,QAAQ,MAAM,SAAS,IAAI,YAAY;AAC3D,SAAK,OAAO;AACZ,SAAK,SAAS,SAAS,SAAS,CAAC,GAAG;AAAA,MAAI,CAAC,SACvC,KAAK,kBAAkB,IAAI;AAAA,IAC7B;AACA,SAAK,cAAc,SAAS;AAAA,EAC9B;AAAA,EAEQ,kBAAkB,MAAyC;AACjE,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,MAAgE;AAC3E;AAAA,MACE,KAAK,WAAW;AAAA,MAChB;AAAA,QAAyD,KAAK,gBAAgB,GAAG,KAAK;AAAA,EAAK,KAAK,gBAAgB,GAAG,UAAU;AAAA,IAC/H;AACA,QAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAK,MAAM,KAAK,GAAG,KAAK,IAAI,CAAC,SAAS,KAAK,kBAAkB,IAAI,CAAC,CAAC;AAAA,IACrE,OAAO;AACL,WAAK,MAAM,KAAK,KAAK,kBAAkB,IAAI,CAAC;AAAA,IAC9C;AACA,QAAI,KAAK,WAAW,WAAW;AAC7B,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAM,QAAsB;AAC1B,QAAI,KAAK,WAAW,UAAU,KAAK,MAAM,SAAS,GAAG;AACnD,cAAQ;AAAA,QACN;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,WAAW,WAAW,6BAA6B;AAC/D,WAAO,KAAK,WAAW,aAAa,+BAA+B;AACnE,WAAO,KAAK,WAAW,SAAS,4BAA4B;AAE5D,UAAM,mBAAmB,KAAK,MAAM;AAAA,MAClC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,mBAAmB,GAAG;AAExB;AAAA,IACF;AAEA,SAAK,SAAS;AACd,QAAI,YAAY;AAChB,QAAI,wBAAwB;AAE5B,QAAI;AAEJ,WAAO,YAAY,KAAK,MAAM,QAAQ;AACpC,YAAM,OAAO,KAAK,MAAM,SAAS;AACjC;AAAA,QACE,KAAK,WAAW;AAAA,QAChB,2CAA2C,KAAK,MAAM;AAAA,MACxD;AACA,WAAK,SAAS;AAAA,QACZ,OAAO,KAAK,IAAI;AAAA,MAClB;AACA,UAAI;AACF,aAAK,SAAS;AACd,YAAI;AACF,cAAI,KAAK,aAAa;AACpB,kBAAM,KAAK,YAAY,IAAI;AAAA,UAC7B;AAAA,QACF,SAAS,GAAG;AACV,kBAAQ,MAAM,wBAAwB,CAAC;AAAA,QACzC;AACA;AAAA,UACE,CAAC,WAAW,UAAU,UAAU,EAAE,QAAQ,KAAK,IAAI,KAAK;AAAA,UACxD,0BAA0B,KAAK,IAAI;AAAA,QACrC;AAEA,cAAM,EAAE,UAAU,MAAM,IAAI;AAC5B,eAAO,UAAU,uCAAuC,KAAK,IAAI,EAAE;AAEnE,YAAI;AACJ,cAAM,kBAAmC;AAAA,UACvC;AAAA,UACA,SAAS,oBAAoB;AAAA,QAC/B;AAEA,YAAI,KAAK,SAAS,WAAW;AAC3B;AAAA,YACE,KAAK,YAAY,YACf,KAAK,YAAY,WACjB,KAAK,YAAY,YACjB,KAAK,YAAY,aACjB,KAAK,YAAY,YACjB,KAAK,YAAY;AAAA,YACnB,gCAAgC,KAAK,OAAO;AAAA,UAC9C;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AACxD,cAAI,KAAK,YAAY,UAAU;AAC7B,iCACE,aACC;AAAA,UACL;AAAA,QACF,WAAW,KAAK,SAAS,YAAY,KAAK,SAAS,YAAY;AAC7D,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D,OAAO;AACL,kBAAQ;AAAA,YACN,0BAA0B,KAAK,IAAI;AAAA,UACrC;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D;AAEA,eAAO,OAAO,MAAM,WAAW;AAC/B,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD,aAAK,OAAO,SAAU,aAAqB,UAAU;AACrD;AAAA,MACF,SAAS,GAAQ;AACf,gCAAwB;AACxB,aAAK,QACH,GAAG,YAAY,OAAO,MAAM,WAAW,IAAI;AAC7C,aAAK,aAAa,EAAE;AAEpB,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF;AAAA,IACF;AAGA,aAAS,IAAI,YAAY,GAAG,IAAI,KAAK,MAAM,QAAQ,KAAK;AACtD,WAAK,MAAM,CAAC,EAAE,SAAS;AAAA,IACzB;AAEA,QAAI,uBAAuB;AACzB,WAAK,SAAS;AAAA,IAChB,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAEA,QAAI,KAAK,MAAM,QAAQ;AAErB,YAAM,cAAc,KAAK,IAAI,WAAW,KAAK,MAAM,SAAS,CAAC;AAC7D,aAAO,KAAK,MAAM,WAAW,EAAE;AAAA,IACjC;AAAA,EACF;AAAA,EAEA,iBAA0B;AACxB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,kBAAwC;AACtC,QAAI,KAAK,WAAW,SAAS;AAC3B,aAAO;AAAA,IACT;AACA,UAAM,iBAAiB,KAAK,MAAM;AAAA,MAChC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,kBAAkB,GAAG;AACvB,aAAO,KAAK,MAAM,cAAc;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,OAAsB;AACpB,QAAI,mBAAmB;AAEvB,QAAI,aAAa,GAAG;AAClB,YAAM,iBAAiB,mBAAmB;AAC1C,UAAI,gBAAgB;AAClB,2BAAmB,WAAW,cAAc;AAAA,MAC9C,OAAO;AACL,2BAAmB,GAAG,aAAa,CAAC;AAAA,MACtC;AAAA,IACF;AACA,UAAM,WAA0B;AAAA,MAC9B,YAAY,WAAW;AAAA,MACvB,YAAY,YAAY,mBAAmB,KAAK;AAAA,MAChD,mBAAmB;AAAA,MACnB,SAAS,KAAK,IAAI;AAAA,MAClB,MAAM,KAAK;AAAA,MACX,OAAO,KAAK;AAAA,IACd;AACA,WAAO;AAAA,EACT;AACF;;;AC9NA,SAAS,4BAA4B;AAE9B,IAAM,8BAA8B,MAAM;AAC/C,SAAO,mNAAmN,qBAAqB,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAoBlP;;;ACOA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA,gBAAAA;AAAA,OACK;AACP,SAAS,yBAAyB,kBAAkB;AACpD,SAAS,gBAAgB;AACzB,SAAS,UAAAC,eAAc;;;AC/BvB;AAAA,EACE,uBAAAC;AAAA,EACA,eAAAC;AAAA,OAGK;AACP,SAAS,YAAY;AAEd,SAAS,gBACd,MACA,gBACA;AACA,QAAM,WAAqB;AAAA,IACzB,YAAY,WAAW;AAAA,IACvB,SAAS,KAAK,IAAI;AAAA,IAClB,YAAYA,aAAYD,oBAAmB,KAAK;AAAA,EAClD;AACA,QAAM,YAAyB;AAAA,IAC7B,OAAO,KAAK;AAAA,IACZ,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,mBAAiB,SAAS;AAC5B;;;ADmBA,IAAM,QAAQ,SAAS,YAAY;AACnC,IAAqB,UAArB,MAGE;AAAA,EAWA,YACE,SAGA,KACA;AAXF,sBAAoD;AAYlD,IAAAD,QAAO,SAAS,iCAAiC;AACjD,QAAI,OAAO,YAAY,YAAY;AACjC,WAAK,qBAAqB;AAAA,IAC5B,OAAO;AACL,WAAK,qBAAqB,MAAM,QAAQ,QAAQ,OAAO;AAAA,IACzD;AAEA,QAAI,OAAO,KAAK,eAAe,aAAa;AAC1C,WAAK,aAAa,IAAI;AAAA,IACxB;AACA,QAAI,OAAO,KAAK,aAAa,aAAa;AACxC,WAAK,WAAW,IAAI;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,MAAM,OACJ,OACA,KACuB;AACvB,UAAM,EAAE,OAAO,IAAI,OAAO,CAAC;AAC3B,UAAM,cAAc,OAAO,UAAU,WAAW,QAAQ,MAAM;AAC9D,IAAAA,QAAO,aAAa,8BAA8B;AAClD,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,IAAAA,QAAO,OAAO,UAAU,UAAU,sCAAsC;AAExE,UAAM,wBAAwB;AAAA,MAC5B;AAAA,IACF;AACA,QAAI,uBAAuB;AACzB,YAAM,yBAAyB,qBAAqB;AAAA,IACtD;AACA,QAAI;AACJ,QAAI,MAAM,aAAa,uBAAuB;AAC5C,yBAAmB,MAAM;AAAA,IAC3B;AAEA,QAAI,oBAAoB,CAACD,cAAa,GAAG;AACvC,cAAQ;AAAA,QACN;AAAA,MACF;AACA,yBAAmB;AAAA,IACrB;AAEA,UAAM,UAAU,KAAK,WAAY,MAAM,KAAK,mBAAmB,QAAQ;AAEvE,QAAI,aAA+B;AACnC,QAAI,wBAA4C;AAChD,QAAI,kBAA2C;AAC/C,QAAI,qBAEY;AAChB,QAAI,kBAAkB;AACpB,2BAAqB,MAAM,gBAAgB;AAAA,QACzC;AAAA,QACA,oBAAoB;AAAA,MACtB,CAAC;AACD,MAAAC;AAAA,QACE,mBAAmB;AAAA,QACnB,gCAAgC,gBAAgB,IAC9C,mBAAmB,QAAQ,KAAK,mBAAmB,KAAK,KAAK,EAC/D;AAAA,MACF;AACA,8BAAwB,mBAAmB;AAC3C,wBAAkB,mBAAmB;AACrC,mBAAa,mBAAmB;AAAA,IAClC;AAEA,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,aAAa,aAAa,MAAM,IACzD,MAAM,gBAAgB;AAAA,MACpB,QAAQ,UAAU,KAAK;AAAA,MACvB;AAAA,MACA,0BAA0B;AAAA,MAC1B,cAAc;AAAA,IAChB,CAAC;AAEH,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,MACvC,gBAAgB,KAAK,UAAU,WAAW;AAAA,MAC1C;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAgC,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAC1E;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT,SAAS;AAAA,MACX;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,aAAa;AAAA,MACb,MAAM;AAAA,MACN;AAAA,MACA,WAAW,CAAC,CAAC;AAAA,MACb,OAAO;AAAA,IACT;AAEA,UAAM,WAA0B,CAAC;AACjC,KAAC,YAAY,YAAY,CAAC,GAAG,QAAQ,CAAC,SAAS;AAC7C,UAAI,QAAQ,MAAM;AAChB,cAAM,UAAU,YAAY,MAAM,EAAE;AAEpC,YAAI,CAAC,SAAS;AACZ,kBAAQ;AAAA,YACN,kCAAkC,KAAK,EAAE;AAAA,UAC3C;AACA;AAAA,QACF;AACA,iBAAS,KAAK,OAAO;AAAA,MACvB;AAAA,IACF,CAAC;AAED;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH,gBAAgB;AAAA,MAClB;AAAA,MACA;AAAA,IACF;AAEA,QAAI,UAAU;AACZ,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,IAAAA;AAAA,MACE,SAAS,UAAU;AAAA,MACnB,6CAA6C,SAAS,MAAM;AAAA,IAC9D;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO;AAAA,QACL,SAAS;AAAA,UACP,IAAI,SAAS,CAAC,EAAG;AAAA,UACjB,SAAS,SAAS,CAAC,EAAG;AAAA,UACtB,QAAQ,SAAS,CAAC,EAAG;AAAA,UACrB,MAAM,SAAS,CAAC,EAAG;AAAA,UACnB,QAAQ,SAAS,CAAC,EAAG,UAAU,CAAC;AAAA,UAChC,YAAY,SAAS,CAAC,EAAG;AAAA,QAC3B;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAAA,EAQA,MAAM,QAAW,YAA+C;AAC9D,IAAAA;AAAA,MACE,OAAO,eAAe,YAAY,OAAO,eAAe;AAAA,MACxD,kDAAkD,OAAO,UAAU;AAAA,IACrE;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,SAAS;AAEvD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,IAAI,MAAM,qBAAwB;AAAA,MAC3D;AAAA,MACA,WAAW;AAAA,IACb,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,IACzC;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAwB,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAClE;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,OAAO;AAAA,IACT;AAEA,UAAM,EAAE,KAAK,IAAI,eAAe,CAAC;AAGjC;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,QAAI,YAAY,CAAC,MAAM;AACrB,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,WAAsD;AACjE,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AACtD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,eAAe,MAAM,SAAS;AAAA,MAClC;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,aAAa,OAAO;AAAA,IAClD;AAEA,UAAM,EAAE,SAAS,KAAK,IAAI,aAAa;AACvC,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,eAAe;AAAA,MACf,kBAAkB;AAAA,MAClB,OAAO,OAAO,SAAY;AAAA,IAC5B;AACA,oBAAgB,UAAU,cAAc;AAExC,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,aAAa;AAAA,IACtB;AAAA,EACF;AAAA,EACA,MAAM,SACJ,QACA,KAGyD;AACzD,IAAAA,QAAO,QAAQ,yCAAyC;AACxD,UAAM,UAAU,MAAM,KAAK,mBAAmB,UAAU;AACxD,UAAM,EAAE,iBAAiB,IAAI;AAC7B,IAAAA,QAAO,kBAAkB,6CAA6C;AAEtE,UAAM,eAAe,4BAA4B;AAGjD,UAAM,kBAAkB;AACxB,UAAM,aAAmB,MAAM,QAAQ,MAAM,IACzC;AAAA,MACE,MAAM,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAChD,KAAK,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAC/C,OAAO;AAAA,MACP,QAAQ;AAAA,IACV,IACA;AAEJ,QAAI,eAAe,MAAM,wBAAwB;AAAA,MAC/C,gBAAgB;AAAA,MAChB,sBAAsB;AAAA,QACpB;AAAA,UACE,MAAM;AAAA,QACR;AAAA,MACF;AAAA,MACA,iBAAiB;AAAA,IACnB,CAAC;AAED,QAAI,KAAK,WAAW;AAClB,YAAM,aAAa,iBAAiB,YAAY,QAAQ,IAAI;AAC5D,YAAM,4BAA4B,UAAU;AAC5C,qBAAe,MAAM;AAAA,QACnB;AAAA,QACA;AAAA,QACA,qBAAqB,oBAAoB;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,OAAe;AAAA,MACnB,EAAE,MAAM,UAAU,SAAS,aAAa;AAAA,MACxC;AAAA,QACE,MAAM;AAAA,QACN,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,WAAW;AAAA,cACT,KAAK;AAAA,cACL,QAAQ;AAAA,YACV;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,WACJ,KAAK,cAAc;AAErB,UAAM,MAAM,MAAM,SAAS,8BAAmC;AAE9D,UAAM,EAAE,QAAQ,IAAI;AACpB,IAAAA,QAAO,CAAC,QAAQ,OAAO,oBAAoB,QAAQ,KAAK,EAAE;AAC1D,IAAAA,QAAO,QAAQ,aAAa,gCAAgC;AAC5D,WAAO;AAAA,EACT;AACF;;;AEhZA,SAAS,eAAAE,cAAa,uBAAAD,4BAA2B;AAGjD,IAAO,cAAQ","names":["vlLocateMode","assert","MIDSCENE_MODEL_NAME","getAIConfig"],"ignoreList":[],"sources":["../../src/ai-model/action-executor.ts","../../src/ai-model/prompt/describe.ts","../../src/insight/index.ts","../../src/insight/utils.ts","../../src/index.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<any> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n task.timing.aiCost = (returnValue as any)?.aiCost || 0;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n return this.tasks[outputIndex].output;\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n let modelDescription = '';\n\n if (vlLocateMode()) {\n const uiTarsModelVer = uiTarsModelVersion();\n if (uiTarsModelVer) {\n modelDescription = `UI-TARS=${uiTarsModelVer}`;\n } else {\n modelDescription = `${vlLocateMode()} mode`;\n }\n }\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n model_description: modelDescription,\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n","import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const elementDescriberInstruction = () => {\n return `Tell what is the content of the element wrapped by the read rectangle in the screenshot. Your description is expected to be used to precisely locate the element from other similar elements on screenshot. Use ${getPreferredLanguage()} in the description.\n\nPlease follow the following rules:\n1. The description should be start with a brief description, like \"a button for confirming the action\".\n\n2. Include these information in the description to distinguish the element from its siblings and other similar elements, as much as possible:\n- The text of the element, like \"with text 'Confirm'\"\n- What the element looks like if it's an image, like \"with image '...'\"\n- The relative position of the element, like \"on the left of ..., around ...\"\n- How to distinguish the element from its siblings elements, like \"it is the icon instead of the text\"\n\n3. Do NOT mention the red rectangle in the description.\n\n4. Use the error field to describe the unexpected situations, if any. If not, put null.\n\nReturn in JSON:\n{\n \"description\": \"[{brief description}]: {text of the element} {image of the element} {relative position of the element} ... \",\n \"error\"?: \"...\"\n}`;\n};\n","import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiAssert, AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n MIDSCENE_USE_QWEN_VL,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, elementById, rawResponse, usage } =\n await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T = any>(input: string): Promise<T>;\n async extract<T extends Record<string, string>>(\n input: T,\n ): Promise<Record<keyof T, any>>;\n async extract<T extends object>(input: Record<keyof T, string>): Promise<T>;\n\n async extract<T>(dataDemand: InsightExtractParam): Promise<any> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n usage,\n };\n }\n\n async assert(assertion: string): Promise<InsightAssertionResponse> {\n if (typeof assertion !== 'string') {\n throw new Error(\n 'This is the assert method for Midscene, the first argument should be a string. If you want to use the assert method from Node.js, please import it from the Node.js assert module.',\n );\n }\n\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('assert');\n const startTime = Date.now();\n const assertResult = await AiAssert({\n assertion,\n context,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(assertResult.content),\n };\n\n const { thought, pass } = assertResult.content;\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'assert',\n userQuery: {\n assertion,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n assertionPass: pass,\n assertionThought: thought,\n error: pass ? undefined : thought,\n };\n emitInsightDump(dumpData, dumpSubscriber);\n\n return {\n pass,\n thought,\n usage: assertResult.usage,\n };\n }\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64 } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, context.size);\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n","import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { uuid } from '@midscene/shared/utils';\n\nexport function emitInsightDump(\n data: PartialInsightDumpFromSDK,\n dumpSubscriber?: DumpSubscriber,\n) {\n const baseData: DumpMeta = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n };\n const finalData: InsightDump = {\n logId: uuid(),\n ...baseData,\n ...data,\n };\n\n dumpSubscriber?.(finalData);\n}\n","import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n AiAssert,\n} from './ai-model/index';\n\nexport { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';\n\nexport type * from './types';\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n MidsceneYamlScript,\n MidsceneYamlTask,\n MidsceneYamlFlowItem,\n MidsceneYamlFlowItemAIRightClick,\n} from './yaml';\n"]}
1
+ {"version":3,"mappings":";;;;;;;;;;;;;;;;AAUA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,cAAc;AAEhB,IAAM,WAAN,MAAe;AAAA,EAUpB,YACE,MACA,SAGA;AACA,SAAK,SACH,SAAS,SAAS,QAAQ,MAAM,SAAS,IAAI,YAAY;AAC3D,SAAK,OAAO;AACZ,SAAK,SAAS,SAAS,SAAS,CAAC,GAAG;AAAA,MAAI,CAAC,SACvC,KAAK,kBAAkB,IAAI;AAAA,IAC7B;AACA,SAAK,cAAc,SAAS;AAAA,EAC9B;AAAA,EAEQ,kBAAkB,MAAyC;AACjE,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,GAAG;AAAA,IACL;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,MAAgE;AAC3E;AAAA,MACE,KAAK,WAAW;AAAA,MAChB;AAAA,QAAyD,KAAK,gBAAgB,GAAG,KAAK;AAAA,EAAK,KAAK,gBAAgB,GAAG,UAAU;AAAA,IAC/H;AACA,QAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,WAAK,MAAM,KAAK,GAAG,KAAK,IAAI,CAAC,SAAS,KAAK,kBAAkB,IAAI,CAAC,CAAC;AAAA,IACrE,OAAO;AACL,WAAK,MAAM,KAAK,KAAK,kBAAkB,IAAI,CAAC;AAAA,IAC9C;AACA,QAAI,KAAK,WAAW,WAAW;AAC7B,WAAK,SAAS;AAAA,IAChB;AAAA,EACF;AAAA,EAEA,MAAM,QAAsB;AAC1B,QAAI,KAAK,WAAW,UAAU,KAAK,MAAM,SAAS,GAAG;AACnD,cAAQ;AAAA,QACN;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,WAAW,WAAW,6BAA6B;AAC/D,WAAO,KAAK,WAAW,aAAa,+BAA+B;AACnE,WAAO,KAAK,WAAW,SAAS,4BAA4B;AAE5D,UAAM,mBAAmB,KAAK,MAAM;AAAA,MAClC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,mBAAmB,GAAG;AAExB;AAAA,IACF;AAEA,SAAK,SAAS;AACd,QAAI,YAAY;AAChB,QAAI,wBAAwB;AAE5B,QAAI;AAEJ,WAAO,YAAY,KAAK,MAAM,QAAQ;AACpC,YAAM,OAAO,KAAK,MAAM,SAAS;AACjC;AAAA,QACE,KAAK,WAAW;AAAA,QAChB,2CAA2C,KAAK,MAAM;AAAA,MACxD;AACA,WAAK,SAAS;AAAA,QACZ,OAAO,KAAK,IAAI;AAAA,MAClB;AACA,UAAI;AACF,aAAK,SAAS;AACd,YAAI;AACF,cAAI,KAAK,aAAa;AACpB,kBAAM,KAAK,YAAY,IAAI;AAAA,UAC7B;AAAA,QACF,SAAS,GAAG;AACV,kBAAQ,MAAM,wBAAwB,CAAC;AAAA,QACzC;AACA;AAAA,UACE,CAAC,WAAW,UAAU,UAAU,EAAE,QAAQ,KAAK,IAAI,KAAK;AAAA,UACxD,0BAA0B,KAAK,IAAI;AAAA,QACrC;AAEA,cAAM,EAAE,UAAU,MAAM,IAAI;AAC5B,eAAO,UAAU,uCAAuC,KAAK,IAAI,EAAE;AAEnE,YAAI;AACJ,cAAM,kBAAmC;AAAA,UACvC;AAAA,UACA,SAAS,oBAAoB;AAAA,QAC/B;AAEA,YAAI,KAAK,SAAS,WAAW;AAC3B;AAAA,YACE,KAAK,YAAY,YACf,KAAK,YAAY,WACjB,KAAK,YAAY,YACjB,KAAK,YAAY,aACjB,KAAK,YAAY,YACjB,KAAK,YAAY;AAAA,YACnB,gCAAgC,KAAK,OAAO;AAAA,UAC9C;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AACxD,cAAI,KAAK,YAAY,UAAU;AAC7B,iCACE,aACC;AAAA,UACL;AAAA,QACF,WAAW,KAAK,SAAS,YAAY,KAAK,SAAS,YAAY;AAC7D,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D,OAAO;AACL,kBAAQ;AAAA,YACN,0BAA0B,KAAK,IAAI;AAAA,UACrC;AACA,wBAAc,MAAM,KAAK,SAAS,OAAO,eAAe;AAAA,QAC1D;AAEA,eAAO,OAAO,MAAM,WAAW;AAC/B,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD,aAAK,OAAO,SAAU,aAAqB,UAAU;AACrD;AAAA,MACF,SAAS,GAAQ;AACf,gCAAwB;AACxB,aAAK,QACH,GAAG,YAAY,OAAO,MAAM,WAAW,IAAI;AAC7C,aAAK,aAAa,EAAE;AAEpB,aAAK,SAAS;AACd,aAAK,OAAO,MAAM,KAAK,IAAI;AAC3B,aAAK,OAAO,OAAO,KAAK,OAAO,MAAM,KAAK,OAAO;AACjD;AAAA,MACF;AAAA,IACF;AAGA,aAAS,IAAI,YAAY,GAAG,IAAI,KAAK,MAAM,QAAQ,KAAK;AACtD,WAAK,MAAM,CAAC,EAAE,SAAS;AAAA,IACzB;AAEA,QAAI,uBAAuB;AACzB,WAAK,SAAS;AAAA,IAChB,OAAO;AACL,WAAK,SAAS;AAAA,IAChB;AAEA,QAAI,KAAK,MAAM,QAAQ;AAErB,YAAM,cAAc,KAAK,IAAI,WAAW,KAAK,MAAM,SAAS,CAAC;AAC7D,aAAO,KAAK,MAAM,WAAW,EAAE;AAAA,IACjC;AAAA,EACF;AAAA,EAEA,iBAA0B;AACxB,WAAO,KAAK,WAAW;AAAA,EACzB;AAAA,EAEA,kBAAwC;AACtC,QAAI,KAAK,WAAW,SAAS;AAC3B,aAAO;AAAA,IACT;AACA,UAAM,iBAAiB,KAAK,MAAM;AAAA,MAChC,CAAC,SAAS,KAAK,WAAW;AAAA,IAC5B;AACA,QAAI,kBAAkB,GAAG;AACvB,aAAO,KAAK,MAAM,cAAc;AAAA,IAClC;AACA,WAAO;AAAA,EACT;AAAA,EAEA,OAAsB;AACpB,QAAI,mBAAmB;AAEvB,QAAI,aAAa,GAAG;AAClB,YAAM,iBAAiB,mBAAmB;AAC1C,UAAI,gBAAgB;AAClB,2BAAmB,WAAW,cAAc;AAAA,MAC9C,OAAO;AACL,2BAAmB,GAAG,aAAa,CAAC;AAAA,MACtC;AAAA,IACF;AACA,UAAM,WAA0B;AAAA,MAC9B,YAAY,WAAW;AAAA,MACvB,YAAY,YAAY,mBAAmB,KAAK;AAAA,MAChD,mBAAmB;AAAA,MACnB,SAAS,KAAK,IAAI;AAAA,MAClB,MAAM,KAAK;AAAA,MACX,OAAO,KAAK;AAAA,IACd;AACA,WAAO;AAAA,EACT;AACF;;;AC9NA,SAAS,4BAA4B;AAE9B,IAAM,8BAA8B,MAAM;AAC/C,SAAO,mNAAmN,qBAAqB,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAoBlP;;;ACQA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA,gBAAAA;AAAA,OACK;AACP,SAAS,yBAAyB,kBAAkB;AACpD,SAAS,gBAAgB;AACzB,SAAS,UAAAC,eAAc;;;AChCvB;AAAA,EACE,uBAAAC;AAAA,EACA,eAAAC;AAAA,OAGK;AACP,SAAS,YAAY;AAEd,SAAS,gBACd,MACA,gBACA;AACA,QAAM,WAAqB;AAAA,IACzB,YAAY,WAAW;AAAA,IACvB,SAAS,KAAK,IAAI;AAAA,IAClB,YAAYA,aAAYD,oBAAmB,KAAK;AAAA,EAClD;AACA,QAAM,YAAyB;AAAA,IAC7B,OAAO,KAAK;AAAA,IACZ,GAAG;AAAA,IACH,GAAG;AAAA,EACL;AAEA,mBAAiB,SAAS;AAC5B;;;ADoBA,IAAM,QAAQ,SAAS,YAAY;AACnC,IAAqB,UAArB,MAGE;AAAA,EAWA,YACE,SAGA,KACA;AAXF,sBAAoD;AAYlD,IAAAD,QAAO,SAAS,iCAAiC;AACjD,QAAI,OAAO,YAAY,YAAY;AACjC,WAAK,qBAAqB;AAAA,IAC5B,OAAO;AACL,WAAK,qBAAqB,MAAM,QAAQ,QAAQ,OAAO;AAAA,IACzD;AAEA,QAAI,OAAO,KAAK,eAAe,aAAa;AAC1C,WAAK,aAAa,IAAI;AAAA,IACxB;AACA,QAAI,OAAO,KAAK,aAAa,aAAa;AACxC,WAAK,WAAW,IAAI;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,MAAM,OACJ,OACA,KACuB;AACvB,UAAM,EAAE,OAAO,IAAI,OAAO,CAAC;AAC3B,UAAM,cAAc,OAAO,UAAU,WAAW,QAAQ,MAAM;AAC9D,IAAAA,QAAO,aAAa,8BAA8B;AAClD,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,IAAAA,QAAO,OAAO,UAAU,UAAU,sCAAsC;AAExE,UAAM,wBAAwB;AAAA,MAC5B;AAAA,IACF;AACA,QAAI,uBAAuB;AACzB,YAAM,yBAAyB,qBAAqB;AAAA,IACtD;AACA,QAAI;AACJ,QAAI,MAAM,aAAa,uBAAuB;AAC5C,yBAAmB,MAAM;AAAA,IAC3B;AAEA,QAAI,oBAAoB,CAACD,cAAa,GAAG;AACvC,cAAQ;AAAA,QACN;AAAA,MACF;AACA,yBAAmB;AAAA,IACrB;AAEA,UAAM,UAAU,KAAK,WAAY,MAAM,KAAK,mBAAmB,QAAQ;AAEvE,QAAI,aAA+B;AACnC,QAAI,wBAA4C;AAChD,QAAI,kBAA2C;AAC/C,QAAI,qBAEY;AAChB,QAAI,kBAAkB;AACpB,2BAAqB,MAAM,gBAAgB;AAAA,QACzC;AAAA,QACA,oBAAoB;AAAA,MACtB,CAAC;AACD,MAAAC;AAAA,QACE,mBAAmB;AAAA,QACnB,gCAAgC,gBAAgB,IAC9C,mBAAmB,QAAQ,KAAK,mBAAmB,KAAK,KAAK,EAC/D;AAAA,MACF;AACA,8BAAwB,mBAAmB;AAC3C,wBAAkB,mBAAmB;AACrC,mBAAa,mBAAmB;AAAA,IAClC;AAEA,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,aAAa,aAAa,MAAM,IACzD,MAAM,gBAAgB;AAAA,MACpB,QAAQ,UAAU,KAAK;AAAA,MACvB;AAAA,MACA,0BAA0B;AAAA,MAC1B,cAAc;AAAA,IAChB,CAAC;AAEH,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,MACvC,gBAAgB,KAAK,UAAU,WAAW;AAAA,MAC1C;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAgC,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAC1E;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT,SAAS;AAAA,MACX;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,aAAa;AAAA,MACb,MAAM;AAAA,MACN;AAAA,MACA,WAAW,CAAC,CAAC;AAAA,MACb,OAAO;AAAA,IACT;AAEA,UAAM,WAA0B,CAAC;AACjC,KAAC,YAAY,YAAY,CAAC,GAAG,QAAQ,CAAC,SAAS;AAC7C,UAAI,QAAQ,MAAM;AAChB,cAAM,UAAU,YAAY,MAAM,EAAE;AAEpC,YAAI,CAAC,SAAS;AACZ,kBAAQ;AAAA,YACN,kCAAkC,KAAK,EAAE;AAAA,UAC3C;AACA;AAAA,QACF;AACA,iBAAS,KAAK,OAAO;AAAA,MACvB;AAAA,IACF,CAAC;AAED;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH,gBAAgB;AAAA,MAClB;AAAA,MACA;AAAA,IACF;AAEA,QAAI,UAAU;AACZ,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,IAAAA;AAAA,MACE,SAAS,UAAU;AAAA,MACnB,6CAA6C,SAAS,MAAM;AAAA,IAC9D;AAEA,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO;AAAA,QACL,SAAS;AAAA,UACP,IAAI,SAAS,CAAC,EAAG;AAAA,UACjB,SAAS,SAAS,CAAC,EAAG;AAAA,UACtB,QAAQ,SAAS,CAAC,EAAG;AAAA,UACrB,MAAM,SAAS,CAAC,EAAG;AAAA,UACnB,QAAQ,SAAS,CAAC,EAAG,UAAU,CAAC;AAAA,UAChC,YAAY,SAAS,CAAC,EAAG;AAAA,QAC3B;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,WAAO;AAAA,MACL,SAAS;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAAA,EAYA,MAAM,QACJ,YACA,KACc;AACd,IAAAA;AAAA,MACE,OAAO,eAAe,YAAY,OAAO,eAAe;AAAA,MACxD,kDAAkD,OAAO,UAAU;AAAA,IACrE;AACA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,SAAS;AAEvD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,EAAE,aAAa,MAAM,IAAI,MAAM,qBAAwB;AAAA,MAC3D;AAAA,MACA,WAAW;AAAA,MACX,eAAe;AAAA,IACjB,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,WAAW;AAAA,IACzC;AAEA,QAAI;AACJ,QAAI,YAAY,QAAQ,QAAQ;AAC9B,iBAAW;AAAA,EAAwB,YAAY,OAAO,KAAK,IAAI,CAAC;AAAA,IAClE;AAEA,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,OAAO;AAAA,IACT;AAEA,UAAM,EAAE,KAAK,IAAI,eAAe,CAAC;AAGjC;AAAA,MACE;AAAA,QACE,GAAG;AAAA,QACH;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,QAAI,YAAY,CAAC,MAAM;AACrB,YAAM,IAAI,MAAM,QAAQ;AAAA,IAC1B;AAEA,WAAO;AAAA,MACL;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,WAAsD;AACjE,QAAI,OAAO,cAAc,UAAU;AACjC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,iBAAiB,KAAK;AAC5B,SAAK,oBAAoB;AAEzB,UAAM,UAAU,MAAM,KAAK,mBAAmB,QAAQ;AACtD,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,eAAe,MAAM,SAAS;AAAA,MAClC;AAAA,MACA;AAAA,IACF,CAAC;AAED,UAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,UAAM,WAA4B;AAAA,MAChC,GAAI,KAAK,WAAW,KAAK,WAAW,CAAC;AAAA,MACrC,YAAY;AAAA,MACZ,aAAa,KAAK,UAAU,aAAa,OAAO;AAAA,IAClD;AAEA,UAAM,EAAE,SAAS,KAAK,IAAI,aAAa;AACvC,UAAM,WAAsC;AAAA,MAC1C,MAAM;AAAA,MACN,WAAW;AAAA,QACT;AAAA,MACF;AAAA,MACA,gBAAgB,CAAC;AAAA,MACjB,MAAM;AAAA,MACN;AAAA,MACA,eAAe;AAAA,MACf,kBAAkB;AAAA,MAClB,OAAO,OAAO,SAAY;AAAA,IAC5B;AACA,oBAAgB,UAAU,cAAc;AAExC,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,aAAa;AAAA,IACtB;AAAA,EACF;AAAA,EACA,MAAM,SACJ,QACA,KAGyD;AACzD,IAAAA,QAAO,QAAQ,yCAAyC;AACxD,UAAM,UAAU,MAAM,KAAK,mBAAmB,UAAU;AACxD,UAAM,EAAE,iBAAiB,IAAI;AAC7B,IAAAA,QAAO,kBAAkB,6CAA6C;AAEtE,UAAM,eAAe,4BAA4B;AAGjD,UAAM,kBAAkB;AACxB,UAAM,aAAmB,MAAM,QAAQ,MAAM,IACzC;AAAA,MACE,MAAM,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAChD,KAAK,KAAK,MAAM,OAAO,CAAC,IAAI,kBAAkB,CAAC;AAAA,MAC/C,OAAO;AAAA,MACP,QAAQ;AAAA,IACV,IACA;AAEJ,QAAI,eAAe,MAAM,wBAAwB;AAAA,MAC/C,gBAAgB;AAAA,MAChB,sBAAsB;AAAA,QACpB;AAAA,UACE,MAAM;AAAA,QACR;AAAA,MACF;AAAA,MACA,iBAAiB;AAAA,IACnB,CAAC;AAED,QAAI,KAAK,WAAW;AAClB,YAAM,aAAa,iBAAiB,YAAY,QAAQ,IAAI;AAC5D,YAAM,4BAA4B,UAAU;AAC5C,qBAAe,MAAM;AAAA,QACnB;AAAA,QACA;AAAA,QACA,qBAAqB,oBAAoB;AAAA,MAC3C;AAAA,IACF;AAEA,UAAM,OAAe;AAAA,MACnB,EAAE,MAAM,UAAU,SAAS,aAAa;AAAA,MACxC;AAAA,QACE,MAAM;AAAA,QACN,SAAS;AAAA,UACP;AAAA,YACE,MAAM;AAAA,YACN,WAAW;AAAA,cACT,KAAK;AAAA,cACL,QAAQ;AAAA,YACV;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,WACJ,KAAK,cAAc;AAErB,UAAM,MAAM,MAAM,SAAS,8BAAmC;AAE9D,UAAM,EAAE,QAAQ,IAAI;AACpB,IAAAA,QAAO,CAAC,QAAQ,OAAO,oBAAoB,QAAQ,KAAK,EAAE;AAC1D,IAAAA,QAAO,QAAQ,aAAa,gCAAgC;AAC5D,WAAO;AAAA,EACT;AACF;;;AEzZA,SAAS,eAAAE,cAAa,uBAAAD,4BAA2B;AAGjD,IAAO,cAAQ","names":["vlLocateMode","assert","MIDSCENE_MODEL_NAME","getAIConfig"],"ignoreList":[],"sources":["../../src/ai-model/action-executor.ts","../../src/ai-model/prompt/describe.ts","../../src/insight/index.ts","../../src/insight/utils.ts","../../src/index.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<any> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n task.timing.aiCost = (returnValue as any)?.aiCost || 0;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n return this.tasks[outputIndex].output;\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n let modelDescription = '';\n\n if (vlLocateMode()) {\n const uiTarsModelVer = uiTarsModelVersion();\n if (uiTarsModelVer) {\n modelDescription = `UI-TARS=${uiTarsModelVer}`;\n } else {\n modelDescription = `${vlLocateMode()} mode`;\n }\n }\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n model_description: modelDescription,\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n","import { getPreferredLanguage } from '@midscene/shared/env';\n\nexport const elementDescriberInstruction = () => {\n return `Tell what is the content of the element wrapped by the read rectangle in the screenshot. Your description is expected to be used to precisely locate the element from other similar elements on screenshot. Use ${getPreferredLanguage()} in the description.\n\nPlease follow the following rules:\n1. The description should be start with a brief description, like \"a button for confirming the action\".\n\n2. Include these information in the description to distinguish the element from its siblings and other similar elements, as much as possible:\n- The text of the element, like \"with text 'Confirm'\"\n- What the element looks like if it's an image, like \"with image '...'\"\n- The relative position of the element, like \"on the left of ..., around ...\"\n- How to distinguish the element from its siblings elements, like \"it is the icon instead of the text\"\n\n3. Do NOT mention the red rectangle in the description.\n\n4. Use the error field to describe the unexpected situations, if any. If not, put null.\n\nReturn in JSON:\n{\n \"description\": \"[{brief description}]: {text of the element} {image of the element} {relative position of the element} ... \",\n \"error\"?: \"...\"\n}`;\n};\n","import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiAssert, AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightAssertionResponse,\n InsightExtractOption,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_FORCE_DEEP_THINK,\n MIDSCENE_USE_QWEN_VL,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n if (searchAreaPrompt && !vlLocateMode()) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, elementById, rawResponse, usage } =\n await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T = any>(input: string, opt?: InsightExtractOption): Promise<T>;\n async extract<T extends Record<string, string>>(\n input: T,\n opt?: InsightExtractOption,\n ): Promise<Record<keyof T, any>>;\n async extract<T extends object>(\n input: Record<keyof T, string>,\n opt?: InsightExtractOption,\n ): Promise<T>;\n\n async extract<T>(\n dataDemand: InsightExtractParam,\n opt?: InsightExtractOption,\n ): Promise<any> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n extractOption: opt,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n usage,\n };\n }\n\n async assert(assertion: string): Promise<InsightAssertionResponse> {\n if (typeof assertion !== 'string') {\n throw new Error(\n 'This is the assert method for Midscene, the first argument should be a string. If you want to use the assert method from Node.js, please import it from the Node.js assert module.',\n );\n }\n\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const context = await this.contextRetrieverFn('assert');\n const startTime = Date.now();\n const assertResult = await AiAssert({\n assertion,\n context,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(assertResult.content),\n };\n\n const { thought, pass } = assertResult.content;\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'assert',\n userQuery: {\n assertion,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n assertionPass: pass,\n assertionThought: thought,\n error: pass ? undefined : thought,\n };\n emitInsightDump(dumpData, dumpSubscriber);\n\n return {\n pass,\n thought,\n usage: assertResult.usage,\n };\n }\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64 } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, context.size);\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n","import type {\n DumpMeta,\n DumpSubscriber,\n InsightDump,\n PartialInsightDumpFromSDK,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { uuid } from '@midscene/shared/utils';\n\nexport function emitInsightDump(\n data: PartialInsightDumpFromSDK,\n dumpSubscriber?: DumpSubscriber,\n) {\n const baseData: DumpMeta = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n };\n const finalData: InsightDump = {\n logId: uuid(),\n ...baseData,\n ...data,\n };\n\n dumpSubscriber?.(finalData);\n}\n","import { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n AiAssert,\n} from './ai-model/index';\n\nexport { getAIConfig, MIDSCENE_MODEL_NAME } from '@midscene/shared/env';\n\nexport type * from './types';\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n MidsceneYamlScript,\n MidsceneYamlTask,\n MidsceneYamlFlowItem,\n MidsceneYamlFlowItemAIRightClick,\n} from './yaml';\n"]}
@@ -1,4 +1,5 @@
1
- import { l as AIUsageInfo, R as Rect, k as ElementTreeNode, B as BaseElement, U as UIContext, az as ReferenceImage, q as AIElementLocatorResponse, Q as ElementById, t as AIDataExtractionResponse, v as AIAssertionResponse, ax as PageType, Z as PlanningAIResponse } from './types-c892e193.js';
1
+ import { l as AIUsageInfo, U as UIContext, av as ReferenceImage, q as AIElementLocatorResponse, K as ElementById, g as InsightExtractOption, t as AIDataExtractionResponse, v as AIAssertionResponse, at as PageType, V as PlanningAIResponse } from './types-8c197f92.js';
2
+ import { Rect, ElementTreeNode, BaseElement } from '@midscene/shared/types';
2
3
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
3
4
 
4
5
  type AIArgs = [
@@ -28,6 +29,8 @@ declare function elementByPositionWithElementInfo(treeRoot: ElementTreeNode<Base
28
29
  declare function describeUserPage<ElementType extends BaseElement = BaseElement>(context: Omit<UIContext<ElementType>, 'describer'>, opt?: {
29
30
  truncateTextLength?: number;
30
31
  filterNonTextContent?: boolean;
32
+ domIncluded?: boolean;
33
+ visibleOnly?: boolean;
31
34
  }): Promise<{
32
35
  description: string;
33
36
  elementById(idOrIndexId: string): ElementType;
@@ -75,6 +78,7 @@ declare function AiLocateSection(options: {
75
78
  declare function AiExtractElementInfo<T, ElementType extends BaseElement = BaseElement>(options: {
76
79
  dataQuery: string | Record<string, string>;
77
80
  context: UIContext<ElementType>;
81
+ extractOption?: InsightExtractOption;
78
82
  }): Promise<{
79
83
  parseResult: AIDataExtractionResponse<T>;
80
84
  elementById: (idOrIndexId: string) => ElementType;
package/dist/es/tree.d.ts CHANGED
@@ -1,12 +1 @@
1
- import * as _midscene_shared_constants from '@midscene/shared/constants';
2
- import { B as BaseElement, k as ElementTreeNode } from './types-c892e193.js';
3
- import 'openai/resources';
4
-
5
- declare function truncateText(text: string | number | object | undefined, maxLength?: number): string;
6
- declare function trimAttributes(attributes: Record<string, any>, truncateTextLength?: number): {
7
- [key: string]: string;
8
- nodeType: _midscene_shared_constants.NodeType;
9
- };
10
- declare function descriptionOfTree<ElementType extends BaseElement = BaseElement>(tree: ElementTreeNode<ElementType>, truncateTextLength?: number, filterNonTextContent?: boolean): string;
11
-
12
- export { descriptionOfTree, trimAttributes, truncateText };
1
+ export { descriptionOfTree, trimAttributes, truncateText } from '@midscene/shared/extractor';
package/dist/es/tree.js CHANGED
@@ -1,98 +1,9 @@
1
1
  // src/tree.ts
2
- function truncateText(text, maxLength = 150) {
3
- if (typeof text === "undefined") {
4
- return "";
5
- }
6
- if (typeof text === "object") {
7
- text = JSON.stringify(text);
8
- }
9
- if (typeof text === "number") {
10
- return text.toString();
11
- }
12
- if (typeof text === "string" && text.length > maxLength) {
13
- return `${text.slice(0, maxLength)}...`;
14
- }
15
- if (typeof text === "string") {
16
- return text.trim();
17
- }
18
- return "";
19
- }
20
- function trimAttributes(attributes, truncateTextLength) {
21
- const tailorAttributes = Object.keys(attributes).reduce(
22
- (res, currentKey) => {
23
- const attributeVal = attributes[currentKey];
24
- if (currentKey === "style" || currentKey === "src" || currentKey === "htmlTagName" || currentKey === "nodeType") {
25
- return res;
26
- }
27
- res[currentKey] = truncateText(attributeVal, truncateTextLength);
28
- return res;
29
- },
30
- {}
31
- );
32
- return tailorAttributes;
33
- }
34
- var nodeSizeThreshold = 4;
35
- function descriptionOfTree(tree, truncateTextLength, filterNonTextContent = false) {
36
- const attributesString = (kv) => {
37
- return Object.entries(kv).map(
38
- ([key, value]) => `${key}="${truncateText(value, truncateTextLength)}"`
39
- ).join(" ");
40
- };
41
- function buildContentTree(node, indent = 0) {
42
- let before = "";
43
- let contentWithIndent = "";
44
- let after = "";
45
- let emptyNode = true;
46
- const indentStr = " ".repeat(indent);
47
- let children = "";
48
- for (let i = 0; i < (node.children || []).length; i++) {
49
- const childContent = buildContentTree(node.children[i], indent + 1);
50
- if (childContent) {
51
- children += `
52
- ${childContent}`;
53
- }
54
- }
55
- if (node.node && node.node.rect.width > nodeSizeThreshold && node.node.rect.height > nodeSizeThreshold && (!filterNonTextContent || filterNonTextContent && node.node.content)) {
56
- emptyNode = false;
57
- let nodeTypeString;
58
- if (node.node.attributes?.htmlTagName) {
59
- nodeTypeString = node.node.attributes.htmlTagName.replace(/[<>]/g, "");
60
- } else {
61
- nodeTypeString = node.node.attributes.nodeType.replace(/\sNode$/, "").toLowerCase();
62
- }
63
- const markerId = node.node.indexId;
64
- const markerIdString = typeof markerId !== "undefined" ? `markerId="${markerId}"` : "";
65
- const rectAttribute = node.node.rect ? {
66
- left: node.node.rect.left,
67
- top: node.node.rect.top,
68
- width: node.node.rect.width,
69
- height: node.node.rect.height
70
- } : {};
71
- before = `<${nodeTypeString} id="${node.node.id}" ${markerIdString} ${attributesString(trimAttributes(node.node.attributes || {}, truncateTextLength))} ${attributesString(rectAttribute)}>`;
72
- const content = truncateText(node.node.content, truncateTextLength);
73
- contentWithIndent = content ? `
74
- ${indentStr} ${content}` : "";
75
- after = `</${nodeTypeString}>`;
76
- } else if (!filterNonTextContent) {
77
- if (!children.trim().startsWith("<>")) {
78
- before = "<>";
79
- contentWithIndent = "";
80
- after = "</>";
81
- }
82
- }
83
- if (emptyNode && !children.trim()) {
84
- return "";
85
- }
86
- const result2 = `${indentStr}${before}${contentWithIndent}${children}
87
- ${indentStr}${after}`;
88
- if (result2.trim()) {
89
- return result2;
90
- }
91
- return "";
92
- }
93
- const result = buildContentTree(tree);
94
- return result.replace(/^\s*\n/gm, "");
95
- }
2
+ import {
3
+ truncateText,
4
+ trimAttributes,
5
+ descriptionOfTree
6
+ } from "@midscene/shared/extractor";
96
7
  export {
97
8
  descriptionOfTree,
98
9
  trimAttributes,
@@ -1 +1 @@
1
- {"version":3,"mappings":";AAEO,SAAS,aACd,MACA,YAAY,KACZ;AACA,MAAI,OAAO,SAAS,aAAa;AAC/B,WAAO;AAAA,EACT;AAEA,MAAI,OAAO,SAAS,UAAU;AAC5B,WAAO,KAAK,UAAU,IAAI;AAAA,EAC5B;AAEA,MAAI,OAAO,SAAS,UAAU;AAC5B,WAAO,KAAK,SAAS;AAAA,EACvB;AAEA,MAAI,OAAO,SAAS,YAAY,KAAK,SAAS,WAAW;AACvD,WAAO,GAAG,KAAK,MAAM,GAAG,SAAS,CAAC;AAAA,EACpC;AAEA,MAAI,OAAO,SAAS,UAAU;AAC5B,WAAO,KAAK,KAAK;AAAA,EACnB;AAEA,SAAO;AACT;AAEO,SAAS,eACd,YACA,oBACA;AACA,QAAM,mBAAmB,OAAO,KAAK,UAAU,EAAE;AAAA,IAC/C,CAAC,KAAK,eAAuB;AAC3B,YAAM,eAAgB,WAAmB,UAAU;AACnD,UACE,eAAe,WACf,eAAe,SACf,eAAe,iBACf,eAAe,YACf;AACA,eAAO;AAAA,MACT;AAEA,UAAI,UAAU,IAAI,aAAa,cAAc,kBAAkB;AAC/D,aAAO;AAAA,IACT;AAAA,IACA,CAAC;AAAA,EACH;AACA,SAAO;AACT;AAEA,IAAM,oBAAoB;AACnB,SAAS,kBAGd,MACA,oBACA,uBAAuB,OACvB;AACA,QAAM,mBAAmB,CAAC,OAA4B;AACpD,WAAO,OAAO,QAAQ,EAAE,EACrB;AAAA,MACC,CAAC,CAAC,KAAK,KAAK,MAAM,GAAG,GAAG,KAAK,aAAa,OAAO,kBAAkB,CAAC;AAAA,IACtE,EACC,KAAK,GAAG;AAAA,EACb;AAEA,WAAS,iBACP,MACA,SAAS,GACD;AACR,QAAI,SAAS;AACb,QAAI,oBAAoB;AACxB,QAAI,QAAQ;AACZ,QAAI,YAAY;AAChB,UAAM,YAAY,KAAK,OAAO,MAAM;AAEpC,QAAI,WAAW;AACf,aAAS,IAAI,GAAG,KAAK,KAAK,YAAY,CAAC,GAAG,QAAQ,KAAK;AACrD,YAAM,eAAe,iBAAiB,KAAK,SAAS,CAAC,GAAG,SAAS,CAAC;AAClE,UAAI,cAAc;AAChB,oBAAY;AAAA,EAAK,YAAY;AAAA,MAC/B;AAAA,IACF;AAEA,QACE,KAAK,QACL,KAAK,KAAK,KAAK,QAAQ,qBACvB,KAAK,KAAK,KAAK,SAAS,sBACvB,CAAC,wBAAyB,wBAAwB,KAAK,KAAK,UAC7D;AACA,kBAAY;AACZ,UAAI;AACJ,UAAI,KAAK,KAAK,YAAY,aAAa;AACrC,yBAAiB,KAAK,KAAK,WAAW,YAAY,QAAQ,SAAS,EAAE;AAAA,MACvE,OAAO;AACL,yBAAiB,KAAK,KAAK,WAAW,SACnC,QAAQ,WAAW,EAAE,EACrB,YAAY;AAAA,MACjB;AACA,YAAM,WAAW,KAAK,KAAK;AAC3B,YAAM,iBACJ,OAAO,aAAa,cAAc,aAAa,QAAQ,MAAM;AAC/D,YAAM,gBAAgB,KAAK,KAAK,OAC5B;AAAA,QACE,MAAM,KAAK,KAAK,KAAK;AAAA,QACrB,KAAK,KAAK,KAAK,KAAK;AAAA,QACpB,OAAO,KAAK,KAAK,KAAK;AAAA,QACtB,QAAQ,KAAK,KAAK,KAAK;AAAA,MACzB,IACA,CAAC;AACL,eAAS,IAAI,cAAc,QAAQ,KAAK,KAAK,EAAE,KAAK,cAAc,IAAI,iBAAiB,eAAe,KAAK,KAAK,cAAc,CAAC,GAAG,kBAAkB,CAAC,CAAC,IAAI,iBAAiB,aAAa,CAAC;AACzL,YAAM,UAAU,aAAa,KAAK,KAAK,SAAS,kBAAkB;AAClE,0BAAoB,UAAU;AAAA,EAAK,SAAS,KAAK,OAAO,KAAK;AAC7D,cAAQ,KAAK,cAAc;AAAA,IAC7B,WAAW,CAAC,sBAAsB;AAChC,UAAI,CAAC,SAAS,KAAK,EAAE,WAAW,IAAI,GAAG;AACrC,iBAAS;AACT,4BAAoB;AACpB,gBAAQ;AAAA,MACV;AAAA,IACF;AAEA,QAAI,aAAa,CAAC,SAAS,KAAK,GAAG;AACjC,aAAO;AAAA,IACT;AAEA,UAAMA,UAAS,GAAG,SAAS,GAAG,MAAM,GAAG,iBAAiB,GAAG,QAAQ;AAAA,EAAK,SAAS,GAAG,KAAK;AACzF,QAAIA,QAAO,KAAK,GAAG;AACjB,aAAOA;AAAA,IACT;AACA,WAAO;AAAA,EACT;AAEA,QAAM,SAAS,iBAAiB,IAAI;AACpC,SAAO,OAAO,QAAQ,YAAY,EAAE;AACtC","names":["result"],"ignoreList":[],"sources":["../../src/tree.ts"],"sourcesContent":["import type { BaseElement, ElementTreeNode, Size, UIContext } from '@/types';\n\nexport function truncateText(\n text: string | number | object | undefined,\n maxLength = 150,\n) {\n if (typeof text === 'undefined') {\n return '';\n }\n\n if (typeof text === 'object') {\n text = JSON.stringify(text);\n }\n\n if (typeof text === 'number') {\n return text.toString();\n }\n\n if (typeof text === 'string' && text.length > maxLength) {\n return `${text.slice(0, maxLength)}...`;\n }\n\n if (typeof text === 'string') {\n return text.trim();\n }\n\n return '';\n}\n\nexport function trimAttributes(\n attributes: Record<string, any>,\n truncateTextLength?: number,\n) {\n const tailorAttributes = Object.keys(attributes).reduce(\n (res, currentKey: string) => {\n const attributeVal = (attributes as any)[currentKey];\n if (\n currentKey === 'style' ||\n currentKey === 'src' ||\n currentKey === 'htmlTagName' ||\n currentKey === 'nodeType'\n ) {\n return res;\n }\n\n res[currentKey] = truncateText(attributeVal, truncateTextLength);\n return res;\n },\n {} as BaseElement['attributes'],\n );\n return tailorAttributes;\n}\n\nconst nodeSizeThreshold = 4;\nexport function descriptionOfTree<\n ElementType extends BaseElement = BaseElement,\n>(\n tree: ElementTreeNode<ElementType>,\n truncateTextLength?: number,\n filterNonTextContent = false,\n) {\n const attributesString = (kv: Record<string, any>) => {\n return Object.entries(kv)\n .map(\n ([key, value]) => `${key}=\"${truncateText(value, truncateTextLength)}\"`,\n )\n .join(' ');\n };\n\n function buildContentTree(\n node: ElementTreeNode<ElementType>,\n indent = 0,\n ): string {\n let before = '';\n let contentWithIndent = '';\n let after = '';\n let emptyNode = true;\n const indentStr = ' '.repeat(indent);\n\n let children = '';\n for (let i = 0; i < (node.children || []).length; i++) {\n const childContent = buildContentTree(node.children[i], indent + 1);\n if (childContent) {\n children += `\\n${childContent}`;\n }\n }\n\n if (\n node.node &&\n node.node.rect.width > nodeSizeThreshold &&\n node.node.rect.height > nodeSizeThreshold &&\n (!filterNonTextContent || (filterNonTextContent && node.node.content))\n ) {\n emptyNode = false;\n let nodeTypeString: string;\n if (node.node.attributes?.htmlTagName) {\n nodeTypeString = node.node.attributes.htmlTagName.replace(/[<>]/g, '');\n } else {\n nodeTypeString = node.node.attributes.nodeType\n .replace(/\\sNode$/, '')\n .toLowerCase();\n }\n const markerId = node.node.indexId;\n const markerIdString =\n typeof markerId !== 'undefined' ? `markerId=\"${markerId}\"` : '';\n const rectAttribute = node.node.rect\n ? {\n left: node.node.rect.left,\n top: node.node.rect.top,\n width: node.node.rect.width,\n height: node.node.rect.height,\n }\n : {};\n before = `<${nodeTypeString} id=\"${node.node.id}\" ${markerIdString} ${attributesString(trimAttributes(node.node.attributes || {}, truncateTextLength))} ${attributesString(rectAttribute)}>`;\n const content = truncateText(node.node.content, truncateTextLength);\n contentWithIndent = content ? `\\n${indentStr} ${content}` : '';\n after = `</${nodeTypeString}>`;\n } else if (!filterNonTextContent) {\n if (!children.trim().startsWith('<>')) {\n before = '<>';\n contentWithIndent = '';\n after = '</>';\n }\n }\n\n if (emptyNode && !children.trim()) {\n return '';\n }\n\n const result = `${indentStr}${before}${contentWithIndent}${children}\\n${indentStr}${after}`;\n if (result.trim()) {\n return result;\n }\n return '';\n }\n\n const result = buildContentTree(tree);\n return result.replace(/^\\s*\\n/gm, '');\n}\n"]}
1
+ {"version":3,"mappings":";AAAA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK","names":[],"ignoreList":[],"sources":["../../src/tree.ts"],"sourcesContent":["export {\n truncateText,\n trimAttributes,\n descriptionOfTree,\n} from '@midscene/shared/extractor';\n"]}
@@ -1,4 +1,5 @@
1
1
  import { NodeType } from '@midscene/shared/constants';
2
+ import { Rect, BaseElement, ElementTreeNode, Size } from '@midscene/shared/types';
2
3
  import { ChatCompletionMessageParam } from 'openai/resources';
3
4
 
4
5
  interface LocateOption {
@@ -6,6 +7,10 @@ interface LocateOption {
6
7
  deepThink?: boolean;
7
8
  cacheable?: boolean;
8
9
  }
10
+ interface InsightExtractOption {
11
+ domIncluded?: boolean;
12
+ screenshotIncluded?: boolean;
13
+ }
9
14
  interface ReferenceImage {
10
15
  base64: string;
11
16
  rect?: Rect;
@@ -66,19 +71,19 @@ interface MidsceneYamlFlowItemAIAssert {
66
71
  aiAssert: string;
67
72
  errorMessage?: string;
68
73
  }
69
- interface MidsceneYamlFlowItemAIQuery {
74
+ interface MidsceneYamlFlowItemAIQuery extends InsightExtractOption {
70
75
  aiQuery: string;
71
76
  name?: string;
72
77
  }
73
- interface MidsceneYamlFlowItemAINumber {
78
+ interface MidsceneYamlFlowItemAINumber extends InsightExtractOption {
74
79
  aiNumber: string;
75
80
  name?: string;
76
81
  }
77
- interface MidsceneYamlFlowItemAINString {
82
+ interface MidsceneYamlFlowItemAINString extends InsightExtractOption {
78
83
  aiString: string;
79
84
  name?: string;
80
85
  }
81
- interface MidsceneYamlFlowItemAIBoolean {
86
+ interface MidsceneYamlFlowItemAIBoolean extends InsightExtractOption {
82
87
  aiBoolean: string;
83
88
  name?: string;
84
89
  }
@@ -131,35 +136,6 @@ interface ScriptPlayerTaskStatus extends MidsceneYamlTask {
131
136
  }
132
137
  type ScriptPlayerStatusValue = 'init' | 'running' | 'done' | 'error';
133
138
 
134
- interface Point {
135
- left: number;
136
- top: number;
137
- }
138
- interface Size {
139
- width: number;
140
- height: number;
141
- dpr?: number;
142
- }
143
- type Rect = Point & Size & {
144
- zoom?: number;
145
- };
146
- declare abstract class BaseElement {
147
- abstract id: string;
148
- abstract indexId?: number;
149
- abstract attributes: {
150
- nodeType: NodeType;
151
- [key: string]: string;
152
- };
153
- abstract content: string;
154
- abstract rect: Rect;
155
- abstract center: [number, number];
156
- abstract locator?: string;
157
- abstract xpaths?: string[];
158
- }
159
- interface ElementTreeNode<ElementType extends BaseElement = BaseElement> {
160
- node: ElementType | null;
161
- children: ElementTreeNode<ElementType>[];
162
- }
163
139
  type AIUsageInfo = Record<string, any> & {
164
140
  prompt_tokens: number;
165
141
  completion_tokens: number;
@@ -477,4 +453,4 @@ interface GroupedActionDump {
477
453
  }
478
454
  type PageType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android';
479
455
 
480
- export { type PlanningActionParamHover as $, type AIDescribeElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type InsightExtractParam as F, type LocateResultElement as G, type DumpMeta as H, type InsightAction as I, type ReportDumpWithAttributes as J, type InsightDump as K, type LocateResult as L, type MidsceneYamlScript as M, type PartialInsightDumpFromSDK as N, type LiteUISection as O, type Point as P, type ElementById as Q, type Rect as R, type Size as S, type OnTaskStartTip as T, UIContext as U, type AgentWaitForOpt as V, type AgentAssertOpt as W, type PlanningLocateParam as X, type PlanningAction as Y, type PlanningAIResponse as Z, type PlanningActionParamTap as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamRightClick as a0, type PlanningActionParamInputOrKeyPress as a1, type PlanningActionParamScroll as a2, type PlanningActionParamAssert as a3, type PlanningActionParamSleep as a4, type PlanningActionParamError as a5, type PlanningActionParamWaitFor as a6, type Color as a7, type BaseAgentParserOpt as a8, type PuppeteerParserOpt as a9, type scrollParam as aA, type MidsceneYamlScriptEnvBase as aB, type MidsceneYamlScriptWebEnv as aC, type MidsceneYamlScriptAndroidEnv as aD, type MidsceneYamlScriptEnv as aE, type MidsceneYamlFlowItemAIAction as aF, type MidsceneYamlFlowItemAIAssert as aG, type MidsceneYamlFlowItemAIQuery as aH, type MidsceneYamlFlowItemAINumber as aI, type MidsceneYamlFlowItemAINString as aJ, type MidsceneYamlFlowItemAIBoolean as aK, type MidsceneYamlFlowItemAILocate as aL, type MidsceneYamlFlowItemAIWaitFor as aM, type MidsceneYamlFlowItemAITap as aN, type MidsceneYamlFlowItemAIHover as aO, type MidsceneYamlFlowItemAIInput as aP, type MidsceneYamlFlowItemAIKeyboardPress as aQ, type MidsceneYamlFlowItemAIScroll as aR, type MidsceneYamlFlowItemEvaluateJavaScript as aS, type MidsceneYamlFlowItemSleep as aT, type FreeFn as aU, type ScriptPlayerTaskStatus as aV, type ScriptPlayerStatusValue as aW, type PlaywrightParserOpt as aa, type ExecutionRecorderItem as ab, type ExecutionTaskType as ac, type ExecutorContext as ad, type TaskCacheInfo as ae, type ExecutionTaskReturn as af, type ExecutionTaskInsightLocateParam as ag, type ExecutionTaskInsightLocateOutput as ah, type ExecutionTaskInsightDumpLog as ai, type ExecutionTaskInsightLocateApply as aj, type ExecutionTaskInsightLocate as ak, type ExecutionTaskInsightQueryParam as al, type ExecutionTaskInsightQueryOutput as am, type ExecutionTaskInsightQueryApply as an, type ExecutionTaskInsightQuery as ao, type ExecutionTaskInsightAssertionParam as ap, type ExecutionTaskInsightAssertionApply as aq, type ExecutionTaskInsightAssertion as ar, type ExecutionTaskActionApply as as, type ExecutionTaskAction as at, type ExecutionTaskPlanningApply as au, type ExecutionTaskPlanning as av, type GroupedActionDump as aw, type PageType as ax, type LocateOption as ay, type ReferenceImage as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type MidsceneYamlFlowItemAIRightClick as j, type ElementTreeNode as k, type AIUsageInfo as l, AIResponseFormat as m, type AISingleElementResponseById as n, type AISingleElementResponseByPosition as o, type AISingleElementResponse as p, type AIElementLocatorResponse as q, type AIElementCoordinatesResponse as r, type AIElementResponse as s, type AIDataExtractionResponse as t, type AISectionLocatorResponse as u, type AIAssertionResponse as v, type LocatorValidatorOption as w, type LocateValidatorResult as x, type AgentDescribeElementAtPointResult as y, type EnsureObject as z };
456
+ export { type PlanningActionParamAssert as $, type AIDescribeElementResponse as A, type InsightExtractParam as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type LocateResultElement as F, type DumpMeta as G, type InsightDump as H, type InsightAction as I, type LiteUISection as J, type ElementById as K, type LocateResult as L, type MidsceneYamlScript as M, type AgentWaitForOpt as N, type OnTaskStartTip as O, type PartialInsightDumpFromSDK as P, type AgentAssertOpt as Q, type ReportDumpWithAttributes as R, type PlanningLocateParam as S, type PlanningAction as T, UIContext as U, type PlanningAIResponse as V, type PlanningActionParamTap as W, type PlanningActionParamHover as X, type PlanningActionParamRightClick as Y, type PlanningActionParamInputOrKeyPress as Z, type PlanningActionParamScroll as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamSleep as a0, type PlanningActionParamError as a1, type PlanningActionParamWaitFor as a2, type Color as a3, type BaseAgentParserOpt as a4, type PuppeteerParserOpt as a5, type PlaywrightParserOpt as a6, type ExecutionRecorderItem as a7, type ExecutionTaskType as a8, type ExecutorContext as a9, type MidsceneYamlScriptEnv as aA, type MidsceneYamlFlowItemAIAction as aB, type MidsceneYamlFlowItemAIAssert as aC, type MidsceneYamlFlowItemAIQuery as aD, type MidsceneYamlFlowItemAINumber as aE, type MidsceneYamlFlowItemAINString as aF, type MidsceneYamlFlowItemAIBoolean as aG, type MidsceneYamlFlowItemAILocate as aH, type MidsceneYamlFlowItemAIWaitFor as aI, type MidsceneYamlFlowItemAITap as aJ, type MidsceneYamlFlowItemAIHover as aK, type MidsceneYamlFlowItemAIInput as aL, type MidsceneYamlFlowItemAIKeyboardPress as aM, type MidsceneYamlFlowItemAIScroll as aN, type MidsceneYamlFlowItemEvaluateJavaScript as aO, type MidsceneYamlFlowItemSleep as aP, type FreeFn as aQ, type ScriptPlayerTaskStatus as aR, type ScriptPlayerStatusValue as aS, type TaskCacheInfo as aa, type ExecutionTaskReturn as ab, type ExecutionTaskInsightLocateParam as ac, type ExecutionTaskInsightLocateOutput as ad, type ExecutionTaskInsightDumpLog as ae, type ExecutionTaskInsightLocateApply as af, type ExecutionTaskInsightLocate as ag, type ExecutionTaskInsightQueryParam as ah, type ExecutionTaskInsightQueryOutput as ai, type ExecutionTaskInsightQueryApply as aj, type ExecutionTaskInsightQuery as ak, type ExecutionTaskInsightAssertionParam as al, type ExecutionTaskInsightAssertionApply as am, type ExecutionTaskInsightAssertion as an, type ExecutionTaskActionApply as ao, type ExecutionTaskAction as ap, type ExecutionTaskPlanningApply as aq, type ExecutionTaskPlanning as ar, type GroupedActionDump as as, type PageType as at, type LocateOption as au, type ReferenceImage as av, type scrollParam as aw, type MidsceneYamlScriptEnvBase as ax, type MidsceneYamlScriptWebEnv as ay, type MidsceneYamlScriptAndroidEnv as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractOption as g, type InsightAssertionResponse as h, type MidsceneYamlTask as i, type MidsceneYamlFlowItem as j, type MidsceneYamlFlowItemAIRightClick as k, type AIUsageInfo as l, AIResponseFormat as m, type AISingleElementResponseById as n, type AISingleElementResponseByPosition as o, type AISingleElementResponse as p, type AIElementLocatorResponse as q, type AIElementCoordinatesResponse as r, type AIElementResponse as s, type AIDataExtractionResponse as t, type AISectionLocatorResponse as u, type AIAssertionResponse as v, type LocatorValidatorOption as w, type LocateValidatorResult as x, type AgentDescribeElementAtPointResult as y, type EnsureObject as z };
@@ -1,4 +1,5 @@
1
- import { J as ReportDumpWithAttributes, R as Rect } from './types-c892e193.js';
1
+ import { R as ReportDumpWithAttributes } from './types-8c197f92.js';
2
+ import { Rect } from '@midscene/shared/types';
2
3
  import '@midscene/shared/constants';
3
4
  import 'openai/resources';
4
5
 
package/dist/es/utils.js CHANGED
@@ -12,7 +12,7 @@ import {
12
12
  uploadTestInfoToServer,
13
13
  writeDumpReport,
14
14
  writeLogFile
15
- } from "./chunk-CDRBBE7D.js";
15
+ } from "./chunk-A22YWG37.js";
16
16
  export {
17
17
  getTmpDir,
18
18
  getTmpFile,
@@ -1,10 +1,11 @@
1
- import { l as AIUsageInfo, Y as PlanningAction, i as MidsceneYamlFlowItem, S as Size } from './types-c892e193.js';
1
+ import { l as AIUsageInfo, T as PlanningAction, j as MidsceneYamlFlowItem } from './types-8c197f92.js';
2
2
  import { ChatCompletionMessageParam } from 'openai/resources';
3
3
  export { ChatCompletionMessageParam } from 'openai/resources';
4
- import { b as AIActionType } from './llm-planning-9cfa38ad.js';
5
- export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-9cfa38ad.js';
4
+ import { b as AIActionType } from './llm-planning-573b9b34.js';
5
+ export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-573b9b34.js';
6
6
  import { vlLocateMode } from '@midscene/shared/env';
7
7
  import { actionParser } from '@ui-tars/action-parser';
8
+ import { Size } from '@midscene/shared/types';
8
9
  import '@midscene/shared/constants';
9
10
 
10
11
  declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{
@@ -12,7 +12,7 @@
12
12
 
13
13
 
14
14
 
15
- var _chunk5HHR4GGSjs = require('./chunk-5HHR4GGS.js');
15
+ var _chunk7MNTWX2Ajs = require('./chunk-7MNTWX2A.js');
16
16
 
17
17
 
18
18
 
@@ -27,4 +27,4 @@ var _chunk5HHR4GGSjs = require('./chunk-5HHR4GGS.js');
27
27
 
28
28
 
29
29
 
30
- exports.AiAssert = _chunk5HHR4GGSjs.AiAssert; exports.AiExtractElementInfo = _chunk5HHR4GGSjs.AiExtractElementInfo; exports.AiLocateElement = _chunk5HHR4GGSjs.AiLocateElement; exports.AiLocateSection = _chunk5HHR4GGSjs.AiLocateSection; exports.adaptBboxToRect = _chunk5HHR4GGSjs.adaptBboxToRect; exports.callAiFn = _chunk5HHR4GGSjs.callAiFn; exports.callToGetJSONObject = _chunk5HHR4GGSjs.callToGetJSONObject; exports.describeUserPage = _chunk5HHR4GGSjs.describeUserPage; exports.elementByPositionWithElementInfo = _chunk5HHR4GGSjs.elementByPositionWithElementInfo; exports.plan = _chunk5HHR4GGSjs.plan; exports.resizeImageForUiTars = _chunk5HHR4GGSjs.resizeImageForUiTars; exports.systemPromptToLocateElement = _chunk5HHR4GGSjs.systemPromptToLocateElement; exports.vlmPlanning = _chunk5HHR4GGSjs.vlmPlanning;
30
+ exports.AiAssert = _chunk7MNTWX2Ajs.AiAssert; exports.AiExtractElementInfo = _chunk7MNTWX2Ajs.AiExtractElementInfo; exports.AiLocateElement = _chunk7MNTWX2Ajs.AiLocateElement; exports.AiLocateSection = _chunk7MNTWX2Ajs.AiLocateSection; exports.adaptBboxToRect = _chunk7MNTWX2Ajs.adaptBboxToRect; exports.callAiFn = _chunk7MNTWX2Ajs.callAiFn; exports.callToGetJSONObject = _chunk7MNTWX2Ajs.callToGetJSONObject; exports.describeUserPage = _chunk7MNTWX2Ajs.describeUserPage; exports.elementByPositionWithElementInfo = _chunk7MNTWX2Ajs.elementByPositionWithElementInfo; exports.plan = _chunk7MNTWX2Ajs.plan; exports.resizeImageForUiTars = _chunk7MNTWX2Ajs.resizeImageForUiTars; exports.systemPromptToLocateElement = _chunk7MNTWX2Ajs.systemPromptToLocateElement; exports.vlmPlanning = _chunk7MNTWX2Ajs.vlmPlanning;