@midscene/core 0.17.4-beta-20250526154612.0 → 0.17.4-beta-20250530041415.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/es/ai-model.d.ts +4 -3
  2. package/dist/es/ai-model.js +1 -1
  3. package/dist/es/{chunk-2AS6R2AR.js → chunk-7MNTWX2A.js} +57 -52
  4. package/dist/es/chunk-7MNTWX2A.js.map +1 -0
  5. package/dist/es/{chunk-3YRZWYIB.js → chunk-DR6LJT3K.js} +3 -3
  6. package/dist/es/index.d.ts +10 -8
  7. package/dist/es/index.js +5 -4
  8. package/dist/es/index.js.map +1 -1
  9. package/dist/es/{llm-planning-9c97f5c2.d.ts → llm-planning-573b9b34.d.ts} +5 -1
  10. package/dist/es/tree.d.ts +1 -12
  11. package/dist/es/tree.js +5 -94
  12. package/dist/es/tree.js.map +1 -1
  13. package/dist/es/{types-81bb2c02.d.ts → types-8c197f92.d.ts} +17 -36
  14. package/dist/es/utils.d.ts +2 -1
  15. package/dist/es/utils.js +1 -1
  16. package/dist/lib/ai-model.d.ts +4 -3
  17. package/dist/lib/ai-model.js +2 -2
  18. package/dist/lib/{chunk-2AS6R2AR.js → chunk-7MNTWX2A.js} +51 -46
  19. package/dist/lib/chunk-7MNTWX2A.js.map +1 -0
  20. package/dist/lib/{chunk-3YRZWYIB.js → chunk-DR6LJT3K.js} +3 -3
  21. package/dist/lib/index.d.ts +10 -8
  22. package/dist/lib/index.js +15 -14
  23. package/dist/lib/index.js.map +1 -1
  24. package/dist/lib/{llm-planning-9c97f5c2.d.ts → llm-planning-573b9b34.d.ts} +5 -1
  25. package/dist/lib/tree.d.ts +1 -12
  26. package/dist/lib/tree.js +7 -96
  27. package/dist/lib/tree.js.map +1 -1
  28. package/dist/{types/types-81bb2c02.d.ts → lib/types-8c197f92.d.ts} +17 -36
  29. package/dist/lib/utils.d.ts +2 -1
  30. package/dist/lib/utils.js +2 -2
  31. package/dist/types/ai-model.d.ts +4 -3
  32. package/dist/types/index.d.ts +10 -8
  33. package/dist/types/{llm-planning-9c97f5c2.d.ts → llm-planning-573b9b34.d.ts} +5 -1
  34. package/dist/types/tree.d.ts +1 -12
  35. package/dist/{lib/types-81bb2c02.d.ts → types/types-8c197f92.d.ts} +17 -36
  36. package/dist/types/utils.d.ts +2 -1
  37. package/package.json +2 -2
  38. package/dist/es/chunk-2AS6R2AR.js.map +0 -1
  39. package/dist/lib/chunk-2AS6R2AR.js.map +0 -1
  40. /package/dist/es/{chunk-3YRZWYIB.js.map → chunk-DR6LJT3K.js.map} +0 -0
  41. /package/dist/lib/{chunk-3YRZWYIB.js.map → chunk-DR6LJT3K.js.map} +0 -0
@@ -1,4 +1,5 @@
1
- import { H as ReportDumpWithAttributes, R as Rect } from './types-81bb2c02.js';
1
+ import { R as ReportDumpWithAttributes } from './types-8c197f92.js';
2
+ import { Rect } from '@midscene/shared/types';
2
3
  import '@midscene/shared/constants';
3
4
  import 'openai/resources';
4
5
 
package/dist/lib/utils.js CHANGED
@@ -12,7 +12,7 @@
12
12
 
13
13
 
14
14
 
15
- var _chunk3YRZWYIBjs = require('./chunk-3YRZWYIB.js');
15
+ var _chunkDR6LJT3Kjs = require('./chunk-DR6LJT3K.js');
16
16
 
17
17
 
18
18
 
@@ -27,4 +27,4 @@ var _chunk3YRZWYIBjs = require('./chunk-3YRZWYIB.js');
27
27
 
28
28
 
29
29
 
30
- exports.getTmpDir = _chunk3YRZWYIBjs.getTmpDir; exports.getTmpFile = _chunk3YRZWYIBjs.getTmpFile; exports.getVersion = _chunk3YRZWYIBjs.getVersion; exports.groupedActionDumpFileExt = _chunk3YRZWYIBjs.groupedActionDumpFileExt; exports.overlapped = _chunk3YRZWYIBjs.overlapped; exports.replaceStringWithFirstAppearance = _chunk3YRZWYIBjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunk3YRZWYIBjs.replacerForPageObject; exports.reportHTMLContent = _chunk3YRZWYIBjs.reportHTMLContent; exports.sleep = _chunk3YRZWYIBjs.sleep; exports.stringifyDumpData = _chunk3YRZWYIBjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunk3YRZWYIBjs.uploadTestInfoToServer; exports.writeDumpReport = _chunk3YRZWYIBjs.writeDumpReport; exports.writeLogFile = _chunk3YRZWYIBjs.writeLogFile;
30
+ exports.getTmpDir = _chunkDR6LJT3Kjs.getTmpDir; exports.getTmpFile = _chunkDR6LJT3Kjs.getTmpFile; exports.getVersion = _chunkDR6LJT3Kjs.getVersion; exports.groupedActionDumpFileExt = _chunkDR6LJT3Kjs.groupedActionDumpFileExt; exports.overlapped = _chunkDR6LJT3Kjs.overlapped; exports.replaceStringWithFirstAppearance = _chunkDR6LJT3Kjs.replaceStringWithFirstAppearance; exports.replacerForPageObject = _chunkDR6LJT3Kjs.replacerForPageObject; exports.reportHTMLContent = _chunkDR6LJT3Kjs.reportHTMLContent; exports.sleep = _chunkDR6LJT3Kjs.sleep; exports.stringifyDumpData = _chunkDR6LJT3Kjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkDR6LJT3Kjs.uploadTestInfoToServer; exports.writeDumpReport = _chunkDR6LJT3Kjs.writeDumpReport; exports.writeLogFile = _chunkDR6LJT3Kjs.writeLogFile;
@@ -1,10 +1,11 @@
1
- import { k as AIUsageInfo, X as PlanningAction, i as MidsceneYamlFlowItem, S as Size } from './types-81bb2c02.js';
1
+ import { l as AIUsageInfo, T as PlanningAction, j as MidsceneYamlFlowItem } from './types-8c197f92.js';
2
2
  import { ChatCompletionMessageParam } from 'openai/resources';
3
3
  export { ChatCompletionMessageParam } from 'openai/resources';
4
- import { b as AIActionType } from './llm-planning-9c97f5c2.js';
5
- export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-9c97f5c2.js';
4
+ import { b as AIActionType } from './llm-planning-573b9b34.js';
5
+ export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-573b9b34.js';
6
6
  import { vlLocateMode } from '@midscene/shared/env';
7
7
  import { actionParser } from '@ui-tars/action-parser';
8
+ import { Size } from '@midscene/shared/types';
8
9
  import '@midscene/shared/constants';
9
10
 
10
11
  declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{
@@ -1,7 +1,9 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightAssertionResponse, R as Rect, A as AIDescribeElementResponse } from './types-81bb2c02.js';
2
- export { u as AIAssertionResponse, s as AIDataExtractionResponse, q as AIElementCoordinatesResponse, p as AIElementLocatorResponse, r as AIElementResponse, l as AIResponseFormat, t as AISectionLocatorResponse, o as AISingleElementResponse, m as AISingleElementResponseById, n as AISingleElementResponseByPosition, k as AIUsageInfo, V as AgentAssertOpt, x as AgentDescribeElementAtPointResult, T as AgentWaitForOpt, a6 as BaseAgentParserOpt, C as CallAIFn, a5 as Color, G as DumpMeta, O as ElementById, j as ElementTreeNode, y as EnsureObject, a9 as ExecutionRecorderItem, ar as ExecutionTaskAction, aq as ExecutionTaskActionApply, ap as ExecutionTaskInsightAssertion, ao as ExecutionTaskInsightAssertionApply, an as ExecutionTaskInsightAssertionParam, ag as ExecutionTaskInsightDumpLog, ai as ExecutionTaskInsightLocate, ah as ExecutionTaskInsightLocateApply, af as ExecutionTaskInsightLocateOutput, ae as ExecutionTaskInsightLocateParam, am as ExecutionTaskInsightQuery, al as ExecutionTaskInsightQueryApply, ak as ExecutionTaskInsightQueryOutput, aj as ExecutionTaskInsightQueryParam, at as ExecutionTaskPlanning, as as ExecutionTaskPlanningApply, ad as ExecutionTaskReturn, aa as ExecutionTaskType, ab as ExecutorContext, aS as FreeFn, au as GroupedActionDump, J as InsightDump, z as InsightExtractParam, N as LiteUISection, aw as LocateOption, F as LocateResultElement, w as LocateValidatorResult, v as LocatorValidatorOption, i as MidsceneYamlFlowItem, aD as MidsceneYamlFlowItemAIAction, aE as MidsceneYamlFlowItemAIAssert, aI as MidsceneYamlFlowItemAIBoolean, aM as MidsceneYamlFlowItemAIHover, aN as MidsceneYamlFlowItemAIInput, aO as MidsceneYamlFlowItemAIKeyboardPress, aJ as MidsceneYamlFlowItemAILocate, aH as MidsceneYamlFlowItemAINString, aG as MidsceneYamlFlowItemAINumber, aF as MidsceneYamlFlowItemAIQuery, aP as MidsceneYamlFlowItemAIScroll, aL as MidsceneYamlFlowItemAITap, aK as MidsceneYamlFlowItemAIWaitFor, aQ as MidsceneYamlFlowItemEvaluateJavaScript, aR as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aB as MidsceneYamlScriptAndroidEnv, aC as MidsceneYamlScriptEnv, az as MidsceneYamlScriptEnvBase, aA as MidsceneYamlScriptWebEnv, h as MidsceneYamlTask, Q as OnTaskStartTip, av as PageType, K as PartialInsightDumpFromSDK, Y as PlanningAIResponse, X as PlanningAction, a1 as PlanningActionParamAssert, a3 as PlanningActionParamError, _ as PlanningActionParamHover, $ as PlanningActionParamInputOrKeyPress, a0 as PlanningActionParamScroll, a2 as PlanningActionParamSleep, Z as PlanningActionParamTap, a4 as PlanningActionParamWaitFor, W as PlanningLocateParam, a8 as PlaywrightParserOpt, P as Point, a7 as PuppeteerParserOpt, ax as ReferenceImage, H as ReportDumpWithAttributes, aU as ScriptPlayerStatusValue, aT as ScriptPlayerTaskStatus, S as Size, ac as TaskCacheInfo, ay as scrollParam } from './types-81bb2c02.js';
3
- import { c as callAiFn } from './llm-planning-9c97f5c2.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-9c97f5c2.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractOption, h as InsightAssertionResponse, A as AIDescribeElementResponse } from './types-8c197f92.js';
2
+ export { v as AIAssertionResponse, t as AIDataExtractionResponse, r as AIElementCoordinatesResponse, q as AIElementLocatorResponse, s as AIElementResponse, m as AIResponseFormat, u as AISectionLocatorResponse, p as AISingleElementResponse, n as AISingleElementResponseById, o as AISingleElementResponseByPosition, l as AIUsageInfo, Q as AgentAssertOpt, y as AgentDescribeElementAtPointResult, N as AgentWaitForOpt, a4 as BaseAgentParserOpt, C as CallAIFn, a3 as Color, G as DumpMeta, K as ElementById, z as EnsureObject, a7 as ExecutionRecorderItem, ap as ExecutionTaskAction, ao as ExecutionTaskActionApply, an as ExecutionTaskInsightAssertion, am as ExecutionTaskInsightAssertionApply, al as ExecutionTaskInsightAssertionParam, ae as ExecutionTaskInsightDumpLog, ag as ExecutionTaskInsightLocate, af as ExecutionTaskInsightLocateApply, ad as ExecutionTaskInsightLocateOutput, ac as ExecutionTaskInsightLocateParam, ak as ExecutionTaskInsightQuery, aj as ExecutionTaskInsightQueryApply, ai as ExecutionTaskInsightQueryOutput, ah as ExecutionTaskInsightQueryParam, ar as ExecutionTaskPlanning, aq as ExecutionTaskPlanningApply, ab as ExecutionTaskReturn, a8 as ExecutionTaskType, a9 as ExecutorContext, aQ as FreeFn, as as GroupedActionDump, H as InsightDump, B as InsightExtractParam, J as LiteUISection, au as LocateOption, F as LocateResultElement, x as LocateValidatorResult, w as LocatorValidatorOption, j as MidsceneYamlFlowItem, aB as MidsceneYamlFlowItemAIAction, aC as MidsceneYamlFlowItemAIAssert, aG as MidsceneYamlFlowItemAIBoolean, aK as MidsceneYamlFlowItemAIHover, aL as MidsceneYamlFlowItemAIInput, aM as MidsceneYamlFlowItemAIKeyboardPress, aH as MidsceneYamlFlowItemAILocate, aF as MidsceneYamlFlowItemAINString, aE as MidsceneYamlFlowItemAINumber, aD as MidsceneYamlFlowItemAIQuery, k as MidsceneYamlFlowItemAIRightClick, aN as MidsceneYamlFlowItemAIScroll, aJ as MidsceneYamlFlowItemAITap, aI as MidsceneYamlFlowItemAIWaitFor, aO as MidsceneYamlFlowItemEvaluateJavaScript, aP as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, az as MidsceneYamlScriptAndroidEnv, aA as MidsceneYamlScriptEnv, ax as MidsceneYamlScriptEnvBase, ay as MidsceneYamlScriptWebEnv, i as MidsceneYamlTask, O as OnTaskStartTip, at as PageType, P as PartialInsightDumpFromSDK, V as PlanningAIResponse, T as PlanningAction, $ as PlanningActionParamAssert, a1 as PlanningActionParamError, X as PlanningActionParamHover, Z as PlanningActionParamInputOrKeyPress, Y as PlanningActionParamRightClick, _ as PlanningActionParamScroll, a0 as PlanningActionParamSleep, W as PlanningActionParamTap, a2 as PlanningActionParamWaitFor, S as PlanningLocateParam, a6 as PlaywrightParserOpt, a5 as PuppeteerParserOpt, av as ReferenceImage, R as ReportDumpWithAttributes, aS as ScriptPlayerStatusValue, aR as ScriptPlayerTaskStatus, aa as TaskCacheInfo, aw as scrollParam } from './types-8c197f92.js';
3
+ import { c as callAiFn } from './llm-planning-573b9b34.js';
4
+ export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-573b9b34.js';
5
+ import { BaseElement, Rect } from '@midscene/shared/types';
6
+ export { BaseElement, ElementTreeNode, Point, Rect, Size } from '@midscene/shared/types';
5
7
  export { getVersion } from './utils.js';
6
8
  export { MIDSCENE_MODEL_NAME, getAIConfig } from '@midscene/shared/env';
7
9
  import '@midscene/shared/constants';
@@ -34,13 +36,13 @@ declare class Insight<ElementType extends BaseElement = BaseElement, ContextType
34
36
  taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;
35
37
  constructor(context: ContextType | ((action: InsightAction) => Promise<ContextType> | ContextType), opt?: InsightOptions);
36
38
  locate(query: DetailedLocateParam, opt?: LocateOpts): Promise<LocateResult>;
37
- extract<T = any>(input: string): Promise<T>;
38
- extract<T extends Record<string, string>>(input: T): Promise<Record<keyof T, any>>;
39
- extract<T extends object>(input: Record<keyof T, string>): Promise<T>;
39
+ extract<T = any>(input: string, opt?: InsightExtractOption): Promise<T>;
40
+ extract<T extends Record<string, string>>(input: T, opt?: InsightExtractOption): Promise<Record<keyof T, any>>;
41
+ extract<T extends object>(input: Record<keyof T, string>, opt?: InsightExtractOption): Promise<T>;
40
42
  assert(assertion: string): Promise<InsightAssertionResponse>;
41
43
  describe(target: Rect | [number, number], opt?: {
42
44
  deepThink?: boolean;
43
45
  }): Promise<Pick<AIDescribeElementResponse, 'description'>>;
44
46
  }
45
47
 
46
- export { AIDescribeElementResponse, BaseElement, DetailedLocateParam, DumpSubscriber, ExecutionDump, ExecutionTask, ExecutionTaskApply, ExecutionTaskProgressOptions, Executor, Insight, InsightAction, InsightAssertionResponse, InsightOptions, InsightTaskInfo, LocateResult, Rect, UIContext, Insight as default };
48
+ export { AIDescribeElementResponse, DetailedLocateParam, DumpSubscriber, ExecutionDump, ExecutionTask, ExecutionTaskApply, ExecutionTaskProgressOptions, Executor, Insight, InsightAction, InsightAssertionResponse, InsightExtractOption, InsightOptions, InsightTaskInfo, LocateResult, UIContext, Insight as default };
@@ -1,4 +1,5 @@
1
- import { k as AIUsageInfo, R as Rect, j as ElementTreeNode, B as BaseElement, U as UIContext, ax as ReferenceImage, p as AIElementLocatorResponse, O as ElementById, s as AIDataExtractionResponse, u as AIAssertionResponse, av as PageType, Y as PlanningAIResponse } from './types-81bb2c02.js';
1
+ import { l as AIUsageInfo, U as UIContext, av as ReferenceImage, q as AIElementLocatorResponse, K as ElementById, g as InsightExtractOption, t as AIDataExtractionResponse, v as AIAssertionResponse, at as PageType, V as PlanningAIResponse } from './types-8c197f92.js';
2
+ import { Rect, ElementTreeNode, BaseElement } from '@midscene/shared/types';
2
3
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
3
4
 
4
5
  type AIArgs = [
@@ -28,6 +29,8 @@ declare function elementByPositionWithElementInfo(treeRoot: ElementTreeNode<Base
28
29
  declare function describeUserPage<ElementType extends BaseElement = BaseElement>(context: Omit<UIContext<ElementType>, 'describer'>, opt?: {
29
30
  truncateTextLength?: number;
30
31
  filterNonTextContent?: boolean;
32
+ domIncluded?: boolean;
33
+ visibleOnly?: boolean;
31
34
  }): Promise<{
32
35
  description: string;
33
36
  elementById(idOrIndexId: string): ElementType;
@@ -75,6 +78,7 @@ declare function AiLocateSection(options: {
75
78
  declare function AiExtractElementInfo<T, ElementType extends BaseElement = BaseElement>(options: {
76
79
  dataQuery: string | Record<string, string>;
77
80
  context: UIContext<ElementType>;
81
+ extractOption?: InsightExtractOption;
78
82
  }): Promise<{
79
83
  parseResult: AIDataExtractionResponse<T>;
80
84
  elementById: (idOrIndexId: string) => ElementType;
@@ -1,12 +1 @@
1
- import * as _midscene_shared_constants from '@midscene/shared/constants';
2
- import { B as BaseElement, j as ElementTreeNode } from './types-81bb2c02.js';
3
- import 'openai/resources';
4
-
5
- declare function truncateText(text: string | number | object | undefined, maxLength?: number): string;
6
- declare function trimAttributes(attributes: Record<string, any>, truncateTextLength?: number): {
7
- [key: string]: string;
8
- nodeType: _midscene_shared_constants.NodeType;
9
- };
10
- declare function descriptionOfTree<ElementType extends BaseElement = BaseElement>(tree: ElementTreeNode<ElementType>, truncateTextLength?: number, filterNonTextContent?: boolean): string;
11
-
12
- export { descriptionOfTree, trimAttributes, truncateText };
1
+ export { descriptionOfTree, trimAttributes, truncateText } from '@midscene/shared/extractor';
@@ -1,4 +1,5 @@
1
1
  import { NodeType } from '@midscene/shared/constants';
2
+ import { Rect, BaseElement, ElementTreeNode, Size } from '@midscene/shared/types';
2
3
  import { ChatCompletionMessageParam } from 'openai/resources';
3
4
 
4
5
  interface LocateOption {
@@ -6,6 +7,10 @@ interface LocateOption {
6
7
  deepThink?: boolean;
7
8
  cacheable?: boolean;
8
9
  }
10
+ interface InsightExtractOption {
11
+ domIncluded?: boolean;
12
+ screenshotIncluded?: boolean;
13
+ }
9
14
  interface ReferenceImage {
10
15
  base64: string;
11
16
  rect?: Rect;
@@ -64,20 +69,21 @@ interface MidsceneYamlFlowItemAIAction {
64
69
  }
65
70
  interface MidsceneYamlFlowItemAIAssert {
66
71
  aiAssert: string;
72
+ errorMessage?: string;
67
73
  }
68
- interface MidsceneYamlFlowItemAIQuery {
74
+ interface MidsceneYamlFlowItemAIQuery extends InsightExtractOption {
69
75
  aiQuery: string;
70
76
  name?: string;
71
77
  }
72
- interface MidsceneYamlFlowItemAINumber {
78
+ interface MidsceneYamlFlowItemAINumber extends InsightExtractOption {
73
79
  aiNumber: string;
74
80
  name?: string;
75
81
  }
76
- interface MidsceneYamlFlowItemAINString {
82
+ interface MidsceneYamlFlowItemAINString extends InsightExtractOption {
77
83
  aiString: string;
78
84
  name?: string;
79
85
  }
80
- interface MidsceneYamlFlowItemAIBoolean {
86
+ interface MidsceneYamlFlowItemAIBoolean extends InsightExtractOption {
81
87
  aiBoolean: string;
82
88
  name?: string;
83
89
  }
@@ -92,6 +98,9 @@ interface MidsceneYamlFlowItemAIWaitFor {
92
98
  interface MidsceneYamlFlowItemAITap extends LocateOption {
93
99
  aiTap: string;
94
100
  }
101
+ interface MidsceneYamlFlowItemAIRightClick extends LocateOption {
102
+ aiRightClick: string;
103
+ }
95
104
  interface MidsceneYamlFlowItemAIHover extends LocateOption {
96
105
  aiHover: string;
97
106
  }
@@ -114,7 +123,7 @@ interface MidsceneYamlFlowItemEvaluateJavaScript {
114
123
  interface MidsceneYamlFlowItemSleep {
115
124
  sleep: number;
116
125
  }
117
- type MidsceneYamlFlowItem = MidsceneYamlFlowItemAIAction | MidsceneYamlFlowItemAIAssert | MidsceneYamlFlowItemAIQuery | MidsceneYamlFlowItemAIWaitFor | MidsceneYamlFlowItemAITap | MidsceneYamlFlowItemAIHover | MidsceneYamlFlowItemAIInput | MidsceneYamlFlowItemAIKeyboardPress | MidsceneYamlFlowItemAIScroll | MidsceneYamlFlowItemSleep;
126
+ type MidsceneYamlFlowItem = MidsceneYamlFlowItemAIAction | MidsceneYamlFlowItemAIAssert | MidsceneYamlFlowItemAIQuery | MidsceneYamlFlowItemAIWaitFor | MidsceneYamlFlowItemAITap | MidsceneYamlFlowItemAIRightClick | MidsceneYamlFlowItemAIHover | MidsceneYamlFlowItemAIInput | MidsceneYamlFlowItemAIKeyboardPress | MidsceneYamlFlowItemAIScroll | MidsceneYamlFlowItemSleep;
118
127
  interface FreeFn {
119
128
  name: string;
120
129
  fn: () => void;
@@ -127,35 +136,6 @@ interface ScriptPlayerTaskStatus extends MidsceneYamlTask {
127
136
  }
128
137
  type ScriptPlayerStatusValue = 'init' | 'running' | 'done' | 'error';
129
138
 
130
- interface Point {
131
- left: number;
132
- top: number;
133
- }
134
- interface Size {
135
- width: number;
136
- height: number;
137
- dpr?: number;
138
- }
139
- type Rect = Point & Size & {
140
- zoom?: number;
141
- };
142
- declare abstract class BaseElement {
143
- abstract id: string;
144
- abstract indexId?: number;
145
- abstract attributes: {
146
- nodeType: NodeType;
147
- [key: string]: string;
148
- };
149
- abstract content: string;
150
- abstract rect: Rect;
151
- abstract center: [number, number];
152
- abstract locator?: string;
153
- abstract xpaths?: string[];
154
- }
155
- interface ElementTreeNode<ElementType extends BaseElement = BaseElement> {
156
- node: ElementType | null;
157
- children: ElementTreeNode<ElementType>[];
158
- }
159
139
  type AIUsageInfo = Record<string, any> & {
160
140
  prompt_tokens: number;
161
141
  completion_tokens: number;
@@ -338,7 +318,7 @@ interface PlanningLocateParam extends DetailedLocateParam {
338
318
  }
339
319
  interface PlanningAction<ParamType = any> {
340
320
  thought?: string;
341
- type: 'Locate' | 'Tap' | 'Drag' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'ExpectedFalsyCondition' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished' | 'AndroidBackButton' | 'AndroidHomeButton' | 'AndroidRecentAppsButton';
321
+ type: 'Locate' | 'Tap' | 'RightClick' | 'Hover' | 'Drag' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'ExpectedFalsyCondition' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished' | 'AndroidBackButton' | 'AndroidHomeButton' | 'AndroidRecentAppsButton';
342
322
  param: ParamType;
343
323
  locate?: PlanningLocateParam | null;
344
324
  }
@@ -356,6 +336,7 @@ interface PlanningAIResponse {
356
336
  }
357
337
  type PlanningActionParamTap = null;
358
338
  type PlanningActionParamHover = null;
339
+ type PlanningActionParamRightClick = null;
359
340
  interface PlanningActionParamInputOrKeyPress {
360
341
  value: string;
361
342
  }
@@ -472,4 +453,4 @@ interface GroupedActionDump {
472
453
  }
473
454
  type PageType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android';
474
455
 
475
- export { type PlanningActionParamInputOrKeyPress as $, type AIDescribeElementResponse as A, BaseElement as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type LocateResultElement as F, type DumpMeta as G, type ReportDumpWithAttributes as H, type InsightAction as I, type InsightDump as J, type PartialInsightDumpFromSDK as K, type LocateResult as L, type MidsceneYamlScript as M, type LiteUISection as N, type ElementById as O, type Point as P, type OnTaskStartTip as Q, type Rect as R, type Size as S, type AgentWaitForOpt as T, UIContext as U, type AgentAssertOpt as V, type PlanningLocateParam as W, type PlanningAction as X, type PlanningAIResponse as Y, type PlanningActionParamTap as Z, type PlanningActionParamHover as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamScroll as a0, type PlanningActionParamAssert as a1, type PlanningActionParamSleep as a2, type PlanningActionParamError as a3, type PlanningActionParamWaitFor as a4, type Color as a5, type BaseAgentParserOpt as a6, type PuppeteerParserOpt as a7, type PlaywrightParserOpt as a8, type ExecutionRecorderItem as a9, type MidsceneYamlScriptWebEnv as aA, type MidsceneYamlScriptAndroidEnv as aB, type MidsceneYamlScriptEnv as aC, type MidsceneYamlFlowItemAIAction as aD, type MidsceneYamlFlowItemAIAssert as aE, type MidsceneYamlFlowItemAIQuery as aF, type MidsceneYamlFlowItemAINumber as aG, type MidsceneYamlFlowItemAINString as aH, type MidsceneYamlFlowItemAIBoolean as aI, type MidsceneYamlFlowItemAILocate as aJ, type MidsceneYamlFlowItemAIWaitFor as aK, type MidsceneYamlFlowItemAITap as aL, type MidsceneYamlFlowItemAIHover as aM, type MidsceneYamlFlowItemAIInput as aN, type MidsceneYamlFlowItemAIKeyboardPress as aO, type MidsceneYamlFlowItemAIScroll as aP, type MidsceneYamlFlowItemEvaluateJavaScript as aQ, type MidsceneYamlFlowItemSleep as aR, type FreeFn as aS, type ScriptPlayerTaskStatus as aT, type ScriptPlayerStatusValue as aU, type ExecutionTaskType as aa, type ExecutorContext as ab, type TaskCacheInfo as ac, type ExecutionTaskReturn as ad, type ExecutionTaskInsightLocateParam as ae, type ExecutionTaskInsightLocateOutput as af, type ExecutionTaskInsightDumpLog as ag, type ExecutionTaskInsightLocateApply as ah, type ExecutionTaskInsightLocate as ai, type ExecutionTaskInsightQueryParam as aj, type ExecutionTaskInsightQueryOutput as ak, type ExecutionTaskInsightQueryApply as al, type ExecutionTaskInsightQuery as am, type ExecutionTaskInsightAssertionParam as an, type ExecutionTaskInsightAssertionApply as ao, type ExecutionTaskInsightAssertion as ap, type ExecutionTaskActionApply as aq, type ExecutionTaskAction as ar, type ExecutionTaskPlanningApply as as, type ExecutionTaskPlanning as at, type GroupedActionDump as au, type PageType as av, type LocateOption as aw, type ReferenceImage as ax, type scrollParam as ay, type MidsceneYamlScriptEnvBase as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightAssertionResponse as g, type MidsceneYamlTask as h, type MidsceneYamlFlowItem as i, type ElementTreeNode as j, type AIUsageInfo as k, AIResponseFormat as l, type AISingleElementResponseById as m, type AISingleElementResponseByPosition as n, type AISingleElementResponse as o, type AIElementLocatorResponse as p, type AIElementCoordinatesResponse as q, type AIElementResponse as r, type AIDataExtractionResponse as s, type AISectionLocatorResponse as t, type AIAssertionResponse as u, type LocatorValidatorOption as v, type LocateValidatorResult as w, type AgentDescribeElementAtPointResult as x, type EnsureObject as y, type InsightExtractParam as z };
456
+ export { type PlanningActionParamAssert as $, type AIDescribeElementResponse as A, type InsightExtractParam as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type LocateResultElement as F, type DumpMeta as G, type InsightDump as H, type InsightAction as I, type LiteUISection as J, type ElementById as K, type LocateResult as L, type MidsceneYamlScript as M, type AgentWaitForOpt as N, type OnTaskStartTip as O, type PartialInsightDumpFromSDK as P, type AgentAssertOpt as Q, type ReportDumpWithAttributes as R, type PlanningLocateParam as S, type PlanningAction as T, UIContext as U, type PlanningAIResponse as V, type PlanningActionParamTap as W, type PlanningActionParamHover as X, type PlanningActionParamRightClick as Y, type PlanningActionParamInputOrKeyPress as Z, type PlanningActionParamScroll as _, type ExecutionTaskProgressOptions as a, type PlanningActionParamSleep as a0, type PlanningActionParamError as a1, type PlanningActionParamWaitFor as a2, type Color as a3, type BaseAgentParserOpt as a4, type PuppeteerParserOpt as a5, type PlaywrightParserOpt as a6, type ExecutionRecorderItem as a7, type ExecutionTaskType as a8, type ExecutorContext as a9, type MidsceneYamlScriptEnv as aA, type MidsceneYamlFlowItemAIAction as aB, type MidsceneYamlFlowItemAIAssert as aC, type MidsceneYamlFlowItemAIQuery as aD, type MidsceneYamlFlowItemAINumber as aE, type MidsceneYamlFlowItemAINString as aF, type MidsceneYamlFlowItemAIBoolean as aG, type MidsceneYamlFlowItemAILocate as aH, type MidsceneYamlFlowItemAIWaitFor as aI, type MidsceneYamlFlowItemAITap as aJ, type MidsceneYamlFlowItemAIHover as aK, type MidsceneYamlFlowItemAIInput as aL, type MidsceneYamlFlowItemAIKeyboardPress as aM, type MidsceneYamlFlowItemAIScroll as aN, type MidsceneYamlFlowItemEvaluateJavaScript as aO, type MidsceneYamlFlowItemSleep as aP, type FreeFn as aQ, type ScriptPlayerTaskStatus as aR, type ScriptPlayerStatusValue as aS, type TaskCacheInfo as aa, type ExecutionTaskReturn as ab, type ExecutionTaskInsightLocateParam as ac, type ExecutionTaskInsightLocateOutput as ad, type ExecutionTaskInsightDumpLog as ae, type ExecutionTaskInsightLocateApply as af, type ExecutionTaskInsightLocate as ag, type ExecutionTaskInsightQueryParam as ah, type ExecutionTaskInsightQueryOutput as ai, type ExecutionTaskInsightQueryApply as aj, type ExecutionTaskInsightQuery as ak, type ExecutionTaskInsightAssertionParam as al, type ExecutionTaskInsightAssertionApply as am, type ExecutionTaskInsightAssertion as an, type ExecutionTaskActionApply as ao, type ExecutionTaskAction as ap, type ExecutionTaskPlanningApply as aq, type ExecutionTaskPlanning as ar, type GroupedActionDump as as, type PageType as at, type LocateOption as au, type ReferenceImage as av, type scrollParam as aw, type MidsceneYamlScriptEnvBase as ax, type MidsceneYamlScriptWebEnv as ay, type MidsceneYamlScriptAndroidEnv as az, type ExecutionTaskApply as b, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractOption as g, type InsightAssertionResponse as h, type MidsceneYamlTask as i, type MidsceneYamlFlowItem as j, type MidsceneYamlFlowItemAIRightClick as k, type AIUsageInfo as l, AIResponseFormat as m, type AISingleElementResponseById as n, type AISingleElementResponseByPosition as o, type AISingleElementResponse as p, type AIElementLocatorResponse as q, type AIElementCoordinatesResponse as r, type AIElementResponse as s, type AIDataExtractionResponse as t, type AISectionLocatorResponse as u, type AIAssertionResponse as v, type LocatorValidatorOption as w, type LocateValidatorResult as x, type AgentDescribeElementAtPointResult as y, type EnsureObject as z };
@@ -1,4 +1,5 @@
1
- import { H as ReportDumpWithAttributes, R as Rect } from './types-81bb2c02.js';
1
+ import { R as ReportDumpWithAttributes } from './types-8c197f92.js';
2
+ import { Rect } from '@midscene/shared/types';
2
3
  import '@midscene/shared/constants';
3
4
  import 'openai/resources';
4
5
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "0.17.4-beta-20250526154612.0",
4
+ "version": "0.17.4-beta-20250530041415.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "jsnext:source": "./src/index.ts",
@@ -45,7 +45,7 @@
45
45
  "openai": "4.81.0",
46
46
  "socks-proxy-agent": "8.0.4",
47
47
  "xss": "1.0.15",
48
- "@midscene/shared": "0.17.4-beta-20250526154612.0"
48
+ "@midscene/shared": "0.17.4-beta-20250530041415.0"
49
49
  },
50
50
  "devDependencies": {
51
51
  "@modern-js/module-tools": "2.60.6",