@midscene/core 0.24.2-beta-20250731102409.0 → 0.24.2-beta-20250731132300.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/es/ai-model.d.ts +3 -3
  2. package/dist/es/ai-model.js +1 -1
  3. package/dist/es/{chunk-YCHAOUOW.js → chunk-FKQMUAXP.js} +46 -15
  4. package/dist/es/chunk-FKQMUAXP.js.map +1 -0
  5. package/dist/es/{chunk-TYKJHDNK.js → chunk-IMZJSEAX.js} +3 -3
  6. package/dist/es/index.d.ts +4 -4
  7. package/dist/es/index.js +2 -2
  8. package/dist/es/{llm-planning-4bc82162.d.ts → llm-planning-d7096b0d.d.ts} +1 -1
  9. package/dist/es/{types-93fa8419.d.ts → types-d836fa73.d.ts} +16 -2
  10. package/dist/es/utils.d.ts +1 -1
  11. package/dist/es/utils.js +1 -1
  12. package/dist/lib/ai-model.d.ts +3 -3
  13. package/dist/lib/ai-model.js +2 -2
  14. package/dist/lib/{chunk-YCHAOUOW.js → chunk-FKQMUAXP.js} +55 -24
  15. package/dist/lib/chunk-FKQMUAXP.js.map +1 -0
  16. package/dist/lib/{chunk-TYKJHDNK.js → chunk-IMZJSEAX.js} +3 -3
  17. package/dist/lib/index.d.ts +4 -4
  18. package/dist/lib/index.js +12 -12
  19. package/dist/lib/{llm-planning-4bc82162.d.ts → llm-planning-d7096b0d.d.ts} +1 -1
  20. package/dist/{types/types-93fa8419.d.ts → lib/types-d836fa73.d.ts} +16 -2
  21. package/dist/lib/utils.d.ts +1 -1
  22. package/dist/lib/utils.js +2 -2
  23. package/dist/types/ai-model.d.ts +3 -3
  24. package/dist/types/index.d.ts +4 -4
  25. package/dist/types/{llm-planning-4bc82162.d.ts → llm-planning-d7096b0d.d.ts} +1 -1
  26. package/dist/{lib/types-93fa8419.d.ts → types/types-d836fa73.d.ts} +16 -2
  27. package/dist/types/utils.d.ts +1 -1
  28. package/package.json +3 -3
  29. package/dist/es/chunk-YCHAOUOW.js.map +0 -1
  30. package/dist/lib/chunk-YCHAOUOW.js.map +0 -1
  31. /package/dist/es/{chunk-TYKJHDNK.js.map → chunk-IMZJSEAX.js.map} +0 -0
  32. /package/dist/lib/{chunk-TYKJHDNK.js.map → chunk-IMZJSEAX.js.map} +0 -0
@@ -1,7 +1,7 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractOption, h as InsightAssertionResponse, A as AIDescribeElementResponse } from './types-93fa8419.js';
2
- export { w as AIAssertionResponse, u as AIDataExtractionResponse, s as AIElementCoordinatesResponse, r as AIElementLocatorResponse, t as AIElementResponse, n as AIResponseFormat, v as AISectionLocatorResponse, q as AISingleElementResponse, o as AISingleElementResponseById, p as AISingleElementResponseByPosition, m as AIUsageInfo, S as AgentAssertOpt, z as AgentDescribeElementAtPointResult, Q as AgentWaitForOpt, a5 as BaseAgentParserOpt, C as CallAIFn, az as CodeGenerationChunk, a4 as Color, H as DumpMeta, N as ElementById, B as EnsureObject, a8 as ExecutionRecorderItem, aq as ExecutionTaskAction, ap as ExecutionTaskActionApply, ab as ExecutionTaskHitBy, ao as ExecutionTaskInsightAssertion, an as ExecutionTaskInsightAssertionApply, am as ExecutionTaskInsightAssertionParam, af as ExecutionTaskInsightDumpLog, ah as ExecutionTaskInsightLocate, ag as ExecutionTaskInsightLocateApply, ae as ExecutionTaskInsightLocateOutput, ad as ExecutionTaskInsightLocateParam, al as ExecutionTaskInsightQuery, ak as ExecutionTaskInsightQueryApply, aj as ExecutionTaskInsightQueryOutput, ai as ExecutionTaskInsightQueryParam, as as ExecutionTaskLog, ar as ExecutionTaskLogApply, au as ExecutionTaskPlanning, at as ExecutionTaskPlanningApply, ac as ExecutionTaskReturn, a9 as ExecutionTaskType, aa as ExecutorContext, aZ as FreeFn, av as GroupedActionDump, J as InsightDump, F as InsightExtractParam, K as LiteUISection, aB as LocateOption, G as LocateResultElement, y as LocateValidatorResult, x as LocatorValidatorOption, b0 as MidsceneYamlConfig, b1 as MidsceneYamlConfigOutput, l as MidsceneYamlConfigResult, j as MidsceneYamlFlowItem, aI as MidsceneYamlFlowItemAIAction, aN as MidsceneYamlFlowItemAIAsk, aJ as MidsceneYamlFlowItemAIAssert, aO as MidsceneYamlFlowItemAIBoolean, aS as MidsceneYamlFlowItemAIHover, aT as MidsceneYamlFlowItemAIInput, aU as MidsceneYamlFlowItemAIKeyboardPress, aP as MidsceneYamlFlowItemAILocate, aL as MidsceneYamlFlowItemAINumber, aK as MidsceneYamlFlowItemAIQuery, k as MidsceneYamlFlowItemAIRightClick, aV as MidsceneYamlFlowItemAIScroll, aM as MidsceneYamlFlowItemAIString, aR as MidsceneYamlFlowItemAITap, aQ as MidsceneYamlFlowItemAIWaitFor, aW as MidsceneYamlFlowItemEvaluateJavaScript, aY as MidsceneYamlFlowItemLogScreenshot, aX as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aG as MidsceneYamlScriptAndroidEnv, aH as MidsceneYamlScriptEnv, aE as MidsceneYamlScriptEnvBase, aF as MidsceneYamlScriptWebEnv, i as MidsceneYamlTask, O as OnTaskStartTip, aw as PageType, P as PartialInsightDumpFromSDK, W as PlanningAIResponse, V as PlanningAction, a0 as PlanningActionParamAssert, a2 as PlanningActionParamError, Y as PlanningActionParamHover, _ as PlanningActionParamInputOrKeyPress, Z as PlanningActionParamRightClick, $ as PlanningActionParamScroll, a1 as PlanningActionParamSleep, X as PlanningActionParamTap, a3 as PlanningActionParamWaitFor, T as PlanningLocateParam, a7 as PlaywrightParserOpt, a6 as PuppeteerParserOpt, aC as ReferenceImage, R as ReportDumpWithAttributes, a$ as ScriptPlayerStatusValue, a_ as ScriptPlayerTaskStatus, aA as StreamingAIResponse, ay as StreamingCallback, ax as StreamingCodeGenerationOptions, aD as scrollParam } from './types-93fa8419.js';
3
- import { c as callAiFn } from './llm-planning-4bc82162.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-4bc82162.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractOption, h as InsightAssertionResponse, A as AIDescribeElementResponse } from './types-d836fa73.js';
2
+ export { w as AIAssertionResponse, u as AIDataExtractionResponse, s as AIElementCoordinatesResponse, r as AIElementLocatorResponse, t as AIElementResponse, n as AIResponseFormat, v as AISectionLocatorResponse, q as AISingleElementResponse, o as AISingleElementResponseById, p as AISingleElementResponseByPosition, m as AIUsageInfo, S as AgentAssertOpt, z as AgentDescribeElementAtPointResult, Q as AgentWaitForOpt, a7 as BaseAgentParserOpt, C as CallAIFn, aB as CodeGenerationChunk, a6 as Color, H as DumpMeta, N as ElementById, B as EnsureObject, aa as ExecutionRecorderItem, as as ExecutionTaskAction, ar as ExecutionTaskActionApply, ad as ExecutionTaskHitBy, aq as ExecutionTaskInsightAssertion, ap as ExecutionTaskInsightAssertionApply, ao as ExecutionTaskInsightAssertionParam, ah as ExecutionTaskInsightDumpLog, aj as ExecutionTaskInsightLocate, ai as ExecutionTaskInsightLocateApply, ag as ExecutionTaskInsightLocateOutput, af as ExecutionTaskInsightLocateParam, an as ExecutionTaskInsightQuery, am as ExecutionTaskInsightQueryApply, al as ExecutionTaskInsightQueryOutput, ak as ExecutionTaskInsightQueryParam, au as ExecutionTaskLog, at as ExecutionTaskLogApply, aw as ExecutionTaskPlanning, av as ExecutionTaskPlanningApply, ae as ExecutionTaskReturn, ab as ExecutionTaskType, ac as ExecutorContext, a$ as FreeFn, ax as GroupedActionDump, J as InsightDump, F as InsightExtractParam, K as LiteUISection, aD as LocateOption, G as LocateResultElement, y as LocateValidatorResult, x as LocatorValidatorOption, b2 as MidsceneYamlConfig, b3 as MidsceneYamlConfigOutput, l as MidsceneYamlConfigResult, j as MidsceneYamlFlowItem, aK as MidsceneYamlFlowItemAIAction, aP as MidsceneYamlFlowItemAIAsk, aL as MidsceneYamlFlowItemAIAssert, aQ as MidsceneYamlFlowItemAIBoolean, aU as MidsceneYamlFlowItemAIHover, aV as MidsceneYamlFlowItemAIInput, aW as MidsceneYamlFlowItemAIKeyboardPress, aR as MidsceneYamlFlowItemAILocate, aN as MidsceneYamlFlowItemAINumber, aM as MidsceneYamlFlowItemAIQuery, k as MidsceneYamlFlowItemAIRightClick, aX as MidsceneYamlFlowItemAIScroll, aO as MidsceneYamlFlowItemAIString, aT as MidsceneYamlFlowItemAITap, aS as MidsceneYamlFlowItemAIWaitFor, aY as MidsceneYamlFlowItemEvaluateJavaScript, a_ as MidsceneYamlFlowItemLogScreenshot, aZ as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aI as MidsceneYamlScriptAndroidEnv, aJ as MidsceneYamlScriptEnv, aG as MidsceneYamlScriptEnvBase, aH as MidsceneYamlScriptWebEnv, i as MidsceneYamlTask, O as OnTaskStartTip, ay as PageType, P as PartialInsightDumpFromSDK, W as PlanningAIResponse, V as PlanningAction, a4 as PlanningActionParamAndroidLongPress, a5 as PlanningActionParamAndroidPull, a0 as PlanningActionParamAssert, a2 as PlanningActionParamError, Y as PlanningActionParamHover, _ as PlanningActionParamInputOrKeyPress, Z as PlanningActionParamRightClick, $ as PlanningActionParamScroll, a1 as PlanningActionParamSleep, X as PlanningActionParamTap, a3 as PlanningActionParamWaitFor, T as PlanningLocateParam, a9 as PlaywrightParserOpt, a8 as PuppeteerParserOpt, aE as ReferenceImage, R as ReportDumpWithAttributes, b1 as ScriptPlayerStatusValue, b0 as ScriptPlayerTaskStatus, aC as StreamingAIResponse, aA as StreamingCallback, az as StreamingCodeGenerationOptions, aF as scrollParam } from './types-d836fa73.js';
3
+ import { c as callAiFn } from './llm-planning-d7096b0d.js';
4
+ export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-d7096b0d.js';
5
5
  import { BaseElement, Rect } from '@midscene/shared/types';
6
6
  export { BaseElement, ElementTreeNode, Point, Rect, Size } from '@midscene/shared/types';
7
7
  export { getVersion } from './utils.js';
package/dist/es/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  getVersion
3
- } from "./chunk-TYKJHDNK.js";
3
+ } from "./chunk-IMZJSEAX.js";
4
4
  import {
5
5
  AiAssert,
6
6
  AiExtractElementInfo,
@@ -11,7 +11,7 @@ import {
11
11
  describeUserPage,
12
12
  expandSearchArea,
13
13
  plan
14
- } from "./chunk-YCHAOUOW.js";
14
+ } from "./chunk-FKQMUAXP.js";
15
15
 
16
16
  // src/ai-model/action-executor.ts
17
17
  import {
@@ -1,4 +1,4 @@
1
- import { m as AIUsageInfo, U as UIContext, aC as ReferenceImage, r as AIElementLocatorResponse, N as ElementById, g as InsightExtractOption, u as AIDataExtractionResponse, w as AIAssertionResponse, aw as PageType, W as PlanningAIResponse } from './types-93fa8419.js';
1
+ import { m as AIUsageInfo, U as UIContext, aE as ReferenceImage, r as AIElementLocatorResponse, N as ElementById, g as InsightExtractOption, u as AIDataExtractionResponse, w as AIAssertionResponse, ay as PageType, W as PlanningAIResponse } from './types-d836fa73.js';
2
2
  import { Rect, ElementTreeNode, BaseElement } from '@midscene/shared/types';
3
3
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
4
4
 
@@ -357,7 +357,7 @@ interface PlanningLocateParam extends DetailedLocateParam {
357
357
  }
358
358
  interface PlanningAction<ParamType = any> {
359
359
  thought?: string;
360
- type: 'Locate' | 'Tap' | 'RightClick' | 'Hover' | 'Drag' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'ExpectedFalsyCondition' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished' | 'AndroidBackButton' | 'AndroidHomeButton' | 'AndroidRecentAppsButton';
360
+ type: 'Locate' | 'Tap' | 'RightClick' | 'Hover' | 'Drag' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'ExpectedFalsyCondition' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished' | 'AndroidBackButton' | 'AndroidHomeButton' | 'AndroidRecentAppsButton' | 'AndroidLongPress' | 'AndroidPull';
361
361
  param: ParamType;
362
362
  locate?: PlanningLocateParam | null;
363
363
  }
@@ -393,6 +393,20 @@ interface PlanningActionParamError {
393
393
  type PlanningActionParamWaitFor = AgentWaitForOpt & {
394
394
  assertion: string;
395
395
  };
396
+ interface PlanningActionParamAndroidLongPress {
397
+ x: number;
398
+ y: number;
399
+ duration?: number;
400
+ }
401
+ interface PlanningActionParamAndroidPull {
402
+ direction: 'up' | 'down';
403
+ startPoint?: {
404
+ x: number;
405
+ y: number;
406
+ };
407
+ distance?: number;
408
+ duration?: number;
409
+ }
396
410
  /**
397
411
  * misc
398
412
  */
@@ -529,4 +543,4 @@ interface StreamingAIResponse {
529
543
  isStreamed: boolean;
530
544
  }
531
545
 
532
- export { type PlanningActionParamScroll as $, type AIDescribeElementResponse as A, type EnsureObject as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type InsightExtractParam as F, type LocateResultElement as G, type DumpMeta as H, type InsightAction as I, type InsightDump as J, type LiteUISection as K, type LocateResult as L, type MidsceneYamlScript as M, type ElementById as N, type OnTaskStartTip as O, type PartialInsightDumpFromSDK as P, type AgentWaitForOpt as Q, type ReportDumpWithAttributes as R, type AgentAssertOpt as S, type PlanningLocateParam as T, UIContext as U, type PlanningAction as V, type PlanningAIResponse as W, type PlanningActionParamTap as X, type PlanningActionParamHover as Y, type PlanningActionParamRightClick as Z, type PlanningActionParamInputOrKeyPress as _, type ExecutionTaskProgressOptions as a, type ScriptPlayerStatusValue as a$, type PlanningActionParamAssert as a0, type PlanningActionParamSleep as a1, type PlanningActionParamError as a2, type PlanningActionParamWaitFor as a3, type Color as a4, type BaseAgentParserOpt as a5, type PuppeteerParserOpt as a6, type PlaywrightParserOpt as a7, type ExecutionRecorderItem as a8, type ExecutionTaskType as a9, type StreamingAIResponse as aA, type LocateOption as aB, type ReferenceImage as aC, type scrollParam as aD, type MidsceneYamlScriptEnvBase as aE, type MidsceneYamlScriptWebEnv as aF, type MidsceneYamlScriptAndroidEnv as aG, type MidsceneYamlScriptEnv as aH, type MidsceneYamlFlowItemAIAction as aI, type MidsceneYamlFlowItemAIAssert as aJ, type MidsceneYamlFlowItemAIQuery as aK, type MidsceneYamlFlowItemAINumber as aL, type MidsceneYamlFlowItemAIString as aM, type MidsceneYamlFlowItemAIAsk as aN, type MidsceneYamlFlowItemAIBoolean as aO, type MidsceneYamlFlowItemAILocate as aP, type MidsceneYamlFlowItemAIWaitFor as aQ, type MidsceneYamlFlowItemAITap as aR, type MidsceneYamlFlowItemAIHover as aS, type MidsceneYamlFlowItemAIInput as aT, type MidsceneYamlFlowItemAIKeyboardPress as aU, type MidsceneYamlFlowItemAIScroll as aV, type MidsceneYamlFlowItemEvaluateJavaScript as aW, type MidsceneYamlFlowItemSleep as aX, type MidsceneYamlFlowItemLogScreenshot as aY, type FreeFn as aZ, type ScriptPlayerTaskStatus as a_, type ExecutorContext as aa, type ExecutionTaskHitBy as ab, type ExecutionTaskReturn as ac, type ExecutionTaskInsightLocateParam as ad, type ExecutionTaskInsightLocateOutput as ae, type ExecutionTaskInsightDumpLog as af, type ExecutionTaskInsightLocateApply as ag, type ExecutionTaskInsightLocate as ah, type ExecutionTaskInsightQueryParam as ai, type ExecutionTaskInsightQueryOutput as aj, type ExecutionTaskInsightQueryApply as ak, type ExecutionTaskInsightQuery as al, type ExecutionTaskInsightAssertionParam as am, type ExecutionTaskInsightAssertionApply as an, type ExecutionTaskInsightAssertion as ao, type ExecutionTaskActionApply as ap, type ExecutionTaskAction as aq, type ExecutionTaskLogApply as ar, type ExecutionTaskLog as as, type ExecutionTaskPlanningApply as at, type ExecutionTaskPlanning as au, type GroupedActionDump as av, type PageType as aw, type StreamingCodeGenerationOptions as ax, type StreamingCallback as ay, type CodeGenerationChunk as az, type ExecutionTaskApply as b, type MidsceneYamlConfig as b0, type MidsceneYamlConfigOutput as b1, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractOption as g, type InsightAssertionResponse as h, type MidsceneYamlTask as i, type MidsceneYamlFlowItem as j, type MidsceneYamlFlowItemAIRightClick as k, type MidsceneYamlConfigResult as l, type AIUsageInfo as m, AIResponseFormat as n, type AISingleElementResponseById as o, type AISingleElementResponseByPosition as p, type AISingleElementResponse as q, type AIElementLocatorResponse as r, type AIElementCoordinatesResponse as s, type AIElementResponse as t, type AIDataExtractionResponse as u, type AISectionLocatorResponse as v, type AIAssertionResponse as w, type LocatorValidatorOption as x, type LocateValidatorResult as y, type AgentDescribeElementAtPointResult as z };
546
+ export { type PlanningActionParamScroll as $, type AIDescribeElementResponse as A, type EnsureObject as B, type CallAIFn as C, type DumpSubscriber as D, type ExecutionTask as E, type InsightExtractParam as F, type LocateResultElement as G, type DumpMeta as H, type InsightAction as I, type InsightDump as J, type LiteUISection as K, type LocateResult as L, type MidsceneYamlScript as M, type ElementById as N, type OnTaskStartTip as O, type PartialInsightDumpFromSDK as P, type AgentWaitForOpt as Q, type ReportDumpWithAttributes as R, type AgentAssertOpt as S, type PlanningLocateParam as T, UIContext as U, type PlanningAction as V, type PlanningAIResponse as W, type PlanningActionParamTap as X, type PlanningActionParamHover as Y, type PlanningActionParamRightClick as Z, type PlanningActionParamInputOrKeyPress as _, type ExecutionTaskProgressOptions as a, type FreeFn as a$, type PlanningActionParamAssert as a0, type PlanningActionParamSleep as a1, type PlanningActionParamError as a2, type PlanningActionParamWaitFor as a3, type PlanningActionParamAndroidLongPress as a4, type PlanningActionParamAndroidPull as a5, type Color as a6, type BaseAgentParserOpt as a7, type PuppeteerParserOpt as a8, type PlaywrightParserOpt as a9, type StreamingCallback as aA, type CodeGenerationChunk as aB, type StreamingAIResponse as aC, type LocateOption as aD, type ReferenceImage as aE, type scrollParam as aF, type MidsceneYamlScriptEnvBase as aG, type MidsceneYamlScriptWebEnv as aH, type MidsceneYamlScriptAndroidEnv as aI, type MidsceneYamlScriptEnv as aJ, type MidsceneYamlFlowItemAIAction as aK, type MidsceneYamlFlowItemAIAssert as aL, type MidsceneYamlFlowItemAIQuery as aM, type MidsceneYamlFlowItemAINumber as aN, type MidsceneYamlFlowItemAIString as aO, type MidsceneYamlFlowItemAIAsk as aP, type MidsceneYamlFlowItemAIBoolean as aQ, type MidsceneYamlFlowItemAILocate as aR, type MidsceneYamlFlowItemAIWaitFor as aS, type MidsceneYamlFlowItemAITap as aT, type MidsceneYamlFlowItemAIHover as aU, type MidsceneYamlFlowItemAIInput as aV, type MidsceneYamlFlowItemAIKeyboardPress as aW, type MidsceneYamlFlowItemAIScroll as aX, type MidsceneYamlFlowItemEvaluateJavaScript as aY, type MidsceneYamlFlowItemSleep as aZ, type MidsceneYamlFlowItemLogScreenshot as a_, type ExecutionRecorderItem as aa, type ExecutionTaskType as ab, type ExecutorContext as ac, type ExecutionTaskHitBy as ad, type ExecutionTaskReturn as ae, type ExecutionTaskInsightLocateParam as af, type ExecutionTaskInsightLocateOutput as ag, type ExecutionTaskInsightDumpLog as ah, type ExecutionTaskInsightLocateApply as ai, type ExecutionTaskInsightLocate as aj, type ExecutionTaskInsightQueryParam as ak, type ExecutionTaskInsightQueryOutput as al, type ExecutionTaskInsightQueryApply as am, type ExecutionTaskInsightQuery as an, type ExecutionTaskInsightAssertionParam as ao, type ExecutionTaskInsightAssertionApply as ap, type ExecutionTaskInsightAssertion as aq, type ExecutionTaskActionApply as ar, type ExecutionTaskAction as as, type ExecutionTaskLogApply as at, type ExecutionTaskLog as au, type ExecutionTaskPlanningApply as av, type ExecutionTaskPlanning as aw, type GroupedActionDump as ax, type PageType as ay, type StreamingCodeGenerationOptions as az, type ExecutionTaskApply as b, type ScriptPlayerTaskStatus as b0, type ScriptPlayerStatusValue as b1, type MidsceneYamlConfig as b2, type MidsceneYamlConfigOutput as b3, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractOption as g, type InsightAssertionResponse as h, type MidsceneYamlTask as i, type MidsceneYamlFlowItem as j, type MidsceneYamlFlowItemAIRightClick as k, type MidsceneYamlConfigResult as l, type AIUsageInfo as m, AIResponseFormat as n, type AISingleElementResponseById as o, type AISingleElementResponseByPosition as p, type AISingleElementResponse as q, type AIElementLocatorResponse as r, type AIElementCoordinatesResponse as s, type AIElementResponse as t, type AIDataExtractionResponse as u, type AISectionLocatorResponse as v, type AIAssertionResponse as w, type LocatorValidatorOption as x, type LocateValidatorResult as y, type AgentDescribeElementAtPointResult as z };
@@ -1,4 +1,4 @@
1
- import { R as ReportDumpWithAttributes } from './types-93fa8419.js';
1
+ import { R as ReportDumpWithAttributes } from './types-d836fa73.js';
2
2
  import { Rect } from '@midscene/shared/types';
3
3
  import '@midscene/shared/constants';
4
4
  import 'openai/resources';
package/dist/es/utils.js CHANGED
@@ -12,7 +12,7 @@ import {
12
12
  uploadTestInfoToServer,
13
13
  writeDumpReport,
14
14
  writeLogFile
15
- } from "./chunk-TYKJHDNK.js";
15
+ } from "./chunk-IMZJSEAX.js";
16
16
  export {
17
17
  getTmpDir,
18
18
  getTmpFile,
@@ -1,9 +1,9 @@
1
- import { ay as StreamingCallback, m as AIUsageInfo, ax as StreamingCodeGenerationOptions, aA as StreamingAIResponse, V as PlanningAction, j as MidsceneYamlFlowItem } from './types-93fa8419.js';
1
+ import { aA as StreamingCallback, m as AIUsageInfo, az as StreamingCodeGenerationOptions, aC as StreamingAIResponse, V as PlanningAction, j as MidsceneYamlFlowItem } from './types-d836fa73.js';
2
2
  import OpenAI from 'openai';
3
3
  import { ChatCompletionMessageParam } from 'openai/resources';
4
4
  export { ChatCompletionMessageParam } from 'openai/resources';
5
- import { b as AIActionType } from './llm-planning-4bc82162.js';
6
- export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-4bc82162.js';
5
+ import { b as AIActionType } from './llm-planning-d7096b0d.js';
6
+ export { a as AiAssert, f as AiExtractElementInfo, A as AiLocateElement, g as AiLocateSection, h as adaptBboxToRect, c as callAiFn, d as describeUserPage, e as elementByPositionWithElementInfo, p as plan } from './llm-planning-d7096b0d.js';
7
7
  import { vlLocateMode } from '@midscene/shared/env';
8
8
  import { actionParser } from '@ui-tars/action-parser';
9
9
  import { Size } from '@midscene/shared/types';
@@ -18,7 +18,7 @@
18
18
 
19
19
 
20
20
 
21
- var _chunkYCHAOUOWjs = require('./chunk-YCHAOUOW.js');
21
+ var _chunkFKQMUAXPjs = require('./chunk-FKQMUAXP.js');
22
22
 
23
23
 
24
24
 
@@ -39,4 +39,4 @@ var _chunkYCHAOUOWjs = require('./chunk-YCHAOUOW.js');
39
39
 
40
40
 
41
41
 
42
- exports.AIActionType = _chunkYCHAOUOWjs.AIActionType; exports.AiAssert = _chunkYCHAOUOWjs.AiAssert; exports.AiExtractElementInfo = _chunkYCHAOUOWjs.AiExtractElementInfo; exports.AiLocateElement = _chunkYCHAOUOWjs.AiLocateElement; exports.AiLocateSection = _chunkYCHAOUOWjs.AiLocateSection; exports.adaptBboxToRect = _chunkYCHAOUOWjs.adaptBboxToRect; exports.callAi = _chunkYCHAOUOWjs.call; exports.callAiFn = _chunkYCHAOUOWjs.callAiFn; exports.callToGetJSONObject = _chunkYCHAOUOWjs.callToGetJSONObject; exports.describeUserPage = _chunkYCHAOUOWjs.describeUserPage; exports.elementByPositionWithElementInfo = _chunkYCHAOUOWjs.elementByPositionWithElementInfo; exports.generatePlaywrightTest = _chunkYCHAOUOWjs.generatePlaywrightTest; exports.generatePlaywrightTestStream = _chunkYCHAOUOWjs.generatePlaywrightTestStream; exports.generateYamlTest = _chunkYCHAOUOWjs.generateYamlTest; exports.generateYamlTestStream = _chunkYCHAOUOWjs.generateYamlTestStream; exports.plan = _chunkYCHAOUOWjs.plan; exports.resizeImageForUiTars = _chunkYCHAOUOWjs.resizeImageForUiTars; exports.systemPromptToLocateElement = _chunkYCHAOUOWjs.systemPromptToLocateElement; exports.vlmPlanning = _chunkYCHAOUOWjs.vlmPlanning;
42
+ exports.AIActionType = _chunkFKQMUAXPjs.AIActionType; exports.AiAssert = _chunkFKQMUAXPjs.AiAssert; exports.AiExtractElementInfo = _chunkFKQMUAXPjs.AiExtractElementInfo; exports.AiLocateElement = _chunkFKQMUAXPjs.AiLocateElement; exports.AiLocateSection = _chunkFKQMUAXPjs.AiLocateSection; exports.adaptBboxToRect = _chunkFKQMUAXPjs.adaptBboxToRect; exports.callAi = _chunkFKQMUAXPjs.call; exports.callAiFn = _chunkFKQMUAXPjs.callAiFn; exports.callToGetJSONObject = _chunkFKQMUAXPjs.callToGetJSONObject; exports.describeUserPage = _chunkFKQMUAXPjs.describeUserPage; exports.elementByPositionWithElementInfo = _chunkFKQMUAXPjs.elementByPositionWithElementInfo; exports.generatePlaywrightTest = _chunkFKQMUAXPjs.generatePlaywrightTest; exports.generatePlaywrightTestStream = _chunkFKQMUAXPjs.generatePlaywrightTestStream; exports.generateYamlTest = _chunkFKQMUAXPjs.generateYamlTest; exports.generateYamlTestStream = _chunkFKQMUAXPjs.generateYamlTestStream; exports.plan = _chunkFKQMUAXPjs.plan; exports.resizeImageForUiTars = _chunkFKQMUAXPjs.resizeImageForUiTars; exports.systemPromptToLocateElement = _chunkFKQMUAXPjs.systemPromptToLocateElement; exports.vlmPlanning = _chunkFKQMUAXPjs.vlmPlanning;
@@ -300,7 +300,7 @@ function buildYamlFlowFromPlans(plans, sleep) {
300
300
  flow.push({
301
301
  sleep: param.timeMs
302
302
  });
303
- } else if (type === "AndroidBackButton" || type === "AndroidHomeButton" || type === "AndroidRecentAppsButton") {
303
+ } else if (type === "AndroidBackButton" || type === "AndroidHomeButton" || type === "AndroidRecentAppsButton" || type === "AndroidLongPress" || type === "AndroidPull") {
304
304
  } else if (type === "Error" || type === "ExpectedFalsyCondition" || type === "Assert" || type === "AssertWithoutThrow" || type === "Finished") {
305
305
  } else {
306
306
  console.warn(
@@ -794,7 +794,7 @@ Target: User will give you a screenshot, an instruction and some previous logs i
794
794
 
795
795
  Restriction:
796
796
  - Don't give extra actions or plans beyond the instruction. ONLY plan for what the instruction requires. For example, don't try to submit the form if the instruction is only to fill something.
797
- - Always give ONLY ONE action in \`log\` field (or null if no action should be done), instead of multiple actions. Supported actions are Tap, Hover, Input, KeyboardPress, Scroll${pageType === "android" ? ", AndroidBackButton, AndroidHomeButton, AndroidRecentAppsButton." : "."}
797
+ - Always give ONLY ONE action in \`log\` field (or null if no action should be done), instead of multiple actions. Supported actions are Tap, Hover, Input, KeyboardPress, Scroll${pageType === "android" ? ", AndroidBackButton, AndroidHomeButton, AndroidRecentAppsButton, AndroidLongPress, AndroidPull." : "."}
798
798
  - Don't repeat actions in the previous logs.
799
799
  - Bbox is the bounding box of the element to be located. It's an array of 4 numbers, representing ${bboxDescription(vlMode)}.
800
800
 
@@ -807,7 +807,9 @@ Supporting actions:
807
807
  - Scroll: { type: "Scroll", ${vlLocateParam} | null, param: { direction: 'down'(default) | 'up' | 'right' | 'left', scrollType: 'once' (default) | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft', distance: null | number }} // locate is the element to scroll. If it's a page scroll, put \`null\` in the \`locate\` field.
808
808
  ${pageType === "android" ? `- AndroidBackButton: { type: "AndroidBackButton", param: {} }
809
809
  - AndroidHomeButton: { type: "AndroidHomeButton", param: {} }
810
- - AndroidRecentAppsButton: { type: "AndroidRecentAppsButton", param: {} }` : ""}
810
+ - AndroidRecentAppsButton: { type: "AndroidRecentAppsButton", param: {} }
811
+ - AndroidLongPress: { type: "AndroidLongPress", param: { x: number, y: number, duration?: number } }
812
+ - AndroidPull: { type: "AndroidPull", param: { direction: 'up' | 'down', startPoint?: { x: number, y: number }, distance?: number, duration?: number } } // Pull down to refresh (direction: 'down') or pull up to load more (direction: 'up')` : ""}
811
813
 
812
814
  Field description:
813
815
  * The \`prompt\` field inside the \`locate\` field is a short description that could be used to locate the element.
@@ -857,7 +859,7 @@ You are a versatile professional in software UI automation. Your outstanding con
857
859
  ## Workflow
858
860
 
859
861
  1. Receive the screenshot, element description of screenshot(if any), user's instruction and previous logs.
860
- 2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyConditionStatement / Sleep ${pageType === "android" ? "/ AndroidBackButton / AndroidHomeButton / AndroidRecentAppsButton" : ""}). The "About the action" section below will give you more details.
862
+ 2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyConditionStatement / Sleep ${pageType === "android" ? "/ AndroidBackButton / AndroidHomeButton / AndroidRecentAppsButton / AndroidLongPress / AndroidPull" : ""}). The "About the action" section below will give you more details.
861
863
  3. Precisely locate the target element if it's already shown in the screenshot, put the location info in the \`locate\` field of the action.
862
864
  4. If some target elements is not shown in the screenshot, consider the user's instruction is not feasible on this page. Follow the next steps.
863
865
  5. Consider whether the user's instruction will be accomplished after all the actions
@@ -916,7 +918,11 @@ ${pageType === "android" ? `- type: 'AndroidBackButton', trigger the system "bac
916
918
  - type: 'AndroidHomeButton', trigger the system "home" operation on Android devices
917
919
  * {{ param: {{}} }}
918
920
  - type: 'AndroidRecentAppsButton', trigger the system "recent apps" operation on Android devices
919
- * {{ param: {{}} }}` : ""}
921
+ * {{ param: {{}} }}
922
+ - type: 'AndroidLongPress', trigger a long press on the screen at specified coordinates on Android devices
923
+ * {{ param: {{ x: number, y: number, duration?: number }} }}
924
+ - type: 'AndroidPull', trigger pull down to refresh or pull up actions on Android devices
925
+ * {{ param: {{ direction: 'up' | 'down', startPoint?: {{ x: number, y: number }}, distance?: number, duration?: number }} }}` : ""}
920
926
  `;
921
927
  var outputTemplate = `
922
928
  ## Output JSON Format:
@@ -1030,7 +1036,7 @@ var planSchema = {
1030
1036
  },
1031
1037
  type: {
1032
1038
  type: "string",
1033
- description: 'Type of action, one of "Tap", "RightClick", "Hover" , "Input", "KeyboardPress", "Scroll", "ExpectedFalsyCondition", "Sleep", "AndroidBackButton", "AndroidHomeButton", "AndroidRecentAppsButton"'
1039
+ description: 'Type of action, one of "Tap", "RightClick", "Hover" , "Input", "KeyboardPress", "Scroll", "ExpectedFalsyCondition", "Sleep", "AndroidBackButton", "AndroidHomeButton", "AndroidRecentAppsButton", "AndroidLongPress"'
1034
1040
  },
1035
1041
  param: {
1036
1042
  anyOf: [
@@ -2185,14 +2191,6 @@ async function AiLocateElement(options) {
2185
2191
  context.size
2186
2192
  );
2187
2193
  }
2188
- let referenceImagePayload;
2189
- if (_optionalChain([options, 'access', _55 => _55.referenceImage, 'optionalAccess', _56 => _56.rect]) && options.referenceImage.base64) {
2190
- referenceImagePayload = await _img.cropByRect.call(void 0,
2191
- options.referenceImage.base64,
2192
- options.referenceImage.rect,
2193
- _env.getAIConfigInBoolean.call(void 0, _env.MIDSCENE_USE_QWEN_VL)
2194
- );
2195
- }
2196
2194
  const msgs = [
2197
2195
  { role: "system", content: systemPrompt },
2198
2196
  {
@@ -2222,10 +2220,10 @@ async function AiLocateElement(options) {
2222
2220
  if ("bbox" in res.content && Array.isArray(res.content.bbox)) {
2223
2221
  resRect = adaptBboxToRect(
2224
2222
  res.content.bbox,
2225
- _optionalChain([options, 'access', _57 => _57.searchConfig, 'optionalAccess', _58 => _58.rect, 'optionalAccess', _59 => _59.width]) || context.size.width,
2226
- _optionalChain([options, 'access', _60 => _60.searchConfig, 'optionalAccess', _61 => _61.rect, 'optionalAccess', _62 => _62.height]) || context.size.height,
2227
- _optionalChain([options, 'access', _63 => _63.searchConfig, 'optionalAccess', _64 => _64.rect, 'optionalAccess', _65 => _65.left]),
2228
- _optionalChain([options, 'access', _66 => _66.searchConfig, 'optionalAccess', _67 => _67.rect, 'optionalAccess', _68 => _68.top])
2223
+ _optionalChain([options, 'access', _55 => _55.searchConfig, 'optionalAccess', _56 => _56.rect, 'optionalAccess', _57 => _57.width]) || context.size.width,
2224
+ _optionalChain([options, 'access', _58 => _58.searchConfig, 'optionalAccess', _59 => _59.rect, 'optionalAccess', _60 => _60.height]) || context.size.height,
2225
+ _optionalChain([options, 'access', _61 => _61.searchConfig, 'optionalAccess', _62 => _62.rect, 'optionalAccess', _63 => _63.left]),
2226
+ _optionalChain([options, 'access', _64 => _64.searchConfig, 'optionalAccess', _65 => _65.rect, 'optionalAccess', _66 => _66.top])
2229
2227
  );
2230
2228
  debugInspect("resRect", resRect);
2231
2229
  const rectCenter = {
@@ -2244,7 +2242,7 @@ async function AiLocateElement(options) {
2244
2242
  }
2245
2243
  } catch (e) {
2246
2244
  const msg = e instanceof Error ? `Failed to parse bbox: ${e.message}` : "unknown error in locate";
2247
- if (!errors || _optionalChain([errors, 'optionalAccess', _69 => _69.length]) === 0) {
2245
+ if (!errors || _optionalChain([errors, 'optionalAccess', _67 => _67.length]) === 0) {
2248
2246
  errors = [msg];
2249
2247
  } else {
2250
2248
  errors.push(`(${msg})`);
@@ -2336,14 +2334,14 @@ async function AiExtractElementInfo(options) {
2336
2334
  truncateTextLength: 200,
2337
2335
  filterNonTextContent: false,
2338
2336
  visibleOnly: false,
2339
- domIncluded: _optionalChain([extractOption, 'optionalAccess', _70 => _70.domIncluded])
2337
+ domIncluded: _optionalChain([extractOption, 'optionalAccess', _68 => _68.domIncluded])
2340
2338
  });
2341
2339
  const extractDataPromptText = await extractDataQueryPrompt(
2342
2340
  description,
2343
2341
  dataQuery
2344
2342
  );
2345
2343
  const userContent = [];
2346
- if (_optionalChain([extractOption, 'optionalAccess', _71 => _71.screenshotIncluded]) !== false) {
2344
+ if (_optionalChain([extractOption, 'optionalAccess', _69 => _69.screenshotIncluded]) !== false) {
2347
2345
  userContent.push({
2348
2346
  type: "image_url",
2349
2347
  image_url: {
@@ -2471,7 +2469,7 @@ async function plan(userInstruction, opts) {
2471
2469
  const { content, usage } = await call2(msgs, 3 /* PLAN */);
2472
2470
  const rawResponse = JSON.stringify(content, void 0, 2);
2473
2471
  const planFromAI = content;
2474
- const actions = (_optionalChain([planFromAI, 'access', _72 => _72.action, 'optionalAccess', _73 => _73.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2472
+ const actions = (_optionalChain([planFromAI, 'access', _70 => _70.action, 'optionalAccess', _71 => _71.type]) ? [planFromAI.action] : planFromAI.actions) || [];
2475
2473
  const returnValue = {
2476
2474
  ...planFromAI,
2477
2475
  actions,
@@ -2498,7 +2496,7 @@ async function plan(userInstruction, opts) {
2498
2496
  _utils.assert.call(void 0, !planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
2499
2497
  } else {
2500
2498
  actions.forEach((action) => {
2501
- if (_optionalChain([action, 'access', _74 => _74.locate, 'optionalAccess', _75 => _75.id])) {
2499
+ if (_optionalChain([action, 'access', _72 => _72.locate, 'optionalAccess', _73 => _73.id])) {
2502
2500
  const element = elementById(action.locate.id);
2503
2501
  if (element) {
2504
2502
  action.locate.id = element.id;
@@ -2710,6 +2708,39 @@ async function vlmPlanning(options) {
2710
2708
  type: "AndroidRecentAppsButton",
2711
2709
  param: {}
2712
2710
  });
2711
+ } else if (action.action_type === "androidLongPress") {
2712
+ _utils.assert.call(void 0,
2713
+ action.action_inputs.start_coords,
2714
+ "start_coords is required for androidLongPress"
2715
+ );
2716
+ const point = action.action_inputs.start_coords;
2717
+ transformActions.push({
2718
+ type: "AndroidLongPress",
2719
+ param: {
2720
+ x: point[0],
2721
+ y: point[1],
2722
+ duration: 1e3
2723
+ },
2724
+ locate: null,
2725
+ thought: action.thought || ""
2726
+ });
2727
+ } else if (action.action_type === "androidPull") {
2728
+ const pullDirection = action.action_inputs.direction || "down";
2729
+ const startPoint = action.action_inputs.start_coords ? {
2730
+ x: action.action_inputs.start_coords[0],
2731
+ y: action.action_inputs.start_coords[1]
2732
+ } : void 0;
2733
+ transformActions.push({
2734
+ type: "AndroidPull",
2735
+ param: {
2736
+ direction: pullDirection,
2737
+ startPoint,
2738
+ distance: action.action_inputs.distance,
2739
+ duration: action.action_inputs.duration || 500
2740
+ },
2741
+ locate: null,
2742
+ thought: action.thought || ""
2743
+ });
2713
2744
  }
2714
2745
  });
2715
2746
  if (transformActions.length === 0) {
@@ -2793,4 +2824,4 @@ async function resizeImageForUiTars(imageBase64, size) {
2793
2824
 
2794
2825
  exports.systemPromptToLocateElement = systemPromptToLocateElement; exports.elementByPositionWithElementInfo = elementByPositionWithElementInfo; exports.describeUserPage = describeUserPage; exports.call = call; exports.callToGetJSONObject = callToGetJSONObject; exports.AIActionType = AIActionType; exports.callAiFn = callAiFn; exports.adaptBboxToRect = adaptBboxToRect; exports.expandSearchArea = expandSearchArea; exports.generateYamlTest = generateYamlTest; exports.generateYamlTestStream = generateYamlTestStream; exports.generatePlaywrightTest = generatePlaywrightTest; exports.generatePlaywrightTestStream = generatePlaywrightTestStream; exports.AiLocateElement = AiLocateElement; exports.AiLocateSection = AiLocateSection; exports.AiExtractElementInfo = AiExtractElementInfo; exports.AiAssert = AiAssert; exports.plan = plan; exports.vlmPlanning = vlmPlanning; exports.resizeImageForUiTars = resizeImageForUiTars;
2795
2826
 
2796
- //# sourceMappingURL=chunk-YCHAOUOW.js.map
2827
+ //# sourceMappingURL=chunk-FKQMUAXP.js.map