@midscene/core 0.26.2-beta-20250812035614.0 → 0.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/es/ai-model.d.ts +3 -3
  2. package/dist/es/ai-model.js +1 -1
  3. package/dist/es/{chunk-I5LBWOQA.js → chunk-DDYIQHOA.js} +268 -310
  4. package/dist/es/chunk-DDYIQHOA.js.map +1 -0
  5. package/dist/es/{chunk-KAYSYGXR.js → chunk-O3KUKF2A.js} +3 -3
  6. package/dist/es/index.d.ts +4 -4
  7. package/dist/es/index.js +2 -2
  8. package/dist/es/{llm-planning-92cec090.d.ts → llm-planning-4e0c16fe.d.ts} +2 -1
  9. package/dist/es/{types-b4a208c6.d.ts → types-8a6be57c.d.ts} +11 -2
  10. package/dist/es/utils.d.ts +1 -1
  11. package/dist/es/utils.js +1 -1
  12. package/dist/lib/ai-model.d.ts +3 -3
  13. package/dist/lib/ai-model.js +2 -2
  14. package/dist/lib/{chunk-I5LBWOQA.js → chunk-DDYIQHOA.js} +257 -299
  15. package/dist/lib/chunk-DDYIQHOA.js.map +1 -0
  16. package/dist/lib/{chunk-KAYSYGXR.js → chunk-O3KUKF2A.js} +3 -3
  17. package/dist/lib/index.d.ts +4 -4
  18. package/dist/lib/index.js +12 -12
  19. package/dist/lib/{llm-planning-92cec090.d.ts → llm-planning-4e0c16fe.d.ts} +2 -1
  20. package/dist/{types/types-b4a208c6.d.ts → lib/types-8a6be57c.d.ts} +11 -2
  21. package/dist/lib/utils.d.ts +1 -1
  22. package/dist/lib/utils.js +2 -2
  23. package/dist/types/ai-model.d.ts +3 -3
  24. package/dist/types/index.d.ts +4 -4
  25. package/dist/types/{llm-planning-92cec090.d.ts → llm-planning-4e0c16fe.d.ts} +2 -1
  26. package/dist/{lib/types-b4a208c6.d.ts → types/types-8a6be57c.d.ts} +11 -2
  27. package/dist/types/utils.d.ts +1 -1
  28. package/package.json +3 -3
  29. package/dist/es/chunk-I5LBWOQA.js.map +0 -1
  30. package/dist/lib/chunk-I5LBWOQA.js.map +0 -1
  31. /package/dist/es/{chunk-KAYSYGXR.js.map → chunk-O3KUKF2A.js.map} +0 -0
  32. /package/dist/lib/{chunk-KAYSYGXR.js.map → chunk-O3KUKF2A.js.map} +0 -0
@@ -1,7 +1,7 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractParam, h as InsightExtractOption, T as TMultimodalPrompt, A as AIUsageInfo, i as TUserPrompt, j as InsightAssertionResponse, k as AIDescribeElementResponse } from './types-b4a208c6.js';
2
- export { z as AIAssertionResponse, x as AIDataExtractionResponse, v as AIElementCoordinatesResponse, u as AIElementLocatorResponse, w as AIElementResponse, q as AIResponseFormat, y as AISectionLocatorResponse, t as AISingleElementResponse, r as AISingleElementResponseById, s as AISingleElementResponseByPosition, W as AgentAssertOpt, F as AgentDescribeElementAtPointResult, V as AgentWaitForOpt, aa as BaseAgentParserOpt, G as CallAIFn, aE as CodeGenerationChunk, a9 as Color, K as DumpMeta, Q as ElementById, H as EnsureObject, ad as ExecutionRecorderItem, av as ExecutionTaskAction, au as ExecutionTaskActionApply, ag as ExecutionTaskHitBy, at as ExecutionTaskInsightAssertion, as as ExecutionTaskInsightAssertionApply, ar as ExecutionTaskInsightAssertionParam, ak as ExecutionTaskInsightDumpLog, am as ExecutionTaskInsightLocate, al as ExecutionTaskInsightLocateApply, aj as ExecutionTaskInsightLocateOutput, ai as ExecutionTaskInsightLocateParam, aq as ExecutionTaskInsightQuery, ap as ExecutionTaskInsightQueryApply, ao as ExecutionTaskInsightQueryOutput, an as ExecutionTaskInsightQueryParam, ax as ExecutionTaskLog, aw as ExecutionTaskLogApply, az as ExecutionTaskPlanning, ay as ExecutionTaskPlanningApply, ah as ExecutionTaskReturn, ae as ExecutionTaskType, af as ExecutorContext, b1 as FreeFn, aA as GroupedActionDump, N as InsightDump, O as LiteUISection, p as LocateOption, J as LocateResultElement, C as LocateValidatorResult, B as LocatorValidatorOption, b4 as MidsceneYamlConfig, b5 as MidsceneYamlConfigOutput, o as MidsceneYamlConfigResult, m as MidsceneYamlFlowItem, aM as MidsceneYamlFlowItemAIAction, aR as MidsceneYamlFlowItemAIAsk, aN as MidsceneYamlFlowItemAIAssert, aS as MidsceneYamlFlowItemAIBoolean, aW as MidsceneYamlFlowItemAIHover, aX as MidsceneYamlFlowItemAIInput, aY as MidsceneYamlFlowItemAIKeyboardPress, aT as MidsceneYamlFlowItemAILocate, aP as MidsceneYamlFlowItemAINumber, aO as MidsceneYamlFlowItemAIQuery, n as MidsceneYamlFlowItemAIRightClick, aZ as MidsceneYamlFlowItemAIScroll, aQ as MidsceneYamlFlowItemAIString, aV as MidsceneYamlFlowItemAITap, aU as MidsceneYamlFlowItemAIWaitFor, a_ as MidsceneYamlFlowItemEvaluateJavaScript, b0 as MidsceneYamlFlowItemLogScreenshot, a$ as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aK as MidsceneYamlScriptAndroidEnv, aL as MidsceneYamlScriptEnv, aI as MidsceneYamlScriptEnvBase, aJ as MidsceneYamlScriptWebEnv, l as MidsceneYamlTask, S as OnTaskStartTip, aB as PageType, P as PartialInsightDumpFromSDK, Z as PlanningAIResponse, Y as PlanningAction, a7 as PlanningActionParamAndroidLongPress, a8 as PlanningActionParamAndroidPull, a3 as PlanningActionParamAssert, a5 as PlanningActionParamError, $ as PlanningActionParamHover, a1 as PlanningActionParamInputOrKeyPress, a0 as PlanningActionParamRightClick, a2 as PlanningActionParamScroll, a4 as PlanningActionParamSleep, _ as PlanningActionParamTap, a6 as PlanningActionParamWaitFor, X as PlanningLocateParam, ac as PlaywrightParserOpt, ab as PuppeteerParserOpt, aG as ReferenceImage, R as ReportDumpWithAttributes, b3 as ScriptPlayerStatusValue, b2 as ScriptPlayerTaskStatus, aF as StreamingAIResponse, aD as StreamingCallback, aC as StreamingCodeGenerationOptions, aH as scrollParam } from './types-b4a208c6.js';
3
- import { c as callAiFn } from './llm-planning-92cec090.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-92cec090.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractParam, h as InsightExtractOption, T as TMultimodalPrompt, A as AIUsageInfo, i as TUserPrompt, j as InsightAssertionResponse, k as AIDescribeElementResponse } from './types-8a6be57c.js';
2
+ export { z as AIAssertionResponse, x as AIDataExtractionResponse, v as AIElementCoordinatesResponse, u as AIElementLocatorResponse, w as AIElementResponse, q as AIResponseFormat, y as AISectionLocatorResponse, t as AISingleElementResponse, r as AISingleElementResponseById, s as AISingleElementResponseByPosition, W as AgentAssertOpt, F as AgentDescribeElementAtPointResult, V as AgentWaitForOpt, aa as BaseAgentParserOpt, G as CallAIFn, aE as CodeGenerationChunk, a9 as Color, aG as DeviceAction, K as DumpMeta, Q as ElementById, H as EnsureObject, ad as ExecutionRecorderItem, av as ExecutionTaskAction, au as ExecutionTaskActionApply, ag as ExecutionTaskHitBy, at as ExecutionTaskInsightAssertion, as as ExecutionTaskInsightAssertionApply, ar as ExecutionTaskInsightAssertionParam, ak as ExecutionTaskInsightDumpLog, am as ExecutionTaskInsightLocate, al as ExecutionTaskInsightLocateApply, aj as ExecutionTaskInsightLocateOutput, ai as ExecutionTaskInsightLocateParam, aq as ExecutionTaskInsightQuery, ap as ExecutionTaskInsightQueryApply, ao as ExecutionTaskInsightQueryOutput, an as ExecutionTaskInsightQueryParam, ax as ExecutionTaskLog, aw as ExecutionTaskLogApply, az as ExecutionTaskPlanning, ay as ExecutionTaskPlanningApply, ah as ExecutionTaskReturn, ae as ExecutionTaskType, af as ExecutorContext, b2 as FreeFn, aA as GroupedActionDump, N as InsightDump, O as LiteUISection, p as LocateOption, J as LocateResultElement, C as LocateValidatorResult, B as LocatorValidatorOption, b5 as MidsceneYamlConfig, b6 as MidsceneYamlConfigOutput, o as MidsceneYamlConfigResult, m as MidsceneYamlFlowItem, aN as MidsceneYamlFlowItemAIAction, aS as MidsceneYamlFlowItemAIAsk, aO as MidsceneYamlFlowItemAIAssert, aT as MidsceneYamlFlowItemAIBoolean, aX as MidsceneYamlFlowItemAIHover, aY as MidsceneYamlFlowItemAIInput, aZ as MidsceneYamlFlowItemAIKeyboardPress, aU as MidsceneYamlFlowItemAILocate, aQ as MidsceneYamlFlowItemAINumber, aP as MidsceneYamlFlowItemAIQuery, n as MidsceneYamlFlowItemAIRightClick, a_ as MidsceneYamlFlowItemAIScroll, aR as MidsceneYamlFlowItemAIString, aW as MidsceneYamlFlowItemAITap, aV as MidsceneYamlFlowItemAIWaitFor, a$ as MidsceneYamlFlowItemEvaluateJavaScript, b1 as MidsceneYamlFlowItemLogScreenshot, b0 as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aL as MidsceneYamlScriptAndroidEnv, aM as MidsceneYamlScriptEnv, aJ as MidsceneYamlScriptEnvBase, aK as MidsceneYamlScriptWebEnv, l as MidsceneYamlTask, S as OnTaskStartTip, aB as PageType, P as PartialInsightDumpFromSDK, Z as PlanningAIResponse, Y as PlanningAction, a7 as PlanningActionParamAndroidLongPress, a8 as PlanningActionParamAndroidPull, a3 as PlanningActionParamAssert, a5 as PlanningActionParamError, $ as PlanningActionParamHover, a1 as PlanningActionParamInputOrKeyPress, a0 as PlanningActionParamRightClick, a2 as PlanningActionParamScroll, a4 as PlanningActionParamSleep, _ as PlanningActionParamTap, a6 as PlanningActionParamWaitFor, X as PlanningLocateParam, ac as PlaywrightParserOpt, ab as PuppeteerParserOpt, aH as ReferenceImage, R as ReportDumpWithAttributes, b4 as ScriptPlayerStatusValue, b3 as ScriptPlayerTaskStatus, aF as StreamingAIResponse, aD as StreamingCallback, aC as StreamingCodeGenerationOptions, aI as scrollParam } from './types-8a6be57c.js';
3
+ import { c as callAiFn } from './llm-planning-4e0c16fe.js';
4
+ export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-4e0c16fe.js';
5
5
  import { BaseElement, Rect } from '@midscene/shared/types';
6
6
  export { BaseElement, ElementTreeNode, Point, Rect, Size } from '@midscene/shared/types';
7
7
  export { getVersion } from './utils.js';
package/dist/lib/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  "use strict";Object.defineProperty(exports, "__esModule", {value: true}); function _optionalChain(ops) { let lastAccessLHS = undefined; let value = ops[0]; let i = 1; while (i < ops.length) { const op = ops[i]; const fn = ops[i + 1]; i += 2; if ((op === 'optionalAccess' || op === 'optionalCall') && value == null) { return undefined; } if (op === 'access' || op === 'optionalAccess') { lastAccessLHS = value; value = fn(value); } else if (op === 'call' || op === 'optionalCall') { value = fn((...args) => value.call(lastAccessLHS, ...args)); lastAccessLHS = undefined; } } return value; }
2
2
 
3
- var _chunkKAYSYGXRjs = require('./chunk-KAYSYGXR.js');
3
+ var _chunkO3KUKF2Ajs = require('./chunk-O3KUKF2A.js');
4
4
 
5
5
 
6
6
 
@@ -11,7 +11,7 @@ var _chunkKAYSYGXRjs = require('./chunk-KAYSYGXR.js');
11
11
 
12
12
 
13
13
 
14
- var _chunkI5LBWOQAjs = require('./chunk-I5LBWOQA.js');
14
+ var _chunkDDYIQHOAjs = require('./chunk-DDYIQHOA.js');
15
15
 
16
16
  // src/ai-model/action-executor.ts
17
17
 
@@ -176,7 +176,7 @@ ${_optionalChain([this, 'access', _7 => _7.latestErrorTask, 'call', _8 => _8(),
176
176
  }
177
177
  }
178
178
  const dumpData = {
179
- sdkVersion: _chunkKAYSYGXRjs.getVersion.call(void 0, ),
179
+ sdkVersion: _chunkO3KUKF2Ajs.getVersion.call(void 0, ),
180
180
  model_name: _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME) || "",
181
181
  model_description: modelDescription,
182
182
  logTime: Date.now(),
@@ -248,7 +248,7 @@ var _logger = require('@midscene/shared/logger');
248
248
 
249
249
  function emitInsightDump(data, dumpSubscriber) {
250
250
  const baseData = {
251
- sdkVersion: _chunkKAYSYGXRjs.getVersion.call(void 0, ),
251
+ sdkVersion: _chunkO3KUKF2Ajs.getVersion.call(void 0, ),
252
252
  logTime: Date.now(),
253
253
  model_name: _env.getAIConfig.call(void 0, _env.MIDSCENE_MODEL_NAME) || ""
254
254
  };
@@ -264,7 +264,7 @@ function emitInsightDump(data, dumpSubscriber) {
264
264
  var debug = _logger.getDebug.call(void 0, "ai:insight");
265
265
  var Insight = class {
266
266
  constructor(context, opt) {
267
- this.aiVendorFn = _chunkI5LBWOQAjs.callAiFn;
267
+ this.aiVendorFn = _chunkDDYIQHOAjs.callAiFn;
268
268
  _utils.assert.call(void 0, context, "context is required for Insight");
269
269
  if (typeof context === "function") {
270
270
  this.contextRetrieverFn = context;
@@ -307,7 +307,7 @@ var Insight = class {
307
307
  let searchAreaUsage = void 0;
308
308
  let searchAreaResponse = void 0;
309
309
  if (searchAreaPrompt) {
310
- searchAreaResponse = await _chunkI5LBWOQAjs.AiLocateSection.call(void 0, {
310
+ searchAreaResponse = await _chunkDDYIQHOAjs.AiLocateSection.call(void 0, {
311
311
  context,
312
312
  sectionDescription: searchAreaPrompt
313
313
  });
@@ -327,7 +327,7 @@ var Insight = class {
327
327
  rawResponse,
328
328
  usage,
329
329
  isOrderSensitive
330
- } = await _chunkI5LBWOQAjs.AiLocateElement.call(void 0, {
330
+ } = await _chunkDDYIQHOAjs.AiLocateElement.call(void 0, {
331
331
  callAI: callAI || this.aiVendorFn,
332
332
  context,
333
333
  targetElementDescription: queryPrompt,
@@ -416,7 +416,7 @@ ${parseResult.errors.join("\n")}`;
416
416
  this.onceDumpUpdatedFn = void 0;
417
417
  const context = await this.contextRetrieverFn("extract");
418
418
  const startTime = Date.now();
419
- const { parseResult, usage } = await _chunkI5LBWOQAjs.AiExtractElementInfo.call(void 0, {
419
+ const { parseResult, usage } = await _chunkDDYIQHOAjs.AiExtractElementInfo.call(void 0, {
420
420
  context,
421
421
  dataQuery: dataDemand,
422
422
  multimodalPrompt,
@@ -465,7 +465,7 @@ ${parseResult.errors.join("\n")}`;
465
465
  this.onceDumpUpdatedFn = void 0;
466
466
  const context = await this.contextRetrieverFn("assert");
467
467
  const startTime = Date.now();
468
- const assertResult = await _chunkI5LBWOQAjs.AiAssert.call(void 0, {
468
+ const assertResult = await _chunkDDYIQHOAjs.AiAssert.call(void 0, {
469
469
  assertion,
470
470
  context
471
471
  });
@@ -519,7 +519,7 @@ ${parseResult.errors.join("\n")}`;
519
519
  borderThickness: 3
520
520
  });
521
521
  if (_optionalChain([opt, 'optionalAccess', _22 => _22.deepThink])) {
522
- const searchArea = _chunkI5LBWOQAjs.expandSearchArea.call(void 0, targetRect, context.size);
522
+ const searchArea = _chunkDDYIQHOAjs.expandSearchArea.call(void 0, targetRect, context.size);
523
523
  debug("describe: set searchArea", searchArea);
524
524
  imagePayload = await _img.cropByRect.call(void 0,
525
525
  imagePayload,
@@ -542,7 +542,7 @@ ${parseResult.errors.join("\n")}`;
542
542
  ]
543
543
  }
544
544
  ];
545
- const callAIFn = this.aiVendorFn || _chunkI5LBWOQAjs.callToGetJSONObject;
545
+ const callAIFn = this.aiVendorFn || _chunkDDYIQHOAjs.callToGetJSONObject;
546
546
  const res = await callAIFn(msgs, 4 /* DESCRIBE_ELEMENT */);
547
547
  const { content } = res;
548
548
  _utils.assert.call(void 0, !content.error, `describe failed: ${content.error}`);
@@ -565,6 +565,6 @@ var src_default = Insight;
565
565
 
566
566
 
567
567
 
568
- exports.AiAssert = _chunkI5LBWOQAjs.AiAssert; exports.AiLocateElement = _chunkI5LBWOQAjs.AiLocateElement; exports.Executor = Executor; exports.Insight = Insight; exports.MIDSCENE_MODEL_NAME = _env.MIDSCENE_MODEL_NAME; exports.default = src_default; exports.describeUserPage = _chunkI5LBWOQAjs.describeUserPage; exports.getAIConfig = _env.getAIConfig; exports.getVersion = _chunkKAYSYGXRjs.getVersion; exports.plan = _chunkI5LBWOQAjs.plan;
568
+ exports.AiAssert = _chunkDDYIQHOAjs.AiAssert; exports.AiLocateElement = _chunkDDYIQHOAjs.AiLocateElement; exports.Executor = Executor; exports.Insight = Insight; exports.MIDSCENE_MODEL_NAME = _env.MIDSCENE_MODEL_NAME; exports.default = src_default; exports.describeUserPage = _chunkDDYIQHOAjs.describeUserPage; exports.getAIConfig = _env.getAIConfig; exports.getVersion = _chunkO3KUKF2Ajs.getVersion; exports.plan = _chunkDDYIQHOAjs.plan;
569
569
 
570
570
  //# sourceMappingURL=index.js.map
@@ -1,4 +1,4 @@
1
- import { A as AIUsageInfo, U as UIContext, i as TUserPrompt, aG as ReferenceImage, u as AIElementLocatorResponse, Q as ElementById, T as TMultimodalPrompt, h as InsightExtractOption, x as AIDataExtractionResponse, z as AIAssertionResponse, aB as PageType, Z as PlanningAIResponse } from './types-b4a208c6.js';
1
+ import { A as AIUsageInfo, U as UIContext, i as TUserPrompt, aH as ReferenceImage, u as AIElementLocatorResponse, Q as ElementById, T as TMultimodalPrompt, h as InsightExtractOption, x as AIDataExtractionResponse, z as AIAssertionResponse, aB as PageType, aG as DeviceAction, Z as PlanningAIResponse } from './types-8a6be57c.js';
2
2
  import { Rect, ElementTreeNode, BaseElement } from '@midscene/shared/types';
3
3
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
4
4
 
@@ -97,6 +97,7 @@ declare function AiAssert<ElementType extends BaseElement = BaseElement>(options
97
97
  declare function plan(userInstruction: string, opts: {
98
98
  context: UIContext;
99
99
  pageType: PageType;
100
+ actionSpace: DeviceAction[];
100
101
  callAI?: typeof callAiFn<PlanningAIResponse>;
101
102
  log?: string;
102
103
  actionContext?: string;
@@ -359,7 +359,7 @@ interface PlanningLocateParam extends DetailedLocateParam {
359
359
  }
360
360
  interface PlanningAction<ParamType = any> {
361
361
  thought?: string;
362
- type: 'Locate' | 'Tap' | 'RightClick' | 'Hover' | 'Drag' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'ExpectedFalsyCondition' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished' | 'AndroidBackButton' | 'AndroidHomeButton' | 'AndroidRecentAppsButton' | 'AndroidLongPress' | 'AndroidPull';
362
+ type: 'Locate' | 'Tap' | 'RightClick' | 'Hover' | 'Drag' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished' | 'AndroidBackButton' | 'AndroidHomeButton' | 'AndroidRecentAppsButton' | 'AndroidLongPress' | 'AndroidPull';
363
363
  param: ParamType;
364
364
  locate?: PlanningLocateParam | null;
365
365
  }
@@ -564,5 +564,14 @@ type TMultimodalPrompt = {
564
564
  type TUserPrompt = string | ({
565
565
  prompt: string;
566
566
  } & Partial<TMultimodalPrompt>);
567
+ interface DeviceAction<ParamType = any> {
568
+ name: string;
569
+ description?: string;
570
+ paramSchema?: string;
571
+ paramDescription?: string;
572
+ location?: 'required' | 'optional' | false;
573
+ whatToLocate?: string;
574
+ call: (param: ParamType) => Promise<void> | void;
575
+ }
567
576
 
568
- export { type PlanningActionParamHover as $, type AIUsageInfo as A, type LocatorValidatorOption as B, type LocateValidatorResult as C, type DumpSubscriber as D, type ExecutionTask as E, type AgentDescribeElementAtPointResult as F, type CallAIFn as G, type EnsureObject as H, type InsightAction as I, type LocateResultElement as J, type DumpMeta as K, type LocateResult as L, type MidsceneYamlScript as M, type InsightDump as N, type LiteUISection as O, type PartialInsightDumpFromSDK as P, type ElementById as Q, type ReportDumpWithAttributes as R, type OnTaskStartTip as S, type TMultimodalPrompt as T, UIContext as U, type AgentWaitForOpt as V, type AgentAssertOpt as W, type PlanningLocateParam as X, type PlanningAction as Y, type PlanningAIResponse as Z, type PlanningActionParamTap as _, type ExecutionTaskProgressOptions as a, type MidsceneYamlFlowItemSleep as a$, type PlanningActionParamRightClick as a0, type PlanningActionParamInputOrKeyPress as a1, type PlanningActionParamScroll as a2, type PlanningActionParamAssert as a3, type PlanningActionParamSleep as a4, type PlanningActionParamError as a5, type PlanningActionParamWaitFor as a6, type PlanningActionParamAndroidLongPress as a7, type PlanningActionParamAndroidPull as a8, type Color as a9, type GroupedActionDump as aA, type PageType as aB, type StreamingCodeGenerationOptions as aC, type StreamingCallback as aD, type CodeGenerationChunk as aE, type StreamingAIResponse as aF, type ReferenceImage as aG, type scrollParam as aH, type MidsceneYamlScriptEnvBase as aI, type MidsceneYamlScriptWebEnv as aJ, type MidsceneYamlScriptAndroidEnv as aK, type MidsceneYamlScriptEnv as aL, type MidsceneYamlFlowItemAIAction as aM, type MidsceneYamlFlowItemAIAssert as aN, type MidsceneYamlFlowItemAIQuery as aO, type MidsceneYamlFlowItemAINumber as aP, type MidsceneYamlFlowItemAIString as aQ, type MidsceneYamlFlowItemAIAsk as aR, type MidsceneYamlFlowItemAIBoolean as aS, type MidsceneYamlFlowItemAILocate as aT, type MidsceneYamlFlowItemAIWaitFor as aU, type MidsceneYamlFlowItemAITap as aV, type MidsceneYamlFlowItemAIHover as aW, type MidsceneYamlFlowItemAIInput as aX, type MidsceneYamlFlowItemAIKeyboardPress as aY, type MidsceneYamlFlowItemAIScroll as aZ, type MidsceneYamlFlowItemEvaluateJavaScript as a_, type BaseAgentParserOpt as aa, type PuppeteerParserOpt as ab, type PlaywrightParserOpt as ac, type ExecutionRecorderItem as ad, type ExecutionTaskType as ae, type ExecutorContext as af, type ExecutionTaskHitBy as ag, type ExecutionTaskReturn as ah, type ExecutionTaskInsightLocateParam as ai, type ExecutionTaskInsightLocateOutput as aj, type ExecutionTaskInsightDumpLog as ak, type ExecutionTaskInsightLocateApply as al, type ExecutionTaskInsightLocate as am, type ExecutionTaskInsightQueryParam as an, type ExecutionTaskInsightQueryOutput as ao, type ExecutionTaskInsightQueryApply as ap, type ExecutionTaskInsightQuery as aq, type ExecutionTaskInsightAssertionParam as ar, type ExecutionTaskInsightAssertionApply as as, type ExecutionTaskInsightAssertion as at, type ExecutionTaskActionApply as au, type ExecutionTaskAction as av, type ExecutionTaskLogApply as aw, type ExecutionTaskLog as ax, type ExecutionTaskPlanningApply as ay, type ExecutionTaskPlanning as az, type ExecutionTaskApply as b, type MidsceneYamlFlowItemLogScreenshot as b0, type FreeFn as b1, type ScriptPlayerTaskStatus as b2, type ScriptPlayerStatusValue as b3, type MidsceneYamlConfig as b4, type MidsceneYamlConfigOutput as b5, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractParam as g, type InsightExtractOption as h, type TUserPrompt as i, type InsightAssertionResponse as j, type AIDescribeElementResponse as k, type MidsceneYamlTask as l, type MidsceneYamlFlowItem as m, type MidsceneYamlFlowItemAIRightClick as n, type MidsceneYamlConfigResult as o, type LocateOption as p, AIResponseFormat as q, type AISingleElementResponseById as r, type AISingleElementResponseByPosition as s, type AISingleElementResponse as t, type AIElementLocatorResponse as u, type AIElementCoordinatesResponse as v, type AIElementResponse as w, type AIDataExtractionResponse as x, type AISectionLocatorResponse as y, type AIAssertionResponse as z };
577
+ export { type PlanningActionParamHover as $, type AIUsageInfo as A, type LocatorValidatorOption as B, type LocateValidatorResult as C, type DumpSubscriber as D, type ExecutionTask as E, type AgentDescribeElementAtPointResult as F, type CallAIFn as G, type EnsureObject as H, type InsightAction as I, type LocateResultElement as J, type DumpMeta as K, type LocateResult as L, type MidsceneYamlScript as M, type InsightDump as N, type LiteUISection as O, type PartialInsightDumpFromSDK as P, type ElementById as Q, type ReportDumpWithAttributes as R, type OnTaskStartTip as S, type TMultimodalPrompt as T, UIContext as U, type AgentWaitForOpt as V, type AgentAssertOpt as W, type PlanningLocateParam as X, type PlanningAction as Y, type PlanningAIResponse as Z, type PlanningActionParamTap as _, type ExecutionTaskProgressOptions as a, type MidsceneYamlFlowItemEvaluateJavaScript as a$, type PlanningActionParamRightClick as a0, type PlanningActionParamInputOrKeyPress as a1, type PlanningActionParamScroll as a2, type PlanningActionParamAssert as a3, type PlanningActionParamSleep as a4, type PlanningActionParamError as a5, type PlanningActionParamWaitFor as a6, type PlanningActionParamAndroidLongPress as a7, type PlanningActionParamAndroidPull as a8, type Color as a9, type GroupedActionDump as aA, type PageType as aB, type StreamingCodeGenerationOptions as aC, type StreamingCallback as aD, type CodeGenerationChunk as aE, type StreamingAIResponse as aF, type DeviceAction as aG, type ReferenceImage as aH, type scrollParam as aI, type MidsceneYamlScriptEnvBase as aJ, type MidsceneYamlScriptWebEnv as aK, type MidsceneYamlScriptAndroidEnv as aL, type MidsceneYamlScriptEnv as aM, type MidsceneYamlFlowItemAIAction as aN, type MidsceneYamlFlowItemAIAssert as aO, type MidsceneYamlFlowItemAIQuery as aP, type MidsceneYamlFlowItemAINumber as aQ, type MidsceneYamlFlowItemAIString as aR, type MidsceneYamlFlowItemAIAsk as aS, type MidsceneYamlFlowItemAIBoolean as aT, type MidsceneYamlFlowItemAILocate as aU, type MidsceneYamlFlowItemAIWaitFor as aV, type MidsceneYamlFlowItemAITap as aW, type MidsceneYamlFlowItemAIHover as aX, type MidsceneYamlFlowItemAIInput as aY, type MidsceneYamlFlowItemAIKeyboardPress as aZ, type MidsceneYamlFlowItemAIScroll as a_, type BaseAgentParserOpt as aa, type PuppeteerParserOpt as ab, type PlaywrightParserOpt as ac, type ExecutionRecorderItem as ad, type ExecutionTaskType as ae, type ExecutorContext as af, type ExecutionTaskHitBy as ag, type ExecutionTaskReturn as ah, type ExecutionTaskInsightLocateParam as ai, type ExecutionTaskInsightLocateOutput as aj, type ExecutionTaskInsightDumpLog as ak, type ExecutionTaskInsightLocateApply as al, type ExecutionTaskInsightLocate as am, type ExecutionTaskInsightQueryParam as an, type ExecutionTaskInsightQueryOutput as ao, type ExecutionTaskInsightQueryApply as ap, type ExecutionTaskInsightQuery as aq, type ExecutionTaskInsightAssertionParam as ar, type ExecutionTaskInsightAssertionApply as as, type ExecutionTaskInsightAssertion as at, type ExecutionTaskActionApply as au, type ExecutionTaskAction as av, type ExecutionTaskLogApply as aw, type ExecutionTaskLog as ax, type ExecutionTaskPlanningApply as ay, type ExecutionTaskPlanning as az, type ExecutionTaskApply as b, type MidsceneYamlFlowItemSleep as b0, type MidsceneYamlFlowItemLogScreenshot as b1, type FreeFn as b2, type ScriptPlayerTaskStatus as b3, type ScriptPlayerStatusValue as b4, type MidsceneYamlConfig as b5, type MidsceneYamlConfigOutput as b6, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractParam as g, type InsightExtractOption as h, type TUserPrompt as i, type InsightAssertionResponse as j, type AIDescribeElementResponse as k, type MidsceneYamlTask as l, type MidsceneYamlFlowItem as m, type MidsceneYamlFlowItemAIRightClick as n, type MidsceneYamlConfigResult as o, type LocateOption as p, AIResponseFormat as q, type AISingleElementResponseById as r, type AISingleElementResponseByPosition as s, type AISingleElementResponse as t, type AIElementLocatorResponse as u, type AIElementCoordinatesResponse as v, type AIElementResponse as w, type AIDataExtractionResponse as x, type AISectionLocatorResponse as y, type AIAssertionResponse as z };
@@ -1,4 +1,4 @@
1
- import { R as ReportDumpWithAttributes } from './types-b4a208c6.js';
1
+ import { R as ReportDumpWithAttributes } from './types-8a6be57c.js';
2
2
  import { Rect } from '@midscene/shared/types';
3
3
  import '@midscene/shared/constants';
4
4
  import 'openai/resources';
package/dist/lib/utils.js CHANGED
@@ -12,7 +12,7 @@
12
12
 
13
13
 
14
14
 
15
- var _chunkKAYSYGXRjs = require('./chunk-KAYSYGXR.js');
15
+ var _chunkO3KUKF2Ajs = require('./chunk-O3KUKF2A.js');
16
16
 
17
17
 
18
18
 
@@ -27,4 +27,4 @@ var _chunkKAYSYGXRjs = require('./chunk-KAYSYGXR.js');
27
27
 
28
28
 
29
29
 
30
- exports.getTmpDir = _chunkKAYSYGXRjs.getTmpDir; exports.getTmpFile = _chunkKAYSYGXRjs.getTmpFile; exports.getVersion = _chunkKAYSYGXRjs.getVersion; exports.groupedActionDumpFileExt = _chunkKAYSYGXRjs.groupedActionDumpFileExt; exports.insertScriptBeforeClosingHtml = _chunkKAYSYGXRjs.insertScriptBeforeClosingHtml; exports.overlapped = _chunkKAYSYGXRjs.overlapped; exports.replacerForPageObject = _chunkKAYSYGXRjs.replacerForPageObject; exports.reportHTMLContent = _chunkKAYSYGXRjs.reportHTMLContent; exports.sleep = _chunkKAYSYGXRjs.sleep; exports.stringifyDumpData = _chunkKAYSYGXRjs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkKAYSYGXRjs.uploadTestInfoToServer; exports.writeDumpReport = _chunkKAYSYGXRjs.writeDumpReport; exports.writeLogFile = _chunkKAYSYGXRjs.writeLogFile;
30
+ exports.getTmpDir = _chunkO3KUKF2Ajs.getTmpDir; exports.getTmpFile = _chunkO3KUKF2Ajs.getTmpFile; exports.getVersion = _chunkO3KUKF2Ajs.getVersion; exports.groupedActionDumpFileExt = _chunkO3KUKF2Ajs.groupedActionDumpFileExt; exports.insertScriptBeforeClosingHtml = _chunkO3KUKF2Ajs.insertScriptBeforeClosingHtml; exports.overlapped = _chunkO3KUKF2Ajs.overlapped; exports.replacerForPageObject = _chunkO3KUKF2Ajs.replacerForPageObject; exports.reportHTMLContent = _chunkO3KUKF2Ajs.reportHTMLContent; exports.sleep = _chunkO3KUKF2Ajs.sleep; exports.stringifyDumpData = _chunkO3KUKF2Ajs.stringifyDumpData; exports.uploadTestInfoToServer = _chunkO3KUKF2Ajs.uploadTestInfoToServer; exports.writeDumpReport = _chunkO3KUKF2Ajs.writeDumpReport; exports.writeLogFile = _chunkO3KUKF2Ajs.writeLogFile;
@@ -1,9 +1,9 @@
1
- import { aD as StreamingCallback, A as AIUsageInfo, aC as StreamingCodeGenerationOptions, aF as StreamingAIResponse, Y as PlanningAction, m as MidsceneYamlFlowItem } from './types-b4a208c6.js';
1
+ import { aD as StreamingCallback, A as AIUsageInfo, aC as StreamingCodeGenerationOptions, aF as StreamingAIResponse, Y as PlanningAction, m as MidsceneYamlFlowItem } from './types-8a6be57c.js';
2
2
  import OpenAI from 'openai';
3
3
  import { ChatCompletionMessageParam } from 'openai/resources';
4
4
  export { ChatCompletionMessageParam } from 'openai/resources';
5
- import { b as AIActionType, e as AIArgs } from './llm-planning-92cec090.js';
6
- export { a as AiAssert, g as AiExtractElementInfo, A as AiLocateElement, h as AiLocateSection, i as adaptBboxToRect, c as callAiFn, d as describeUserPage, f as elementByPositionWithElementInfo, p as plan } from './llm-planning-92cec090.js';
5
+ import { b as AIActionType, e as AIArgs } from './llm-planning-4e0c16fe.js';
6
+ export { a as AiAssert, g as AiExtractElementInfo, A as AiLocateElement, h as AiLocateSection, i as adaptBboxToRect, c as callAiFn, d as describeUserPage, f as elementByPositionWithElementInfo, p as plan } from './llm-planning-4e0c16fe.js';
7
7
  import { vlLocateMode } from '@midscene/shared/env';
8
8
  import { actionParser } from '@ui-tars/action-parser';
9
9
  import { Size } from '@midscene/shared/types';
@@ -1,7 +1,7 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractParam, h as InsightExtractOption, T as TMultimodalPrompt, A as AIUsageInfo, i as TUserPrompt, j as InsightAssertionResponse, k as AIDescribeElementResponse } from './types-b4a208c6.js';
2
- export { z as AIAssertionResponse, x as AIDataExtractionResponse, v as AIElementCoordinatesResponse, u as AIElementLocatorResponse, w as AIElementResponse, q as AIResponseFormat, y as AISectionLocatorResponse, t as AISingleElementResponse, r as AISingleElementResponseById, s as AISingleElementResponseByPosition, W as AgentAssertOpt, F as AgentDescribeElementAtPointResult, V as AgentWaitForOpt, aa as BaseAgentParserOpt, G as CallAIFn, aE as CodeGenerationChunk, a9 as Color, K as DumpMeta, Q as ElementById, H as EnsureObject, ad as ExecutionRecorderItem, av as ExecutionTaskAction, au as ExecutionTaskActionApply, ag as ExecutionTaskHitBy, at as ExecutionTaskInsightAssertion, as as ExecutionTaskInsightAssertionApply, ar as ExecutionTaskInsightAssertionParam, ak as ExecutionTaskInsightDumpLog, am as ExecutionTaskInsightLocate, al as ExecutionTaskInsightLocateApply, aj as ExecutionTaskInsightLocateOutput, ai as ExecutionTaskInsightLocateParam, aq as ExecutionTaskInsightQuery, ap as ExecutionTaskInsightQueryApply, ao as ExecutionTaskInsightQueryOutput, an as ExecutionTaskInsightQueryParam, ax as ExecutionTaskLog, aw as ExecutionTaskLogApply, az as ExecutionTaskPlanning, ay as ExecutionTaskPlanningApply, ah as ExecutionTaskReturn, ae as ExecutionTaskType, af as ExecutorContext, b1 as FreeFn, aA as GroupedActionDump, N as InsightDump, O as LiteUISection, p as LocateOption, J as LocateResultElement, C as LocateValidatorResult, B as LocatorValidatorOption, b4 as MidsceneYamlConfig, b5 as MidsceneYamlConfigOutput, o as MidsceneYamlConfigResult, m as MidsceneYamlFlowItem, aM as MidsceneYamlFlowItemAIAction, aR as MidsceneYamlFlowItemAIAsk, aN as MidsceneYamlFlowItemAIAssert, aS as MidsceneYamlFlowItemAIBoolean, aW as MidsceneYamlFlowItemAIHover, aX as MidsceneYamlFlowItemAIInput, aY as MidsceneYamlFlowItemAIKeyboardPress, aT as MidsceneYamlFlowItemAILocate, aP as MidsceneYamlFlowItemAINumber, aO as MidsceneYamlFlowItemAIQuery, n as MidsceneYamlFlowItemAIRightClick, aZ as MidsceneYamlFlowItemAIScroll, aQ as MidsceneYamlFlowItemAIString, aV as MidsceneYamlFlowItemAITap, aU as MidsceneYamlFlowItemAIWaitFor, a_ as MidsceneYamlFlowItemEvaluateJavaScript, b0 as MidsceneYamlFlowItemLogScreenshot, a$ as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aK as MidsceneYamlScriptAndroidEnv, aL as MidsceneYamlScriptEnv, aI as MidsceneYamlScriptEnvBase, aJ as MidsceneYamlScriptWebEnv, l as MidsceneYamlTask, S as OnTaskStartTip, aB as PageType, P as PartialInsightDumpFromSDK, Z as PlanningAIResponse, Y as PlanningAction, a7 as PlanningActionParamAndroidLongPress, a8 as PlanningActionParamAndroidPull, a3 as PlanningActionParamAssert, a5 as PlanningActionParamError, $ as PlanningActionParamHover, a1 as PlanningActionParamInputOrKeyPress, a0 as PlanningActionParamRightClick, a2 as PlanningActionParamScroll, a4 as PlanningActionParamSleep, _ as PlanningActionParamTap, a6 as PlanningActionParamWaitFor, X as PlanningLocateParam, ac as PlaywrightParserOpt, ab as PuppeteerParserOpt, aG as ReferenceImage, R as ReportDumpWithAttributes, b3 as ScriptPlayerStatusValue, b2 as ScriptPlayerTaskStatus, aF as StreamingAIResponse, aD as StreamingCallback, aC as StreamingCodeGenerationOptions, aH as scrollParam } from './types-b4a208c6.js';
3
- import { c as callAiFn } from './llm-planning-92cec090.js';
4
- export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-92cec090.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightTaskInfo, e as InsightOptions, f as DetailedLocateParam, L as LocateResult, g as InsightExtractParam, h as InsightExtractOption, T as TMultimodalPrompt, A as AIUsageInfo, i as TUserPrompt, j as InsightAssertionResponse, k as AIDescribeElementResponse } from './types-8a6be57c.js';
2
+ export { z as AIAssertionResponse, x as AIDataExtractionResponse, v as AIElementCoordinatesResponse, u as AIElementLocatorResponse, w as AIElementResponse, q as AIResponseFormat, y as AISectionLocatorResponse, t as AISingleElementResponse, r as AISingleElementResponseById, s as AISingleElementResponseByPosition, W as AgentAssertOpt, F as AgentDescribeElementAtPointResult, V as AgentWaitForOpt, aa as BaseAgentParserOpt, G as CallAIFn, aE as CodeGenerationChunk, a9 as Color, aG as DeviceAction, K as DumpMeta, Q as ElementById, H as EnsureObject, ad as ExecutionRecorderItem, av as ExecutionTaskAction, au as ExecutionTaskActionApply, ag as ExecutionTaskHitBy, at as ExecutionTaskInsightAssertion, as as ExecutionTaskInsightAssertionApply, ar as ExecutionTaskInsightAssertionParam, ak as ExecutionTaskInsightDumpLog, am as ExecutionTaskInsightLocate, al as ExecutionTaskInsightLocateApply, aj as ExecutionTaskInsightLocateOutput, ai as ExecutionTaskInsightLocateParam, aq as ExecutionTaskInsightQuery, ap as ExecutionTaskInsightQueryApply, ao as ExecutionTaskInsightQueryOutput, an as ExecutionTaskInsightQueryParam, ax as ExecutionTaskLog, aw as ExecutionTaskLogApply, az as ExecutionTaskPlanning, ay as ExecutionTaskPlanningApply, ah as ExecutionTaskReturn, ae as ExecutionTaskType, af as ExecutorContext, b2 as FreeFn, aA as GroupedActionDump, N as InsightDump, O as LiteUISection, p as LocateOption, J as LocateResultElement, C as LocateValidatorResult, B as LocatorValidatorOption, b5 as MidsceneYamlConfig, b6 as MidsceneYamlConfigOutput, o as MidsceneYamlConfigResult, m as MidsceneYamlFlowItem, aN as MidsceneYamlFlowItemAIAction, aS as MidsceneYamlFlowItemAIAsk, aO as MidsceneYamlFlowItemAIAssert, aT as MidsceneYamlFlowItemAIBoolean, aX as MidsceneYamlFlowItemAIHover, aY as MidsceneYamlFlowItemAIInput, aZ as MidsceneYamlFlowItemAIKeyboardPress, aU as MidsceneYamlFlowItemAILocate, aQ as MidsceneYamlFlowItemAINumber, aP as MidsceneYamlFlowItemAIQuery, n as MidsceneYamlFlowItemAIRightClick, a_ as MidsceneYamlFlowItemAIScroll, aR as MidsceneYamlFlowItemAIString, aW as MidsceneYamlFlowItemAITap, aV as MidsceneYamlFlowItemAIWaitFor, a$ as MidsceneYamlFlowItemEvaluateJavaScript, b1 as MidsceneYamlFlowItemLogScreenshot, b0 as MidsceneYamlFlowItemSleep, M as MidsceneYamlScript, aL as MidsceneYamlScriptAndroidEnv, aM as MidsceneYamlScriptEnv, aJ as MidsceneYamlScriptEnvBase, aK as MidsceneYamlScriptWebEnv, l as MidsceneYamlTask, S as OnTaskStartTip, aB as PageType, P as PartialInsightDumpFromSDK, Z as PlanningAIResponse, Y as PlanningAction, a7 as PlanningActionParamAndroidLongPress, a8 as PlanningActionParamAndroidPull, a3 as PlanningActionParamAssert, a5 as PlanningActionParamError, $ as PlanningActionParamHover, a1 as PlanningActionParamInputOrKeyPress, a0 as PlanningActionParamRightClick, a2 as PlanningActionParamScroll, a4 as PlanningActionParamSleep, _ as PlanningActionParamTap, a6 as PlanningActionParamWaitFor, X as PlanningLocateParam, ac as PlaywrightParserOpt, ab as PuppeteerParserOpt, aH as ReferenceImage, R as ReportDumpWithAttributes, b4 as ScriptPlayerStatusValue, b3 as ScriptPlayerTaskStatus, aF as StreamingAIResponse, aD as StreamingCallback, aC as StreamingCodeGenerationOptions, aI as scrollParam } from './types-8a6be57c.js';
3
+ import { c as callAiFn } from './llm-planning-4e0c16fe.js';
4
+ export { a as AiAssert, A as AiLocateElement, d as describeUserPage, p as plan } from './llm-planning-4e0c16fe.js';
5
5
  import { BaseElement, Rect } from '@midscene/shared/types';
6
6
  export { BaseElement, ElementTreeNode, Point, Rect, Size } from '@midscene/shared/types';
7
7
  export { getVersion } from './utils.js';
@@ -1,4 +1,4 @@
1
- import { A as AIUsageInfo, U as UIContext, i as TUserPrompt, aG as ReferenceImage, u as AIElementLocatorResponse, Q as ElementById, T as TMultimodalPrompt, h as InsightExtractOption, x as AIDataExtractionResponse, z as AIAssertionResponse, aB as PageType, Z as PlanningAIResponse } from './types-b4a208c6.js';
1
+ import { A as AIUsageInfo, U as UIContext, i as TUserPrompt, aH as ReferenceImage, u as AIElementLocatorResponse, Q as ElementById, T as TMultimodalPrompt, h as InsightExtractOption, x as AIDataExtractionResponse, z as AIAssertionResponse, aB as PageType, aG as DeviceAction, Z as PlanningAIResponse } from './types-8a6be57c.js';
2
2
  import { Rect, ElementTreeNode, BaseElement } from '@midscene/shared/types';
3
3
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
4
4
 
@@ -97,6 +97,7 @@ declare function AiAssert<ElementType extends BaseElement = BaseElement>(options
97
97
  declare function plan(userInstruction: string, opts: {
98
98
  context: UIContext;
99
99
  pageType: PageType;
100
+ actionSpace: DeviceAction[];
100
101
  callAI?: typeof callAiFn<PlanningAIResponse>;
101
102
  log?: string;
102
103
  actionContext?: string;
@@ -359,7 +359,7 @@ interface PlanningLocateParam extends DetailedLocateParam {
359
359
  }
360
360
  interface PlanningAction<ParamType = any> {
361
361
  thought?: string;
362
- type: 'Locate' | 'Tap' | 'RightClick' | 'Hover' | 'Drag' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'ExpectedFalsyCondition' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished' | 'AndroidBackButton' | 'AndroidHomeButton' | 'AndroidRecentAppsButton' | 'AndroidLongPress' | 'AndroidPull';
362
+ type: 'Locate' | 'Tap' | 'RightClick' | 'Hover' | 'Drag' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'Assert' | 'AssertWithoutThrow' | 'Sleep' | 'Finished' | 'AndroidBackButton' | 'AndroidHomeButton' | 'AndroidRecentAppsButton' | 'AndroidLongPress' | 'AndroidPull';
363
363
  param: ParamType;
364
364
  locate?: PlanningLocateParam | null;
365
365
  }
@@ -564,5 +564,14 @@ type TMultimodalPrompt = {
564
564
  type TUserPrompt = string | ({
565
565
  prompt: string;
566
566
  } & Partial<TMultimodalPrompt>);
567
+ interface DeviceAction<ParamType = any> {
568
+ name: string;
569
+ description?: string;
570
+ paramSchema?: string;
571
+ paramDescription?: string;
572
+ location?: 'required' | 'optional' | false;
573
+ whatToLocate?: string;
574
+ call: (param: ParamType) => Promise<void> | void;
575
+ }
567
576
 
568
- export { type PlanningActionParamHover as $, type AIUsageInfo as A, type LocatorValidatorOption as B, type LocateValidatorResult as C, type DumpSubscriber as D, type ExecutionTask as E, type AgentDescribeElementAtPointResult as F, type CallAIFn as G, type EnsureObject as H, type InsightAction as I, type LocateResultElement as J, type DumpMeta as K, type LocateResult as L, type MidsceneYamlScript as M, type InsightDump as N, type LiteUISection as O, type PartialInsightDumpFromSDK as P, type ElementById as Q, type ReportDumpWithAttributes as R, type OnTaskStartTip as S, type TMultimodalPrompt as T, UIContext as U, type AgentWaitForOpt as V, type AgentAssertOpt as W, type PlanningLocateParam as X, type PlanningAction as Y, type PlanningAIResponse as Z, type PlanningActionParamTap as _, type ExecutionTaskProgressOptions as a, type MidsceneYamlFlowItemSleep as a$, type PlanningActionParamRightClick as a0, type PlanningActionParamInputOrKeyPress as a1, type PlanningActionParamScroll as a2, type PlanningActionParamAssert as a3, type PlanningActionParamSleep as a4, type PlanningActionParamError as a5, type PlanningActionParamWaitFor as a6, type PlanningActionParamAndroidLongPress as a7, type PlanningActionParamAndroidPull as a8, type Color as a9, type GroupedActionDump as aA, type PageType as aB, type StreamingCodeGenerationOptions as aC, type StreamingCallback as aD, type CodeGenerationChunk as aE, type StreamingAIResponse as aF, type ReferenceImage as aG, type scrollParam as aH, type MidsceneYamlScriptEnvBase as aI, type MidsceneYamlScriptWebEnv as aJ, type MidsceneYamlScriptAndroidEnv as aK, type MidsceneYamlScriptEnv as aL, type MidsceneYamlFlowItemAIAction as aM, type MidsceneYamlFlowItemAIAssert as aN, type MidsceneYamlFlowItemAIQuery as aO, type MidsceneYamlFlowItemAINumber as aP, type MidsceneYamlFlowItemAIString as aQ, type MidsceneYamlFlowItemAIAsk as aR, type MidsceneYamlFlowItemAIBoolean as aS, type MidsceneYamlFlowItemAILocate as aT, type MidsceneYamlFlowItemAIWaitFor as aU, type MidsceneYamlFlowItemAITap as aV, type MidsceneYamlFlowItemAIHover as aW, type MidsceneYamlFlowItemAIInput as aX, type MidsceneYamlFlowItemAIKeyboardPress as aY, type MidsceneYamlFlowItemAIScroll as aZ, type MidsceneYamlFlowItemEvaluateJavaScript as a_, type BaseAgentParserOpt as aa, type PuppeteerParserOpt as ab, type PlaywrightParserOpt as ac, type ExecutionRecorderItem as ad, type ExecutionTaskType as ae, type ExecutorContext as af, type ExecutionTaskHitBy as ag, type ExecutionTaskReturn as ah, type ExecutionTaskInsightLocateParam as ai, type ExecutionTaskInsightLocateOutput as aj, type ExecutionTaskInsightDumpLog as ak, type ExecutionTaskInsightLocateApply as al, type ExecutionTaskInsightLocate as am, type ExecutionTaskInsightQueryParam as an, type ExecutionTaskInsightQueryOutput as ao, type ExecutionTaskInsightQueryApply as ap, type ExecutionTaskInsightQuery as aq, type ExecutionTaskInsightAssertionParam as ar, type ExecutionTaskInsightAssertionApply as as, type ExecutionTaskInsightAssertion as at, type ExecutionTaskActionApply as au, type ExecutionTaskAction as av, type ExecutionTaskLogApply as aw, type ExecutionTaskLog as ax, type ExecutionTaskPlanningApply as ay, type ExecutionTaskPlanning as az, type ExecutionTaskApply as b, type MidsceneYamlFlowItemLogScreenshot as b0, type FreeFn as b1, type ScriptPlayerTaskStatus as b2, type ScriptPlayerStatusValue as b3, type MidsceneYamlConfig as b4, type MidsceneYamlConfigOutput as b5, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractParam as g, type InsightExtractOption as h, type TUserPrompt as i, type InsightAssertionResponse as j, type AIDescribeElementResponse as k, type MidsceneYamlTask as l, type MidsceneYamlFlowItem as m, type MidsceneYamlFlowItemAIRightClick as n, type MidsceneYamlConfigResult as o, type LocateOption as p, AIResponseFormat as q, type AISingleElementResponseById as r, type AISingleElementResponseByPosition as s, type AISingleElementResponse as t, type AIElementLocatorResponse as u, type AIElementCoordinatesResponse as v, type AIElementResponse as w, type AIDataExtractionResponse as x, type AISectionLocatorResponse as y, type AIAssertionResponse as z };
577
+ export { type PlanningActionParamHover as $, type AIUsageInfo as A, type LocatorValidatorOption as B, type LocateValidatorResult as C, type DumpSubscriber as D, type ExecutionTask as E, type AgentDescribeElementAtPointResult as F, type CallAIFn as G, type EnsureObject as H, type InsightAction as I, type LocateResultElement as J, type DumpMeta as K, type LocateResult as L, type MidsceneYamlScript as M, type InsightDump as N, type LiteUISection as O, type PartialInsightDumpFromSDK as P, type ElementById as Q, type ReportDumpWithAttributes as R, type OnTaskStartTip as S, type TMultimodalPrompt as T, UIContext as U, type AgentWaitForOpt as V, type AgentAssertOpt as W, type PlanningLocateParam as X, type PlanningAction as Y, type PlanningAIResponse as Z, type PlanningActionParamTap as _, type ExecutionTaskProgressOptions as a, type MidsceneYamlFlowItemEvaluateJavaScript as a$, type PlanningActionParamRightClick as a0, type PlanningActionParamInputOrKeyPress as a1, type PlanningActionParamScroll as a2, type PlanningActionParamAssert as a3, type PlanningActionParamSleep as a4, type PlanningActionParamError as a5, type PlanningActionParamWaitFor as a6, type PlanningActionParamAndroidLongPress as a7, type PlanningActionParamAndroidPull as a8, type Color as a9, type GroupedActionDump as aA, type PageType as aB, type StreamingCodeGenerationOptions as aC, type StreamingCallback as aD, type CodeGenerationChunk as aE, type StreamingAIResponse as aF, type DeviceAction as aG, type ReferenceImage as aH, type scrollParam as aI, type MidsceneYamlScriptEnvBase as aJ, type MidsceneYamlScriptWebEnv as aK, type MidsceneYamlScriptAndroidEnv as aL, type MidsceneYamlScriptEnv as aM, type MidsceneYamlFlowItemAIAction as aN, type MidsceneYamlFlowItemAIAssert as aO, type MidsceneYamlFlowItemAIQuery as aP, type MidsceneYamlFlowItemAINumber as aQ, type MidsceneYamlFlowItemAIString as aR, type MidsceneYamlFlowItemAIAsk as aS, type MidsceneYamlFlowItemAIBoolean as aT, type MidsceneYamlFlowItemAILocate as aU, type MidsceneYamlFlowItemAIWaitFor as aV, type MidsceneYamlFlowItemAITap as aW, type MidsceneYamlFlowItemAIHover as aX, type MidsceneYamlFlowItemAIInput as aY, type MidsceneYamlFlowItemAIKeyboardPress as aZ, type MidsceneYamlFlowItemAIScroll as a_, type BaseAgentParserOpt as aa, type PuppeteerParserOpt as ab, type PlaywrightParserOpt as ac, type ExecutionRecorderItem as ad, type ExecutionTaskType as ae, type ExecutorContext as af, type ExecutionTaskHitBy as ag, type ExecutionTaskReturn as ah, type ExecutionTaskInsightLocateParam as ai, type ExecutionTaskInsightLocateOutput as aj, type ExecutionTaskInsightDumpLog as ak, type ExecutionTaskInsightLocateApply as al, type ExecutionTaskInsightLocate as am, type ExecutionTaskInsightQueryParam as an, type ExecutionTaskInsightQueryOutput as ao, type ExecutionTaskInsightQueryApply as ap, type ExecutionTaskInsightQuery as aq, type ExecutionTaskInsightAssertionParam as ar, type ExecutionTaskInsightAssertionApply as as, type ExecutionTaskInsightAssertion as at, type ExecutionTaskActionApply as au, type ExecutionTaskAction as av, type ExecutionTaskLogApply as aw, type ExecutionTaskLog as ax, type ExecutionTaskPlanningApply as ay, type ExecutionTaskPlanning as az, type ExecutionTaskApply as b, type MidsceneYamlFlowItemSleep as b0, type MidsceneYamlFlowItemLogScreenshot as b1, type FreeFn as b2, type ScriptPlayerTaskStatus as b3, type ScriptPlayerStatusValue as b4, type MidsceneYamlConfig as b5, type MidsceneYamlConfigOutput as b6, type ExecutionDump as c, type InsightTaskInfo as d, type InsightOptions as e, type DetailedLocateParam as f, type InsightExtractParam as g, type InsightExtractOption as h, type TUserPrompt as i, type InsightAssertionResponse as j, type AIDescribeElementResponse as k, type MidsceneYamlTask as l, type MidsceneYamlFlowItem as m, type MidsceneYamlFlowItemAIRightClick as n, type MidsceneYamlConfigResult as o, type LocateOption as p, AIResponseFormat as q, type AISingleElementResponseById as r, type AISingleElementResponseByPosition as s, type AISingleElementResponse as t, type AIElementLocatorResponse as u, type AIElementCoordinatesResponse as v, type AIElementResponse as w, type AIDataExtractionResponse as x, type AISectionLocatorResponse as y, type AIAssertionResponse as z };
@@ -1,4 +1,4 @@
1
- import { R as ReportDumpWithAttributes } from './types-b4a208c6.js';
1
+ import { R as ReportDumpWithAttributes } from './types-8a6be57c.js';
2
2
  import { Rect } from '@midscene/shared/types';
3
3
  import '@midscene/shared/constants';
4
4
  import 'openai/resources';
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML. See https://midscenejs.com/ for details.",
4
- "version": "0.26.2-beta-20250812035614.0",
4
+ "version": "0.26.2",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "jsnext:source": "./src/index.ts",
@@ -44,8 +44,8 @@
44
44
  "langsmith": "0.3.7",
45
45
  "openai": "4.81.0",
46
46
  "socks-proxy-agent": "8.0.4",
47
- "@midscene/recorder": "0.26.2-beta-20250812035614.0",
48
- "@midscene/shared": "0.26.2-beta-20250812035614.0"
47
+ "@midscene/recorder": "0.26.2",
48
+ "@midscene/shared": "0.26.2"
49
49
  },
50
50
  "devDependencies": {
51
51
  "@modern-js/module-tools": "2.60.6",