npm - @midscene/core - Versions diffs - 0.30.10 → 1.0.0 - Mend

@midscene/core 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (203) hide show

package/dist/es/agent/agent.mjs +233 -144
package/dist/es/agent/agent.mjs.map +1 -1
package/dist/es/agent/execution-session.mjs +41 -0
package/dist/es/agent/execution-session.mjs.map +1 -0
package/dist/es/agent/index.mjs +3 -3
package/dist/es/agent/task-builder.mjs +319 -0
package/dist/es/agent/task-builder.mjs.map +1 -0
package/dist/es/agent/task-cache.mjs +4 -4
package/dist/es/agent/task-cache.mjs.map +1 -1
package/dist/es/agent/tasks.mjs +197 -504
package/dist/es/agent/tasks.mjs.map +1 -1
package/dist/es/agent/ui-utils.mjs +54 -35
package/dist/es/agent/ui-utils.mjs.map +1 -1
package/dist/es/agent/utils.mjs +16 -58
package/dist/es/agent/utils.mjs.map +1 -1
package/dist/es/ai-model/conversation-history.mjs +25 -13
package/dist/es/ai-model/conversation-history.mjs.map +1 -1
package/dist/es/ai-model/index.mjs +4 -4
package/dist/es/ai-model/inspect.mjs +45 -54
package/dist/es/ai-model/inspect.mjs.map +1 -1
package/dist/es/ai-model/llm-planning.mjs +47 -65
package/dist/es/ai-model/llm-planning.mjs.map +1 -1
package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
package/dist/es/ai-model/prompt/common.mjs.map +1 -1
package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
package/dist/es/ai-model/prompt/llm-locator.mjs +11 -235
package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
package/dist/es/ai-model/prompt/llm-planning.mjs +76 -322
package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
package/dist/es/ai-model/prompt/util.mjs +3 -88
package/dist/es/ai-model/prompt/util.mjs.map +1 -1
package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
package/dist/es/ai-model/service-caller/index.mjs +182 -274
package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
package/dist/es/ai-model/ui-tars-planning.mjs +69 -8
package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
package/dist/es/{ai-model/common.mjs → common.mjs} +18 -30
package/dist/es/common.mjs.map +1 -0
package/dist/es/device/device-options.mjs +0 -0
package/dist/es/device/index.mjs +29 -12
package/dist/es/device/index.mjs.map +1 -1
package/dist/es/index.mjs +5 -4
package/dist/es/index.mjs.map +1 -1
package/dist/es/report.mjs.map +1 -1
package/dist/es/{insight → service}/index.mjs +38 -51
package/dist/es/service/index.mjs.map +1 -0
package/dist/es/{insight → service}/utils.mjs +3 -3
package/dist/es/service/utils.mjs.map +1 -0
package/dist/es/task-runner.mjs +264 -0
package/dist/es/task-runner.mjs.map +1 -0
package/dist/es/tree.mjs +13 -2
package/dist/es/tree.mjs.map +1 -0
package/dist/es/types.mjs +18 -1
package/dist/es/types.mjs.map +1 -1
package/dist/es/utils.mjs +6 -7
package/dist/es/utils.mjs.map +1 -1
package/dist/es/yaml/builder.mjs.map +1 -1
package/dist/es/yaml/player.mjs +121 -98
package/dist/es/yaml/player.mjs.map +1 -1
package/dist/es/yaml/utils.mjs +1 -1
package/dist/es/yaml/utils.mjs.map +1 -1
package/dist/lib/agent/agent.js +231 -142
package/dist/lib/agent/agent.js.map +1 -1
package/dist/lib/agent/common.js +1 -1
package/dist/lib/agent/execution-session.js +75 -0
package/dist/lib/agent/execution-session.js.map +1 -0
package/dist/lib/agent/index.js +14 -14
package/dist/lib/agent/index.js.map +1 -1
package/dist/lib/agent/task-builder.js +356 -0
package/dist/lib/agent/task-builder.js.map +1 -0
package/dist/lib/agent/task-cache.js +8 -8
package/dist/lib/agent/task-cache.js.map +1 -1
package/dist/lib/agent/tasks.js +202 -506
package/dist/lib/agent/tasks.js.map +1 -1
package/dist/lib/agent/ui-utils.js +58 -36
package/dist/lib/agent/ui-utils.js.map +1 -1
package/dist/lib/agent/utils.js +26 -68
package/dist/lib/agent/utils.js.map +1 -1
package/dist/lib/ai-model/conversation-history.js +27 -15
package/dist/lib/ai-model/conversation-history.js.map +1 -1
package/dist/lib/ai-model/index.js +27 -27
package/dist/lib/ai-model/index.js.map +1 -1
package/dist/lib/ai-model/inspect.js +51 -57
package/dist/lib/ai-model/inspect.js.map +1 -1
package/dist/lib/ai-model/llm-planning.js +49 -67
package/dist/lib/ai-model/llm-planning.js.map +1 -1
package/dist/lib/ai-model/prompt/assertion.js +2 -2
package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
package/dist/lib/ai-model/prompt/common.js +2 -2
package/dist/lib/ai-model/prompt/common.js.map +1 -1
package/dist/lib/ai-model/prompt/describe.js +2 -2
package/dist/lib/ai-model/prompt/describe.js.map +1 -1
package/dist/lib/ai-model/prompt/extraction.js +2 -2
package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
package/dist/lib/ai-model/prompt/llm-locator.js +14 -241
package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
package/dist/lib/ai-model/prompt/llm-planning.js +79 -328
package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
package/dist/lib/ai-model/prompt/llm-section-locator.js +17 -16
package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
package/dist/lib/ai-model/prompt/playwright-generator.js +11 -11
package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
package/dist/lib/ai-model/prompt/util.js +7 -95
package/dist/lib/ai-model/prompt/util.js.map +1 -1
package/dist/lib/ai-model/prompt/yaml-generator.js +18 -18
package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
package/dist/lib/ai-model/service-caller/index.js +288 -401
package/dist/lib/ai-model/service-caller/index.js.map +1 -1
package/dist/lib/ai-model/ui-tars-planning.js +71 -10
package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
package/dist/lib/{ai-model/common.js → common.js} +40 -55
package/dist/lib/common.js.map +1 -0
package/dist/lib/device/device-options.js +20 -0
package/dist/lib/device/device-options.js.map +1 -0
package/dist/lib/device/index.js +63 -40
package/dist/lib/device/index.js.map +1 -1
package/dist/lib/image/index.js +5 -5
package/dist/lib/image/index.js.map +1 -1
package/dist/lib/index.js +24 -20
package/dist/lib/index.js.map +1 -1
package/dist/lib/report.js +2 -2
package/dist/lib/report.js.map +1 -1
package/dist/lib/{insight → service}/index.js +41 -54
package/dist/lib/service/index.js.map +1 -0
package/dist/lib/{insight → service}/utils.js +7 -7
package/dist/lib/service/utils.js.map +1 -0
package/dist/lib/task-runner.js +301 -0
package/dist/lib/task-runner.js.map +1 -0
package/dist/lib/tree.js +13 -4
package/dist/lib/tree.js.map +1 -1
package/dist/lib/types.js +31 -12
package/dist/lib/types.js.map +1 -1
package/dist/lib/utils.js +16 -17
package/dist/lib/utils.js.map +1 -1
package/dist/lib/yaml/builder.js +2 -2
package/dist/lib/yaml/builder.js.map +1 -1
package/dist/lib/yaml/index.js +16 -22
package/dist/lib/yaml/index.js.map +1 -1
package/dist/lib/yaml/player.js +123 -100
package/dist/lib/yaml/player.js.map +1 -1
package/dist/lib/yaml/utils.js +6 -6
package/dist/lib/yaml/utils.js.map +1 -1
package/dist/lib/yaml.js +1 -1
package/dist/lib/yaml.js.map +1 -1
package/dist/types/agent/agent.d.ts +62 -17
package/dist/types/agent/execution-session.d.ts +36 -0
package/dist/types/agent/index.d.ts +3 -2
package/dist/types/agent/task-builder.d.ts +35 -0
package/dist/types/agent/tasks.d.ts +32 -23
package/dist/types/agent/ui-utils.d.ts +9 -2
package/dist/types/agent/utils.d.ts +9 -35
package/dist/types/ai-model/conversation-history.d.ts +8 -4
package/dist/types/ai-model/index.d.ts +5 -5
package/dist/types/ai-model/inspect.d.ts +20 -12
package/dist/types/ai-model/llm-planning.d.ts +3 -1
package/dist/types/ai-model/prompt/llm-locator.d.ts +1 -6
package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -3
package/dist/types/ai-model/prompt/llm-section-locator.d.ts +1 -3
package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
package/dist/types/ai-model/prompt/util.d.ts +2 -34
package/dist/types/ai-model/service-caller/index.d.ts +2 -3
package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
package/dist/types/{ai-model/common.d.ts → common.d.ts} +6 -6
package/dist/types/device/device-options.d.ts +57 -0
package/dist/types/device/index.d.ts +55 -39
package/dist/types/index.d.ts +7 -6
package/dist/types/service/index.d.ts +26 -0
package/dist/types/service/utils.d.ts +2 -0
package/dist/types/task-runner.d.ts +49 -0
package/dist/types/tree.d.ts +4 -1
package/dist/types/types.d.ts +103 -66
package/dist/types/yaml/utils.d.ts +1 -1
package/dist/types/yaml.d.ts +68 -43
package/package.json +9 -12
package/dist/es/ai-model/action-executor.mjs +0 -129
package/dist/es/ai-model/action-executor.mjs.map +0 -1
package/dist/es/ai-model/common.mjs.map +0 -1
package/dist/es/insight/index.mjs.map +0 -1
package/dist/es/insight/utils.mjs.map +0 -1
package/dist/lib/ai-model/action-executor.js +0 -163
package/dist/lib/ai-model/action-executor.js.map +0 -1
package/dist/lib/ai-model/common.js.map +0 -1
package/dist/lib/insight/index.js.map +0 -1
package/dist/lib/insight/utils.js.map +0 -1
package/dist/types/ai-model/action-executor.d.ts +0 -19
package/dist/types/insight/index.d.ts +0 -31
package/dist/types/insight/utils.d.ts +0 -2

package/dist/types/agent/ui-utils.d.ts CHANGED Viewed

@@ -1,7 +1,14 @@
-import type { AndroidPullParam, DetailedLocateParam, ExecutionTask, ScrollParam } from '../types';
+import type { DetailedLocateParam, ExecutionTask, PullParam, ScrollParam } from '../types';
 export declare function typeStr(task: ExecutionTask): any;
 export declare function locateParamStr(locate?: DetailedLocateParam | string): string;
 export declare function scrollParamStr(scrollParam?: ScrollParam): string;
-export declare function pullParamStr(pullParam?: AndroidPullParam): string;
+export declare function pullParamStr(pullParam?: PullParam): string;
+export declare function extractInsightParam(taskParam: any): {
+    content: string;
+    images?: Array<{
+        name: string;
+        url: string;
+    }>;
+};
 export declare function taskTitleStr(type: 'Tap' | 'Hover' | 'Input' | 'RightClick' | 'KeyboardPress' | 'Scroll' | 'Action' | 'Query' | 'Assert' | 'WaitFor' | 'Locate' | 'Boolean' | 'Number' | 'String', prompt: string): string;
 export declare function paramStr(task: ExecutionTask): string;

package/dist/types/agent/utils.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import type { TMultimodalPrompt, TUserPrompt } from '../ai-model/common';
+import type { TMultimodalPrompt, TUserPrompt } from '../common';
 import type { AbstractInterface } from '../device';
-import type { BaseElement, ElementCacheFeature, ElementTreeNode, ExecutionDump, ExecutorContext, LocateResultElement, PlanningLocateParam, UIContext } from '../types';
-import type { TaskExecutor } from './tasks';
+import type { ElementCacheFeature, LocateResultElement, PlanningLocateParam, UIContext } from '../types';
+import type { TaskCache } from './task-cache';
 export declare function commonContextParser(interfaceInstance: AbstractInterface, _opt: {
     uploadServerUrl?: string;
 }): Promise<UIContext>;
@@ -13,38 +13,12 @@ export declare function printReportMsg(filepath: string): void;
  */
 export declare function getCurrentExecutionFile(trace?: string): string | false;
 export declare function generateCacheId(fileName?: string): string;
-export declare function matchElementFromPlan(planLocateParam: PlanningLocateParam, tree: ElementTreeNode<BaseElement>): any;
-export declare function matchElementFromCache(taskExecutor: TaskExecutor, cacheEntry: ElementCacheFeature | undefined, cachePrompt: TUserPrompt, cacheable: boolean | undefined): Promise<LocateResultElement | undefined>;
-export declare function trimContextByViewport(execution: ExecutionDump): {
-    tasks: {
-        type: any;
-        subType?: string;
-        param?: any;
-        thought?: string;
-        locate?: PlanningLocateParam | null;
-        uiContext?: UIContext;
-        executor: (param: any, context: ExecutorContext) => void | Promise<void | import("../types").ExecutionTaskReturn<any, any> | undefined> | undefined;
-        output?: any;
-        log?: any;
-        recorder?: import("../types").ExecutionRecorderItem[];
-        hitBy?: import("../types").ExecutionTaskHitBy;
-        status: "pending" | "running" | "finished" | "failed" | "cancelled";
-        error?: Error;
-        errorMessage?: string;
-        errorStack?: string;
-        timing?: {
-            start: number;
-            end?: number;
-            cost?: number;
-        };
-        usage?: import("../types").AIUsageInfo;
-        searchAreaUsage?: import("../types").AIUsageInfo;
-    }[];
-    name: string;
-    description?: string;
-    aiActionContext?: string;
-    logTime: number;
-};
+export declare function ifPlanLocateParamIsBbox(planLocateParam: PlanningLocateParam): boolean;
+export declare function matchElementFromPlan(planLocateParam: PlanningLocateParam): LocateResultElement | undefined;
+export declare function matchElementFromCache(context: {
+    taskCache?: TaskCache;
+    interfaceInstance: AbstractInterface;
+}, cacheEntry: ElementCacheFeature | undefined, cachePrompt: TUserPrompt, cacheable: boolean | undefined): Promise<LocateResultElement | undefined>;
 export declare const getMidsceneVersion: () => string;
 export declare const parsePrompt: (prompt: TUserPrompt) => {
     textPrompt: string;

package/dist/types/ai-model/conversation-history.d.ts CHANGED Viewed

@@ -1,18 +1,22 @@
 import type { ChatCompletionMessageParam } from 'openai/resources/index';
 export interface ConversationHistoryOptions {
-    maxUserImageMessages?: number;
     initialMessages?: ChatCompletionMessageParam[];
 }
 export declare class ConversationHistory {
-    private readonly maxUserImageMessages;
     private readonly messages;
+    pendingFeedbackMessage: string;
     constructor(options?: ConversationHistoryOptions);
+    resetPendingFeedbackMessageIfExists(): void;
     append(message: ChatCompletionMessageParam): void;
     seed(messages: ChatCompletionMessageParam[]): void;
     reset(): void;
-    snapshot(): ChatCompletionMessageParam[];
+    /**
+     * Snapshot the conversation history, and replace the images with text if the number of images exceeds the limit.
+     * @param maxImages - The maximum number of images to include in the snapshot. Undefined means no limit.
+     * @returns The snapshot of the conversation history.
+     */
+    snapshot(maxImages?: number): ChatCompletionMessageParam[];
     get length(): number;
     [Symbol.iterator](): IterableIterator<ChatCompletionMessageParam>;
     toJSON(): ChatCompletionMessageParam[];
-    private pruneOldestUserMessageIfNecessary;
 }

package/dist/types/ai-model/index.d.ts CHANGED Viewed

@@ -1,13 +1,13 @@
 export { callAIWithStringResponse, callAIWithObjectResponse, callAI, } from './service-caller/index';
 export { systemPromptToLocateElement } from './prompt/llm-locator';
-export { describeUserPage, elementByPositionWithElementInfo, } from './prompt/util';
+export { describeUserPage } from './prompt/util';
 export { generatePlaywrightTest, generatePlaywrightTestStream, } from './prompt/playwright-generator';
 export { generateYamlTest, generateYamlTestStream, } from './prompt/yaml-generator';
 export type { ChatCompletionMessageParam } from 'openai/resources/index';
-export { AiLocateElement, AiExtractElementInfo, AiLocateSection, } from './inspect';
+export { AiLocateElement, AiExtractElementInfo, AiLocateSection, AiJudgeOrderSensitive, } from './inspect';
 export { plan } from './llm-planning';
-export { adaptBboxToRect } from './common';
+export { adaptBboxToRect } from '../common';
 export { uiTarsPlanning, resizeImageForUiTars } from './ui-tars-planning';
 export { ConversationHistory, type ConversationHistoryOptions, } from './conversation-history';
-export { AIActionType, type AIArgs } from './common';
-export { getMidsceneLocationSchema, type MidsceneLocationResultType, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, findAllMidsceneLocatorField, dumpActionParam, loadActionParam, parseActionParam, } from './common';
+export { AIActionType, type AIArgs } from '../common';
+export { getMidsceneLocationSchema, type MidsceneLocationResultType, PointSchema, SizeSchema, RectSchema, TMultimodalPromptSchema, TUserPromptSchema, type TMultimodalPrompt, type TUserPrompt, findAllMidsceneLocatorField, dumpActionParam, loadActionParam, parseActionParam, } from '../common';

package/dist/types/ai-model/inspect.d.ts CHANGED Viewed

@@ -1,29 +1,31 @@
-import type { AIDataExtractionResponse, AIElementLocatorResponse, AIElementResponse, AIUsageInfo, BaseElement, ElementById, InsightExtractOption, Rect, ReferenceImage, UIContext } from '../types';
+import type { AIDataExtractionResponse, AIElementResponse, AIUsageInfo, Rect, ReferenceImage, ServiceExtractOption, UIContext } from '../types';
 import type { IModelConfig } from '@midscene/shared/env';
+import type { LocateResultElement } from '@midscene/shared/types';
 import type { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources/index';
-import type { TMultimodalPrompt, TUserPrompt } from './common';
+import type { TMultimodalPrompt, TUserPrompt } from '../common';
 import { callAIWithObjectResponse } from './service-caller/index';
 export type AIArgs = [
     ChatCompletionSystemMessageParam,
     ...ChatCompletionUserMessageParam[]
 ];
-export declare function AiLocateElement<ElementType extends BaseElement = BaseElement>(options: {
-    context: UIContext<ElementType>;
+export declare function AiLocateElement(options: {
+    context: UIContext;
     targetElementDescription: TUserPrompt;
     referenceImage?: ReferenceImage;
     callAIFn: typeof callAIWithObjectResponse<AIElementResponse | [number, number]>;
     searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;
     modelConfig: IModelConfig;
 }): Promise<{
-    parseResult: AIElementLocatorResponse;
+    parseResult: {
+        elements: LocateResultElement[];
+        errors?: string[];
+    };
     rect?: Rect;
     rawResponse: string;
-    elementById: ElementById;
     usage?: AIUsageInfo;
-    isOrderSensitive?: boolean;
 }>;
 export declare function AiLocateSection(options: {
-    context: UIContext<BaseElement>;
+    context: UIContext;
     sectionDescription: TUserPrompt;
     modelConfig: IModelConfig;
 }): Promise<{
@@ -33,14 +35,20 @@ export declare function AiLocateSection(options: {
     rawResponse: string;
     usage?: AIUsageInfo;
 }>;
-export declare function AiExtractElementInfo<T, ElementType extends BaseElement = BaseElement>(options: {
+export declare function AiExtractElementInfo<T>(options: {
     dataQuery: string | Record<string, string>;
     multimodalPrompt?: TMultimodalPrompt;
-    context: UIContext<ElementType>;
-    extractOption?: InsightExtractOption;
+    context: UIContext;
+    pageDescription?: string;
+    extractOption?: ServiceExtractOption;
     modelConfig: IModelConfig;
 }): Promise<{
     parseResult: AIDataExtractionResponse<T>;
-    elementById: (idOrIndexId: string) => ElementType;
     usage: AIUsageInfo | undefined;
 }>;
+export declare function AiJudgeOrderSensitive(description: string, callAIFn: typeof callAIWithObjectResponse<{
+    isOrderSensitive: boolean;
+}>, modelConfig: IModelConfig): Promise<{
+    isOrderSensitive: boolean;
+    usage?: AIUsageInfo;
+}>;

package/dist/types/ai-model/llm-planning.d.ts CHANGED Viewed

@@ -7,5 +7,7 @@ export declare function plan(userInstruction: string, opts: {
     actionSpace: DeviceAction<any>[];
     actionContext?: string;
     modelConfig: IModelConfig;
-    conversationHistory?: ConversationHistory;
+    conversationHistory: ConversationHistory;
+    includeBbox: boolean;
+    imagesIncludeCount?: number;
 }): Promise<PlanningAIResponse>;

package/dist/types/ai-model/prompt/llm-locator.d.ts CHANGED Viewed

@@ -1,8 +1,3 @@
 import type { TVlModeTypes } from '@midscene/shared/env';
-import type { ResponseFormatJSONSchema } from 'openai/resources/index';
 export declare function systemPromptToLocateElement(vlMode: TVlModeTypes | undefined): string;
-export declare const locatorSchema: ResponseFormatJSONSchema;
-export declare const findElementPrompt: ({ pageDescription, targetElementDescription, }: {
-    pageDescription: string;
-    targetElementDescription: string;
-}) => string;
+export declare const findElementPrompt: (targetElementDescription: string) => string;

package/dist/types/ai-model/prompt/llm-planning.d.ts CHANGED Viewed

@@ -1,9 +1,8 @@
 import type { DeviceAction } from '../../types';
 import type { TVlModeTypes } from '@midscene/shared/env';
-import type { ResponseFormatJSONSchema } from 'openai/resources/index';
 export declare const descriptionForAction: (action: DeviceAction<any>, locatorSchemaTypeDescription: string) => string;
-export declare function systemPromptToTaskPlanning({ actionSpace, vlMode, }: {
+export declare function systemPromptToTaskPlanning({ actionSpace, vlMode, includeBbox, }: {
     actionSpace: DeviceAction<any>[];
     vlMode: TVlModeTypes | undefined;
+    includeBbox: boolean;
 }): Promise<string>;
-export declare const planSchema: ResponseFormatJSONSchema;

package/dist/types/ai-model/prompt/llm-section-locator.d.ts CHANGED Viewed

@@ -1,5 +1,3 @@
 import type { TVlModeTypes } from '@midscene/shared/env';
 export declare function systemPromptToLocateSection(vlMode: TVlModeTypes | undefined): string;
-export declare const sectionLocatorInstruction: ({ sectionDescription, }: {
-    sectionDescription: string;
-}) => string;
+export declare const sectionLocatorInstruction: (sectionDescription: string) => string;

package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export declare function systemPromptToJudgeOrderSensitive(): string;
2	+ export declare const orderSensitiveJudgePrompt: (description: string) => string;

package/dist/types/ai-model/prompt/util.d.ts CHANGED Viewed

@@ -1,17 +1,9 @@
-import type { BaseElement, ElementTreeNode, Size, UIContext } from '../../types';
-import type { TVlModeTypes } from '@midscene/shared/env';
+import type { BaseElement, Size, UIContext } from '../../types';
 export declare function describeSize(size: Size): string;
 export declare function describeElement(elements: (Pick<BaseElement, 'rect' | 'content'> & {
     id: string;
 })[]): string;
 export declare const distanceThreshold = 16;
-export declare function elementByPositionWithElementInfo(treeRoot: ElementTreeNode<BaseElement>, position: {
-    x: number;
-    y: number;
-}, options?: {
-    requireStrictDistance?: boolean;
-    filterPositionElements?: boolean;
-}): BaseElement | undefined;
 export declare function distance(point1: {
     x: number;
     y: number;
@@ -20,28 +12,4 @@ export declare function distance(point1: {
     y: number;
 }): number;
 export declare const samplePageDescription = "\nAnd the page is described as follows:\n====================\nThe size of the page: 1280 x 720\nSome of the elements are marked with a rectangle in the screenshot corresponding to the markerId, some are not.\n\nDescription of all the elements in screenshot:\n<div id=\"969f1637\" markerId=\"1\" left=\"100\" top=\"100\" width=\"100\" height=\"100\"> // The markerId indicated by the rectangle label in the screenshot\n  <h4 id=\"b211ecb2\" markerId=\"5\" left=\"150\" top=\"150\" width=\"90\" height=\"60\">\n    The username is accepted\n  </h4>\n  ...many more\n</div>\n====================\n";
-export declare function describeUserPage<ElementType extends BaseElement = BaseElement>(context: Omit<UIContext<ElementType>, 'describer'>, opt: {
-    truncateTextLength?: number;
-    filterNonTextContent?: boolean;
-    domIncluded?: boolean | 'visible-only';
-    visibleOnly?: boolean;
-    vlMode: TVlModeTypes | undefined;
-}): Promise<{
-    description: string;
-    elementById(idOrIndexId: string): ElementType;
-    elementByPosition(position: {
-        x: number;
-        y: number;
-    }, size: {
-        width: number;
-        height: number;
-    }): BaseElement | undefined;
-    insertElementByPosition(position: {
-        x: number;
-        y: number;
-    }): ElementType;
-    size: {
-        width: number;
-        height: number;
-    };
-}>;
+export declare function describeUserPage(context: UIContext): Promise<string>;

package/dist/types/ai-model/service-caller/index.d.ts CHANGED Viewed

@@ -1,9 +1,8 @@
 import { type AIUsageInfo } from '../../types';
 import type { StreamingCallback } from '../../types';
 import { type IModelConfig, type TVlModeTypes } from '@midscene/shared/env';
-import OpenAI from 'openai';
 import type { ChatCompletionMessageParam } from 'openai/resources/index';
-import { AIActionType, type AIArgs } from '../common';
+import type { AIActionType, AIArgs } from '../../common';
 export declare function callAI(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType, modelConfig: IModelConfig, options?: {
     stream?: boolean;
     onChunk?: StreamingCallback;
@@ -12,9 +11,9 @@ export declare function callAI(messages: ChatCompletionMessageParam[], AIActionT
     usage?: AIUsageInfo;
     isStreamed: boolean;
 }>;
-export declare const getResponseFormat: (modelName: string, AIActionTypeValue: AIActionType) => OpenAI.ChatCompletionCreateParams["response_format"] | OpenAI.ResponseFormatJSONObject;
 export declare function callAIWithObjectResponse<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType, modelConfig: IModelConfig): Promise<{
     content: T;
+    contentString: string;
     usage?: AIUsageInfo;
 }>;
 export declare function callAIWithStringResponse(msgs: AIArgs, AIActionTypeValue: AIActionType, modelConfig: IModelConfig): Promise<{

package/dist/types/ai-model/ui-tars-planning.d.ts CHANGED Viewed

@@ -1,11 +1,12 @@
 import type { PlanningAIResponse, Size, UIContext } from '../types';
 import { type IModelConfig, UITarsModelVersion } from '@midscene/shared/env';
 import type { ConversationHistory } from './conversation-history';
-type ActionType = 'click' | 'drag' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';
+type ActionType = 'click' | 'left_double' | 'right_single' | 'drag' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';
 export declare function uiTarsPlanning(userInstruction: string, options: {
     conversationHistory: ConversationHistory;
     context: UIContext;
     modelConfig: IModelConfig;
+    actionContext?: string;
 }): Promise<PlanningAIResponse>;
 interface BaseAction {
     action_type: ActionType;
@@ -32,6 +33,18 @@ interface WaitAction extends BaseAction {
         time: string;
     };
 }
+interface LeftDoubleAction extends BaseAction {
+    action_type: 'left_double';
+    action_inputs: {
+        start_box: string;
+    };
+}
+interface RightSingleAction extends BaseAction {
+    action_type: 'right_single';
+    action_inputs: {
+        start_box: string;
+    };
+}
 interface TypeAction extends BaseAction {
     action_type: 'type';
     action_inputs: {
@@ -54,6 +67,6 @@ interface FinishedAction extends BaseAction {
     action_type: 'finished';
     action_inputs: Record<string, never>;
 }
-export type Action = ClickAction | DragAction | TypeAction | HotkeyAction | ScrollAction | FinishedAction | WaitAction;
+export type Action = ClickAction | LeftDoubleAction | RightSingleAction | DragAction | TypeAction | HotkeyAction | ScrollAction | FinishedAction | WaitAction;
 export declare function resizeImageForUiTars(imageBase64: string, size: Size, uiTarsVersion: UITarsModelVersion | undefined): Promise<string>;
 export {};

package/dist/types/{ai-model/common.d.ts → common.d.ts} RENAMED Viewed

@@ -1,6 +1,6 @@
-import type { BaseElement, DeviceAction, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from '../types';
+import type { BaseElement, DeviceAction, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from './types';
 import type { ChatCompletionMessageParam } from 'openai/resources/index';
-import type { PlanningLocateParam } from '../types';
+import type { PlanningLocateParam } from './types';
 import type { TVlModeTypes } from '@midscene/shared/env';
 import { z } from 'zod';
 export type AIArgs = ChatCompletionMessageParam[];
@@ -12,14 +12,14 @@ export declare enum AIActionType {
     DESCRIBE_ELEMENT = 4,
     TEXT = 5
 }
+type AdaptBboxInput = number[] | string[] | string | (number[] | string[])[];
 export declare function fillBboxParam(locate: PlanningLocateParam, width: number, height: number, rightLimit: number, bottomLimit: number, vlMode: TVlModeTypes | undefined): PlanningLocateParam;
-export declare function adaptQwenBbox(bbox: number[]): [number, number, number, number];
+export declare function adaptQwen2_5Bbox(bbox: number[]): [number, number, number, number];
 export declare function adaptDoubaoBbox(bbox: string[] | number[] | string, width: number, height: number): [number, number, number, number];
-export declare function adaptBbox(bbox: number[], width: number, height: number, rightLimit: number, bottomLimit: number, vlMode: TVlModeTypes | undefined): [number, number, number, number];
+export declare function adaptBbox(bbox: AdaptBboxInput, width: number, height: number, rightLimit: number, bottomLimit: number, vlMode: TVlModeTypes | undefined): [number, number, number, number];
 export declare function normalized01000(bbox: number[], width: number, height: number): [number, number, number, number];
 export declare function adaptGeminiBbox(bbox: number[], width: number, height: number): [number, number, number, number];
 export declare function adaptBboxToRect(bbox: number[], width: number, height: number, offsetX?: number, offsetY?: number, rightLimit?: number, bottomLimit?: number, vlMode?: TVlModeTypes | undefined): Rect;
-export declare function warnGPT4oSizeLimit(size: Size, modelName: string): void;
 export declare function mergeRects(rects: Rect[]): {
     left: number;
     top: number;
@@ -565,5 +565,5 @@ export declare const loadActionParam: (jsonObject: Record<string, any>, zodSchem
  * Locator fields are special business logic fields with complex validation requirements,
  * so they are intentionally excluded from Zod parsing and use existing validation logic.
  */
-export declare const parseActionParam: (rawParam: Record<string, any>, zodSchema: z.ZodType<any>) => Record<string, any>;
+export declare const parseActionParam: (rawParam: Record<string, any> | undefined, zodSchema?: z.ZodType<any>) => Record<string, any> | undefined;
 export {};

package/dist/types/device/device-options.d.ts ADDED Viewed

@@ -0,0 +1,57 @@
+import type { DeviceAction } from '../types';
+/**
+ * Android device input options
+ */
+export type AndroidDeviceInputOpt = {
+    /** Automatically dismiss the keyboard after input is completed */
+    autoDismissKeyboard?: boolean;
+    /** Strategy for dismissing the keyboard: 'esc-first' tries ESC before BACK, 'back-first' tries BACK before ESC */
+    keyboardDismissStrategy?: 'esc-first' | 'back-first';
+};
+/**
+ * Android device options
+ */
+export type AndroidDeviceOpt = {
+    /** Path to the ADB executable */
+    androidAdbPath?: string;
+    /** Remote ADB host address */
+    remoteAdbHost?: string;
+    /** Remote ADB port */
+    remoteAdbPort?: number;
+    /** Input method editor strategy: 'always-yadb' always uses yadb, 'yadb-for-non-ascii' uses yadb only for non-ASCII characters */
+    imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
+    /** Display ID to use for this device */
+    displayId?: number;
+    /** Use physical display ID for screenshot operations */
+    usePhysicalDisplayIdForScreenshot?: boolean;
+    /** Use physical display ID when looking up display information */
+    usePhysicalDisplayIdForDisplayLookup?: boolean;
+    /** Custom device actions to register */
+    customActions?: DeviceAction<any>[];
+    /** Screenshot resize scale factor */
+    screenshotResizeScale?: number;
+    /** Always fetch screen info on each call; if false, cache the first result */
+    alwaysRefreshScreenInfo?: boolean;
+} & AndroidDeviceInputOpt;
+/**
+ * iOS device input options
+ */
+export type IOSDeviceInputOpt = {
+    /** Automatically dismiss the keyboard after input is completed */
+    autoDismissKeyboard?: boolean;
+};
+/**
+ * iOS device options
+ */
+export type IOSDeviceOpt = {
+    /** Device ID (UDID) to connect to */
+    deviceId?: string;
+    /** Custom device actions to register */
+    customActions?: DeviceAction<any>[];
+    /** WebDriverAgent port (default: 8100) */
+    wdaPort?: number;
+    /** WebDriverAgent host (default: 'localhost') */
+    wdaHost?: string;
+    /** Whether to use WebDriverAgent */
+    useWDA?: boolean;
+} & IOSDeviceInputOpt;