npm - @donggui/core - Versions diffs - 1.5.4-donggui.3 - Mend

@donggui/core 1.5.4-donggui.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (269) hide show

package/LICENSE +21 -0
package/README.md +9 -0
package/dist/es/agent/agent.mjs +709 -0
package/dist/es/agent/agent.mjs.map +1 -0
package/dist/es/agent/common.mjs +0 -0
package/dist/es/agent/execution-session.mjs +41 -0
package/dist/es/agent/execution-session.mjs.map +1 -0
package/dist/es/agent/index.mjs +6 -0
package/dist/es/agent/task-builder.mjs +330 -0
package/dist/es/agent/task-builder.mjs.map +1 -0
package/dist/es/agent/task-cache.mjs +186 -0
package/dist/es/agent/task-cache.mjs.map +1 -0
package/dist/es/agent/tasks.mjs +422 -0
package/dist/es/agent/tasks.mjs.map +1 -0
package/dist/es/agent/ui-utils.mjs +91 -0
package/dist/es/agent/ui-utils.mjs.map +1 -0
package/dist/es/agent/utils.mjs +198 -0
package/dist/es/agent/utils.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/actions.mjs +224 -0
package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/index.mjs +6 -0
package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/util.mjs +9 -0
package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
package/dist/es/ai-model/conversation-history.mjs +195 -0
package/dist/es/ai-model/conversation-history.mjs.map +1 -0
package/dist/es/ai-model/index.mjs +11 -0
package/dist/es/ai-model/inspect.mjs +386 -0
package/dist/es/ai-model/inspect.mjs.map +1 -0
package/dist/es/ai-model/llm-planning.mjs +233 -0
package/dist/es/ai-model/llm-planning.mjs.map +1 -0
package/dist/es/ai-model/prompt/common.mjs +7 -0
package/dist/es/ai-model/prompt/common.mjs.map +1 -0
package/dist/es/ai-model/prompt/describe.mjs +66 -0
package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
package/dist/es/ai-model/prompt/extraction.mjs +129 -0
package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
package/dist/es/ai-model/prompt/llm-planning.mjs +364 -0
package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
package/dist/es/ai-model/prompt/util.mjs +59 -0
package/dist/es/ai-model/prompt/util.mjs.map +1 -0
package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
package/dist/es/ai-model/service-caller/index.mjs +466 -0
package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
package/dist/es/common.mjs +371 -0
package/dist/es/common.mjs.map +1 -0
package/dist/es/device/device-options.mjs +0 -0
package/dist/es/device/index.mjs +300 -0
package/dist/es/device/index.mjs.map +1 -0
package/dist/es/dump/html-utils.mjs +211 -0
package/dist/es/dump/html-utils.mjs.map +1 -0
package/dist/es/dump/image-restoration.mjs +43 -0
package/dist/es/dump/image-restoration.mjs.map +1 -0
package/dist/es/dump/index.mjs +3 -0
package/dist/es/index.mjs +15 -0
package/dist/es/index.mjs.map +1 -0
package/dist/es/report-generator.mjs +134 -0
package/dist/es/report-generator.mjs.map +1 -0
package/dist/es/report.mjs +111 -0
package/dist/es/report.mjs.map +1 -0
package/dist/es/screenshot-item.mjs +105 -0
package/dist/es/screenshot-item.mjs.map +1 -0
package/dist/es/service/index.mjs +256 -0
package/dist/es/service/index.mjs.map +1 -0
package/dist/es/service/utils.mjs +15 -0
package/dist/es/service/utils.mjs.map +1 -0
package/dist/es/skill/index.mjs +38 -0
package/dist/es/skill/index.mjs.map +1 -0
package/dist/es/task-runner.mjs +258 -0
package/dist/es/task-runner.mjs.map +1 -0
package/dist/es/task-timing.mjs +12 -0
package/dist/es/task-timing.mjs.map +1 -0
package/dist/es/tree.mjs +13 -0
package/dist/es/tree.mjs.map +1 -0
package/dist/es/types.mjs +196 -0
package/dist/es/types.mjs.map +1 -0
package/dist/es/utils.mjs +218 -0
package/dist/es/utils.mjs.map +1 -0
package/dist/es/yaml/builder.mjs +13 -0
package/dist/es/yaml/builder.mjs.map +1 -0
package/dist/es/yaml/index.mjs +4 -0
package/dist/es/yaml/player.mjs +418 -0
package/dist/es/yaml/player.mjs.map +1 -0
package/dist/es/yaml/utils.mjs +73 -0
package/dist/es/yaml/utils.mjs.map +1 -0
package/dist/es/yaml.mjs +0 -0
package/dist/lib/agent/agent.js +757 -0
package/dist/lib/agent/agent.js.map +1 -0
package/dist/lib/agent/common.js +5 -0
package/dist/lib/agent/execution-session.js +75 -0
package/dist/lib/agent/execution-session.js.map +1 -0
package/dist/lib/agent/index.js +81 -0
package/dist/lib/agent/index.js.map +1 -0
package/dist/lib/agent/task-builder.js +367 -0
package/dist/lib/agent/task-builder.js.map +1 -0
package/dist/lib/agent/task-cache.js +238 -0
package/dist/lib/agent/task-cache.js.map +1 -0
package/dist/lib/agent/tasks.js +465 -0
package/dist/lib/agent/tasks.js.map +1 -0
package/dist/lib/agent/ui-utils.js +143 -0
package/dist/lib/agent/ui-utils.js.map +1 -0
package/dist/lib/agent/utils.js +275 -0
package/dist/lib/agent/utils.js.map +1 -0
package/dist/lib/ai-model/auto-glm/actions.js +258 -0
package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
package/dist/lib/ai-model/auto-glm/index.js +66 -0
package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
package/dist/lib/ai-model/auto-glm/parser.js +282 -0
package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
package/dist/lib/ai-model/auto-glm/planning.js +105 -0
package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
package/dist/lib/ai-model/auto-glm/util.js +46 -0
package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
package/dist/lib/ai-model/conversation-history.js +229 -0
package/dist/lib/ai-model/conversation-history.js.map +1 -0
package/dist/lib/ai-model/index.js +125 -0
package/dist/lib/ai-model/index.js.map +1 -0
package/dist/lib/ai-model/inspect.js +429 -0
package/dist/lib/ai-model/inspect.js.map +1 -0
package/dist/lib/ai-model/llm-planning.js +270 -0
package/dist/lib/ai-model/llm-planning.js.map +1 -0
package/dist/lib/ai-model/prompt/common.js +41 -0
package/dist/lib/ai-model/prompt/common.js.map +1 -0
package/dist/lib/ai-model/prompt/describe.js +100 -0
package/dist/lib/ai-model/prompt/describe.js.map +1 -0
package/dist/lib/ai-model/prompt/extraction.js +169 -0
package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
package/dist/lib/ai-model/prompt/llm-planning.js +401 -0
package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
package/dist/lib/ai-model/prompt/util.js +105 -0
package/dist/lib/ai-model/prompt/util.js.map +1 -0
package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
package/dist/lib/ai-model/service-caller/index.js +531 -0
package/dist/lib/ai-model/service-caller/index.js.map +1 -0
package/dist/lib/ai-model/ui-tars-planning.js +283 -0
package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
package/dist/lib/common.js +480 -0
package/dist/lib/common.js.map +1 -0
package/dist/lib/device/device-options.js +20 -0
package/dist/lib/device/device-options.js.map +1 -0
package/dist/lib/device/index.js +418 -0
package/dist/lib/device/index.js.map +1 -0
package/dist/lib/dump/html-utils.js +281 -0
package/dist/lib/dump/html-utils.js.map +1 -0
package/dist/lib/dump/image-restoration.js +77 -0
package/dist/lib/dump/image-restoration.js.map +1 -0
package/dist/lib/dump/index.js +60 -0
package/dist/lib/dump/index.js.map +1 -0
package/dist/lib/index.js +146 -0
package/dist/lib/index.js.map +1 -0
package/dist/lib/report-generator.js +172 -0
package/dist/lib/report-generator.js.map +1 -0
package/dist/lib/report.js +145 -0
package/dist/lib/report.js.map +1 -0
package/dist/lib/screenshot-item.js +139 -0
package/dist/lib/screenshot-item.js.map +1 -0
package/dist/lib/service/index.js +290 -0
package/dist/lib/service/index.js.map +1 -0
package/dist/lib/service/utils.js +49 -0
package/dist/lib/service/utils.js.map +1 -0
package/dist/lib/skill/index.js +72 -0
package/dist/lib/skill/index.js.map +1 -0
package/dist/lib/task-runner.js +295 -0
package/dist/lib/task-runner.js.map +1 -0
package/dist/lib/task-timing.js +46 -0
package/dist/lib/task-timing.js.map +1 -0
package/dist/lib/tree.js +53 -0
package/dist/lib/tree.js.map +1 -0
package/dist/lib/types.js +285 -0
package/dist/lib/types.js.map +1 -0
package/dist/lib/utils.js +297 -0
package/dist/lib/utils.js.map +1 -0
package/dist/lib/yaml/builder.js +57 -0
package/dist/lib/yaml/builder.js.map +1 -0
package/dist/lib/yaml/index.js +81 -0
package/dist/lib/yaml/index.js.map +1 -0
package/dist/lib/yaml/player.js +452 -0
package/dist/lib/yaml/player.js.map +1 -0
package/dist/lib/yaml/utils.js +126 -0
package/dist/lib/yaml/utils.js.map +1 -0
package/dist/lib/yaml.js +20 -0
package/dist/lib/yaml.js.map +1 -0
package/dist/types/agent/agent.d.ts +190 -0
package/dist/types/agent/common.d.ts +0 -0
package/dist/types/agent/execution-session.d.ts +36 -0
package/dist/types/agent/index.d.ts +10 -0
package/dist/types/agent/task-builder.d.ts +34 -0
package/dist/types/agent/task-cache.d.ts +48 -0
package/dist/types/agent/tasks.d.ts +70 -0
package/dist/types/agent/ui-utils.d.ts +14 -0
package/dist/types/agent/utils.d.ts +29 -0
package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
package/dist/types/ai-model/auto-glm/planning.d.ts +10 -0
package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
package/dist/types/ai-model/conversation-history.d.ts +105 -0
package/dist/types/ai-model/index.d.ts +14 -0
package/dist/types/ai-model/inspect.d.ts +58 -0
package/dist/types/ai-model/llm-planning.d.ts +19 -0
package/dist/types/ai-model/prompt/common.d.ts +2 -0
package/dist/types/ai-model/prompt/describe.d.ts +1 -0
package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
package/dist/types/ai-model/prompt/util.d.ts +33 -0
package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
package/dist/types/ai-model/service-caller/index.d.ts +49 -0
package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
package/dist/types/common.d.ts +288 -0
package/dist/types/device/device-options.d.ts +142 -0
package/dist/types/device/index.d.ts +2315 -0
package/dist/types/dump/html-utils.d.ts +52 -0
package/dist/types/dump/image-restoration.d.ts +6 -0
package/dist/types/dump/index.d.ts +5 -0
package/dist/types/index.d.ts +17 -0
package/dist/types/report-generator.d.ts +48 -0
package/dist/types/report.d.ts +15 -0
package/dist/types/screenshot-item.d.ts +66 -0
package/dist/types/service/index.d.ts +23 -0
package/dist/types/service/utils.d.ts +2 -0
package/dist/types/skill/index.d.ts +25 -0
package/dist/types/task-runner.d.ts +48 -0
package/dist/types/task-timing.d.ts +8 -0
package/dist/types/tree.d.ts +4 -0
package/dist/types/types.d.ts +645 -0
package/dist/types/utils.d.ts +40 -0
package/dist/types/yaml/builder.d.ts +2 -0
package/dist/types/yaml/index.d.ts +4 -0
package/dist/types/yaml/player.d.ts +34 -0
package/dist/types/yaml/utils.d.ts +9 -0
package/dist/types/yaml.d.ts +203 -0
package/package.json +111 -0

package/dist/types/ai-model/prompt/yaml-generator.d.ts ADDED Viewed

@@ -0,0 +1,100 @@
+import type { StreamingAIResponse, StreamingCodeGenerationOptions } from '../../types';
+import type { IModelConfig } from '@midscene/shared/env';
+export interface EventCounts {
+    navigation: number;
+    click: number;
+    input: number;
+    scroll: number;
+    total: number;
+}
+export interface InputDescription {
+    description: string;
+    value: string;
+}
+export interface ProcessedEvent {
+    type: string;
+    timestamp: number;
+    url?: string;
+    title?: string;
+    elementDescription?: string;
+    value?: string;
+    pageInfo?: any;
+    elementRect?: any;
+}
+export interface EventSummary {
+    testName: string;
+    startUrl: string;
+    eventCounts: EventCounts;
+    urls: string[];
+    clickDescriptions: string[];
+    inputDescriptions: InputDescription[];
+    events: ProcessedEvent[];
+}
+export interface ChromeRecordedEvent {
+    type: string;
+    timestamp: number;
+    url?: string;
+    title?: string;
+    elementDescription?: string;
+    value?: string;
+    pageInfo?: any;
+    elementRect?: any;
+    screenshotBefore?: string;
+    screenshotAfter?: string;
+    screenshotWithBox?: string;
+}
+export interface YamlGenerationOptions {
+    testName?: string;
+    includeTimestamps?: boolean;
+    maxScreenshots?: number;
+    description?: string;
+}
+export interface FilteredEvents {
+    navigationEvents: ChromeRecordedEvent[];
+    clickEvents: ChromeRecordedEvent[];
+    inputEvents: ChromeRecordedEvent[];
+    scrollEvents: ChromeRecordedEvent[];
+}
+/**
+ * Get screenshots from events for LLM context
+ */
+export declare const getScreenshotsForLLM: (events: ChromeRecordedEvent[], maxScreenshots?: number) => string[];
+/**
+ * Filter events by type for easier processing
+ */
+export declare const filterEventsByType: (events: ChromeRecordedEvent[]) => FilteredEvents;
+/**
+ * Create event counts summary
+ */
+export declare const createEventCounts: (filteredEvents: FilteredEvents, totalEvents: number) => EventCounts;
+/**
+ * Extract input descriptions from input events
+ */
+export declare const extractInputDescriptions: (inputEvents: ChromeRecordedEvent[]) => InputDescription[];
+/**
+ * Process events for LLM consumption
+ */
+export declare const processEventsForLLM: (events: ChromeRecordedEvent[]) => ProcessedEvent[];
+/**
+ * Prepare comprehensive event summary for LLM
+ */
+export declare const prepareEventSummary: (events: ChromeRecordedEvent[], options?: {
+    testName?: string;
+    maxScreenshots?: number;
+}) => EventSummary;
+/**
+ * Create message content for LLM with optional screenshots
+ */
+export declare const createMessageContent: (promptText: string, screenshots?: string[], includeScreenshots?: boolean) => any[];
+/**
+ * Validate events before processing
+ */
+export declare const validateEvents: (events: ChromeRecordedEvent[]) => void;
+/**
+ * Generates YAML test configuration from recorded events using AI
+ */
+export declare const generateYamlTest: (events: ChromeRecordedEvent[], options: YamlGenerationOptions, modelConfig: IModelConfig) => Promise<string>;
+/**
+ * Generates YAML test configuration from recorded events using AI with streaming support
+ */
+export declare const generateYamlTestStream: (events: ChromeRecordedEvent[], options: YamlGenerationOptions & StreamingCodeGenerationOptions, modelConfig: IModelConfig) => Promise<StreamingAIResponse>;

package/dist/types/ai-model/service-caller/index.d.ts ADDED Viewed

@@ -0,0 +1,49 @@
+import type { AIUsageInfo, DeepThinkOption } from '../../types';
+import type { StreamingCallback } from '../../types';
+export declare class AIResponseParseError extends Error {
+    usage?: AIUsageInfo;
+    rawResponse: string;
+    constructor(message: string, rawResponse: string, usage?: AIUsageInfo);
+}
+import { type IModelConfig, type TModelFamily } from '@midscene/shared/env';
+import type { ChatCompletionMessageParam } from 'openai/resources/index';
+import type { AIArgs } from '../../common';
+export declare function callAI(messages: ChatCompletionMessageParam[], modelConfig: IModelConfig, options?: {
+    stream?: boolean;
+    onChunk?: StreamingCallback;
+    deepThink?: DeepThinkOption;
+    abortSignal?: AbortSignal;
+}): Promise<{
+    content: string;
+    reasoning_content?: string;
+    usage?: AIUsageInfo;
+    isStreamed: boolean;
+}>;
+export declare function callAIWithObjectResponse<T>(messages: ChatCompletionMessageParam[], modelConfig: IModelConfig, options?: {
+    deepThink?: DeepThinkOption;
+    abortSignal?: AbortSignal;
+}): Promise<{
+    content: T;
+    contentString: string;
+    usage?: AIUsageInfo;
+    reasoning_content?: string;
+}>;
+export declare function callAIWithStringResponse(msgs: AIArgs, modelConfig: IModelConfig, options?: {
+    abortSignal?: AbortSignal;
+}): Promise<{
+    content: string;
+    usage?: AIUsageInfo;
+}>;
+export declare function extractJSONFromCodeBlock(response: string): string;
+export declare function preprocessDoubaoBboxJson(input: string): string;
+export declare function resolveReasoningConfig({ reasoningEnabled, reasoningEffort, reasoningBudget, modelFamily, }: {
+    reasoningEnabled?: boolean;
+    reasoningEffort?: string;
+    reasoningBudget?: number;
+    modelFamily?: TModelFamily;
+}): {
+    config: Record<string, unknown>;
+    debugMessage?: string;
+    warningMessage?: string;
+};
+export declare function safeParseJson(input: string, modelFamily: TModelFamily | undefined): any;

package/dist/types/ai-model/ui-tars-planning.d.ts ADDED Viewed

@@ -0,0 +1,72 @@
+import type { PlanningAIResponse, UIContext } from '../types';
+import { type IModelConfig } from '@midscene/shared/env';
+import type { ConversationHistory } from './conversation-history';
+type ActionType = 'click' | 'left_double' | 'right_single' | 'drag' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';
+export declare function uiTarsPlanning(userInstruction: string, options: {
+    conversationHistory: ConversationHistory;
+    context: UIContext;
+    modelConfig: IModelConfig;
+    actionContext?: string;
+    abortSignal?: AbortSignal;
+}): Promise<PlanningAIResponse>;
+interface BaseAction {
+    action_type: ActionType;
+    action_inputs: Record<string, any>;
+    reflection: string | null;
+    thought: string | null;
+}
+interface ClickAction extends BaseAction {
+    action_type: 'click';
+    action_inputs: {
+        start_box: string;
+    };
+}
+interface DragAction extends BaseAction {
+    action_type: 'drag';
+    action_inputs: {
+        start_box: string;
+        end_box: string;
+    };
+}
+interface WaitAction extends BaseAction {
+    action_type: 'wait';
+    action_inputs: {
+        time: string;
+    };
+}
+interface LeftDoubleAction extends BaseAction {
+    action_type: 'left_double';
+    action_inputs: {
+        start_box: string;
+    };
+}
+interface RightSingleAction extends BaseAction {
+    action_type: 'right_single';
+    action_inputs: {
+        start_box: string;
+    };
+}
+interface TypeAction extends BaseAction {
+    action_type: 'type';
+    action_inputs: {
+        content: string;
+    };
+}
+interface HotkeyAction extends BaseAction {
+    action_type: 'hotkey';
+    action_inputs: {
+        key: string;
+    };
+}
+interface ScrollAction extends BaseAction {
+    action_type: 'scroll';
+    action_inputs: {
+        direction: 'up' | 'down';
+    };
+}
+interface FinishedAction extends BaseAction {
+    action_type: 'finished';
+    action_inputs: Record<string, never>;
+}
+export type Action = ClickAction | LeftDoubleAction | RightSingleAction | DragAction | TypeAction | HotkeyAction | ScrollAction | FinishedAction | WaitAction;
+export {};

package/dist/types/common.d.ts ADDED Viewed

@@ -0,0 +1,288 @@
+import type { BaseElement, DeviceAction, ElementTreeNode, MidsceneYamlFlowItem, PlanningAction, Rect, Size } from './types';
+import type { ChatCompletionMessageParam } from 'openai/resources/index';
+import type { PlanningLocateParam } from './types';
+import type { TModelFamily } from '@midscene/shared/env';
+import { z } from 'zod';
+export type AIArgs = ChatCompletionMessageParam[];
+type AdaptBboxInput = number[] | string[] | string | (number[] | string[])[];
+/**
+ * Convert a point coordinate [0, 1000] to a small bbox [0, 1000]
+ * Creates a small bbox around the center point in the same coordinate space
+ *
+ * @param x - X coordinate in [0, 1000] range
+ * @param y - Y coordinate in [0, 1000] range
+ * @param bboxSize - Size of the bbox to create (default: 20)
+ * @returns [x1, y1, x2, y2] bbox in [0, 1000] coordinate space
+ */
+export declare function pointToBbox(x: number, y: number, bboxSize?: number): [number, number, number, number];
+export declare function fillBboxParam(locate: PlanningLocateParam, width: number, height: number, modelFamily: TModelFamily | undefined): PlanningLocateParam;
+export declare function adaptQwen2_5Bbox(bbox: number[]): [number, number, number, number];
+export declare function adaptGpt5Bbox(bbox: number[] | string[] | string): [number, number, number, number];
+export declare function adaptDoubaoBbox(bbox: string[] | number[] | string, width: number, height: number): [number, number, number, number];
+export declare function adaptBbox(bbox: AdaptBboxInput, width: number, height: number, modelFamily: TModelFamily | undefined): [number, number, number, number];
+export declare function normalized01000(bbox: number[], width: number, height: number): [number, number, number, number];
+export declare function adaptGeminiBbox(bbox: number[], width: number, height: number): [number, number, number, number];
+export declare function adaptBboxToRect(bbox: number[], width: number, height: number, offsetX?: number, offsetY?: number, rightLimit?: number, bottomLimit?: number, modelFamily?: TModelFamily | undefined, scale?: number): Rect;
+export declare function mergeRects(rects: Rect[]): {
+    left: number;
+    top: number;
+    width: number;
+    height: number;
+};
+/**
+ * Expand the search area to at least 400 x 400 pixels
+ *
+ * Step 1: Extend 100px on each side (top, right, bottom, left)
+ * - If the element is near a boundary, expansion on that side will be limited
+ * - No compensation is made for boundary limitations (this is intentional)
+ *
+ * Step 2: Ensure the area is at least 400x400 pixels
+ * - Scale up proportionally from the center if needed
+ * - Final result is clamped to screen boundaries
+ */
+export declare function expandSearchArea(rect: Rect, screenSize: Size): Rect;
+export declare function markupImageForLLM(screenshotBase64: string, tree: ElementTreeNode<BaseElement>, size: Size): Promise<string>;
+export declare function buildYamlFlowFromPlans(plans: PlanningAction[], actionSpace: DeviceAction<any>[]): MidsceneYamlFlowItem[];
+export declare const PointSchema: z.ZodObject<{
+    left: z.ZodNumber;
+    top: z.ZodNumber;
+}, "strip", z.ZodTypeAny, {
+    left: number;
+    top: number;
+}, {
+    left: number;
+    top: number;
+}>;
+export declare const SizeSchema: z.ZodObject<{
+    width: z.ZodNumber;
+    height: z.ZodNumber;
+}, "strip", z.ZodTypeAny, {
+    width: number;
+    height: number;
+}, {
+    width: number;
+    height: number;
+}>;
+export declare const RectSchema: z.ZodIntersection<z.ZodIntersection<z.ZodObject<{
+    left: z.ZodNumber;
+    top: z.ZodNumber;
+}, "strip", z.ZodTypeAny, {
+    left: number;
+    top: number;
+}, {
+    left: number;
+    top: number;
+}>, z.ZodObject<{
+    width: z.ZodNumber;
+    height: z.ZodNumber;
+}, "strip", z.ZodTypeAny, {
+    width: number;
+    height: number;
+}, {
+    width: number;
+    height: number;
+}>>, z.ZodObject<{
+    zoom: z.ZodOptional<z.ZodNumber>;
+}, "strip", z.ZodTypeAny, {
+    zoom?: number | undefined;
+}, {
+    zoom?: number | undefined;
+}>>;
+export declare const TMultimodalPromptSchema: z.ZodObject<{
+    images: z.ZodOptional<z.ZodArray<z.ZodObject<{
+        name: z.ZodString;
+        url: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        name: string;
+        url: string;
+    }, {
+        name: string;
+        url: string;
+    }>, "many">>;
+    convertHttpImage2Base64: z.ZodOptional<z.ZodBoolean>;
+}, "strip", z.ZodTypeAny, {
+    images?: {
+        name: string;
+        url: string;
+    }[] | undefined;
+    convertHttpImage2Base64?: boolean | undefined;
+}, {
+    images?: {
+        name: string;
+        url: string;
+    }[] | undefined;
+    convertHttpImage2Base64?: boolean | undefined;
+}>;
+export declare const TUserPromptSchema: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
+    prompt: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    prompt: string;
+}, {
+    prompt: string;
+}>, z.ZodObject<{
+    images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
+        name: z.ZodString;
+        url: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        name: string;
+        url: string;
+    }, {
+        name: string;
+        url: string;
+    }>, "many">>>;
+    convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
+}, "strip", z.ZodTypeAny, {
+    images?: {
+        name: string;
+        url: string;
+    }[] | undefined;
+    convertHttpImage2Base64?: boolean | undefined;
+}, {
+    images?: {
+        name: string;
+        url: string;
+    }[] | undefined;
+    convertHttpImage2Base64?: boolean | undefined;
+}>>]>;
+export type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;
+export type TUserPrompt = z.infer<typeof TUserPromptSchema>;
+/**
+ * Returns the schema for locator fields.
+ * This now returns the input schema which is more permissive and suitable for validation.
+ */
+export declare const getMidsceneLocationSchema: () => z.ZodObject<{
+    prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
+        prompt: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        prompt: string;
+    }, {
+        prompt: string;
+    }>, z.ZodObject<{
+        images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
+            name: z.ZodString;
+            url: z.ZodString;
+        }, "strip", z.ZodTypeAny, {
+            name: string;
+            url: string;
+        }, {
+            name: string;
+            url: string;
+        }>, "many">>>;
+        convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
+    }, "strip", z.ZodTypeAny, {
+        images?: {
+            name: string;
+            url: string;
+        }[] | undefined;
+        convertHttpImage2Base64?: boolean | undefined;
+    }, {
+        images?: {
+            name: string;
+            url: string;
+        }[] | undefined;
+        convertHttpImage2Base64?: boolean | undefined;
+    }>>]>;
+    deepLocate: z.ZodOptional<z.ZodBoolean>;
+    deepThink: z.ZodOptional<z.ZodBoolean>;
+    cacheable: z.ZodOptional<z.ZodBoolean>;
+    xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
+}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
+    prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
+        prompt: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        prompt: string;
+    }, {
+        prompt: string;
+    }>, z.ZodObject<{
+        images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
+            name: z.ZodString;
+            url: z.ZodString;
+        }, "strip", z.ZodTypeAny, {
+            name: string;
+            url: string;
+        }, {
+            name: string;
+            url: string;
+        }>, "many">>>;
+        convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
+    }, "strip", z.ZodTypeAny, {
+        images?: {
+            name: string;
+            url: string;
+        }[] | undefined;
+        convertHttpImage2Base64?: boolean | undefined;
+    }, {
+        images?: {
+            name: string;
+            url: string;
+        }[] | undefined;
+        convertHttpImage2Base64?: boolean | undefined;
+    }>>]>;
+    deepLocate: z.ZodOptional<z.ZodBoolean>;
+    deepThink: z.ZodOptional<z.ZodBoolean>;
+    cacheable: z.ZodOptional<z.ZodBoolean>;
+    xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
+}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
+    prompt: z.ZodUnion<[z.ZodString, z.ZodIntersection<z.ZodObject<{
+        prompt: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        prompt: string;
+    }, {
+        prompt: string;
+    }>, z.ZodObject<{
+        images: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
+            name: z.ZodString;
+            url: z.ZodString;
+        }, "strip", z.ZodTypeAny, {
+            name: string;
+            url: string;
+        }, {
+            name: string;
+            url: string;
+        }>, "many">>>;
+        convertHttpImage2Base64: z.ZodOptional<z.ZodOptional<z.ZodBoolean>>;
+    }, "strip", z.ZodTypeAny, {
+        images?: {
+            name: string;
+            url: string;
+        }[] | undefined;
+        convertHttpImage2Base64?: boolean | undefined;
+    }, {
+        images?: {
+            name: string;
+            url: string;
+        }[] | undefined;
+        convertHttpImage2Base64?: boolean | undefined;
+    }>>]>;
+    deepLocate: z.ZodOptional<z.ZodBoolean>;
+    deepThink: z.ZodOptional<z.ZodBoolean>;
+    cacheable: z.ZodOptional<z.ZodBoolean>;
+    xpath: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodBoolean]>>;
+}, z.ZodTypeAny, "passthrough">>;
+export declare const ifMidsceneLocatorField: (field: any) => boolean;
+export declare const dumpMidsceneLocatorField: (field: any) => string;
+export declare const findAllMidsceneLocatorField: (zodType?: z.ZodType<any>, requiredOnly?: boolean) => string[];
+export declare const dumpActionParam: (jsonObject: Record<string, any>, zodSchema: z.ZodType<any>) => Record<string, any>;
+/**
+ * Parse and validate action parameters using Zod schema.
+ * All fields are validated through Zod, EXCEPT locator fields which are skipped.
+ * Default values defined in the schema are automatically applied.
+ *
+ * Locator fields are special business logic fields with complex validation requirements,
+ * so they are intentionally excluded from Zod parsing and use existing validation logic.
+ *
+ * When shrunkShotToLogicalRatio is provided and !== 1, coordinates in locate fields
+ * are transformed from screenshot space to logical space.
+ */
+export declare const parseActionParam: (rawParam: Record<string, any> | undefined, zodSchema?: z.ZodType<any>, options?: {
+    shrunkShotToLogicalRatio?: number;
+}) => Record<string, any> | undefined;
+export declare const finalizeActionName = "Finalize";
+/**
+ * Get a readable time string for a given timestamp or the current time
+ * @param format - Optional format string. Supports: YYYY, MM, DD, HH, mm, ss. Default: 'YYYY-MM-DD HH:mm:ss'
+ * @param timestamp - Optional timestamp in milliseconds. If not provided, uses current system time.
+ * @returns A formatted time string with format label
+ */
+export declare const getReadableTimeString: (format?: string, timestamp?: number) => string;
+export {};

package/dist/types/device/device-options.d.ts ADDED Viewed

@@ -0,0 +1,142 @@
+import type { DeviceAction } from '../types';
+/**
+ * Android device input options
+ */
+export type AndroidDeviceInputOpt = {
+    /** Automatically dismiss the keyboard after input is completed */
+    autoDismissKeyboard?: boolean;
+    /** Strategy for dismissing the keyboard: 'esc-first' tries ESC before BACK, 'back-first' tries BACK before ESC */
+    keyboardDismissStrategy?: 'esc-first' | 'back-first';
+};
+/**
+ * Android device options
+ */
+export type AndroidDeviceOpt = {
+    /** Path to the ADB executable */
+    androidAdbPath?: string;
+    /** Remote ADB host address */
+    remoteAdbHost?: string;
+    /** Remote ADB port */
+    remoteAdbPort?: number;
+    /** Input method editor strategy: 'always-yadb' always uses yadb, 'yadb-for-non-ascii' uses yadb only for non-ASCII characters */
+    imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
+    /** Display ID to use for this device */
+    displayId?: number;
+    /** Use physical display ID for screenshot operations */
+    usePhysicalDisplayIdForScreenshot?: boolean;
+    /** Use physical display ID when looking up display information */
+    usePhysicalDisplayIdForDisplayLookup?: boolean;
+    /** Custom device actions to register */
+    customActions?: DeviceAction<any>[];
+    /**
+     * @deprecated Use `screenshotShrinkFactor` in AgentOpt instead.
+     * This option no longer affects screenshot size sent to AI model.
+     */
+    screenshotResizeScale?: number;
+    /** Always fetch screen info on each call; if false, cache the first result */
+    alwaysRefreshScreenInfo?: boolean;
+    /** Minimum screenshot buffer size in bytes (default: 10240 = 10KB). Set to 0 to disable validation. */
+    minScreenshotBufferSize?: number;
+    /**
+     * Scrcpy screenshot configuration for high-performance screen capture.
+     *
+     * Scrcpy provides 6-8x faster screenshots by streaming H.264 video from the device.
+     * When enabled, scrcpy will:
+     * 1. Start a video stream from the device on first screenshot request
+     * 2. Keep the connection alive for subsequent screenshots (16-50ms each)
+     * 3. Automatically disconnect after idle timeout to save resources
+     * 4. Fallback to standard ADB mode if unavailable
+     *
+     * @example
+     * ```typescript
+     * // Enable scrcpy for high-performance screenshots
+     * const device = new AndroidDevice(deviceId, {
+     *   scrcpyConfig: {
+     *     enabled: true,
+     *   },
+     * });
+     *
+     * // Custom configuration
+     * const device = new AndroidDevice(deviceId, {
+     *   scrcpyConfig: {
+     *     enabled: true,
+     *     maxSize: 0,        // 0 = no scaling
+     *     idleTimeoutMs: 30000,
+     *     videoBitRate: 8_000_000,
+     *   },
+     * });
+     * ```
+     */
+    scrcpyConfig?: {
+        /**
+         * Enable scrcpy for high-performance screenshots.
+         * @default false
+         */
+        enabled?: boolean;
+        /**
+         * Maximum video dimension (width or height).
+         * Video stream will be scaled down if device resolution exceeds this value.
+         * Lower values reduce bandwidth but may affect image quality.
+         *
+         * @default 0 (no scaling, use original resolution)
+         * @example
+         * { maxSize: 1024 } // Always scale to 1024
+         */
+        maxSize?: number;
+        /**
+         * Idle timeout in milliseconds before disconnecting scrcpy.
+         * Connection auto-closes after this period of inactivity to save resources.
+         * Set to 0 to disable auto-disconnect.
+         * @default 30000 (30 seconds)
+         */
+        idleTimeoutMs?: number;
+        /**
+         * Video bit rate for H.264 encoding in bits per second.
+         * Higher values improve quality but increase bandwidth usage.
+         * @default 2000000 (2 Mbps)
+         */
+        videoBitRate?: number;
+    };
+} & AndroidDeviceInputOpt;
+/**
+ * iOS device input options
+ */
+export type IOSDeviceInputOpt = {
+    /** Automatically dismiss the keyboard after input is completed */
+    autoDismissKeyboard?: boolean;
+};
+/**
+ * iOS device options
+ */
+export type IOSDeviceOpt = {
+    /** Device ID (UDID) to connect to */
+    deviceId?: string;
+    /** Custom device actions to register */
+    customActions?: DeviceAction<any>[];
+    /** WebDriverAgent port (default: 8100) */
+    wdaPort?: number;
+    /** WebDriverAgent host (default: 'localhost') */
+    wdaHost?: string;
+    /** Whether to use WebDriverAgent */
+    useWDA?: boolean;
+    /** WDA MJPEG server port for real-time screen streaming (default: 9100) */
+    wdaMjpegPort?: number;
+} & IOSDeviceInputOpt;
+/**
+ * HarmonyOS device input options
+ */
+export type HarmonyDeviceInputOpt = {
+    /** Automatically dismiss the keyboard after input is completed */
+    autoDismissKeyboard?: boolean;
+};
+/**
+ * HarmonyOS device options
+ */
+export type HarmonyDeviceOpt = {
+    /** Path to the HDC executable */
+    hdcPath?: string;
+    /** Custom device actions to register */
+    customActions?: DeviceAction<any>[];
+    /** Screenshot resize scale factor */
+    screenshotResizeScale?: number;
+} & HarmonyDeviceInputOpt;