npm - @aiscene/core - Versions diffs - 1.1.1 - Mend

@aiscene/core 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (299) hide show

package/LICENSE +21 -0
package/README.md +9 -0
package/dist/es/agent/agent.mjs +753 -0
package/dist/es/agent/agent.mjs.map +1 -0
package/dist/es/agent/common.mjs +0 -0
package/dist/es/agent/execution-session.mjs +41 -0
package/dist/es/agent/execution-session.mjs.map +1 -0
package/dist/es/agent/index.mjs +6 -0
package/dist/es/agent/task-builder.mjs +332 -0
package/dist/es/agent/task-builder.mjs.map +1 -0
package/dist/es/agent/task-cache.mjs +214 -0
package/dist/es/agent/task-cache.mjs.map +1 -0
package/dist/es/agent/tasks.mjs +423 -0
package/dist/es/agent/tasks.mjs.map +1 -0
package/dist/es/agent/ui-utils.mjs +91 -0
package/dist/es/agent/ui-utils.mjs.map +1 -0
package/dist/es/agent/utils.mjs +169 -0
package/dist/es/agent/utils.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/actions.mjs +239 -0
package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/index.mjs +6 -0
package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/planning.mjs +71 -0
package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
package/dist/es/ai-model/auto-glm/util.mjs +9 -0
package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
package/dist/es/ai-model/connectivity.mjs +138 -0
package/dist/es/ai-model/connectivity.mjs.map +1 -0
package/dist/es/ai-model/conversation-history.mjs +195 -0
package/dist/es/ai-model/conversation-history.mjs.map +1 -0
package/dist/es/ai-model/index.mjs +12 -0
package/dist/es/ai-model/inspect.mjs +397 -0
package/dist/es/ai-model/inspect.mjs.map +1 -0
package/dist/es/ai-model/llm-planning.mjs +233 -0
package/dist/es/ai-model/llm-planning.mjs.map +1 -0
package/dist/es/ai-model/prompt/common.mjs +7 -0
package/dist/es/ai-model/prompt/common.mjs.map +1 -0
package/dist/es/ai-model/prompt/describe.mjs +66 -0
package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
package/dist/es/ai-model/prompt/extraction.mjs +131 -0
package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
package/dist/es/ai-model/prompt/llm-locator.mjs +51 -0
package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
package/dist/es/ai-model/prompt/llm-planning.mjs +568 -0
package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
package/dist/es/ai-model/prompt/llm-section-locator.mjs +44 -0
package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
package/dist/es/ai-model/prompt/util.mjs +59 -0
package/dist/es/ai-model/prompt/util.mjs.map +1 -0
package/dist/es/ai-model/prompt/yaml-generator.mjs +203 -0
package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
package/dist/es/ai-model/service-caller/codex-app-server.mjs +575 -0
package/dist/es/ai-model/service-caller/codex-app-server.mjs.map +1 -0
package/dist/es/ai-model/service-caller/image-detail.mjs +6 -0
package/dist/es/ai-model/service-caller/image-detail.mjs.map +1 -0
package/dist/es/ai-model/service-caller/index.mjs +475 -0
package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
package/dist/es/ai-model/ui-tars-planning.mjs +249 -0
package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
package/dist/es/common.mjs +371 -0
package/dist/es/common.mjs.map +1 -0
package/dist/es/device/device-options.mjs +0 -0
package/dist/es/device/index.mjs +341 -0
package/dist/es/device/index.mjs.map +1 -0
package/dist/es/dump/html-utils.mjs +292 -0
package/dist/es/dump/html-utils.mjs.map +1 -0
package/dist/es/dump/index.mjs +3 -0
package/dist/es/dump/screenshot-restoration.mjs +32 -0
package/dist/es/dump/screenshot-restoration.mjs.map +1 -0
package/dist/es/dump/screenshot-store.mjs +126 -0
package/dist/es/dump/screenshot-store.mjs.map +1 -0
package/dist/es/index.mjs +19 -0
package/dist/es/index.mjs.map +1 -0
package/dist/es/report-cli.mjs +151 -0
package/dist/es/report-cli.mjs.map +1 -0
package/dist/es/report-generator.mjs +205 -0
package/dist/es/report-generator.mjs.map +1 -0
package/dist/es/report-markdown.mjs +218 -0
package/dist/es/report-markdown.mjs.map +1 -0
package/dist/es/report.mjs +270 -0
package/dist/es/report.mjs.map +1 -0
package/dist/es/screenshot-item.mjs +122 -0
package/dist/es/screenshot-item.mjs.map +1 -0
package/dist/es/service/index.mjs +274 -0
package/dist/es/service/index.mjs.map +1 -0
package/dist/es/service/utils.mjs +15 -0
package/dist/es/service/utils.mjs.map +1 -0
package/dist/es/skill/index.mjs +38 -0
package/dist/es/skill/index.mjs.map +1 -0
package/dist/es/task-runner.mjs +263 -0
package/dist/es/task-runner.mjs.map +1 -0
package/dist/es/task-timing.mjs +12 -0
package/dist/es/task-timing.mjs.map +1 -0
package/dist/es/tree.mjs +13 -0
package/dist/es/tree.mjs.map +1 -0
package/dist/es/types.mjs +204 -0
package/dist/es/types.mjs.map +1 -0
package/dist/es/utils.mjs +234 -0
package/dist/es/utils.mjs.map +1 -0
package/dist/es/yaml/builder.mjs +13 -0
package/dist/es/yaml/builder.mjs.map +1 -0
package/dist/es/yaml/index.mjs +4 -0
package/dist/es/yaml/player.mjs +442 -0
package/dist/es/yaml/player.mjs.map +1 -0
package/dist/es/yaml/utils.mjs +102 -0
package/dist/es/yaml/utils.mjs.map +1 -0
package/dist/es/yaml.mjs +0 -0
package/dist/lib/agent/agent.js +801 -0
package/dist/lib/agent/agent.js.map +1 -0
package/dist/lib/agent/common.js +5 -0
package/dist/lib/agent/execution-session.js +75 -0
package/dist/lib/agent/execution-session.js.map +1 -0
package/dist/lib/agent/index.js +78 -0
package/dist/lib/agent/index.js.map +1 -0
package/dist/lib/agent/task-builder.js +369 -0
package/dist/lib/agent/task-builder.js.map +1 -0
package/dist/lib/agent/task-cache.js +266 -0
package/dist/lib/agent/task-cache.js.map +1 -0
package/dist/lib/agent/tasks.js +466 -0
package/dist/lib/agent/tasks.js.map +1 -0
package/dist/lib/agent/ui-utils.js +143 -0
package/dist/lib/agent/ui-utils.js.map +1 -0
package/dist/lib/agent/utils.js +240 -0
package/dist/lib/agent/utils.js.map +1 -0
package/dist/lib/ai-model/auto-glm/actions.js +273 -0
package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
package/dist/lib/ai-model/auto-glm/index.js +66 -0
package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
package/dist/lib/ai-model/auto-glm/parser.js +282 -0
package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
package/dist/lib/ai-model/auto-glm/planning.js +105 -0
package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
package/dist/lib/ai-model/auto-glm/util.js +46 -0
package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
package/dist/lib/ai-model/connectivity.js +182 -0
package/dist/lib/ai-model/connectivity.js.map +1 -0
package/dist/lib/ai-model/conversation-history.js +229 -0
package/dist/lib/ai-model/conversation-history.js.map +1 -0
package/dist/lib/ai-model/index.js +129 -0
package/dist/lib/ai-model/index.js.map +1 -0
package/dist/lib/ai-model/inspect.js +443 -0
package/dist/lib/ai-model/inspect.js.map +1 -0
package/dist/lib/ai-model/llm-planning.js +270 -0
package/dist/lib/ai-model/llm-planning.js.map +1 -0
package/dist/lib/ai-model/prompt/common.js +41 -0
package/dist/lib/ai-model/prompt/common.js.map +1 -0
package/dist/lib/ai-model/prompt/describe.js +100 -0
package/dist/lib/ai-model/prompt/describe.js.map +1 -0
package/dist/lib/ai-model/prompt/extraction.js +171 -0
package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
package/dist/lib/ai-model/prompt/llm-locator.js +88 -0
package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
package/dist/lib/ai-model/prompt/llm-planning.js +605 -0
package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
package/dist/lib/ai-model/prompt/llm-section-locator.js +81 -0
package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
package/dist/lib/ai-model/prompt/util.js +105 -0
package/dist/lib/ai-model/prompt/util.js.map +1 -0
package/dist/lib/ai-model/prompt/yaml-generator.js +264 -0
package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
package/dist/lib/ai-model/service-caller/codex-app-server.js +624 -0
package/dist/lib/ai-model/service-caller/codex-app-server.js.map +1 -0
package/dist/lib/ai-model/service-caller/image-detail.js +40 -0
package/dist/lib/ai-model/service-caller/image-detail.js.map +1 -0
package/dist/lib/ai-model/service-caller/index.js +540 -0
package/dist/lib/ai-model/service-caller/index.js.map +1 -0
package/dist/lib/ai-model/ui-tars-planning.js +283 -0
package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
package/dist/lib/common.js +480 -0
package/dist/lib/common.js.map +1 -0
package/dist/lib/device/device-options.js +20 -0
package/dist/lib/device/device-options.js.map +1 -0
package/dist/lib/device/index.js +468 -0
package/dist/lib/device/index.js.map +1 -0
package/dist/lib/dump/html-utils.js +368 -0
package/dist/lib/dump/html-utils.js.map +1 -0
package/dist/lib/dump/index.js +60 -0
package/dist/lib/dump/index.js.map +1 -0
package/dist/lib/dump/screenshot-restoration.js +66 -0
package/dist/lib/dump/screenshot-restoration.js.map +1 -0
package/dist/lib/dump/screenshot-store.js +166 -0
package/dist/lib/dump/screenshot-store.js.map +1 -0
package/dist/lib/index.js +186 -0
package/dist/lib/index.js.map +1 -0
package/dist/lib/report-cli.js +191 -0
package/dist/lib/report-cli.js.map +1 -0
package/dist/lib/report-generator.js +246 -0
package/dist/lib/report-generator.js.map +1 -0
package/dist/lib/report-markdown.js +255 -0
package/dist/lib/report-markdown.js.map +1 -0
package/dist/lib/report.js +316 -0
package/dist/lib/report.js.map +1 -0
package/dist/lib/screenshot-item.js +156 -0
package/dist/lib/screenshot-item.js.map +1 -0
package/dist/lib/service/index.js +308 -0
package/dist/lib/service/index.js.map +1 -0
package/dist/lib/service/utils.js +49 -0
package/dist/lib/service/utils.js.map +1 -0
package/dist/lib/skill/index.js +72 -0
package/dist/lib/skill/index.js.map +1 -0
package/dist/lib/task-runner.js +300 -0
package/dist/lib/task-runner.js.map +1 -0
package/dist/lib/task-timing.js +46 -0
package/dist/lib/task-timing.js.map +1 -0
package/dist/lib/tree.js +53 -0
package/dist/lib/tree.js.map +1 -0
package/dist/lib/types.js +300 -0
package/dist/lib/types.js.map +1 -0
package/dist/lib/utils.js +316 -0
package/dist/lib/utils.js.map +1 -0
package/dist/lib/yaml/builder.js +57 -0
package/dist/lib/yaml/builder.js.map +1 -0
package/dist/lib/yaml/index.js +81 -0
package/dist/lib/yaml/index.js.map +1 -0
package/dist/lib/yaml/player.js +476 -0
package/dist/lib/yaml/player.js.map +1 -0
package/dist/lib/yaml/utils.js +155 -0
package/dist/lib/yaml/utils.js.map +1 -0
package/dist/lib/yaml.js +20 -0
package/dist/lib/yaml.js.map +1 -0
package/dist/types/agent/agent.d.ts +216 -0
package/dist/types/agent/common.d.ts +0 -0
package/dist/types/agent/execution-session.d.ts +36 -0
package/dist/types/agent/index.d.ts +9 -0
package/dist/types/agent/task-builder.d.ts +34 -0
package/dist/types/agent/task-cache.d.ts +49 -0
package/dist/types/agent/tasks.d.ts +69 -0
package/dist/types/agent/ui-utils.d.ts +14 -0
package/dist/types/agent/utils.d.ts +25 -0
package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
package/dist/types/ai-model/connectivity.d.ts +20 -0
package/dist/types/ai-model/conversation-history.d.ts +105 -0
package/dist/types/ai-model/index.d.ts +16 -0
package/dist/types/ai-model/inspect.d.ts +67 -0
package/dist/types/ai-model/llm-planning.d.ts +19 -0
package/dist/types/ai-model/prompt/common.d.ts +2 -0
package/dist/types/ai-model/prompt/describe.d.ts +1 -0
package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
package/dist/types/ai-model/prompt/util.d.ts +33 -0
package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
package/dist/types/ai-model/service-caller/index.d.ts +49 -0
package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
package/dist/types/common.d.ts +288 -0
package/dist/types/device/device-options.d.ts +145 -0
package/dist/types/device/index.d.ts +2528 -0
package/dist/types/dump/html-utils.d.ts +75 -0
package/dist/types/dump/index.d.ts +5 -0
package/dist/types/dump/screenshot-restoration.d.ts +8 -0
package/dist/types/dump/screenshot-store.d.ts +49 -0
package/dist/types/index.d.ts +21 -0
package/dist/types/report-cli.d.ts +36 -0
package/dist/types/report-generator.d.ts +81 -0
package/dist/types/report-markdown.d.ts +24 -0
package/dist/types/report.d.ts +52 -0
package/dist/types/screenshot-item.d.ts +67 -0
package/dist/types/service/index.d.ts +24 -0
package/dist/types/service/utils.d.ts +2 -0
package/dist/types/skill/index.d.ts +25 -0
package/dist/types/task-runner.d.ts +50 -0
package/dist/types/task-timing.d.ts +8 -0
package/dist/types/tree.d.ts +4 -0
package/dist/types/types.d.ts +681 -0
package/dist/types/utils.d.ts +45 -0
package/dist/types/yaml/builder.d.ts +2 -0
package/dist/types/yaml/index.d.ts +4 -0
package/dist/types/yaml/player.d.ts +34 -0
package/dist/types/yaml/utils.d.ts +9 -0
package/dist/types/yaml.d.ts +215 -0
package/package.json +111 -0

package/dist/types/types.d.ts ADDED Viewed

@@ -0,0 +1,681 @@
+import type { NodeType } from '@midscene/shared/constants';
+import type { CreateOpenAIClientFn, TModelConfig } from '@midscene/shared/env';
+import type { BaseElement, LocateResultElement, Rect, Size } from '@midscene/shared/types';
+import type { z } from 'zod';
+import type { TUserPrompt } from './common';
+import { ScreenshotItem } from './screenshot-item';
+import type { DetailedLocateParam, MidsceneYamlFlowItem, ServiceExtractOption } from './yaml';
+export type { ElementTreeNode, BaseElement, Rect, Size, Point, } from '@midscene/shared/types';
+export * from './yaml';
+export type AIUsageInfo = Record<string, any> & {
+    prompt_tokens: number | undefined;
+    completion_tokens: number | undefined;
+    total_tokens: number | undefined;
+    cached_input: number | undefined;
+    time_cost: number | undefined;
+    model_name: string | undefined;
+    model_description: string | undefined;
+    intent: string | undefined;
+    request_id: string | undefined;
+};
+export type { LocateResultElement };
+export type AISingleElementResponseByPosition = {
+    position?: {
+        x: number;
+        y: number;
+    };
+    bbox?: [number, number, number, number];
+    reason: string;
+    text: string;
+};
+export interface AIElementCoordinatesResponse {
+    bbox: [number, number, number, number];
+    errors?: string[];
+}
+export type AIElementResponse = AIElementCoordinatesResponse;
+export interface AIDataExtractionResponse<DataDemand> {
+    data: DataDemand;
+    errors?: string[];
+    thought?: string;
+}
+export interface AISectionLocatorResponse {
+    bbox: [number, number, number, number];
+    references_bbox?: [number, number, number, number][];
+    error?: string;
+}
+export interface AIAssertionResponse {
+    pass: boolean;
+    thought: string;
+}
+export interface AIDescribeElementResponse {
+    description: string;
+    error?: string;
+}
+export interface LocatorValidatorOption {
+    centerDistanceThreshold?: number;
+}
+export interface LocateValidatorResult {
+    pass: boolean;
+    rect: Rect;
+    center: [number, number];
+    centerDistance?: number;
+}
+export interface AgentDescribeElementAtPointResult {
+    prompt: string;
+    deepLocate: boolean;
+    verifyResult?: LocateValidatorResult;
+}
+/**
+ * context
+ */
+export declare abstract class UIContext {
+    /**
+     * screenshot of the current UI state. which size is shotSize(be shrunk by screenshotShrinkFactor),
+     */
+    abstract screenshot: ScreenshotItem;
+    /**
+     * screenshot size after shrinking
+     */
+    abstract shotSize: Size;
+    /**
+     * The ratio for converting shrunk screenshot coordinates to logical coordinates.
+     *
+     * Example:
+     * - Physical screen width: 3000px, dpr=6
+     * - Logical width: 500px
+     * - User-defined screenshotShrinkFactor: 2
+     * - Actual shrunk screenshot width: 3000 / 2 = 1500px
+     * - shrunkShotToLogicalRatio: dpr / screenshotShrinkFactor = 6 / 2 = 3
+     * - To map back to logical coordinates: 1500 / shrunkShotToLogicalRatio = 500px
+     */
+    abstract shrunkShotToLogicalRatio: number;
+    abstract _isFrozen?: boolean;
+    abstract deprecatedDpr?: number;
+}
+export type EnsureObject<T> = {
+    [K in keyof T]: any;
+};
+export type ServiceAction = 'locate' | 'extract' | 'assert' | 'describe';
+export type ServiceExtractParam = string | Record<string, string>;
+export type ElementCacheFeature = Record<string, unknown>;
+export interface LocateResult {
+    element: LocateResultElement | null;
+    rect?: Rect;
+}
+export type ThinkingLevel = 'off' | 'medium' | 'high';
+export type DeepThinkOption = 'unset' | true | false;
+export interface ServiceTaskInfo {
+    durationMs: number;
+    formatResponse?: string;
+    rawResponse?: string;
+    usage?: AIUsageInfo;
+    searchArea?: Rect;
+    searchAreaRawResponse?: string;
+    searchAreaUsage?: AIUsageInfo;
+    reasoning_content?: string;
+}
+export interface DumpMeta {
+    logTime: number;
+}
+export type ReportAttributes = Record<string, string | number | boolean | null | undefined>;
+export interface ReportDumpWithAttributes {
+    dumpString: string;
+    attributes?: ReportAttributes;
+}
+export interface ServiceDump extends DumpMeta {
+    type: 'locate' | 'extract' | 'assert';
+    logId: string;
+    userQuery: {
+        element?: TUserPrompt;
+        dataDemand?: ServiceExtractParam;
+        assertion?: TUserPrompt;
+    };
+    matchedElement: LocateResultElement[];
+    matchedRect?: Rect;
+    deepLocate?: boolean;
+    data: any;
+    assertionPass?: boolean;
+    assertionThought?: string;
+    taskInfo: ServiceTaskInfo;
+    error?: string;
+    output?: any;
+}
+export type PartialServiceDumpFromSDK = Omit<ServiceDump, 'logTime' | 'logId' | 'model_name'>;
+export interface ServiceResultBase {
+    dump: ServiceDump;
+}
+export type LocateResultWithDump = LocateResult & ServiceResultBase;
+export interface ServiceExtractResult<T> extends ServiceResultBase {
+    data: T;
+    thought?: string;
+    usage?: AIUsageInfo;
+    reasoning_content?: string;
+}
+export declare class ServiceError extends Error {
+    dump: ServiceDump;
+    constructor(message: string, dump: ServiceDump);
+}
+export interface LiteUISection {
+    name: string;
+    description: string;
+    sectionCharacteristics: string;
+    textIds: string[];
+}
+export type ElementById = (id: string) => BaseElement | null;
+export type ServiceAssertionResponse = AIAssertionResponse & {
+    usage?: AIUsageInfo;
+};
+/**
+ * agent
+ */
+export type OnTaskStartTip = (tip: string) => Promise<void> | void;
+export interface AgentWaitForOpt extends ServiceExtractOption {
+    checkIntervalMs?: number;
+    timeoutMs?: number;
+}
+export interface AgentAssertOpt {
+    keepRawResponse?: boolean;
+}
+/**
+ * planning
+ *
+ */
+export interface PlanningLocateParam extends DetailedLocateParam {
+    bbox?: [number, number, number, number];
+}
+export interface PlanningAction<ParamType = any> {
+    thought?: string;
+    log?: string;
+    type: string;
+    param: ParamType;
+}
+export type SubGoalStatus = 'pending' | 'running' | 'finished';
+export interface SubGoal {
+    index: number;
+    status: SubGoalStatus;
+    description: string;
+    logs?: string[];
+}
+export interface RawResponsePlanningAIResponse {
+    action: PlanningAction;
+    thought?: string;
+    log: string;
+    memory?: string;
+    error?: string;
+    finalizeMessage?: string;
+    finalizeSuccess?: boolean;
+    updateSubGoals?: SubGoal[];
+    markFinishedIndexes?: number[];
+}
+export interface PlanningAIResponse extends Omit<RawResponsePlanningAIResponse, 'action'> {
+    actions?: PlanningAction[];
+    usage?: AIUsageInfo;
+    rawResponse?: string;
+    yamlFlow?: MidsceneYamlFlowItem[];
+    yamlString?: string;
+    error?: string;
+    reasoning_content?: string;
+    shouldContinuePlanning: boolean;
+    output?: string;
+}
+export interface PlanningActionParamSleep {
+    timeMs: number;
+}
+export interface PlanningActionParamError {
+    thought: string;
+}
+export type PlanningActionParamWaitFor = AgentWaitForOpt & {};
+export interface LongPressParam {
+    duration?: number;
+}
+export interface PullParam {
+    direction: 'up' | 'down';
+    distance?: number;
+    duration?: number;
+}
+/**
+ * misc
+ */
+export interface Color {
+    name: string;
+    hex: string;
+}
+export interface BaseAgentParserOpt {
+    selector?: string;
+}
+export interface PuppeteerParserOpt extends BaseAgentParserOpt {
+}
+export interface PlaywrightParserOpt extends BaseAgentParserOpt {
+}
+export interface ExecutionTaskProgressOptions {
+    onTaskStart?: (task: ExecutionTask) => Promise<void> | void;
+}
+export interface ExecutionRecorderItem {
+    type: 'screenshot';
+    ts: number;
+    screenshot?: ScreenshotItem;
+    timing?: string;
+}
+export type ExecutionTaskType = 'Planning' | 'Insight' | 'Action Space' | 'Log';
+export interface ExecutorContext {
+    task: ExecutionTask;
+    element?: LocateResultElement | null;
+    uiContext?: UIContext;
+}
+export interface ExecutionTaskApply<Type extends ExecutionTaskType = any, TaskParam = any, TaskOutput = any, TaskLog = any> {
+    type: Type;
+    subType?: string;
+    param?: TaskParam;
+    thought?: string;
+    uiContext?: UIContext;
+    executor: (param: TaskParam, context: ExecutorContext) => Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void> | undefined | void;
+}
+export interface ExecutionTaskHitBy {
+    from: string;
+    context: Record<string, any>;
+}
+export interface ExecutionTaskReturn<TaskOutput = unknown, TaskLog = unknown> {
+    output?: TaskOutput;
+    log?: TaskLog;
+    recorder?: ExecutionRecorderItem[];
+    hitBy?: ExecutionTaskHitBy;
+}
+export type ExecutionTask<E extends ExecutionTaskApply<any, any, any> = ExecutionTaskApply<any, any, any>> = E & ExecutionTaskReturn<E extends ExecutionTaskApply<any, any, infer TaskOutput, any> ? TaskOutput : unknown, E extends ExecutionTaskApply<any, any, any, infer TaskLog> ? TaskLog : unknown> & {
+    taskId: string;
+    status: 'pending' | 'running' | 'finished' | 'failed' | 'cancelled';
+    error?: Error;
+    errorMessage?: string;
+    errorStack?: string;
+    timing?: {
+        start: number;
+        getUiContextStart?: number;
+        getUiContextEnd?: number;
+        callAiStart?: number;
+        callAiEnd?: number;
+        beforeInvokeActionHookStart?: number;
+        beforeInvokeActionHookEnd?: number;
+        callActionStart?: number;
+        callActionEnd?: number;
+        afterInvokeActionHookStart?: number;
+        afterInvokeActionHookEnd?: number;
+        captureAfterCallingSnapshotStart?: number;
+        captureAfterCallingSnapshotEnd?: number;
+        end?: number;
+        cost?: number;
+    };
+    usage?: AIUsageInfo;
+    searchAreaUsage?: AIUsageInfo;
+    reasoning_content?: string;
+};
+export interface IExecutionDump extends DumpMeta {
+    /** Stable unique identifier for this execution run */
+    id?: string;
+    name: string;
+    description?: string;
+    tasks: ExecutionTask[];
+    aiActContext?: string;
+}
+/**
+ * ExecutionDump class for serializing and deserializing execution dumps
+ */
+export declare class ExecutionDump implements IExecutionDump {
+    id?: string;
+    logTime: number;
+    name: string;
+    description?: string;
+    tasks: ExecutionTask[];
+    aiActContext?: string;
+    constructor(data: IExecutionDump);
+    /**
+     * Serialize the ExecutionDump to a JSON string
+     */
+    serialize(indents?: number): string;
+    /**
+     * Convert to a plain object for JSON serialization
+     */
+    toJSON(): IExecutionDump;
+    /**
+     * Create an ExecutionDump instance from a serialized JSON string
+     */
+    static fromSerializedString(serialized: string): ExecutionDump;
+    /**
+     * Create an ExecutionDump instance from a plain object
+     */
+    static fromJSON(data: IExecutionDump): ExecutionDump;
+    /**
+     * Collect all ScreenshotItem instances from tasks.
+     * Scans through uiContext and recorder items to find screenshots.
+     *
+     * @returns Array of ScreenshotItem instances
+     */
+    collectScreenshots(): ScreenshotItem[];
+}
+export type ExecutionTaskInsightLocateParam = PlanningLocateParam;
+export interface ExecutionTaskInsightLocateOutput {
+    element: LocateResultElement | null;
+}
+export type ExecutionTaskInsightDump = ServiceDump;
+export type ExecutionTaskInsightLocateApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightLocateParam, ExecutionTaskInsightLocateOutput, ExecutionTaskInsightDump>;
+export type ExecutionTaskInsightLocate = ExecutionTask<ExecutionTaskInsightLocateApply>;
+export interface ExecutionTaskInsightQueryParam {
+    dataDemand: ServiceExtractParam;
+    domIncluded?: boolean | 'visible-only';
+}
+export interface ExecutionTaskInsightQueryOutput {
+    data: any;
+}
+export type ExecutionTaskInsightQueryApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightQueryParam, any, ExecutionTaskInsightDump>;
+export type ExecutionTaskInsightQuery = ExecutionTask<ExecutionTaskInsightQueryApply>;
+export interface ExecutionTaskInsightAssertionParam {
+    assertion: string;
+}
+export type ExecutionTaskInsightAssertionApply = ExecutionTaskApply<'Insight', ExecutionTaskInsightAssertionParam, ServiceAssertionResponse, ExecutionTaskInsightDump>;
+export type ExecutionTaskInsightAssertion = ExecutionTask<ExecutionTaskInsightAssertionApply>;
+export type ExecutionTaskActionApply<ActionParam = any> = ExecutionTaskApply<'Action Space', ActionParam, void, void>;
+export type ExecutionTaskAction = ExecutionTask<ExecutionTaskActionApply>;
+export type ExecutionTaskLogApply<LogParam = {
+    content: string;
+}> = ExecutionTaskApply<'Log', LogParam, void, void>;
+export type ExecutionTaskLog = ExecutionTask<ExecutionTaskLogApply>;
+export type ExecutionTaskPlanningApply = ExecutionTaskApply<'Planning', {
+    userInstruction: string;
+    aiActContext?: string;
+}, PlanningAIResponse>;
+export type ExecutionTaskPlanning = ExecutionTask<ExecutionTaskPlanningApply>;
+export type ExecutionTaskPlanningLocateParam = PlanningLocateParam;
+export interface ExecutionTaskPlanningLocateOutput {
+    element: LocateResultElement | null;
+}
+export type ExecutionTaskPlanningDump = ServiceDump;
+export type ExecutionTaskPlanningLocateApply = ExecutionTaskApply<'Planning', ExecutionTaskPlanningLocateParam, ExecutionTaskPlanningLocateOutput, ExecutionTaskPlanningDump>;
+export type ExecutionTaskPlanningLocate = ExecutionTask<ExecutionTaskPlanningLocateApply>;
+export interface ReportMeta {
+    groupName: string;
+    groupDescription?: string;
+    sdkVersion: string;
+    modelBriefs: ModelBrief[];
+    deviceType?: string;
+}
+export type GroupMeta = ReportMeta;
+export interface IReportActionDump {
+    sdkVersion: string;
+    groupName: string;
+    groupDescription?: string;
+    modelBriefs: ModelBrief[];
+    executions: IExecutionDump[];
+    deviceType?: string;
+}
+export type IGroupedActionDump = IReportActionDump;
+export interface ModelBrief {
+    /**
+     * The intent/category of the model call, for example "planning" or "insight".
+     */
+    intent?: string;
+    /**
+     * The model name returned by usage metadata, for example "gpt-4o".
+     */
+    name?: string;
+    /**
+     * Optional human-readable model description, for example "qwen2.5-vl mode".
+     */
+    modelDescription?: string;
+}
+/**
+ * ReportActionDump class for serializing and deserializing report action dumps
+ */
+export declare class ReportActionDump implements IReportActionDump {
+    sdkVersion: string;
+    groupName: string;
+    groupDescription?: string;
+    modelBriefs: ModelBrief[];
+    executions: ExecutionDump[];
+    deviceType?: string;
+    constructor(data: IReportActionDump);
+    /**
+     * Serialize the ReportActionDump to a JSON string
+     * Uses compact { $screenshot: id } format
+     */
+    serialize(indents?: number): string;
+    /**
+     * Serialize the ReportActionDump with inline screenshots to a JSON string.
+     * Each ScreenshotItem is replaced with { base64: "...", capturedAt }.
+     */
+    serializeWithInlineScreenshots(indents?: number): string;
+    /**
+     * Convert to a plain object for JSON serialization
+     */
+    toJSON(): IReportActionDump;
+    /**
+     * Create a ReportActionDump instance from a serialized JSON string
+     */
+    static fromSerializedString(serialized: string): ReportActionDump;
+    /**
+     * Create a ReportActionDump instance from a plain object
+     */
+    static fromJSON(data: IReportActionDump): ReportActionDump;
+    /**
+     * Collect all ScreenshotItem instances from all executions.
+     *
+     * @returns Array of all ScreenshotItem instances across all executions
+     */
+    collectAllScreenshots(): ScreenshotItem[];
+    /**
+     * Serialize the dump to files with screenshots as separate PNG files.
+     * Creates:
+     * - {basePath} - dump JSON with { $screenshot: id } references
+     * - {basePath}.screenshots/ - PNG files
+     *
+     * @param basePath - Base path for the dump file
+     */
+    serializeToFiles(basePath: string): void;
+    /**
+     * Read dump from files and return JSON string with inline screenshots.
+     * Reads the dump JSON and screenshot files, then inlines the base64 data.
+     *
+     * @param basePath - Base path for the dump file
+     * @returns JSON string with inline screenshots ({ base64: "..." } format)
+     */
+    static fromFilesAsInlineJson(basePath: string): string;
+    /**
+     * Clean up all files associated with a serialized dump.
+     *
+     * @param basePath - Base path for the dump file
+     */
+    static cleanupFiles(basePath: string): void;
+    /**
+     * Get all file paths associated with a serialized dump.
+     *
+     * @param basePath - Base path for the dump file
+     * @returns Array of all associated file paths
+     */
+    static getFilePaths(basePath: string): string[];
+}
+export type GroupedActionDump = ReportActionDump;
+export declare const GroupedActionDump: typeof ReportActionDump;
+export type InterfaceType = 'puppeteer' | 'playwright' | 'static' | 'chrome-extension-proxy' | 'android' | string;
+export interface StreamingCodeGenerationOptions {
+    /** Whether to enable streaming output */
+    stream?: boolean;
+    /** Callback function to handle streaming chunks */
+    onChunk?: StreamingCallback;
+    /** Callback function to handle streaming completion */
+    onComplete?: (finalCode: string) => void;
+    /** Callback function to handle streaming errors */
+    onError?: (error: Error) => void;
+}
+export type StreamingCallback = (chunk: CodeGenerationChunk) => void;
+export interface CodeGenerationChunk {
+    /** The incremental content chunk */
+    content: string;
+    /** The reasoning content */
+    reasoning_content: string;
+    /** The accumulated content so far */
+    accumulated: string;
+    /** Whether this is the final chunk */
+    isComplete: boolean;
+    /** Token usage information if available */
+    usage?: AIUsageInfo;
+}
+export interface StreamingAIResponse {
+    /** The final accumulated content */
+    content: string;
+    /** Token usage information */
+    usage?: AIUsageInfo;
+    /** Whether the response was streamed */
+    isStreamed: boolean;
+}
+export interface DeviceAction<TParam = any, TReturn = any> {
+    name: string;
+    description?: string;
+    interfaceAlias?: string;
+    paramSchema?: z.ZodType<TParam>;
+    call: (param: TParam, context: ExecutorContext) => Promise<TReturn> | TReturn;
+    delayAfterRunner?: number;
+    /**
+     * An example param object for this action.
+     * Locate fields with { prompt } will automatically get bbox injected when needed.
+     */
+    sample?: {
+        [K in keyof TParam]?: any;
+    };
+}
+/**
+ * Type utilities for extracting types from DeviceAction definitions
+ */
+/**
+ * Extract parameter type from a DeviceAction
+ */
+export type ActionParam<Action extends DeviceAction<any, any>> = Action extends DeviceAction<infer P, any> ? P : never;
+/**
+ * Extract return type from a DeviceAction
+ */
+export type ActionReturn<Action extends DeviceAction<any, any>> = Action extends DeviceAction<any, infer R> ? R : never;
+/**
+ * Web-specific types
+ */
+export interface WebElementInfo extends BaseElement {
+    id: string;
+    attributes: {
+        nodeType: NodeType;
+        [key: string]: string;
+    };
+}
+/**
+ * Agent
+ */
+export type CacheConfig = {
+    strategy?: 'read-only' | 'read-write' | 'write-only';
+    id: string;
+};
+export type Cache = false | true | CacheConfig;
+export interface AgentOpt {
+    testId?: string;
+    cacheId?: string;
+    groupName?: string;
+    groupDescription?: string;
+    generateReport?: boolean;
+    persistExecutionDump?: boolean;
+    autoPrintReportMsg?: boolean;
+    /**
+     * Use directory-based report format with separate image files.
+     *
+     * When enabled:
+     * - Screenshots are saved as PNG files in a `screenshots/` subdirectory
+     * - Report is generated as `index.html` with relative image paths
+     * - Reduces memory usage and report file size
+     *
+     * IMPORTANT: 'html-and-external-assets' reports must be served via HTTP server
+     * (e.g., `npx serve ./report-dir`). The file:// protocol will not
+     * work due to browser CORS restrictions.
+     *
+     * @default 'single-html'
+     */
+    outputFormat?: 'single-html' | 'html-and-external-assets';
+    onTaskStartTip?: OnTaskStartTip;
+    aiActContext?: string;
+    aiActionContext?: string;
+    reportFileName?: string;
+    modelConfig?: TModelConfig;
+    cache?: Cache;
+    /**
+     * Maximum number of replanning cycles for aiAct.
+     * Defaults to 20 (40 for `vlm-ui-tars`) when not provided.
+     * If omitted, the agent will also read `MIDSCENE_REPLANNING_CYCLE_LIMIT` for backward compatibility.
+     */
+    replanningCycleLimit?: number;
+    /**
+     * Wait time in milliseconds after each action execution.
+     * This allows the UI to settle and stabilize before the next action.
+     * Defaults to 300ms when not provided.
+     */
+    waitAfterAction?: number;
+    /**
+     * When set to true, Midscene will use the target device's time (Android/iOS)
+     * instead of the system time. Useful when the device time differs from the
+     * host machine. Default: false
+     */
+    useDeviceTimestamp?: boolean;
+    /**
+     * Custom screenshot shrink factor to reduce AI token usage.
+     * When set, the screenshot will be scaled down by this factor from the physical resolution.
+     *
+     * Example:
+     * - Physical screen width: 3000px, dpr=6
+     * - Logical width: 500px
+     * - screenshotShrinkFactor: 2
+     * - Actual shrunk screenshot width: 3000 / 2 = 1500px
+     * - AI analyzes the 1500px screenshot
+     * - Coordinates are transformed back to logical (500px) before actions execute
+     *
+     * Benefits:
+     * - Reduces token usage for high-resolution screenshots
+     * - Maintains accuracy by scaling coordinates appropriately
+     *
+     * Must be >= 1 (shrinking only, enlarging is not supported).
+     *
+     * @default 1 (no shrinking, uses original physical screenshot)
+     */
+    screenshotShrinkFactor?: number;
+    /**
+     * Custom OpenAI client factory function
+     *
+     * If provided, this function will be called to create OpenAI client instances
+     * for each AI call, allowing you to:
+     * - Wrap clients with observability tools (langsmith, langfuse)
+     * - Use custom OpenAI-compatible clients
+     * - Apply different configurations based on intent
+     *
+     * @param config - Resolved model configuration
+     * @returns OpenAI client instance (original or wrapped)
+     *
+     * @example
+     * ```typescript
+     * createOpenAIClient: async (openai, opts) => {
+     *   // Wrap with langsmith for planning tasks
+     *   if (opts.baseURL?.includes('planning')) {
+     *     return wrapOpenAI(openai, { metadata: { task: 'planning' } });
+     *   }
+     *
+     *   return openai;
+     * }
+     * ```
+     */
+    createOpenAIClient?: CreateOpenAIClientFn;
+}
+export type TestStatus = 'passed' | 'failed' | 'timedOut' | 'skipped' | 'interrupted';
+export interface ReportFileAttributes {
+    testDuration: number;
+    testStatus: TestStatus;
+    testTitle: string;
+    testId: string;
+    testDescription: string;
+}
+export type ReportFileWithAttributes = {
+    reportFilePath: string;
+    reportAttributes: ReportFileAttributes;
+} | {
+    reportFilePath?: string;
+    reportAttributes: ReportFileAttributes & {
+        testStatus: 'skipped';
+    };
+};